From 8e12eedbb33750b47f5a7023c223854b5ab067f0 Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Sun, 27 Jan 2019 19:51:05 +0300 Subject: [PATCH 0001/2229] Initial commit --- .gitmodules | 3 +++ CMakeLists.txt | 1 + cmake/find_cassandra.cmake | 12 ++++++++++++ contrib/CMakeLists.txt | 9 ++++++++- contrib/cassandra | 1 + dbms/src/Common/config.h.in | 1 + dbms/src/Dictionaries/CMakeLists.txt | 6 +++++- .../Dictionaries/CassandraDBDictionarySource.cpp | 1 + .../Dictionaries/CassandraDBDictionarySource.h | 16 ++++++++++++++++ 9 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 cmake/find_cassandra.cmake create mode 160000 contrib/cassandra create mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.cpp create mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.h diff --git a/.gitmodules b/.gitmodules index 24211b6707e..86100baa3dc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git +[submodule "contrib/cassandra"] + path = contrib/cassandra + url = https://github.com/datastax/cpp-driver.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 98c3643f055..b8f8d803585 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,6 +236,7 @@ include (cmake/find_protobuf.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) +include (cmake/find_cassandra.cmake) if (ENABLE_TESTS) include (cmake/find_gtest.cmake) endif () diff --git a/cmake/find_cassandra.cmake b/cmake/find_cassandra.cmake new file mode 100644 index 00000000000..3e4bb3ca373 --- /dev/null +++ b/cmake/find_cassandra.cmake @@ -0,0 +1,12 @@ +if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + else() + set (CASSANDRA_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") + set (CASSANDRA_LIBRARY cassandra) + set (USE_CASSANDRA 1) + + message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") + endif() +endif() \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0c4b6c15287..4a4add1f095 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -219,5 +219,12 @@ if (USE_INTERNAL_HDFS3_LIBRARY) endif () if (USE_BASE64) - add_subdirectory (base64-cmake) + add_subdirectory(base64-cmake) endif() + +if (USE_CASSANDRA) + # TODO osfavstov: cassandra/CMakeLists.txt change + # 5: set(CASS_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/cassandra"); + # 10: include(${ClickHouse_SOURCE_DIR}/contrib/cassandra/cmake/modules/CppDriver.cmake) + add_subdirectory(cassandra) +endif() \ No newline at end of file diff --git a/contrib/cassandra b/contrib/cassandra new file mode 160000 index 00000000000..fd9b73d4acf --- /dev/null +++ b/contrib/cassandra @@ -0,0 +1 @@ +Subproject commit fd9b73d4acfd85293ab304be64e2e1e2109e521d diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 0c756841f2e..42ff3e43ec2 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -18,6 +18,7 @@ #cmakedefine01 USE_XXHASH #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_PROTOBUF +#cmakedefine01 USE_CASSANDRA #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index d7f85a5c7eb..534f49bc5bb 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -11,7 +11,7 @@ generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) -add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources}) +add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources} CassandraDBDictionarySource.cpp CassandraDBDictionarySource.h) target_link_libraries(clickhouse_dictionaries PRIVATE clickhouse_common_io pocoext ${MYSQLXX_LIBRARY} ${BTRIE_LIBRARIES}) if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) @@ -36,4 +36,8 @@ if(USE_POCO_MONGODB) target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_MongoDB_LIBRARY}) endif() +if(USE_CASSANDRA) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${CASSANDRA_INCLUDE_DIR}) +endif() + add_subdirectory(Embedded) diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp new file mode 100644 index 00000000000..084ef283107 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp @@ -0,0 +1 @@ +#include "CassandraDBDictionarySource.h" \ No newline at end of file diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.h b/dbms/src/Dictionaries/CassandraDBDictionarySource.h new file mode 100644 index 00000000000..a9a43b026b0 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDBDictionarySource.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +#if USE_CASSANDRA + +# include "DictionaryStructure.h" +# include "IDictionarySource.h" +# include + +namespace DB +{ + +} + +#endif From ac46a3a976b511b16501b3c3df75f56667efba61 Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Sat, 16 Feb 2019 13:11:49 +0300 Subject: [PATCH 0002/2229] Add basic realisation --- dbms/src/Dictionaries/CMakeLists.txt | 2 +- .../CassandraBlockInputStream.cpp | 158 ++++++++++++++++++ .../Dictionaries/CassandraBlockInputStream.h | 38 +++++ .../CassandraDBDictionarySource.cpp | 1 - .../CassandraDBDictionarySource.h | 16 -- .../CassandraDictionarySource.cpp | 114 +++++++++++++ .../Dictionaries/CassandraDictionarySource.h | 58 +++++++ 7 files changed, 369 insertions(+), 18 deletions(-) create mode 100644 dbms/src/Dictionaries/CassandraBlockInputStream.cpp create mode 100644 dbms/src/Dictionaries/CassandraBlockInputStream.h delete mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.cpp delete mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.h create mode 100644 dbms/src/Dictionaries/CassandraDictionarySource.cpp create mode 100644 dbms/src/Dictionaries/CassandraDictionarySource.h diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index 534f49bc5bb..21b82a49154 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -11,7 +11,7 @@ generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) -add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources} CassandraDBDictionarySource.cpp CassandraDBDictionarySource.h) +add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources}) target_link_libraries(clickhouse_dictionaries PRIVATE clickhouse_common_io pocoext ${MYSQLXX_LIBRARY} ${BTRIE_LIBRARIES}) if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp new file mode 100644 index 00000000000..8493cf8a6e5 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -0,0 +1,158 @@ +#include +#include + +#if USE_CASSANDRA + +# include "CassandraBlockInputStream.h" +#include "CassandraBlockInputStream.h" + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CASSANDRA_INTERNAL_ERROR; +} + +CassandraBlockInputStream::CassandraBlockInputStream( + CassSession *session, + const std::string &query_str, + const DB::Block &sample_block, + const size_t max_block_size) + : session{session} + , query_str{query_str} + , max_block_size{max_block_size} +{ + CassStatement * statement = cass_statement_new(query_str.c_str(), 0); + CassFuture* future = cass_session_execute(session, statement); + + const CassResult * result = cass_future_get_result(future); + cass_statement_free(statement); + + if (result == nullptr) { +// CassError error_code = cass_future_error_code(future); + const char* error_message; + size_t error_message_length; + cass_future_error_message(future, &error_message, &error_message_length); + + throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; + } + + cass_future_free(future); + + this->result = result; + + description.init(sample_block); +} + +CassandraBlockInputStream::~CassandraBlockInputStream() { + if (iterator != nullptr) + cass_iterator_free(iterator); + cass_result_free(result); +} + +namespace +{ + using ValueType = ExternalResultDescription::ValueType; + + void insertValue(IColumn & column, const ValueType type, const CassValue * value) + { + switch (type) + { + case ValueType::UInt8: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt16: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt32: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt64: + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int8: + { + cass_int8_t _value; + cass_value_get_int8(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int16: + { + cass_int16_t _value; + cass_value_get_int16(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int32: + { + cass_int32_t _value; + cass_value_get_int32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int64: + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Float32: + { + cass_float_t _value; + cass_value_get_float(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Float64: + { + cass_double_t _value; + cass_value_get_double(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::String: + { + const char* _value; + size_t _value_length; + cass_value_get_string + static_cast(column).insertData(value.data(), value.size()); + break; + } + case ValueType::Date: + static_cast(column).insertValue(UInt16(value.getDate().getDayNum())); + break; + case ValueType::DateTime: + static_cast(column).insertValue(UInt32(value.getDateTime())); + break; + case ValueType::UUID: + static_cast(column).insert(parse(value.data(), value.size())); + break; + } + } + + void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } +} + + +} +#endif diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h new file mode 100644 index 00000000000..fdf9954d18f --- /dev/null +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include "ExternalResultDescription.h" + + +namespace DB +{ +/// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining + class CassandraBlockInputStream final : public IBlockInputStream + { + public: + CassandraBlockInputStream( + CassSession * session, + const std::string & query_str, + const Block & sample_block, + const size_t max_block_size); + ~CassandraBlockInputStream() override; + + String getName() const override { return "Cassandra"; } + + Block getHeader() const override { return description.sample_block.cloneEmpty(); } + + private: + Block readImpl() override; + + CassSession * session, + const std::string & query_str; + const size_t max_block_size; + ExternalResultDescription description; + const CassResult * result; + CassIterator * iterator = nullptr; + }; + +} diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp deleted file mode 100644 index 084ef283107..00000000000 --- a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "CassandraDBDictionarySource.h" \ No newline at end of file diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.h b/dbms/src/Dictionaries/CassandraDBDictionarySource.h deleted file mode 100644 index a9a43b026b0..00000000000 --- a/dbms/src/Dictionaries/CassandraDBDictionarySource.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -#if USE_CASSANDRA - -# include "DictionaryStructure.h" -# include "IDictionarySource.h" -# include - -namespace DB -{ - -} - -#endif diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp new file mode 100644 index 00000000000..81cc3e9d85c --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -0,0 +1,114 @@ +#include "CassandraDictionarySource.h" +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + +namespace DB +{ + namespace ErrorCodes + { + extern const int SUPPORT_IS_DISABLED; + } + + void registerDictionarySourceCassandra(DictionarySourceFactory & factory) + { + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */) -> DictionarySourcePtr { +#if USE_CASSANDRA + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); +#else + (void)dict_struct; + (void)config; + (void)config_prefix; + (void)sample_block; + throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("cassandra", createTableSource); + } + +} + +#if USE_CASSANDRA + +# include + +namespace DB +{ +namespace ErrorCodes { + extern const int UNSUPPORTED_METHOD; + extern const int WRONG_PASSWORD; +} + +static const size_t max_block_size = 8192; + +CassandraDictionarySource::CassandraDictionarySource( + const DB::DictionaryStructure &dict_struct, + const std::string &host, + UInt16 port, + const std::string &user, + const std::string &password, + const std::string &method, + const std::string &db, + const DB::Block &sample_block) + : dict_struct{dict_struct} + , host{host} + , port{port} + , user{user} + , password{password} + , method{method} + , db{db} + , sample_block{sample_block} + , cluster{cass_cluster_new()} + , session{cass_session_new()} +{ + cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); +} + +CassandraDictionarySource::CassandraDictionarySource( + const DB::DictionaryStructure &dict_struct, + const Poco::Util::AbstractConfiguration &config, + const std::string &config_prefix, + DB::Block &sample_block) + : CassandraDictionarySource( + dict_struct, + config.getString(config_prefix + ".host"), + config.getUInt(config_prefix + ".port"), + config.getString(config_prefix + ".user", ""), + config.getString(config_prefix + ".password", ""), + config.getString(config_prefix + ".method", ""), + config.getString(config_prefix + ".db", ""), + sample_block) +{ +} + +CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySource & other) + : CassandraDictionarySource{other.dict_struct, + other.host, + other.port, + other.user, + other.password, + other.method, + other.db, + other.sample_block} +{ +} + +CassandraDictionarySource::~CassandraDictionarySource() { + cass_session_free(session); + cass_cluster_free(cluster); +} + +std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { + return host + (port != 0 ? ":" + std::to_string(port) : ""); +} + +BlockInputStreamPtr CassandraDict + + +} + +#endif diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h new file mode 100644 index 00000000000..2d5e81a2733 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +#if USE_CASSANDRA + +# include "DictionaryStructure.h" +# include "IDictionarySource.h" +# include + +namespace DB +{ +class CassandraDictionarySource final : public IDictionarySource { + CassandraDictionarySource( + const DictionaryStructure & dict_struct, + const std::string & host, + UInt16 port, + const std::string & user, + const std::string & password, + const std::string & method, + const std::string & db, + const Block & sample_block); + +public: + CassandraDictionarySource( + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block); + + CassandraDictionarySource(const CassandraDictionarySource & other); + + ~CassandraDictionarySource() override; + + BlockInputStreamPtr loadAll() override; + + BlockInputStreamPtr loadIds(const std::vector & ids) override; + + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + +private: + static std::string toConnectionString(const std::string& host, const UInt16 port); + + const DictionaryStructure dict_struct; + const std::string host; + const UInt16 port; + const std::string user; + const std::string password; + const std::string method; + const std::string db; + Block sample_block; + + CassCluster * cluster; + CassSession * session; +}; +} + +#endif From 0e047e9abcd873f18eb0cf42471dc41e19c6ef0d Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Thu, 11 Apr 2019 11:05:01 +0200 Subject: [PATCH 0003/2229] Base code for testing added --- ci/jobs/quick-build/run.sh | 2 +- .../CassandraBlockInputStream.cpp | 87 ++++++++++++++----- .../Dictionaries/CassandraBlockInputStream.h | 1 + .../src/Dictionaries/registerDictionaries.cpp | 2 + dbms/tests/integration/helpers/cluster.py | 20 ++++- .../helpers/docker_compose_cassandra.yml | 7 ++ dbms/tests/integration/image/Dockerfile | 2 +- .../external_sources.py | 20 +++++ .../test_external_dictionaries/test.py | 5 +- .../dicts/external_dicts_dict_sources.md | 16 +++- 10 files changed, 132 insertions(+), 30 deletions(-) create mode 100644 dbms/tests/integration/helpers/docker_compose_cassandra.yml diff --git a/ci/jobs/quick-build/run.sh b/ci/jobs/quick-build/run.sh index 9e8fe9353d6..c7d8470d358 100755 --- a/ci/jobs/quick-build/run.sh +++ b/ci/jobs/quick-build/run.sh @@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse BUILD_TYPE=Debug ENABLE_EMBEDDED_COMPILER=0 -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0" +CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_CASSANDRA=0" [[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp index 8493cf8a6e5..ac90ac8b528 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,10 +1,13 @@ #include #include +#include +#include +#include #if USE_CASSANDRA # include "CassandraBlockInputStream.h" -#include "CassandraBlockInputStream.h" +# include "CassandraBlockInputStream.h" namespace DB @@ -25,23 +28,8 @@ CassandraBlockInputStream::CassandraBlockInputStream( , max_block_size{max_block_size} { CassStatement * statement = cass_statement_new(query_str.c_str(), 0); - CassFuture* future = cass_session_execute(session, statement); - - const CassResult * result = cass_future_get_result(future); - cass_statement_free(statement); - - if (result == nullptr) { -// CassError error_code = cass_future_error_code(future); - const char* error_message; - size_t error_message_length; - cass_future_error_message(future, &error_message, &error_message_length); - - throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; - } - - cass_future_free(future); - - this->result = result; + cass_statement_set_paging_size(statement, max_block_size) + this->has_more_pages = cass_true; description.init(sample_block); } @@ -134,23 +122,78 @@ namespace { const char* _value; size_t _value_length; - cass_value_get_string - static_cast(column).insertData(value.data(), value.size()); + cass_value_get_string(value, &_value, &_value_length); + static_cast(column).insertData(_value, _value_length); break; } case ValueType::Date: - static_cast(column).insertValue(UInt16(value.getDate().getDayNum())); + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); break; + } case ValueType::DateTime: - static_cast(column).insertValue(UInt32(value.getDateTime())); + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); break; + } case ValueType::UUID: + { + CassUuid _value; + cass_value_get_uuid(value, &_value); static_cast(column).insert(parse(value.data(), value.size())); break; + } } } void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + + Block CassandraBlockInputStream::readImpl() + { + if (has_more_pages) + return {}; + + CassFuture* query_future = cass_session_execute(session, statement); + + const CassResult* result = cass_future_get_result(query_future); + + if (result == nullptr) { + const char* error_message; + size_t error_message_length; + cass_future_error_message(future, &error_message, &error_message_length); + + throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; + } + + const CassRow* row = cass_result_first_row(result); + const CassValue* map = cass_row_get_column(row, 0); + CassIterator* iterator = cass_iterator_from_map(map); + while (cass_iterator_next(iterator)) { + const CassValue* _key = cass_iterator_get_map_key(iterator); + const CassValue* _value = cass_iterator_get_map_value(iterator); + for (const auto &[value, idx]: {{_key, 0}, {_value, 1}}) { + if (description.types[idx].second) { + ColumnNullable & column_nullable = static_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + column_nullable.getNullMapData().emplace_back(0); + } else { + insertValue(*columns[idx], description.types[idx].first, value); + } + } + } + + has_more_pages = cass_result_has_more_pages(result); + + if (has_more_pages) { + cass_statement_set_paging_state(statement, result); + } + + cass_result_free(result); + } } diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h index fdf9954d18f..005c6f69b75 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.h +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -32,6 +32,7 @@ namespace DB const size_t max_block_size; ExternalResultDescription description; const CassResult * result; + cass_bool_t has_more_pages; CassIterator * iterator = nullptr; }; diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index 1a8c5a7be7b..93df888d519 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -7,6 +7,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & source_factory); void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory); +void registerDictionarySourceCassandra(DictionarySourceFactory & source_factory); void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory); @@ -30,6 +31,7 @@ void registerDictionaries() registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); registerDictionarySourceMongoDB(source_factory); + registerDictionarySourceCassandra(source_factory); registerDictionarySourceXDBC(source_factory); registerDictionarySourceJDBC(source_factory); registerDictionarySourceExecutable(source_factory); diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 7ee2c1f9f54..38f09a72524 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -92,6 +92,7 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_cassandra_cmd = [] self.pre_zookeeper_commands = [] self.instances = {} self.with_zookeeper = False @@ -101,6 +102,7 @@ class ClickHouseCluster: self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False + self.with_cassandra = False self.docker_client = None self.is_up = False @@ -112,7 +114,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_cassandra=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -130,7 +132,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, self.base_configs_dir, self.server_bin_path, + self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_cassandra, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) @@ -185,6 +187,13 @@ class ClickHouseCluster: self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mongo.yml')] + if with_cassandra and not self.with_cassandra: + self.with_cassandra = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_cassandra.yml')]) + self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_cassandra.yml')] + + return instance @@ -316,6 +325,10 @@ class ClickHouseCluster: subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate']) self.wait_mongo_to_start(30) + if self.with_cassandra and self.base_cassandra_cmd: + subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate']) + time.sleep(10) + subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) start_deadline = time.time() + 20.0 # seconds @@ -414,7 +427,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, base_configs_dir, server_bin_path, odbc_bridge_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_cassandra, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -439,6 +452,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka self.with_mongo = with_mongo + self.with_cassandra = with_cassandra self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, 'docker_compose.yml') diff --git a/dbms/tests/integration/helpers/docker_compose_cassandra.yml b/dbms/tests/integration/helpers/docker_compose_cassandra.yml new file mode 100644 index 00000000000..bb6a0221c54 --- /dev/null +++ b/dbms/tests/integration/helpers/docker_compose_cassandra.yml @@ -0,0 +1,7 @@ +version: '2.2' +services: + cassandra1: + image: cassandra + restart: always + ports: + - 6340:6349 diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 1dd5c1713b2..0b3cdd5a65f 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal cassandra-driver ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 71dc05ca78c..e0adb8c187b 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -2,6 +2,7 @@ import warnings import pymysql.cursors import pymongo +import cassandra from tzlocal import get_localzone import datetime import os @@ -372,3 +373,22 @@ class SourceHTTP(SourceHTTPBase): class SourceHTTPS(SourceHTTPBase): def _get_schema(self): return "https" + +class SourceCassandra(ExternalSource): + def get_source_str(self, table_name): + return ''' + + {host} + {port} + + '''.format( + host=self.docker_hostname, + port=self.docker_port, + ) + + def prepare(self, structure, table_name, cluster): + self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) + self.prepared = True + + def load_data(self, data, table_name): + for ro diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index 314ec26a106..0bb76ce171e 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -5,7 +5,7 @@ import time from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed, SourceMongo -from external_sources import SourceHTTP, SourceHTTPS +from external_sources import SourceHTTP, SourceHTTPS, SourceCassandra SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -79,6 +79,7 @@ LAYOUTS = [ ] SOURCES = [ + SourceCassandra("Cassandra", "localhost", "6340", "cassandra1", "6349", "", ""), SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), @@ -120,7 +121,7 @@ def setup_module(module): for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) + node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_cassandra=True) cluster.add_instance('clickhouse1') @pytest.fixture(scope="module") diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index 8fb2145ecaf..cda41f8294b 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -30,6 +30,7 @@ Types of sources (`source_type`): - [MySQL](#dicts-external_dicts_dict_sources-mysql) - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) + - [Cassanda](#dicts-external_dicts_dict_sources-cassandra) - [ODBC](#dicts-external_dicts_dict_sources-odbc) @@ -143,7 +144,7 @@ PASSWORD = test If you then make a query such as ``` -SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); +SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ``` ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. @@ -421,4 +422,17 @@ Setting fields: - `db` – Name of the database. - `collection` – Name of the collection. +### Cassandra {#dicts-external_dicts_dict_sources-cassandra} + +Example of settings: + +```xml + + + localhost + 6349 + + +``` + [Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_sources/) From 3175caa1c0a4c517080b7a1c2d1ccac697318bf1 Mon Sep 17 00:00:00 2001 From: Gleb-Tretyakov Date: Thu, 23 May 2019 00:09:29 +0300 Subject: [PATCH 0004/2229] Fix compilation errors --- .../CassandraBlockInputStream.cpp | 37 ++++++++++++------- .../Dictionaries/CassandraBlockInputStream.h | 8 ++-- .../CassandraDictionarySource.cpp | 10 ++++- .../Dictionaries/CassandraDictionarySource.h | 17 +++++++++ 4 files changed, 53 insertions(+), 19 deletions(-) diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp index ac90ac8b528..f76c9dd93f6 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,11 +1,14 @@ -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include #if USE_CASSANDRA +# include # include "CassandraBlockInputStream.h" # include "CassandraBlockInputStream.h" @@ -24,11 +27,11 @@ CassandraBlockInputStream::CassandraBlockInputStream( const DB::Block &sample_block, const size_t max_block_size) : session{session} + , statement{cass_statement_new(query_str.c_str(), 0)} , query_str{query_str} , max_block_size{max_block_size} { - CassStatement * statement = cass_statement_new(query_str.c_str(), 0); - cass_statement_set_paging_size(statement, max_block_size) + cass_statement_set_paging_size(statement, max_block_size); this->has_more_pages = cass_true; description.init(sample_block); @@ -130,33 +133,37 @@ namespace { cass_int64_t _value; cass_value_get_int64(value, &_value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME break; } case ValueType::DateTime: { cass_int64_t _value; cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + static_cast(column).insertValue(_value); break; } case ValueType::UUID: { CassUuid _value; cass_value_get_uuid(value, &_value); - static_cast(column).insert(parse(value.data(), value.size())); + std::array uuid_str; + cass_uuid_string(_value, uuid_str.data()); + static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } } } +} - void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + // void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } Block CassandraBlockInputStream::readImpl() { if (has_more_pages) return {}; + MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); const CassResult* result = cass_future_get_result(query_future); @@ -164,7 +171,7 @@ namespace if (result == nullptr) { const char* error_message; size_t error_message_length; - cass_future_error_message(future, &error_message, &error_message_length); + cass_future_error_message(query_future, &error_message, &error_message_length); throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } @@ -175,7 +182,8 @@ namespace while (cass_iterator_next(iterator)) { const CassValue* _key = cass_iterator_get_map_key(iterator); const CassValue* _value = cass_iterator_get_map_value(iterator); - for (const auto &[value, idx]: {{_key, 0}, {_value, 1}}) { + auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; + for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { ColumnNullable & column_nullable = static_cast(*columns[idx]); insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); @@ -193,8 +201,9 @@ namespace } cass_result_free(result); + + return description.sample_block.cloneWithColumns(std::move(columns)); } -} } diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h index 005c6f69b75..2b7c3b68744 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.h +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include #include -#include "ExternalResultDescription.h" +#include namespace DB @@ -27,8 +26,9 @@ namespace DB private: Block readImpl() override; - CassSession * session, - const std::string & query_str; + CassSession * session; + CassStatement * statement; + String query_str; const size_t max_block_size; ExternalResultDescription description; const CassResult * result; diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp index 81cc3e9d85c..ad89d5e2e43 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.cpp +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -35,6 +35,8 @@ namespace DB #if USE_CASSANDRA # include +# include +# include "CassandraBlockInputStream.h" namespace DB { @@ -106,7 +108,13 @@ std::string CassandraDictionarySource::toConnectionString(const std::string &hos return host + (port != 0 ? ":" + std::to_string(port) : ""); } -BlockInputStreamPtr CassandraDict +BlockInputStreamPtr CassandraDictionarySource::loadAll() { + return std::make_shared(nullptr, "", sample_block, max_block_size); +} + +std::string CassandraDictionarySource::toString() const { + return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); +} } diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h index 2d5e81a2733..60c503fc4da 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.h +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if USE_CASSANDRA @@ -34,9 +35,25 @@ public: BlockInputStreamPtr loadAll() override; + bool supportsSelectiveLoad() const override { return true; } + + bool isModified() const override { return true; } + + ///Not yet supported + bool hasUpdateField() const override { return false; } + + DictionarySourcePtr clone() const override { return std::make_unique(*this); } + BlockInputStreamPtr loadIds(const std::vector & ids) override; BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + + BlockInputStreamPtr loadUpdatedAll() override + { + throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; + } + + std::string toString() const override; private: static std::string toConnectionString(const std::string& host, const UInt16 port); From a82f4925ec3189eb7eef1ebca48dfa65b35042c4 Mon Sep 17 00:00:00 2001 From: favstovol Date: Thu, 30 May 2019 02:01:25 +0300 Subject: [PATCH 0005/2229] Add stubs --- contrib/cassandra-cmake/CMakeLists.txt | 0 dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Dictionaries/CassandraDictionarySource.cpp | 5 +++-- dbms/src/Dictionaries/CassandraDictionarySource.h | 12 +++++++++--- 4 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 contrib/cassandra-cmake/CMakeLists.txt diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 093f98d22a1..04a7e72e0c8 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -427,6 +427,7 @@ namespace ErrorCodes extern const int BAD_TTL_EXPRESSION = 450; extern const int BAD_TTL_FILE = 451; extern const int SETTING_CONSTRAINT_VIOLATION = 452; + extern const int CASSANDRA_INTERNAL_ERROR = 453; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp index ad89d5e2e43..cfd21510e69 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.cpp +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -17,7 +17,7 @@ namespace DB Block & sample_block, const Context & /* context */) -> DictionarySourcePtr { #if USE_CASSANDRA - return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else (void)dict_struct; (void)config; @@ -40,7 +40,8 @@ namespace DB namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int UNSUPPORTED_METHOD; extern const int WRONG_PASSWORD; } diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h index 60c503fc4da..2bdd476951a 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.h +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -44,9 +44,15 @@ public: DictionarySourcePtr clone() const override { return std::make_unique(*this); } - BlockInputStreamPtr loadIds(const std::vector & ids) override; + BlockInputStreamPtr loadIds(const std::vector & /* ids */) override + { + throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + } - BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override + { + throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + } BlockInputStreamPtr loadUpdatedAll() override { @@ -56,7 +62,7 @@ public: std::string toString() const override; private: - static std::string toConnectionString(const std::string& host, const UInt16 port); + static std::string toConnectionString(const std::string & host, const UInt16 port); const DictionaryStructure dict_struct; const std::string host; From c1af83d239a7c518b58bf333cf7aa54d3bc27d6b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 25 Oct 2019 21:06:08 +0300 Subject: [PATCH 0006/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 80 ++++ dbms/src/Dictionaries/SSDCacheDictionary.h | 365 ++++++++++++++++++ .../src/Dictionaries/SSDCacheDictionary.inc.h | 45 +++ 3 files changed, 490 insertions(+) create mode 100644 dbms/src/Dictionaries/SSDCacheDictionary.cpp create mode 100644 dbms/src/Dictionaries/SSDCacheDictionary.h create mode 100644 dbms/src/Dictionaries/SSDCacheDictionary.inc.h diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp new file mode 100644 index 00000000000..15a160fcd2f --- /dev/null +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -0,0 +1,80 @@ +#include "SSDCacheDictionary.h" + +#include + +namespace DB +{ + +BlockFile::BlockFile(size_t file_id, const std::string & file_name, const Block & header, size_t buffer_size) + : id(file_id), file_name(file_name), buffer_size(buffer_size), out_file(file_name, buffer_size), in_file(file_name), header(header), buffer(header.cloneEmptyColumns()) +{ +} + +void BlockFile::appendBlock(const Block & block) +{ + size_t bytes = 0; + const auto new_columns = block.getColumns(); + if (new_columns.size() != buffer.size()) + { + throw Exception("Wrong size of block in BlockFile::appendBlock(). It's a bug.", ErrorCodes::TYPE_MISMATCH); + } + + const auto id_column = typeid_cast(new_columns.front().get()); + if (!id_column) + throw Exception{"id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + + size_t start_size = buffer.front()->size(); + for (size_t i = 0; i < header.columns(); ++i) + { + buffer[i]->insertRangeFrom(*new_columns[i], 0, new_columns[i]->size()); + bytes += buffer[i]->byteSize(); + } + + const auto & ids = id_column->getData(); + for (size_t i = 0; i < new_columns.size(); ++i) + { + key_to_file_offset[ids[i]] = start_size + i; + } + + if (bytes >= buffer_size) + { + flush(); + } +} + +void BlockFile::flush() +{ + const auto id_column = typeid_cast(buffer.front().get()); + if (!id_column) + throw Exception{"id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + const auto & ids = id_column->getData(); + + key_to_file_offset[ids[0]] = out_file.getPositionInFile() + (1ULL << FILE_OFFSET_SIZE); + size_t prev_size = 0; + for (size_t row = 0; row < buffer.front()->size(); ++row) + { + key_to_file_offset[ids[row]] = key_to_file_offset[ids[row ? row - 1 : 0]] + prev_size; + prev_size = 0; + for (size_t col = 0; col < header.columns(); ++col) + { + const auto & column = buffer[col]; + const auto & type = header.getByPosition(col).type; + type->serializeBinary(*column, row, out_file); + if (type->getTypeId() != TypeIndex::String) { + prev_size += column->sizeOfValueIfFixed(); + } else { + prev_size += column->getDataAt(row).size + sizeof(UInt64); + } + } + } + + if (out_file.hasPendingData()) { + out_file.sync(); + } + + buffer = header.cloneEmptyColumns(); +} + + + +} diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h new file mode 100644 index 00000000000..cd913142321 --- /dev/null +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -0,0 +1,365 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" +#include +#include + + +namespace DB +{ + +constexpr size_t OFFSET_MASK = ~0xffff000000000000; +constexpr size_t FILE_ID_SIZE = 16; +constexpr size_t FILE_OFFSET_SIZE = sizeof(size_t) * 8 - FILE_ID_SIZE; + + +class SSDCacheDictionary; + +class BlockFile +{ +public: + using Offset = size_t; + using Offsets = std::vector; + + BlockFile(size_t file_id, const std::string & file_name, const Block & header, size_t buffer_size = 4 * 1024 * 1024); + + void appendBlock(const Block & block); + + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + + template + void getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const; + + // TODO:: getString + +private: + void flush(); + + size_t id; + std::string file_name; + size_t buffer_size; + + WriteBufferFromFile out_file; // 4MB + mutable ReadBufferFromFile in_file; // ssd page size TODO:: adaptive buffer (read two if there less than pagesize bytes) + + /// Block structure: Key, (Default + TTL), Attr1, Attr2, ... + Block header; + + std::unordered_map key_to_file_offset; + MutableColumns buffer; +}; + + +class BlockFilesController +{ + BlockFilesController(const std::string & path) : path(path) { + } + + void appendBlock(const Block& block) { + file->appendBlock(block); + } + + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + + template + void getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const { + file->getValue(column, ids, out, not_found); + } + + // getString(); + +private: + const std::string path; + std::unique_ptr file; +}; + + +class SSDCacheDictionary final : public IDictionary +{ +public: + SSDCacheDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_, + const size_t size_); + + std::string getName() const override { return name; } + + std::string getTypeName() const override { return "SSDCache"; } + + size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); } // TODO: ? + + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } + + double getHitRate() const override + { + return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); + } + + size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } + + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } // TODO: fix + + bool isCached() const override { return true; } + + std::shared_ptr clone() const override + { + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, size); + } + + const IDictionarySource * getSource() const override { return source_ptr.get(); } + + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + + const DictionaryStructure & getStructure() const override { return dict_struct; } + + bool isInjective(const std::string & attribute_name) const override + { + return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; + } + + bool hasHierarchy() const override { return hierarchical_attribute; } + + void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; + + void isInVectorVector( + const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; + void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; + + std::exception_ptr getLastException() const override; + + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + +#define DECLARE(TYPE) \ + void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void + getString(const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) + const; + +#define DECLARE(TYPE) \ +void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; + + void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; + + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + +private: + template + using ContainerType = Value[]; + template + using ContainerPtrType = std::unique_ptr>; + + struct CellMetadata final + { + using time_point_t = std::chrono::system_clock::time_point; + using time_point_rep_t = time_point_t::rep; + using time_point_urep_t = std::make_unsigned_t; + + static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits::max(); + static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; + + UInt64 id; + /// Stores both expiration time and `is_default` flag in the most significant bit + time_point_urep_t data; + + /// Sets expiration time, resets `is_default` flag to false + time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } + void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } + + bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } + void setDefault() { data |= IS_DEFAULT_MASK; } + }; + + struct Attribute final + { + AttributeUnderlyingType type; + std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> + null_values; + std::variant< + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType, + ContainerPtrType> + arrays; + }; + + void createAttributes(); + + Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + + template + void getItemsNumberImpl( + Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; + + template + void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; + + template + void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; + + PaddedPODArray getCachedIds() const; + + bool isEmptyCell(const UInt64 idx) const; + + size_t getCellIdx(const Key id) const; + + void setDefaultAttributeValue(Attribute & attribute, const Key idx) const; + + void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const; + + Attribute & getAttribute(const std::string & attribute_name) const; + + struct FindResult + { + const size_t cell_idx; + const bool valid; + const bool outdated; + }; + + FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; + + template + void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; + + const std::string name; + const DictionaryStructure dict_struct; + mutable DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; + Logger * const log; + + mutable std::shared_mutex rw_lock; + + /// Actual size will be increased to match power of 2 + const size_t size; + + /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111) + const size_t size_overlap_mask; + + /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3 + static constexpr size_t max_collision_length = 10; + + const size_t zero_cell_idx{getCellIdx(0)}; + std::map attribute_index_by_name; + mutable std::vector attributes; + mutable std::vector cells; + Attribute * hierarchical_attribute = nullptr; + std::unique_ptr string_arena; + + mutable std::exception_ptr last_exception; + mutable size_t error_count = 0; + mutable std::chrono::system_clock::time_point backoff_end_time; + + mutable pcg64 rnd_engine; + + mutable size_t bytes_allocated = 0; + mutable std::atomic element_count{0}; + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; +}; + +} + + +#include "SSDCacheDictionary.inc.h" \ No newline at end of file diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.inc.h b/dbms/src/Dictionaries/SSDCacheDictionary.inc.h new file mode 100644 index 00000000000..73e94cb5b6d --- /dev/null +++ b/dbms/src/Dictionaries/SSDCacheDictionary.inc.h @@ -0,0 +1,45 @@ +#pragma once + +namespace DB { + +template +void BlockFile::getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const +{ + std::vector> offsets; + offsets.reserve(ids.size()); + + for (size_t i = 0; i < ids.size(); ++i) + { + auto it = key_to_file_offset.find(ids[i]); + if (it != std::end(key_to_file_offset)) + { + offsets.emplace_back(it->second, i); + } + else + { + not_found.push_back(i); + } + } + std::sort(std::begin(offsets), std::end(offsets)); + + Field field; + for (const auto & [offset, index] : offsets) + { + if (offset & OFFSET_MASK) + { + in_file.seek(offset && !OFFSET_MASK); + for (size_t col = 0; col < column; ++col) + { + const auto & type = header.getByPosition(column).type; + type->deserializeBinary(field, in_file); + } + } + else + { + buffer[column]->get(offset, field); + } + out[index] = DB::get(field); + } +} + +} \ No newline at end of file From 443a5ca9c13a41faef4e04b42c6f3b83bc0f2f43 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 1 Jan 2020 20:40:46 +0300 Subject: [PATCH 0007/2229] changes --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 283 ++++++++++++++++++- dbms/src/Dictionaries/SSDCacheDictionary.h | 201 +++++-------- 2 files changed, 344 insertions(+), 140 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 15a160fcd2f..44ca8b8bbe7 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -1,20 +1,36 @@ #include "SSDCacheDictionary.h" #include +#include +#include +#include +#include +#include +#include +#include namespace DB { -BlockFile::BlockFile(size_t file_id, const std::string & file_name, const Block & header, size_t buffer_size) - : id(file_id), file_name(file_name), buffer_size(buffer_size), out_file(file_name, buffer_size), in_file(file_name), header(header), buffer(header.cloneEmptyColumns()) +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; + extern const int BAD_ARGUMENTS; + extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; + extern const int TOO_SMALL_BUFFER_SIZE; +} + +CachePartition::CachePartition(const std::string & file_name, const Block & header, size_t buffer_size) + : file_name(file_name), buffer_size(buffer_size), out_file(file_name, buffer_size), header(header), buffer(header.cloneEmptyColumns()) { } -void BlockFile::appendBlock(const Block & block) +void CachePartition::appendBlock(const Block & block) { size_t bytes = 0; const auto new_columns = block.getColumns(); - if (new_columns.size() != buffer.size()) + if (new_columns.size() != header.columns()) { throw Exception("Wrong size of block in BlockFile::appendBlock(). It's a bug.", ErrorCodes::TYPE_MISMATCH); } @@ -42,7 +58,7 @@ void BlockFile::appendBlock(const Block & block) } } -void BlockFile::flush() +void CachePartition::flush() { const auto id_column = typeid_cast(buffer.front().get()); if (!id_column) @@ -75,6 +91,263 @@ void BlockFile::flush() buffer = header.cloneEmptyColumns(); } +SSDCacheDictionary::SSDCacheDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_, + const std::string & path, + const size_t partition_max_size) + : name(name_) + , dict_struct(dict_struct_) + , source_ptr(std::move(source_ptr_)) + , dict_lifetime(dict_lifetime_) + , storage(path, partition_max_size) +{ + if (!this->source_ptr->supportsSelectiveLoad()) + throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; + createAttributes(); +} + +#define DECLARE(TYPE) \ + void SSDCacheDictionary::get##TYPE( \ + const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ +\ + const auto null_value = std::get(attributes[index].null_value); \ +\ + getItemsNumberImpl( \ + attribute_name, \ + ids, \ + out, \ + [&](const size_t) { return null_value; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +#define DECLARE(TYPE) \ + void SSDCacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ +\ + getItemsNumberImpl( \ + attribute_name, \ + ids, \ + out, \ + [&](const size_t row) { return def[row]; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +#define DECLARE(TYPE) \ + void SSDCacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const PaddedPODArray & ids, \ + const TYPE def, \ + ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ +\ + getItemsNumberImpl( \ + attribute_name, \ + ids, \ + out, \ + [&](const size_t) { return def; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +template +void SSDCacheDictionary::getItemsNumberImpl( + const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const +{ + std::unordered_map> not_found_ids; + storage.getValue(attribute_name, ids, out, not_found_ids); + if (not_found_ids.empty()) + return; + + std::vector required_ids(not_found_ids.size()); + std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + + update( + required_ids, + [&](const auto id, const auto & attribute_value) + { + for (const size_t row : not_found_ids[id]) + out[row] = static_cast(attribute_value); + }, + [&](const auto id) + { + for (const size_t row : not_found_ids[id]) + out[row] = get_default(row); + }); +} + +void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const +{ + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + const auto null_value = StringRef{std::get(attribute.null_value)}; + + getItemsString(attribute_name, ids, out, [&](const size_t) { return null_value; }); +} + +void SSDCacheDictionary::getString( + const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const +{ + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + getItemsString(attribute_name, ids, out, [&](const size_t row) { return def->getDataAt(row); }); +} + +void SSDCacheDictionary::getString( + const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const +{ + auto & attribute = getAttribute(attribute_name); + checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + + getItemsString(attribute_name, ids, out, [&](const size_t) { return StringRef{def}; }); +} + +template +void SSDCacheDictionary::getItemsString(const std::string & attribute_name, const PaddedPODArray & ids, + ColumnString * out, DefaultGetter && get_default) const +{ + UNUSED(attribute_name); + UNUSED(ids); + UNUSED(out); + UNUSED(get_default); +} + +size_t SSDCacheDictionary::getAttributeIndex(const std::string & attr_name) const +{ + auto it = attribute_index_by_name.find(attr_name); + if (it == std::end(attribute_index_by_name)) + throw Exception{"Attribute `" + name + "` does not exist.", ErrorCodes::BAD_ARGUMENTS}; + return it->second; +} + +SSDCacheDictionary::Attribute & SSDCacheDictionary::getAttribute(const std::string & attr_name) +{ + return attributes[getAttributeIndex(attr_name)]; +} + +const SSDCacheDictionary::Attribute & SSDCacheDictionary::getAttribute(const std::string & attr_name) const +{ + return attributes[getAttributeIndex(attr_name)]; +} + +template +SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value) +{ + Attribute attr{type, {}}; + attr.null_value = static_cast(null_value.get>()); + bytes_allocated += sizeof(T); + return attr; +} + +template <> +SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value) +{ + Attribute attr{type, {}}; + attr.null_value = null_value.get(); + bytes_allocated += sizeof(StringRef); + //if (!string_arena) + // string_arena = std::make_unique(); + return attr; +} + +SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +{ + switch (type) + { +#define DISPATCH(TYPE) \ +case AttributeUnderlyingType::ut##TYPE: \ + return createAttributeWithTypeImpl(type, null_value); + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + DISPATCH(String) +#undef DISPATCH + } +} + +void SSDCacheDictionary::createAttributes() +{ + attributes.resize(dict_struct.attributes.size()); + for (size_t i = 0; i < dict_struct.attributes.size(); ++i) + { + const auto & attribute = dict_struct.attributes[i]; + + attribute_index_by_name.emplace(attribute.name, i); + attributes.push_back(createAttributeWithType(attribute.type, attribute.null_value)); + + if (attribute.hierarchical) + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; + } +} } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index cd913142321..04777bd2587 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -29,33 +29,32 @@ constexpr size_t FILE_OFFSET_SIZE = sizeof(size_t) * 8 - FILE_ID_SIZE; class SSDCacheDictionary; -class BlockFile +class CachePartition { public: using Offset = size_t; using Offsets = std::vector; - BlockFile(size_t file_id, const std::string & file_name, const Block & header, size_t buffer_size = 4 * 1024 * 1024); + CachePartition(const std::string & file_name, const Block & header = {}, size_t buffer_size = 4 * 1024 * 1024); void appendBlock(const Block & block); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - template - void getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const; + template + void getValue(const std::string & attribute_name, const PaddedPODArray & ids, + ResultArrayType & out, std::unordered_map> & not_found) const; // TODO:: getString private: void flush(); - size_t id; std::string file_name; size_t buffer_size; WriteBufferFromFile out_file; // 4MB - mutable ReadBufferFromFile in_file; // ssd page size TODO:: adaptive buffer (read two if there less than pagesize bytes) /// Block structure: Key, (Default + TTL), Attr1, Attr2, ... Block header; @@ -65,28 +64,38 @@ private: }; -class BlockFilesController +class CacheStorage { - BlockFilesController(const std::string & path) : path(path) { + CacheStorage(const std::string & path_, size_t partition_max_size_) + : path(path_) + , partition_max_size(partition_max_size_) + { + partition = std::make_unique(path); } - void appendBlock(const Block& block) { - file->appendBlock(block); + void appendBlock(const Block& block) + { + partition->appendBlock(block); } template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - template - void getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const { - file->getValue(column, ids, out, not_found); + template + void getValue(const std::string & attribute_name, const PaddedPODArray & ids, + ResultArrayType & out, std::unordered_map> & not_found) const + { + partition->getValue(attribute_name, ids, out, not_found); } // getString(); + //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; + private: const std::string path; - std::unique_ptr file; + const size_t partition_max_size; + std::unique_ptr partition; }; @@ -98,13 +107,14 @@ public: const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - const size_t size_); + const std::string & path, + const size_t partition_max_size_); std::string getName() const override { return name; } std::string getTypeName() const override { return "SSDCache"; } - size_t getBytesAllocated() const override { return bytes_allocated + (string_arena ? string_arena->size() : 0); } // TODO: ? + size_t getBytesAllocated() const override { return 0; } // TODO: ? size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } @@ -115,13 +125,13 @@ public: size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } // TODO: fix + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / max_size; } // TODO: fix - bool isCached() const override { return true; } + bool supportUpdates() const override { return true; } std::shared_ptr clone() const override { - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, size); + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, max_size); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -132,19 +142,14 @@ public: bool isInjective(const std::string & attribute_name) const override { - return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; + return dict_struct.attributes[getAttributeIndex(attribute_name)].injective; } - bool hasHierarchy() const override { return hierarchical_attribute; } + bool hasHierarchy() const override { return false; } - void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const override; + void toParent(const PaddedPODArray & /* ids */, PaddedPODArray & /* out */ ) const override {} - void isInVectorVector( - const PaddedPODArray & child_ids, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; - void isInVectorConstant(const PaddedPODArray & child_ids, const Key ancestor_id, PaddedPODArray & out) const override; - void isInConstantVector(const Key child_id, const PaddedPODArray & ancestor_ids, PaddedPODArray & out) const override; - - std::exception_ptr getLastException() const override; + std::exception_ptr getLastException() const override { return last_exception; } template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -196,7 +201,7 @@ public: const; #define DECLARE(TYPE) \ -void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; + void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, const TYPE def, ResultArrayType & out) const; DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -220,139 +225,65 @@ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & i BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: - template - using ContainerType = Value[]; - template - using ContainerPtrType = std::unique_ptr>; - - struct CellMetadata final - { - using time_point_t = std::chrono::system_clock::time_point; - using time_point_rep_t = time_point_t::rep; - using time_point_urep_t = std::make_unsigned_t; - - static constexpr UInt64 EXPIRES_AT_MASK = std::numeric_limits::max(); - static constexpr UInt64 IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; - - UInt64 id; - /// Stores both expiration time and `is_default` flag in the most significant bit - time_point_urep_t data; - - /// Sets expiration time, resets `is_default` flag to false - time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } - void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } - - bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } - void setDefault() { data |= IS_DEFAULT_MASK; } - }; - - struct Attribute final + struct Attribute { AttributeUnderlyingType type; std::variant< - UInt8, - UInt16, - UInt32, - UInt64, - UInt128, - Int8, - Int16, - Int32, - Int64, - Decimal32, - Decimal64, - Decimal128, - Float32, - Float64, - String> - null_values; - std::variant< - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType, - ContainerPtrType> - arrays; + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String> null_value; }; + using Attributes = std::vector; - void createAttributes(); + size_t getAttributeIndex(const std::string & attr_name) const; + Attribute & getAttribute(const std::string & attr_name); + const Attribute & getAttribute(const std::string & attr_name) const; + template + Attribute createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value); Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + void createAttributes(); template void getItemsNumberImpl( - Attribute & attribute, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; - + const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template - void getItemsString(Attribute & attribute, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; + void getItemsString(const std::string & attribute_name, const PaddedPODArray & ids, + ColumnString * out, DefaultGetter && get_default) const; template - void update(const std::vector & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const; - - PaddedPODArray getCachedIds() const; - - bool isEmptyCell(const UInt64 idx) const; - - size_t getCellIdx(const Key id) const; - - void setDefaultAttributeValue(Attribute & attribute, const Key idx) const; - - void setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const; - - Attribute & getAttribute(const std::string & attribute_name) const; - - struct FindResult - { - const size_t cell_idx; - const bool valid; - const bool outdated; - }; - - FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const; - - template - void isInImpl(const PaddedPODArray & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; - + void update(const std::vector & requested_ids, PresentIdHandler && on_updated, + AbsentIdHandler && on_id_not_found) const; + const std::string name; const DictionaryStructure dict_struct; mutable DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + + CacheStorage storage; Logger * const log; mutable std::shared_mutex rw_lock; - /// Actual size will be increased to match power of 2 - const size_t size; - - /// all bits to 1 mask (size - 1) (0b1000 - 1 = 0b111) - const size_t size_overlap_mask; - - /// Max tries to find cell, overlaped with mask: if size = 16 and start_cell=10: will try cells: 10,11,12,13,14,15,0,1,2,3 - static constexpr size_t max_collision_length = 10; - - const size_t zero_cell_idx{getCellIdx(0)}; std::map attribute_index_by_name; - mutable std::vector attributes; - mutable std::vector cells; - Attribute * hierarchical_attribute = nullptr; - std::unique_ptr string_arena; + Attributes attributes; mutable std::exception_ptr last_exception; mutable size_t error_count = 0; mutable std::chrono::system_clock::time_point backoff_end_time; - mutable pcg64 rnd_engine; - mutable size_t bytes_allocated = 0; mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; From 3bbb73e37caeb9764d5761dabac31f183aac3e1a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 2 Jan 2020 22:33:19 +0300 Subject: [PATCH 0008/2229] update --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 160 ++++++++++++++++++- dbms/src/Dictionaries/SSDCacheDictionary.h | 105 +++++++----- 2 files changed, 222 insertions(+), 43 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 44ca8b8bbe7..61f3933f178 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -1,14 +1,34 @@ #include "SSDCacheDictionary.h" #include -#include #include +#include +#include #include #include #include #include #include +namespace ProfileEvents +{ + extern const Event DictCacheKeysRequested; + extern const Event DictCacheKeysRequestedMiss; + extern const Event DictCacheKeysRequestedFound; + extern const Event DictCacheKeysExpired; + extern const Event DictCacheKeysNotFound; + extern const Event DictCacheKeysHit; + extern const Event DictCacheRequestTimeNs; + extern const Event DictCacheRequests; + extern const Event DictCacheLockWriteNs; + extern const Event DictCacheLockReadNs; +} + +namespace CurrentMetrics +{ + extern const Metric DictCacheRequests; +} + namespace DB { @@ -21,8 +41,8 @@ namespace ErrorCodes extern const int TOO_SMALL_BUFFER_SIZE; } -CachePartition::CachePartition(const std::string & file_name, const Block & header, size_t buffer_size) - : file_name(file_name), buffer_size(buffer_size), out_file(file_name, buffer_size), header(header), buffer(header.cloneEmptyColumns()) +CachePartition::CachePartition(CacheStorage & storage_, const size_t file_id_, const size_t max_size_, const size_t buffer_size_) + : storage(storage_), file_id(file_id_), max_size(max_size_), buffer_size(buffer_size_) { } @@ -91,6 +111,124 @@ void CachePartition::flush() buffer = header.cloneEmptyColumns(); } +template +std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, + PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found) +{ + CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); + + std::unordered_map remaining_ids{requested_ids.size()}; + for (const auto id : requested_ids) + remaining_ids.insert({id, 0}); + + const auto now = std::chrono::system_clock::now(); + + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + if (now > backoff_end_time) + { + try + { + if (update_error_count) + { + /// Recover after error: we have to clone the source here because + /// it could keep connections which should be reset after error. + source_ptr = source_ptr->clone(); + } + + Stopwatch watch; + auto stream = source_ptr->loadIds(requested_ids); + stream->readPrefix(); + + while (const auto block = stream->read()) + { + const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); + if (!id_column) + throw Exception{"Id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + + const auto & ids = id_column->getData(); + + /// cache column pointers + const auto column_ptrs = ext::map( + ext::range(0, dictionary.getAttributes().size()), + [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); + + for (const auto i : ext::range(0, ids.size())) + { + const auto id = ids[i]; + + on_updated(id, i, column_ptrs); + /// mark corresponding id as found + remaining_ids[id] = 1; + } + + /// TODO: Add TTL to block + partitions[0]->appendBlock(block); + } + + stream->readSuffix(); + + update_error_count = 0; + last_update_exception = std::exception_ptr{}; + backoff_end_time = std::chrono::system_clock::time_point{}; + + ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); + } + catch (...) + { + ++update_error_count; + last_update_exception = std::current_exception(); + backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, update_error_count)); + + tryLogException(last_update_exception, log, "Could not update cache dictionary '" + dictionary.getName() + + "', next update is scheduled at " + ext::to_string(backoff_end_time)); + } + } + + size_t not_found_num = 0, found_num = 0; + + /// Check which ids have not been found and require setting null_value + auto mutable_columns = header.cloneEmptyColumns(); + for (const auto & id_found_pair : remaining_ids) + { + if (id_found_pair.second) + { + ++found_num; + continue; + } + ++not_found_num; + + const auto id = id_found_pair.first; + + if (update_error_count) + { + /// TODO: юзать старые значения. + + /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. + std::rethrow_exception(last_update_exception); + } + + /// TODO: Add TTL + + /// Set null_value for each attribute + const auto & attributes = dictionary.getAttributes(); + for (size_t i = 0; i < attributes.size(); ++i) + { + const auto & attribute = attributes[i]; + mutable_columns[i].insert(attribute.null_value); + } + + /// inform caller that the cell has not been found + on_id_not_found(id); + } + partitions[0]->appendBlock(header.cloneWithColumns(std::move(mutable_columns))); + + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); + ProfileEvents::increment(ProfileEvents::DictCacheRequests); +} + SSDCacheDictionary::SSDCacheDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, @@ -102,7 +240,7 @@ SSDCacheDictionary::SSDCacheDictionary( , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) - , storage(path, partition_max_size) + , storage(*this, path, 1, partition_max_size) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -209,6 +347,8 @@ template void SSDCacheDictionary::getItemsNumberImpl( const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { + const auto attribute_index = getAttributeIndex(attribute_index); + std::unordered_map> not_found_ids; storage.getValue(attribute_name, ids, out, not_found_ids); if (not_found_ids.empty()) @@ -217,12 +357,13 @@ void SSDCacheDictionary::getItemsNumberImpl( std::vector required_ids(not_found_ids.size()); std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); - update( + storage.update( + source_ptr, required_ids, - [&](const auto id, const auto & attribute_value) + [&](const auto id, const auto row, const auto & attributes) { for (const size_t row : not_found_ids[id]) - out[row] = static_cast(attribute_value); + out[row] = static_cast(attributes[attribute_index][row]); }, [&](const auto id) { @@ -287,6 +428,11 @@ const SSDCacheDictionary::Attribute & SSDCacheDictionary::getAttribute(const std return attributes[getAttributeIndex(attr_name)]; } +const SSDCacheDictionary::Attributes & SSDCacheDictionary::getAttributes() const +{ + return attributes; +} + template SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value) { diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 04777bd2587..6ece329ba87 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -22,12 +22,8 @@ namespace DB { -constexpr size_t OFFSET_MASK = ~0xffff000000000000; -constexpr size_t FILE_ID_SIZE = 16; -constexpr size_t FILE_OFFSET_SIZE = sizeof(size_t) * 8 - FILE_ID_SIZE; - - class SSDCacheDictionary; +class CacheStorage; class CachePartition { @@ -35,7 +31,7 @@ public: using Offset = size_t; using Offsets = std::vector; - CachePartition(const std::string & file_name, const Block & header = {}, size_t buffer_size = 4 * 1024 * 1024); + CachePartition(CacheStorage & storage, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); void appendBlock(const Block & block); @@ -48,34 +44,46 @@ public: // TODO:: getString + /// 0 -- not found + /// 1 -- good + /// 2 -- expired + void has(const PaddedPODArray & ids, ResultArrayType & out) const; + private: void flush(); - std::string file_name; + + CacheStorage & storage; + + size_t file_id; + size_t max_size; size_t buffer_size; - WriteBufferFromFile out_file; // 4MB - - /// Block structure: Key, (Default + TTL), Attr1, Attr2, ... - Block header; + //mutable std::shared_mutex rw_lock; + int index_fd; + int data_fd; std::unordered_map key_to_file_offset; MutableColumns buffer; + + mutable std::atomic element_count{0}; }; +using CachePartitionPtr = std::unique_ptr; + class CacheStorage { - CacheStorage(const std::string & path_, size_t partition_max_size_) - : path(path_) - , partition_max_size(partition_max_size_) - { - partition = std::make_unique(path); - } + using Key = IDictionary::Key; - void appendBlock(const Block& block) + CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) + : dictionary(dictionary_) + , path(path_) + , partition_max_size(partition_max_size_) + , log(&Poco::Logger::get("CacheStorage")) { - partition->appendBlock(block); + for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) + partitions.emplace_back(std::make_unique(partition_id, partition_max_size)); } template @@ -85,17 +93,42 @@ class CacheStorage void getValue(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - partition->getValue(attribute_name, ids, out, not_found); + partitions[0]->getValue(attribute_name, ids, out, not_found); } // getString(); + template + std::exception_ptr update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, + PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found); + //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; + std::exception_ptr getLastException() const { return last_update_exception; } + private: + SSDCacheDictionary & dictionary; + + /// Block structure: Key, (Default + TTL), Attr1, Attr2, ... + const Block header; const std::string path; const size_t partition_max_size; - std::unique_ptr partition; + std::vector partitions; + + Logger * const log; + + mutable pcg64 rnd_engine; + + mutable std::shared_mutex rw_lock; + + mutable std::exception_ptr last_update_exception; + mutable size_t update_error_count = 0; + mutable std::chrono::system_clock::time_point backoff_end_time; + + // stats + mutable std::atomic element_count{0}; + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; }; @@ -125,13 +158,13 @@ public: size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / max_size; } // TODO: fix + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / partition_max_size; } // TODO: fix bool supportUpdates() const override { return true; } std::shared_ptr clone() const override { - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, max_size); + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, partition_max_size); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -149,7 +182,7 @@ public: void toParent(const PaddedPODArray & /* ids */, PaddedPODArray & /* out */ ) const override {} - std::exception_ptr getLastException() const override { return last_exception; } + std::exception_ptr getLastException() const override { return storage.getLastException(); } template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -222,9 +255,16 @@ public: void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; - BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override // TODO + { + UNUSED(column_names); + UNUSED(max_block_size); + return nullptr; + } private: + friend class CacheStorage; + struct Attribute { AttributeUnderlyingType type; @@ -250,6 +290,7 @@ private: size_t getAttributeIndex(const std::string & attr_name) const; Attribute & getAttribute(const std::string & attr_name); const Attribute & getAttribute(const std::string & attr_name) const; + const Attributes & getAttributes() const; template Attribute createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value); @@ -262,28 +303,20 @@ private: template void getItemsString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; - - template - void update(const std::vector & requested_ids, PresentIdHandler && on_updated, - AbsentIdHandler && on_id_not_found) const; const std::string name; const DictionaryStructure dict_struct; mutable DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; - CacheStorage storage; + const std::string path; + const size_t partition_max_size; + mutable CacheStorage storage; Logger * const log; - mutable std::shared_mutex rw_lock; - std::map attribute_index_by_name; Attributes attributes; - mutable std::exception_ptr last_exception; - mutable size_t error_count = 0; - mutable std::chrono::system_clock::time_point backoff_end_time; - mutable size_t bytes_allocated = 0; mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; From b55d8dd348c26406965dd318c47e37ec705ab847 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 3 Jan 2020 22:52:07 +0300 Subject: [PATCH 0009/2229] update, changed block -> attrs --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 261 ++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 78 ++++-- .../src/Dictionaries/SSDCacheDictionary.inc.h | 4 +- 3 files changed, 272 insertions(+), 71 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 61f3933f178..4d0cbc3f075 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -1,5 +1,6 @@ #include "SSDCacheDictionary.h" +#include #include #include #include @@ -41,35 +42,66 @@ namespace ErrorCodes extern const int TOO_SMALL_BUFFER_SIZE; } -CachePartition::CachePartition(CacheStorage & storage_, const size_t file_id_, const size_t max_size_, const size_t buffer_size_) - : storage(storage_), file_id(file_id_), max_size(max_size_), buffer_size(buffer_size_) +namespace { + constexpr size_t INMEMORY = (1ULL << 63ULL); + const std::string BIN_FILE_EXT = ".bin"; + const std::string IND_FILE_EXT = ".idx"; } -void CachePartition::appendBlock(const Block & block) +CachePartition::CachePartition(const std::vector & structure, const std::string & dir_path, + const size_t file_id_, const size_t max_size_, const size_t buffer_size_) + : file_id(file_id_), max_size(max_size_), buffer_size(buffer_size_), path(dir_path + "/" + std::to_string(file_id)) { - size_t bytes = 0; - const auto new_columns = block.getColumns(); - if (new_columns.size() != header.columns()) + for (const auto & type : structure) { - throw Exception("Wrong size of block in BlockFile::appendBlock(). It's a bug.", ErrorCodes::TYPE_MISMATCH); - } + switch (type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + buffer.emplace_back(type, std::vector()); \ + break; - const auto id_column = typeid_cast(new_columns.front().get()); - if (!id_column) - throw Exception{"id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } +} + +void CachePartition::appendBlock(const Attributes & new_columns) +{ + if (new_columns.size() != buffer.size()) + throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; + + const auto & id_column = std::get>(new_columns.front().values); size_t start_size = buffer.front()->size(); - for (size_t i = 0; i < header.columns(); ++i) + for (size_t i = 0; i < buffer.size(); ++i) { - buffer[i]->insertRangeFrom(*new_columns[i], 0, new_columns[i]->size()); + appendValuesToBufferAttribute(buffer[i], new_columns[i]); bytes += buffer[i]->byteSize(); } - const auto & ids = id_column->getData(); - for (size_t i = 0; i < new_columns.size(); ++i) + for (size_t i = 0; i < id_column.size(); ++i) { - key_to_file_offset[ids[i]] = start_size + i; + key_to_file_offset[id_column[i]] = (start_size + i) | INMEMORY; } if (bytes >= buffer_size) @@ -78,14 +110,57 @@ void CachePartition::appendBlock(const Block & block) } } +void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribute & from) +{ + switch (to.type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + auto &to_values = std::get>(to.values); \ + auto &from_values = std::get>(from.values); \ + size_t prev_size = to_values.size(); \ + to_values.resize(to_values.size() + from_values.size()); \ + memcpy(to_values.data() + prev_size * sizeof(TYPE), from_values.data(), from_values.size() * sizeof(TYPE)); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } +} + void CachePartition::flush() { + if (!write_data_buffer) + { + write_data_buffer = std::make_unique(path + BIN_FILE_EXT, buffer_size, O_RDWR | O_CREAT | O_TRUNC); + // TODO: не перетирать + seek в конец файла + } + const auto id_column = typeid_cast(buffer.front().get()); if (!id_column) throw Exception{"id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; const auto & ids = id_column->getData(); - key_to_file_offset[ids[0]] = out_file.getPositionInFile() + (1ULL << FILE_OFFSET_SIZE); + key_to_file_offset[ids[0]] = write_data_buffer->getPositionInFile(); size_t prev_size = 0; for (size_t row = 0; row < buffer.front()->size(); ++row) { @@ -95,7 +170,7 @@ void CachePartition::flush() { const auto & column = buffer[col]; const auto & type = header.getByPosition(col).type; - type->serializeBinary(*column, row, out_file); + type->serializeBinary(*column, row, *write_data_buffer); if (type->getTypeId() != TypeIndex::String) { prev_size += column->sizeOfValueIfFixed(); } else { @@ -104,13 +179,27 @@ void CachePartition::flush() } } - if (out_file.hasPendingData()) { - out_file.sync(); - } + write_data_buffer->sync(); buffer = header.cloneEmptyColumns(); } +template +void CachePartition::getValue(const std::string & attribute_name, const PaddedPODArray & ids, + ResultArrayType & out, std::unordered_map> & not_found) const +{ + UNUSED(attribute_name); + UNUSED(ids); + UNUSED(out); + UNUSED(not_found); +} + +void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const +{ + UNUSED(ids); + UNUSED(out); +} + template std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found) @@ -143,28 +232,18 @@ std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const while (const auto block = stream->read()) { - const auto id_column = typeid_cast(block.safeGetByPosition(0).column.get()); - if (!id_column) - throw Exception{"Id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; - - const auto & ids = id_column->getData(); - - /// cache column pointers - const auto column_ptrs = ext::map( - ext::range(0, dictionary.getAttributes().size()), - [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); }); + const auto new_attributes = createAttributesFromBlock(block); + const auto & ids = std::get>(new_attributes.front().values); for (const auto i : ext::range(0, ids.size())) { - const auto id = ids[i]; - - on_updated(id, i, column_ptrs); /// mark corresponding id as found - remaining_ids[id] = 1; + on_updated(ids[i], i, new_attributes); + remaining_ids[ids[i]] = 1; } /// TODO: Add TTL to block - partitions[0]->appendBlock(block); + partitions[0]->appendBlock(new_attributes); } stream->readSuffix(); @@ -189,7 +268,40 @@ std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const size_t not_found_num = 0, found_num = 0; /// Check which ids have not been found and require setting null_value - auto mutable_columns = header.cloneEmptyColumns(); + CachePartition::Attributes new_attributes; + { + /// TODO: create attributes from structure + for (const auto & attribute : dictionary.getAttributes()) + { + switch (attribute.type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + new_attributes.emplace_back(attribute.type, std::vector()); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } + } for (const auto & id_found_pair : remaining_ids) { if (id_found_pair.second) @@ -216,31 +328,81 @@ std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const for (size_t i = 0; i < attributes.size(); ++i) { const auto & attribute = attributes[i]; - mutable_columns[i].insert(attribute.null_value); + // TODO : append null + (attribute.null_value); } /// inform caller that the cell has not been found on_id_not_found(id); } - partitions[0]->appendBlock(header.cloneWithColumns(std::move(mutable_columns))); + partitions[0]->appendBlock(new_attributes); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); ProfileEvents::increment(ProfileEvents::DictCacheRequests); } +CachePartition::Attributes CacheStorage::createAttributesFromBlock(const Block & block) +{ + CachePartition::Attributes attributes; + + const auto & structure = dictionary.getAttributes(); + const auto columns = block.getColumns(); + for (size_t i = 0; i < structure.size(); ++i) + { + const auto & column = columns[i]; + switch (structure[i].type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + std::vector values(column->size()); \ + const auto raw_data = column->getRawData(); \ + memcpy(values.data(), raw_data.data, raw_data.size); \ + attributes.emplace_back(structure[i].type, std::move(values)); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } + + return attributes; +} + SSDCacheDictionary::SSDCacheDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - const std::string & path, - const size_t partition_max_size) + const std::string & path_, + const size_t partition_max_size_) : name(name_) , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) + , path(path_) + , partition_max_size(partition_max_size_) , storage(*this, path, 1, partition_max_size) + , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -347,10 +509,10 @@ template void SSDCacheDictionary::getItemsNumberImpl( const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { - const auto attribute_index = getAttributeIndex(attribute_index); + const auto attribute_index = getAttributeIndex(attribute_name); std::unordered_map> not_found_ids; - storage.getValue(attribute_name, ids, out, not_found_ids); + storage.getValue(attribute_name, ids, out, not_found_ids); if (not_found_ids.empty()) return; @@ -360,10 +522,15 @@ void SSDCacheDictionary::getItemsNumberImpl( storage.update( source_ptr, required_ids, - [&](const auto id, const auto row, const auto & attributes) + [&](const auto id, const auto row, const auto & new_attributes) { - for (const size_t row : not_found_ids[id]) - out[row] = static_cast(attributes[attribute_index][row]); + Field field; + for (const size_t out_row : not_found_ids[id]) + { + new_attributes[attribute_index] + ->get(row, field); + out[out_row] = field.get(); + } }, [&](const auto id) { @@ -488,7 +655,7 @@ void SSDCacheDictionary::createAttributes() const auto & attribute = dict_struct.attributes[i]; attribute_index_by_name.emplace(attribute.name, i); - attributes.push_back(createAttributeWithType(attribute.type, attribute.null_value)); + attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 6ece329ba87..ff6bfd10d8c 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -15,9 +15,7 @@ #include "DictionaryStructure.h" #include "IDictionary.h" #include "IDictionarySource.h" -#include -#include - +#include namespace DB { @@ -31,9 +29,8 @@ public: using Offset = size_t; using Offsets = std::vector; - CachePartition(CacheStorage & storage, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); - - void appendBlock(const Block & block); + CachePartition(const std::vector & structure, const std::string & dir_path, + const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -49,22 +46,55 @@ public: /// 2 -- expired void has(const PaddedPODArray & ids, ResultArrayType & out) const; + struct Attribute + { + template + using Container = std::vector; + + AttributeUnderlyingType type; + std::variant< + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container> values; + }; + using Attributes = std::vector; + + + // Key, (Metadata), attributes + void appendBlock(const Attributes & new_columns); + private: void flush(); - - CacheStorage & storage; + void appendValuesToBufferAttribute(Attribute & to, const Attribute & from); size_t file_id; size_t max_size; size_t buffer_size; + std::string path; //mutable std::shared_mutex rw_lock; - int index_fd; + //int index_fd; int data_fd; + std::unique_ptr write_data_buffer; std::unordered_map key_to_file_offset; - MutableColumns buffer; + + Attributes buffer; + //MutableColumns buffer; + size_t bytes = 0; mutable std::atomic element_count{0}; }; @@ -74,6 +104,7 @@ using CachePartitionPtr = std::unique_ptr; class CacheStorage { +public: using Key = IDictionary::Key; CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) @@ -83,17 +114,17 @@ class CacheStorage , log(&Poco::Logger::get("CacheStorage")) { for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) - partitions.emplace_back(std::make_unique(partition_id, partition_max_size)); + partitions.emplace_back(std::make_unique(*this, partition_id, partition_max_size, path_)); } template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - template + template void getValue(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - partitions[0]->getValue(attribute_name, ids, out, not_found); + partitions[0]->getValue(attribute_name, ids, out, not_found); } // getString(); @@ -106,11 +137,15 @@ class CacheStorage std::exception_ptr getLastException() const { return last_update_exception; } + const std::string & getPath() const { return path; } + private: + CachePartition::Attributes createAttributesFromBlock(const Block & block); + SSDCacheDictionary & dictionary; - /// Block structure: Key, (Default + TTL), Attr1, Attr2, ... - const Block header; + // Block structure: Key, (Default + TTL), Attr1, Attr2, ... + // const Block header; const std::string path; const size_t partition_max_size; std::vector partitions; @@ -164,7 +199,7 @@ public: std::shared_ptr clone() const override { - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, partition_max_size); + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, partition_max_size); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -253,7 +288,7 @@ public: void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; - void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; + void has(const PaddedPODArray & /* ids */, PaddedPODArray & /* out */) const override {} // TODO BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override // TODO { @@ -262,9 +297,6 @@ public: return nullptr; } -private: - friend class CacheStorage; - struct Attribute { AttributeUnderlyingType type; @@ -287,10 +319,12 @@ private: }; using Attributes = std::vector; + const Attributes & getAttributes() const; + +private: size_t getAttributeIndex(const std::string & attr_name) const; Attribute & getAttribute(const std::string & attr_name); const Attribute & getAttribute(const std::string & attr_name) const; - const Attributes & getAttributes() const; template Attribute createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value); @@ -315,7 +349,7 @@ private: Logger * const log; std::map attribute_index_by_name; - Attributes attributes; + Attributes attributes; // TODO: move to storage mutable size_t bytes_allocated = 0; mutable std::atomic element_count{0}; diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.inc.h b/dbms/src/Dictionaries/SSDCacheDictionary.inc.h index 73e94cb5b6d..c3d11303509 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.inc.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.inc.h @@ -1,7 +1,7 @@ #pragma once namespace DB { - +/* template void BlockFile::getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const { @@ -41,5 +41,5 @@ void BlockFile::getValue(size_t column, const PaddedPODArray & ids, Resu out[index] = DB::get(field); } } - +*/ } \ No newline at end of file From 5dccab304f456435e4a340811118f710feeca41e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 4 Jan 2020 18:04:16 +0300 Subject: [PATCH 0010/2229] compl --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 166 ++++++++++++++----- dbms/src/Dictionaries/SSDCacheDictionary.h | 35 ++-- 2 files changed, 147 insertions(+), 54 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 4d0cbc3f075..afa473c0bec 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -59,7 +60,9 @@ CachePartition::CachePartition(const std::vector & stru { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - buffer.emplace_back(type, std::vector()); \ + buffer.emplace_back(); \ + buffer.back().type = type; \ + buffer.back().values = std::vector(); \ break; DISPATCH(UInt8) @@ -90,21 +93,21 @@ void CachePartition::appendBlock(const Attributes & new_columns) if (new_columns.size() != buffer.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; - const auto & id_column = std::get>(new_columns.front().values); + const auto & ids = std::get>(new_columns.front().values); - size_t start_size = buffer.front()->size(); + size_t start_size = ids.size(); for (size_t i = 0; i < buffer.size(); ++i) { appendValuesToBufferAttribute(buffer[i], new_columns[i]); - bytes += buffer[i]->byteSize(); + //bytes += buffer[i]->byteSize(); } - for (size_t i = 0; i < id_column.size(); ++i) + for (size_t i = 0; i < ids.size(); ++i) { - key_to_file_offset[id_column[i]] = (start_size + i) | INMEMORY; + key_to_file_offset[ids[i]] = (start_size + i) | INMEMORY; } - if (bytes >= buffer_size) + //if (bytes >= buffer_size) { flush(); } @@ -155,33 +158,67 @@ void CachePartition::flush() // TODO: не перетирать + seek в конец файла } - const auto id_column = typeid_cast(buffer.front().get()); - if (!id_column) - throw Exception{"id column has type different from UInt64.", ErrorCodes::TYPE_MISMATCH}; - const auto & ids = id_column->getData(); + const auto & ids = std::get>(buffer.front().values); + + std::vector offsets; - key_to_file_offset[ids[0]] = write_data_buffer->getPositionInFile(); size_t prev_size = 0; - for (size_t row = 0; row < buffer.front()->size(); ++row) + for (size_t row = 0; row < ids.size(); ++row) { - key_to_file_offset[ids[row]] = key_to_file_offset[ids[row ? row - 1 : 0]] + prev_size; + offsets.push_back((offsets.empty() ? write_data_buffer->getPositionInFile() : offsets.back()) + prev_size); prev_size = 0; - for (size_t col = 0; col < header.columns(); ++col) + + for (size_t col = 0; col < buffer.size(); ++col) { - const auto & column = buffer[col]; - const auto & type = header.getByPosition(col).type; - type->serializeBinary(*column, row, *write_data_buffer); - if (type->getTypeId() != TypeIndex::String) { - prev_size += column->sizeOfValueIfFixed(); - } else { - prev_size += column->getDataAt(row).size + sizeof(UInt64); + const auto & attribute = buffer[col]; + + switch (attribute.type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + const auto & values = std::get>(attribute.values); \ + writeBinary(values[row], *static_cast(write_data_buffer.get())); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; } } } - write_data_buffer->sync(); - buffer = header.cloneEmptyColumns(); + /// commit changes in index + for (size_t row = 0; row < ids.size(); ++row) + { + key_to_file_offset[ids[row]] = offsets[row]; + } + + /// clear buffer + for (auto & attribute : buffer) + { + std::visit([](auto & attr) { + attr.clear(); + }, attribute.values); + } } template @@ -200,8 +237,23 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType structure; + for (const auto & item : dictionary.getStructure().attributes) + { + structure.push_back(item.underlying_type); + } + for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) + partitions.emplace_back(std::make_unique(structure, path_, partition_id, partition_max_size)); +} + template -std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, +void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found) { CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; @@ -277,7 +329,9 @@ std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - new_attributes.emplace_back(attribute.type, std::vector()); \ + new_attributes.emplace_back(); \ + new_attributes.back().type = attribute.type; \ + new_attributes.back().values = std::vector(); \ break; DISPATCH(UInt8) @@ -328,8 +382,38 @@ std::exception_ptr CacheStorage::update(DictionarySourcePtr & source_ptr, const for (size_t i = 0; i < attributes.size(); ++i) { const auto & attribute = attributes[i]; - // TODO : append null - (attribute.null_value); + // append null + switch (attribute.type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + auto & to_values = std::get>(new_attributes[i].values); \ + auto & null_value = std::get(attribute.null_value); \ + to_values.push_back(null_value); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } } /// inform caller that the cell has not been found @@ -359,7 +443,9 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock(const Block & std::vector values(column->size()); \ const auto raw_data = column->getRawData(); \ memcpy(values.data(), raw_data.data, raw_data.size); \ - attributes.emplace_back(structure[i].type, std::move(values)); \ + attributes.emplace_back(); \ + attributes.back().type = structure[i].type; \ + attributes.back().values = std::move(values); \ } \ break; @@ -416,14 +502,12 @@ SSDCacheDictionary::SSDCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ -\ const auto null_value = std::get(attributes[index].null_value); \ -\ getItemsNumberImpl( \ - attribute_name, \ - ids, \ - out, \ - [&](const size_t) { return null_value; }); \ + attribute_name, \ + ids, \ + out, \ + [&](const size_t) { return null_value; }); \ } DECLARE(UInt8) DECLARE(UInt16) @@ -450,7 +534,6 @@ SSDCacheDictionary::SSDCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ -\ getItemsNumberImpl( \ attribute_name, \ ids, \ @@ -522,15 +605,9 @@ void SSDCacheDictionary::getItemsNumberImpl( storage.update( source_ptr, required_ids, - [&](const auto id, const auto row, const auto & new_attributes) - { - Field field; + [&](const auto id, const auto row, const auto & new_attributes) { for (const size_t out_row : not_found_ids[id]) - { - new_attributes[attribute_index] - ->get(row, field); - out[out_row] = field.get(); - } + out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const auto id) { @@ -645,6 +722,7 @@ case AttributeUnderlyingType::ut##TYPE: \ DISPATCH(String) #undef DISPATCH } + throw Exception{"Unknown attribute type: " + std::to_string(static_cast(type)), ErrorCodes::TYPE_MISMATCH}; } void SSDCacheDictionary::createAttributes() diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index ff6bfd10d8c..b5a2712dd05 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -23,6 +23,27 @@ namespace DB class SSDCacheDictionary; class CacheStorage; +/*class SimpleSerializer +{ +public: + bool block() const { return false; } + + template + size_t estimateSizeNumber(T number) const; + + size_t estimateSizeString(const String & str) const; + + template + ssize_t writeNumber(T number, WriteBuffer & buffer); + + ssize_t writeString(const String & str, WriteBuffer & buffer); + + template + ssize_t readNumber(T number, WriteBuffer & buffer); + + ssize_t readString(const String & str, WriteBuffer & buffer); +};*/ + class CachePartition { public: @@ -107,15 +128,8 @@ class CacheStorage public: using Key = IDictionary::Key; - CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) - : dictionary(dictionary_) - , path(path_) - , partition_max_size(partition_max_size_) - , log(&Poco::Logger::get("CacheStorage")) - { - for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) - partitions.emplace_back(std::make_unique(*this, partition_id, partition_max_size, path_)); - } + CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, + const size_t partitions_count_, const size_t partition_max_size_); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -130,7 +144,7 @@ public: // getString(); template - std::exception_ptr update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, + void update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found); //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; @@ -319,6 +333,7 @@ public: }; using Attributes = std::vector; + /// переместить const Attributes & getAttributes() const; private: From 55125cd5ac9819b0255bafb611befb0551a15c7f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 5 Jan 2020 16:59:49 +0300 Subject: [PATCH 0011/2229] create + refactoring --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 177 +++++++++++------- dbms/src/Dictionaries/SSDCacheDictionary.h | 22 ++- .../getDictionaryConfigurationFromAST.cpp | 26 ++- .../src/Dictionaries/registerDictionaries.cpp | 1 + dbms/src/Dictionaries/registerDictionaries.h | 1 + .../Functions/FunctionsExternalDictionaries.h | 3 + dbms/src/Parsers/ASTDictionary.cpp | 18 +- dbms/src/Parsers/ASTDictionary.h | 2 +- dbms/src/Parsers/ParserDictionary.cpp | 14 +- 9 files changed, 163 insertions(+), 101 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index afa473c0bec..90c8057034c 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "DictionaryFactory.h" #include #include #include @@ -50,35 +51,38 @@ namespace const std::string IND_FILE_EXT = ".idx"; } -CachePartition::CachePartition(const std::vector & structure, const std::string & dir_path, - const size_t file_id_, const size_t max_size_, const size_t buffer_size_) +CachePartition::CachePartition( + const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure, + const std::string & dir_path, const size_t file_id_, const size_t max_size_, const size_t buffer_size_) : file_id(file_id_), max_size(max_size_), buffer_size(buffer_size_), path(dir_path + "/" + std::to_string(file_id)) { - for (const auto & type : structure) + keys_buffer.type = AttributeUnderlyingType::utUInt64; + keys_buffer.values = std::vector(); + for (const auto & type : attributes_structure) { switch (type) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - buffer.emplace_back(); \ - buffer.back().type = type; \ - buffer.back().values = std::vector(); \ + attributes_buffer.emplace_back(); \ + attributes_buffer.back().type = type; \ + attributes_buffer.back().values = std::vector(); \ break; - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) #undef DISPATCH case AttributeUnderlyingType::utString: @@ -88,17 +92,19 @@ CachePartition::CachePartition(const std::vector & stru } } -void CachePartition::appendBlock(const Attributes & new_columns) +void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes) { - if (new_columns.size() != buffer.size()) + if (new_attributes.size() != attributes_buffer.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; - const auto & ids = std::get>(new_columns.front().values); + const auto & ids = std::get>(new_keys.values); size_t start_size = ids.size(); - for (size_t i = 0; i < buffer.size(); ++i) + + appendValuesToBufferAttribute(keys_buffer, new_keys); + for (size_t i = 0; i < attributes_buffer.size(); ++i) { - appendValuesToBufferAttribute(buffer[i], new_columns[i]); + appendValuesToBufferAttribute(attributes_buffer[i], new_attributes[i]); //bytes += buffer[i]->byteSize(); } @@ -158,7 +164,7 @@ void CachePartition::flush() // TODO: не перетирать + seek в конец файла } - const auto & ids = std::get>(buffer.front().values); + const auto & ids = std::get>(keys_buffer.values); std::vector offsets; @@ -168,9 +174,9 @@ void CachePartition::flush() offsets.push_back((offsets.empty() ? write_data_buffer->getPositionInFile() : offsets.back()) + prev_size); prev_size = 0; - for (size_t col = 0; col < buffer.size(); ++col) + for (size_t col = 0; col < attributes_buffer.size(); ++col) { - const auto & attribute = buffer[col]; + const auto & attribute = attributes_buffer[col]; switch (attribute.type) { @@ -208,24 +214,19 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) - { key_to_file_offset[ids[row]] = offsets[row]; - } /// clear buffer - for (auto & attribute : buffer) - { - std::visit([](auto & attr) { - attr.clear(); - }, attribute.values); - } + std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); + for (auto & attribute : attributes_buffer) + std::visit([](auto & attr) { attr.clear(); }, attribute.values); } template -void CachePartition::getValue(const std::string & attribute_name, const PaddedPODArray & ids, +void CachePartition::getValue(size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - UNUSED(attribute_name); + UNUSED(attribute_index); UNUSED(ids); UNUSED(out); UNUSED(not_found); @@ -249,7 +250,7 @@ CacheStorage::CacheStorage(SSDCacheDictionary & dictionary_, const std::string & structure.push_back(item.underlying_type); } for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) - partitions.emplace_back(std::make_unique(structure, path_, partition_id, partition_max_size)); + partitions.emplace_back(std::make_unique(AttributeUnderlyingType::utUInt64, structure, path_, partition_id, partition_max_size)); } template @@ -284,8 +285,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorread()) { - const auto new_attributes = createAttributesFromBlock(block); - const auto & ids = std::get>(new_attributes.front().values); + const auto new_keys = createAttributesFromBlock(block, 0, 1).front(); + const auto new_attributes = createAttributesFromBlock(block, 1); + const auto & ids = std::get>(new_keys.values); for (const auto i : ext::range(0, ids.size())) { @@ -295,7 +297,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_attributes); + partitions[0]->appendBlock(new_keys, new_attributes); } stream->readSuffix(); @@ -320,6 +322,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); CachePartition::Attributes new_attributes; { /// TODO: create attributes from structure @@ -377,6 +382,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); + /// Set null_value for each attribute const auto & attributes = dictionary.getAttributes(); for (size_t i = 0; i < attributes.size(); ++i) @@ -419,20 +427,23 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_attributes); + partitions[0]->appendBlock(new_keys, new_attributes); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); ProfileEvents::increment(ProfileEvents::DictCacheRequests); } -CachePartition::Attributes CacheStorage::createAttributesFromBlock(const Block & block) +CachePartition::Attributes CacheStorage::createAttributesFromBlock(const Block & block, const size_t begin, size_t end) { CachePartition::Attributes attributes; const auto & structure = dictionary.getAttributes(); + if (end == static_cast(-1)) + end = structure.size(); + const auto columns = block.getColumns(); - for (size_t i = 0; i < structure.size(); ++i) + for (size_t i = begin; i < end; ++i) { const auto & column = columns[i]; switch (structure[i].type) @@ -504,11 +515,12 @@ SSDCacheDictionary::SSDCacheDictionary( checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ const auto null_value = std::get(attributes[index].null_value); \ getItemsNumberImpl( \ - attribute_name, \ + index, \ ids, \ out, \ [&](const size_t) { return null_value; }); \ } + DECLARE(UInt8) DECLARE(UInt16) DECLARE(UInt32) @@ -535,7 +547,7 @@ SSDCacheDictionary::SSDCacheDictionary( const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ getItemsNumberImpl( \ - attribute_name, \ + index, \ ids, \ out, \ [&](const size_t row) { return def[row]; }); \ @@ -565,9 +577,8 @@ SSDCacheDictionary::SSDCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ -\ getItemsNumberImpl( \ - attribute_name, \ + index, \ ids, \ out, \ [&](const size_t) { return def; }); \ @@ -590,17 +601,15 @@ SSDCacheDictionary::SSDCacheDictionary( template void SSDCacheDictionary::getItemsNumberImpl( - const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const + const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { - const auto attribute_index = getAttributeIndex(attribute_name); - std::unordered_map> not_found_ids; - storage.getValue(attribute_name, ids, out, not_found_ids); + storage.getValue(attribute_index, ids, out, not_found_ids); if (not_found_ids.empty()) return; std::vector required_ids(not_found_ids.size()); - std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](auto & pair) { return pair.first; }); + std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; }); storage.update( source_ptr, @@ -609,7 +618,7 @@ void SSDCacheDictionary::getItemsNumberImpl( for (const size_t out_row : not_found_ids[id]) out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, - [&](const auto id) + [&](const size_t id) { for (const size_t row : not_found_ids[id]) out[row] = get_default(row); @@ -618,37 +627,37 @@ void SSDCacheDictionary::getItemsNumberImpl( void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); - const auto null_value = StringRef{std::get(attribute.null_value)}; + const auto null_value = StringRef{std::get(attributes[index].null_value)}; - getItemsString(attribute_name, ids, out, [&](const size_t) { return null_value; }); + getItemsString(index, ids, out, [&](const size_t) { return null_value; }); } void SSDCacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); - getItemsString(attribute_name, ids, out, [&](const size_t row) { return def->getDataAt(row); }); + getItemsString(index, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } void SSDCacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const { - auto & attribute = getAttribute(attribute_name); - checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString); + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); - getItemsString(attribute_name, ids, out, [&](const size_t) { return StringRef{def}; }); + getItemsString(index, ids, out, [&](const size_t) { return StringRef{def}; }); } template -void SSDCacheDictionary::getItemsString(const std::string & attribute_name, const PaddedPODArray & ids, +void SSDCacheDictionary::getItemsString(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const { - UNUSED(attribute_name); + UNUSED(attribute_index); UNUSED(ids); UNUSED(out); UNUSED(get_default); @@ -727,7 +736,7 @@ case AttributeUnderlyingType::ut##TYPE: \ void SSDCacheDictionary::createAttributes() { - attributes.resize(dict_struct.attributes.size()); + attributes.reserve(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { const auto & attribute = dict_struct.attributes[i]; @@ -741,4 +750,36 @@ void SSDCacheDictionary::createAttributes() } } +void registerDictionarySSDCache(DictionaryFactory & factory) +{ + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr + { + if (dict_struct.key) + throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const auto & layout_prefix = config_prefix + ".layout"; + const auto max_partition_size = config.getInt(layout_prefix + ".ssd.max_partition_size"); + if (max_partition_size == 0) + throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + const auto path = config.getString(layout_prefix + ".ssd.path"); + if (path.empty()) + throw Exception{name + ": dictionary of layout 'cache' cannot have empty path", + ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, path, max_partition_size); + }; + factory.registerLayout("ssd", create_layout, false); +} + } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index b5a2712dd05..9df0b2597e2 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -50,14 +50,15 @@ public: using Offset = size_t; using Offsets = std::vector; - CachePartition(const std::vector & structure, const std::string & dir_path, - const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); + CachePartition( + const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, + const std::string & dir_path, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; template - void getValue(const std::string & attribute_name, const PaddedPODArray & ids, + void getValue(size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const; // TODO:: getString @@ -94,7 +95,7 @@ public: // Key, (Metadata), attributes - void appendBlock(const Attributes & new_columns); + void appendBlock(const Attribute & new_keys, const Attributes & new_attributes); private: void flush(); @@ -113,7 +114,8 @@ private: std::unique_ptr write_data_buffer; std::unordered_map key_to_file_offset; - Attributes buffer; + Attribute keys_buffer; + Attributes attributes_buffer; //MutableColumns buffer; size_t bytes = 0; @@ -135,10 +137,10 @@ public: using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; template - void getValue(const std::string & attribute_name, const PaddedPODArray & ids, + void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - partitions[0]->getValue(attribute_name, ids, out, not_found); + partitions[0]->getValue(attribute_index, ids, out, not_found); } // getString(); @@ -154,7 +156,7 @@ public: const std::string & getPath() const { return path; } private: - CachePartition::Attributes createAttributesFromBlock(const Block & block); + CachePartition::Attributes createAttributesFromBlock(const Block & block, const size_t begin = 0, size_t end = -1); SSDCacheDictionary & dictionary; @@ -348,9 +350,9 @@ private: template void getItemsNumberImpl( - const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; + const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template - void getItemsString(const std::string & attribute_name, const PaddedPODArray & ids, + void getItemsString(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; const std::string name; diff --git a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index f0e49bcc4ac..fcecc8f2bbb 100644 --- a/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/dbms/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB { @@ -95,13 +96,22 @@ void buildLayoutConfiguration( root->appendChild(layout_element); AutoPtr layout_type_element(doc->createElement(layout->layout_type)); layout_element->appendChild(layout_type_element); - if (layout->parameter.has_value()) + for (const auto & param : layout->parameters) { - const auto & param = layout->parameter; - AutoPtr layout_type_parameter_element(doc->createElement(param->first)); - const ASTLiteral & literal = param->second->as(); - AutoPtr value(doc->createTextNode(toString(literal.value.get()))); - layout_type_parameter_element->appendChild(value); + AutoPtr layout_type_parameter_element(doc->createElement(param.first)); + const ASTLiteral & literal = param.second->as(); + Field::dispatch([&](auto & value) + { + if constexpr (std::is_same_v, UInt64> || std::is_same_v, String>) + { + AutoPtr value_to_append(doc->createTextNode(toString(value))); + layout_type_parameter_element->appendChild(value_to_append); + } + else + { + throw DB::Exception{"Wrong type of layout argument.", ErrorCodes::BAD_ARGUMENTS}; + } + }, literal.value); layout_type_element->appendChild(layout_type_parameter_element); } } @@ -458,6 +468,10 @@ DictionaryConfigurationPtr getDictionaryConfigurationFromAST(const ASTCreateQuer buildRangeConfiguration(xml_document, structure_element, query.dictionary->range, all_attr_names_and_types); conf->load(xml_document); + + std::ostringstream ss; + conf->save(ss); + Poco::Logger::get("xml config:").information(ss.str()); return conf; } diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index 4ebaae04116..47e755a3c3d 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -29,6 +29,7 @@ void registerDictionaries() registerDictionaryFlat(factory); registerDictionaryHashed(factory); registerDictionaryCache(factory); + registerDictionarySSDCache(factory); } } diff --git a/dbms/src/Dictionaries/registerDictionaries.h b/dbms/src/Dictionaries/registerDictionaries.h index 3f2e730b5e3..5fa4a734ec3 100644 --- a/dbms/src/Dictionaries/registerDictionaries.h +++ b/dbms/src/Dictionaries/registerDictionaries.h @@ -24,6 +24,7 @@ void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); +void registerDictionarySSDCache(DictionaryFactory & factory); void registerDictionaries(); } diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 33cb05e2e7b..8542bc00f93 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -839,6 +840,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && @@ -1103,6 +1105,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) diff --git a/dbms/src/Parsers/ASTDictionary.cpp b/dbms/src/Parsers/ASTDictionary.cpp index ec750acff31..37e73166a33 100644 --- a/dbms/src/Parsers/ASTDictionary.cpp +++ b/dbms/src/Parsers/ASTDictionary.cpp @@ -66,10 +66,10 @@ ASTPtr ASTDictionaryLayout::clone() const auto res = std::make_shared(*this); res->children.clear(); res->layout_type = layout_type; - if (parameter.has_value()) + for (const auto & parameter : parameters) { - res->parameter.emplace(parameter->first, nullptr); - res->set(res->parameter->second, parameter->second->clone()); + res->parameters.emplace_back(parameter.first, nullptr); + res->set(res->parameters.back().second, parameter.second->clone()); } return res; } @@ -88,14 +88,18 @@ void ASTDictionaryLayout::formatImpl(const FormatSettings & settings, << (settings.hilite ? hilite_none : ""); settings.ostr << "("; - if (parameter) + bool first = true; + for (const auto & parameter : parameters) { - settings.ostr << (settings.hilite ? hilite_keyword : "") - << Poco::toUpper(parameter->first) + settings.ostr << (first ? "" : " ") + << (settings.hilite ? hilite_keyword : "") + << Poco::toUpper(parameter.first) << (settings.hilite ? hilite_none : "") << " "; - parameter->second->formatImpl(settings, state, expected); + parameter.second->formatImpl(settings, state, expected); + + first = false; } settings.ostr << ")"; settings.ostr << ")"; diff --git a/dbms/src/Parsers/ASTDictionary.h b/dbms/src/Parsers/ASTDictionary.h index e146162cbdf..0113afe5d00 100644 --- a/dbms/src/Parsers/ASTDictionary.h +++ b/dbms/src/Parsers/ASTDictionary.h @@ -32,7 +32,7 @@ public: /// flat, cache, hashed, etc. String layout_type; /// optional parameter (size_in_cells) - std::optional parameter; + std::vector parameters; String getID(char) const override { return "Dictionary layout"; } diff --git a/dbms/src/Parsers/ParserDictionary.cpp b/dbms/src/Parsers/ParserDictionary.cpp index ca9c2ad031a..f79df2d31b1 100644 --- a/dbms/src/Parsers/ParserDictionary.cpp +++ b/dbms/src/Parsers/ParserDictionary.cpp @@ -123,21 +123,17 @@ bool ParserDictionaryLayout::parseImpl(Pos & pos, ASTPtr & node, Expected & expe res->layout_type = func.name; const ASTExpressionList & type_expr_list = func.elements->as(); - /// there are no layout with more than 1 parameter - if (type_expr_list.children.size() > 1) - return false; - - if (type_expr_list.children.size() == 1) + for (const auto & child : type_expr_list.children) { - const ASTPair * pair = dynamic_cast(type_expr_list.children.at(0).get()); + const ASTPair * pair = dynamic_cast(child.get()); if (pair == nullptr) return false; const ASTLiteral * literal = dynamic_cast(pair->second.get()); - if (literal == nullptr || literal->value.getType() != Field::Types::UInt64) + if (literal == nullptr || (literal->value.getType() != Field::Types::UInt64 && literal->value.getType() != Field::Types::String)) return false; - res->parameter.emplace(pair->first, nullptr); - res->set(res->parameter->second, literal->clone()); + res->parameters.emplace_back(pair->first, nullptr); + res->set(res->parameters.back().second, literal->clone()); } node = res; From 57d9e3820f24e47eb0d15496d918e99376b336ce Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 5 Jan 2020 20:05:49 +0300 Subject: [PATCH 0012/2229] fixed update --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 27 ++++++++++---------- dbms/src/Dictionaries/SSDCacheDictionary.h | 3 ++- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 90c8057034c..c65668e8dc5 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -115,7 +115,7 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & //if (bytes >= buffer_size) { - flush(); + //flush(); } } @@ -227,9 +227,11 @@ void CachePartition::getValue(size_t attribute_index, const PaddedPODArray & out, std::unordered_map> & not_found) const { UNUSED(attribute_index); - UNUSED(ids); UNUSED(out); - UNUSED(not_found); + for (size_t i = 0; i < ids.size(); ++i) + { + not_found[ids[i]].push_back(i); + } } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const @@ -285,8 +287,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorread()) { - const auto new_keys = createAttributesFromBlock(block, 0, 1).front(); - const auto new_attributes = createAttributesFromBlock(block, 1); + const auto new_keys = createAttributesFromBlock(block, { AttributeUnderlyingType::utUInt64 }).front(); + const auto new_attributes = createAttributesFromBlock( + block, ext::map(dictionary.getAttributes(), [](const auto & attribute) { return attribute.type; })); const auto & ids = std::get>(new_keys.values); for (const auto i : ext::range(0, ids.size())) @@ -434,19 +437,16 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & structure) { CachePartition::Attributes attributes; - const auto & structure = dictionary.getAttributes(); - if (end == static_cast(-1)) - end = structure.size(); - const auto columns = block.getColumns(); - for (size_t i = begin; i < end; ++i) + for (size_t i = 0; i < structure.size(); ++i) { const auto & column = columns[i]; - switch (structure[i].type) + switch (structure[i]) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ @@ -455,7 +455,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock(const Block & const auto raw_data = column->getRawData(); \ memcpy(values.data(), raw_data.data, raw_data.size); \ attributes.emplace_back(); \ - attributes.back().type = structure[i].type; \ + attributes.back().type = structure[i]; \ attributes.back().values = std::move(values); \ } \ break; @@ -615,6 +615,7 @@ void SSDCacheDictionary::getItemsNumberImpl( source_ptr, required_ids, [&](const auto id, const auto row, const auto & new_attributes) { + Poco::Logger::get("update:").information(std::to_string(id) + " " + std::to_string(row)); for (const size_t out_row : not_found_ids[id]) out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 9df0b2597e2..e7fb12af15f 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -156,7 +156,8 @@ public: const std::string & getPath() const { return path; } private: - CachePartition::Attributes createAttributesFromBlock(const Block & block, const size_t begin = 0, size_t end = -1); + CachePartition::Attributes createAttributesFromBlock( + const Block & block, const std::vector & structure); SSDCacheDictionary & dictionary; From f3b00e6c8c6ec7f03beddd8a085067fc93f8fdf3 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 5 Jan 2020 23:31:25 +0300 Subject: [PATCH 0013/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 64 ++++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 14 ++++- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index c65668e8dc5..3fc0db30899 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -46,7 +46,8 @@ namespace ErrorCodes namespace { - constexpr size_t INMEMORY = (1ULL << 63ULL); + constexpr size_t IN_MEMORY = (1ULL << 63ULL); + constexpr size_t NOT_FOUND = -1; const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; } @@ -99,7 +100,7 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & const auto & ids = std::get>(new_keys.values); - size_t start_size = ids.size(); + const size_t start_size = std::visit([](const auto & values) { return values.size(); }, keys_buffer.values); appendValuesToBufferAttribute(keys_buffer, new_keys); for (size_t i = 0; i < attributes_buffer.size(); ++i) @@ -109,14 +110,10 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & } for (size_t i = 0; i < ids.size(); ++i) - { - key_to_file_offset[ids[i]] = (start_size + i) | INMEMORY; - } + key_to_file_offset[ids[i]] = (start_size + i) | IN_MEMORY; //if (bytes >= buffer_size) - { //flush(); - } } void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribute & from) @@ -223,21 +220,61 @@ void CachePartition::flush() } template -void CachePartition::getValue(size_t attribute_index, const PaddedPODArray & ids, +void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - UNUSED(attribute_index); - UNUSED(out); + PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { - not_found[ids[i]].push_back(i); + auto it = key_to_file_offset.find(ids[i]); + if (it != std::end(key_to_file_offset)) + { + Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i])); + indices[i] = it->second; + } + else + { + Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i])); + indices[i] = NOT_FOUND; + not_found[ids[i]].push_back(i); + } + + + getValueFromMemory(attribute_index, indices, out); + getValueFromStorage(attribute_index, indices, out); } } +template +void CachePartition::getValueFromMemory( + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const +{ + const auto & attribute = std::get>(attributes_buffer[attribute_index].values); + for (size_t i = 0; i < indices.size(); ++i) + { + const auto & index = indices[i]; + if (index != NOT_FOUND && (index & IN_MEMORY)) + { + out[i] = attribute[index ^ IN_MEMORY]; + if constexpr (std::is_same_v) + Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(out[i]) + " --- " + std::to_string(index ^ IN_MEMORY)); + } + } +} + +template +void CachePartition::getValueFromStorage( + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const +{ + UNUSED(attribute_index); + UNUSED(indices); + UNUSED(out); +} + void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const { - UNUSED(ids); - UNUSED(out); + for (size_t i = 0; i < ids.size(); ++i) + out[i] = static_cast(key_to_file_offset.find(ids[i]) != std::end(key_to_file_offset)); } CacheStorage::CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) @@ -259,6 +296,7 @@ template void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found) { + Poco::Logger::get("cachestorage").information("update"); CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index e7fb12af15f..a02562a0480 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -54,11 +54,15 @@ public: const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, const std::string & dir_path, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); + ~CachePartition() { + Poco::Logger::get("cachepartition").information("DESTROY"); + } + template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; template - void getValue(size_t attribute_index, const PaddedPODArray & ids, + void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const; // TODO:: getString @@ -102,6 +106,14 @@ private: void appendValuesToBufferAttribute(Attribute & to, const Attribute & from); + template + void getValueFromMemory( + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + + template + void getValueFromStorage( + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + size_t file_id; size_t max_size; size_t buffer_size; From 2c521628daf27f4bfed16cae21832ca9b47e0cfe Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 6 Jan 2020 23:38:32 +0300 Subject: [PATCH 0014/2229] some fixes --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 319 +++++++++++++++++-- dbms/src/Dictionaries/SSDCacheDictionary.h | 59 +++- 2 files changed, 337 insertions(+), 41 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 3fc0db30899..c98082f327a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -7,11 +7,14 @@ #include #include #include "DictionaryFactory.h" +#include +#include #include #include #include #include #include +#include namespace ProfileEvents { @@ -25,6 +28,7 @@ namespace ProfileEvents extern const Event DictCacheRequests; extern const Event DictCacheLockWriteNs; extern const Event DictCacheLockReadNs; + extern const Event FileOpen; } namespace CurrentMetrics @@ -42,16 +46,93 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; extern const int TOO_SMALL_BUFFER_SIZE; + extern const int FILE_DOESNT_EXIST; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_IO_GETEVENTS; } namespace { - constexpr size_t IN_MEMORY = (1ULL << 63ULL); + constexpr size_t MAX_KEYS_TO_READ_ONCE = 128; + constexpr size_t SSD_BLOCK_SIZE = 4096; + constexpr size_t READ_BUFFER_ALIGNMENT = 0; + constexpr size_t MAX_ATTRIBUTES_SIZE = 1024; + + static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); + static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; + + constexpr size_t KEY_IN_MEMORY_BIT = 63; + constexpr size_t KEY_IN_MEMORY = (1ULL << KEY_IN_MEMORY_BIT); + constexpr size_t BLOCK_INDEX_BITS = 32; + constexpr size_t INDEX_IN_BLOCK_BITS = 16; + constexpr size_t INDEX_IN_BLOCK_MASK = (1ULL << INDEX_IN_BLOCK_BITS) - 1; + constexpr size_t BLOCK_INDEX_MASK = ((1ULL << (BLOCK_INDEX_BITS + INDEX_IN_BLOCK_BITS)) - 1) ^ INDEX_IN_BLOCK_MASK; + constexpr size_t NOT_FOUND = -1; + const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; } +CachePartition::KeyMetadata::time_point_t CachePartition::KeyMetadata::expiresAt() const +{ + return ext::safe_bit_cast(data & KEY_METADATA_EXPIRES_AT_MASK); +} +void CachePartition::KeyMetadata::setExpiresAt(const time_point_t & t) +{ + data = ext::safe_bit_cast(t); +} + +bool CachePartition::KeyMetadata::isDefault() const +{ + return (data & KEY_METADATA_IS_DEFAULT_MASK) == KEY_METADATA_IS_DEFAULT_MASK; +} +void CachePartition::KeyMetadata::setDefault() +{ + data |= KEY_METADATA_IS_DEFAULT_MASK; +} + +bool CachePartition::Index::inMemory() const +{ + return (index & KEY_IN_MEMORY) == KEY_IN_MEMORY; +} + +bool CachePartition::Index::exists() const +{ + return index != NOT_FOUND; +} + +void CachePartition::Index::setNotExists() +{ + index = NOT_FOUND; +} + +void CachePartition::Index::setInMemory(const bool in_memory) +{ + index = (index & ~KEY_IN_MEMORY) | (static_cast(in_memory) << KEY_IN_MEMORY_BIT); +} + +size_t CachePartition::Index::getAddressInBlock() const +{ + return index & INDEX_IN_BLOCK_MASK; +} + +void CachePartition::Index::setAddressInBlock(const size_t address_in_block) +{ + index = (index & ~INDEX_IN_BLOCK_MASK) | address_in_block; +} + +size_t CachePartition::Index::getBlockId() const +{ + return (index & BLOCK_INDEX_MASK) >> INDEX_IN_BLOCK_BITS; +} + +void CachePartition::Index::setBlockId(const size_t block_id) +{ + index = (index & ~BLOCK_INDEX_MASK) | (block_id << INDEX_IN_BLOCK_BITS); +} + CachePartition::CachePartition( const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure, const std::string & dir_path, const size_t file_id_, const size_t max_size_, const size_t buffer_size_) @@ -91,6 +172,22 @@ CachePartition::CachePartition( break; } } + + { + ProfileEvents::increment(ProfileEvents::FileOpen); + + const std::string filename = path + BIN_FILE_EXT; + read_fd = ::open(filename.c_str(), O_RDONLY | O_DIRECT); + if (read_fd == -1) + { + auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; + throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); + } + } +} + +CachePartition::~CachePartition() { + ::close(read_fd); } void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes) @@ -110,10 +207,12 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & } for (size_t i = 0; i < ids.size(); ++i) - key_to_file_offset[ids[i]] = (start_size + i) | IN_MEMORY; - + { + key_to_metadata[ids[i]].index.setInMemory(true); + key_to_metadata[ids[i]].index.setAddressInBlock(start_size + i); + } //if (bytes >= buffer_size) - //flush(); + //flush(); } void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribute & from) @@ -127,7 +226,7 @@ void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribu auto &from_values = std::get>(from.values); \ size_t prev_size = to_values.size(); \ to_values.resize(to_values.size() + from_values.size()); \ - memcpy(to_values.data() + prev_size * sizeof(TYPE), from_values.data(), from_values.size() * sizeof(TYPE)); \ + memcpy(&to_values[prev_size], &from_values[0], from_values.size() * sizeof(TYPE)); \ } \ break; @@ -162,6 +261,10 @@ void CachePartition::flush() } const auto & ids = std::get>(keys_buffer.values); + if (ids.empty()) + return; + + Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!!"); std::vector offsets; @@ -211,7 +314,12 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) - key_to_file_offset[ids[row]] = offsets[row]; + { + key_to_metadata[ids[row]].index.setInMemory(false); + key_to_metadata[ids[row]].index.setBlockId(current_block_id); + key_to_metadata[ids[row]].index.setAddressInBlock(offsets[row]); + Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_metadata[ids[row]].index.index)); + } /// clear buffer std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); @@ -223,20 +331,20 @@ template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { - PaddedPODArray indices(ids.size()); + PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { - auto it = key_to_file_offset.find(ids[i]); - if (it != std::end(key_to_file_offset)) + auto it = key_to_metadata.find(ids[i]); + if (it == std::end(key_to_metadata)) // TODO: check expired { - Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i])); - indices[i] = it->second; + Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i])); + indices[i].setNotExists(); + not_found[ids[i]].push_back(i); } else { - Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i])); - indices[i] = NOT_FOUND; - not_found[ids[i]].push_back(i); + Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i])); + indices[i] = it->second.index; } @@ -247,34 +355,184 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray template void CachePartition::getValueFromMemory( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const { const auto & attribute = std::get>(attributes_buffer[attribute_index].values); for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; - if (index != NOT_FOUND && (index & IN_MEMORY)) + if (index.exists() && index.inMemory()) { - out[i] = attribute[index ^ IN_MEMORY]; + out[i] = attribute[index.getAddressInBlock()]; if constexpr (std::is_same_v) - Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(out[i]) + " --- " + std::to_string(index ^ IN_MEMORY)); + Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(out[i]) + " --- " + std::to_string(index.getAddressInBlock())); } } } template void CachePartition::getValueFromStorage( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const { - UNUSED(attribute_index); - UNUSED(indices); - UNUSED(out); + std::vector> index_to_out; + for (size_t i = 0; i < indices.size(); ++i) + { + const auto & index = indices[i]; + if (index.exists() && !index.inMemory()) + index_to_out.emplace_back(index.getAddressInBlock(), i); + } + if (index_to_out.empty()) + return; + + std::sort(std::begin(index_to_out), std::end(index_to_out)); + + DB::Memory read_buffer(MAX_ATTRIBUTES_SIZE * index_to_out.size(), READ_BUFFER_ALIGNMENT); + + std::vector requests(index_to_out.size()); + memset(requests.data(), 0, requests.size() * sizeof(requests.front())); + std::vector pointers(index_to_out.size()); + for (size_t i = 0; i < index_to_out.size(); ++i) + { +#if defined(__FreeBSD__) + request.aio.aio_lio_opcode = LIO_READ; + request.aio.aio_fildes = read_fd; + request.aio.aio_buf = reinterpret_cast(read_buffer.data() + i * MAX_ATTRIBUTES_SIZE); + request.aio.aio_nbytes = MAX_ATTRIBUTES_SIZE; + request.aio.aio_offset = index_to_out[i].first; + request.aio_data = i; +#else + requests[i].aio_lio_opcode = IOCB_CMD_PREAD; + requests[i].aio_fildes = read_fd; + requests[i].aio_buf = reinterpret_cast(read_buffer.data()) + i * MAX_ATTRIBUTES_SIZE; + requests[i].aio_nbytes = MAX_ATTRIBUTES_SIZE; + requests[i].aio_offset = index_to_out[i].first; + requests[i].aio_data = i; +#endif + + Poco::Logger::get("requests:").information(); + pointers[i] = &requests[i]; + } + Poco::Logger::get("requests:").information(std::to_string(requests.size())); + + //const auto pointers = ext::map( + // std::begin(requests), std::end(requests), [](const iocb & request) { return &request; }); + + AIOContext context(MAX_KEYS_TO_READ_ONCE); + + std::vector events(index_to_out.size()); + + for (size_t i = 0; i < index_to_out.size(); i += MAX_KEYS_TO_READ_ONCE) + { + size_t to_push = std::min(MAX_KEYS_TO_READ_ONCE, index_to_out.size() - i); + size_t push_index = i; + int pushed = 0; + while (to_push > 0 && (pushed = io_submit(context.ctx, to_push, pointers.data() + push_index)) < 0) + { + if (errno != EINTR) + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + to_push -= pushed; + push_index += pushed; + pushed = 0; + } + + size_t to_get = std::min(MAX_KEYS_TO_READ_ONCE, index_to_out.size() - i); + size_t got_index = i; + int got = 0; + while (to_get > 0 && (got = io_getevents(context.ctx, to_get, to_get, events.data() + got_index, NULL)) < 0) + { + if (errno != EINTR) + throwFromErrno("io_getevents: Failed to get an event from asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); + to_get -= got; + got_index += got; + got = 0; + } + } + + //std::sort(std::begin(events), std::end(events), [](const auto & lhs, const auto & rhs) { return lhs.data < rhs.data; }); + for (const auto & event : events) + { + Poco::Logger::get("Read:").information("ito: f:" + std::to_string(index_to_out[event.data].first) + " s:" + std::to_string(index_to_out[event.data].second)); + Poco::Logger::get("Read:").information("data: " + std::to_string(event.data) + " res: " + std::to_string(event.res)); + DB::ReadBufferFromMemory buf(read_buffer.data() + event.data * MAX_ATTRIBUTES_SIZE, event.res); + + for (size_t i = 0; i < attribute_index; ++i) + { + switch (attributes_buffer[i].type) + { + #define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + TYPE tmp; \ + readBinary(tmp, buf); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + #undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } + + switch (attributes_buffer[attribute_index].type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + readBinary(out[index_to_out[event.data].second], buf); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const { for (size_t i = 0; i < ids.size(); ++i) - out[i] = static_cast(key_to_file_offset.find(ids[i]) != std::end(key_to_file_offset)); + { + auto it = key_to_metadata.find(ids[i]); + if (it == std::end(key_to_metadata)) + { + out[i] = 0; + } + else + { + out[i] = it->second.isDefault(); + } + } } CacheStorage::CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) @@ -325,9 +583,10 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorread()) { - const auto new_keys = createAttributesFromBlock(block, { AttributeUnderlyingType::utUInt64 }).front(); + const auto new_keys = createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front(); const auto new_attributes = createAttributesFromBlock( - block, ext::map(dictionary.getAttributes(), [](const auto & attribute) { return attribute.type; })); + block, 1, ext::map(dictionary.getAttributes(), [](const auto & attribute) { return attribute.type; })); + const auto & ids = std::get>(new_keys.values); for (const auto i : ext::range(0, ids.size())) @@ -468,7 +727,8 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes); + if (not_found_num) + partitions[0]->appendBlock(new_keys, new_attributes); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); @@ -476,14 +736,14 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & structure) + const Block & block, const size_t begin_column, const std::vector & structure) { CachePartition::Attributes attributes; const auto columns = block.getColumns(); for (size_t i = 0; i < structure.size(); ++i) { - const auto & column = columns[i]; + const auto & column = columns[i + begin_column]; switch (structure[i]) { #define DISPATCH(TYPE) \ @@ -491,7 +751,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( { \ std::vector values(column->size()); \ const auto raw_data = column->getRawData(); \ - memcpy(values.data(), raw_data.data, raw_data.size); \ + memcpy(&values[0], raw_data.data, raw_data.size * sizeof(TYPE)); \ attributes.emplace_back(); \ attributes.back().type = structure[i]; \ attributes.back().values = std::move(values); \ @@ -653,7 +913,6 @@ void SSDCacheDictionary::getItemsNumberImpl( source_ptr, required_ids, [&](const auto id, const auto row, const auto & new_attributes) { - Poco::Logger::get("update:").information(std::to_string(id) + " " + std::to_string(row)); for (const size_t out_row : not_found_ids[id]) out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index a02562a0480..03baf91d7e1 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -52,11 +52,9 @@ public: CachePartition( const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, - const std::string & dir_path, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024 * 1024); + const std::string & dir_path, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024); - ~CachePartition() { - Poco::Logger::get("cachepartition").information("DESTROY"); - } + ~CachePartition(); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -97,22 +95,41 @@ public: }; using Attributes = std::vector; - // Key, (Metadata), attributes void appendBlock(const Attribute & new_keys, const Attributes & new_attributes); private: + struct Index final + { + bool inMemory() const; + void setInMemory(const bool in_memory); + + bool exists() const; + void setNotExists(); + + size_t getAddressInBlock() const; + void setAddressInBlock(const size_t address_in_block); + + size_t getBlockId() const; + void setBlockId(const size_t block_id); + + bool operator< (const Index & rhs) const { return index < rhs.index; } + + /// Stores `is_in_memory` flag, block id, address in uncompressed block + size_t index = 0; + }; + void flush(); void appendValuesToBufferAttribute(Attribute & to, const Attribute & from); template void getValueFromMemory( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; template void getValueFromStorage( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; size_t file_id; size_t max_size; @@ -121,15 +138,35 @@ private: //mutable std::shared_mutex rw_lock; //int index_fd; - int data_fd; + mutable int read_fd = -1; std::unique_ptr write_data_buffer; - std::unordered_map key_to_file_offset; + + struct KeyMetadata final + { + using time_point_t = std::chrono::system_clock::time_point; + using time_point_rep_t = time_point_t::rep; + using time_point_urep_t = std::make_unsigned_t; + + time_point_t expiresAt() const; + void setExpiresAt(const time_point_t & t); + + bool isDefault() const; + void setDefault(); + + Index index{}; + /// Stores both expiration time and `is_default` flag in the most significant bit + time_point_urep_t data = 0; + }; + + std::unordered_map key_to_metadata; Attribute keys_buffer; Attributes attributes_buffer; //MutableColumns buffer; size_t bytes = 0; + size_t current_block_id = 0; + size_t current_address_in_block = 0; mutable std::atomic element_count{0}; }; @@ -169,7 +206,7 @@ public: private: CachePartition::Attributes createAttributesFromBlock( - const Block & block, const std::vector & structure); + const Block & block, const size_t begin_column, const std::vector & structure); SSDCacheDictionary & dictionary; @@ -224,7 +261,7 @@ public: double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / partition_max_size; } // TODO: fix - bool supportUpdates() const override { return true; } + bool supportUpdates() const override { return false; } std::shared_ptr clone() const override { From b62ac3aa8052444d32bd41d9af2afc0388109fbe Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 7 Jan 2020 14:26:52 +0300 Subject: [PATCH 0015/2229] change buffer --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 172 ++++++++++++------- dbms/src/Dictionaries/SSDCacheDictionary.h | 18 +- 2 files changed, 123 insertions(+), 67 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index c98082f327a..7b881fc6415 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -55,8 +55,8 @@ namespace ErrorCodes namespace { constexpr size_t MAX_KEYS_TO_READ_ONCE = 128; - constexpr size_t SSD_BLOCK_SIZE = 4096; - constexpr size_t READ_BUFFER_ALIGNMENT = 0; + constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t MAX_ATTRIBUTES_SIZE = 1024; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); @@ -135,8 +135,8 @@ void CachePartition::Index::setBlockId(const size_t block_id) CachePartition::CachePartition( const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure, - const std::string & dir_path, const size_t file_id_, const size_t max_size_, const size_t buffer_size_) - : file_id(file_id_), max_size(max_size_), buffer_size(buffer_size_), path(dir_path + "/" + std::to_string(file_id)) + const std::string & dir_path, const size_t file_id_, const size_t max_size_) + : file_id(file_id_), max_size(max_size_), path(dir_path + "/" + std::to_string(file_id)), memory(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT) { keys_buffer.type = AttributeUnderlyingType::utUInt64; keys_buffer.values = std::vector(); @@ -186,7 +186,8 @@ CachePartition::CachePartition( } } -CachePartition::~CachePartition() { +CachePartition::~CachePartition() +{ ::close(read_fd); } @@ -197,25 +198,64 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & const auto & ids = std::get>(new_keys.values); - const size_t start_size = std::visit([](const auto & values) { return values.size(); }, keys_buffer.values); + appendValuesToAttribute(keys_buffer, new_keys); - appendValuesToBufferAttribute(keys_buffer, new_keys); - for (size_t i = 0; i < attributes_buffer.size(); ++i) - { - appendValuesToBufferAttribute(attributes_buffer[i], new_attributes[i]); - //bytes += buffer[i]->byteSize(); - } + if (!write_buffer) + write_buffer.emplace(memory.data(), memory.size()); - for (size_t i = 0; i < ids.size(); ++i) + for (size_t index = 0; index < ids.size();) { - key_to_metadata[ids[i]].index.setInMemory(true); - key_to_metadata[ids[i]].index.setAddressInBlock(start_size + i); + auto & key_index = key_to_metadata[ids[index]].index; + key_index.setInMemory(true); + key_index.setBlockId(current_memory_block_id); + key_index.setAddressInBlock(write_buffer->offset()); + + for (const auto & attribute : new_attributes) + { + // TODO:: переделать через столбцы + getDataAt + switch (attribute.type) { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + if (sizeof(TYPE) > write_buffer->available()) \ + { \ + flush(); \ + continue; \ + } \ + else \ + { \ + const auto & values = std::get>(attribute.values); \ + writeBinary(values[index], *write_buffer); \ + } \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } + } + ++index; } - //if (bytes >= buffer_size) - //flush(); } -void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribute & from) +size_t CachePartition::appendValuesToAttribute(Attribute & to, const Attribute & from) { switch (to.type) { @@ -227,6 +267,7 @@ void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribu size_t prev_size = to_values.size(); \ to_values.resize(to_values.size() + from_values.size()); \ memcpy(&to_values[prev_size], &from_values[0], from_values.size() * sizeof(TYPE)); \ + return from_values.size() * sizeof(TYPE); \ } \ break; @@ -250,13 +291,14 @@ void CachePartition::appendValuesToBufferAttribute(Attribute & to, const Attribu // TODO: string support break; } + throw Exception{"Unknown attribute type: " + std::to_string(static_cast(to.type)), ErrorCodes::TYPE_MISMATCH}; } void CachePartition::flush() { if (!write_data_buffer) { - write_data_buffer = std::make_unique(path + BIN_FILE_EXT, buffer_size, O_RDWR | O_CREAT | O_TRUNC); + //write_data_buffer = std::make_unique(path + BIN_FILE_EXT, buffer_size, O_RDWR | O_CREAT | O_TRUNC); // TODO: не перетирать + seek в конец файла } @@ -316,7 +358,7 @@ void CachePartition::flush() for (size_t row = 0; row < ids.size(); ++row) { key_to_metadata[ids[row]].index.setInMemory(false); - key_to_metadata[ids[row]].index.setBlockId(current_block_id); + key_to_metadata[ids[row]].index.setBlockId(current_file_block_id); key_to_metadata[ids[row]].index.setAddressInBlock(offsets[row]); Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_metadata[ids[row]].index.index)); } @@ -357,15 +399,18 @@ template void CachePartition::getValueFromMemory( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const { - const auto & attribute = std::get>(attributes_buffer[attribute_index].values); + //const auto & attribute = std::get>(attributes_buffer[attribute_index].values); for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; if (index.exists() && index.inMemory()) { - out[i] = attribute[index.getAddressInBlock()]; - if constexpr (std::is_same_v) - Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(out[i]) + " --- " + std::to_string(index.getAddressInBlock())); + const size_t offset = index.getAddressInBlock(); + + Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(i) + " --- " + std::to_string(offset)); + + ReadBufferFromMemory read_buffer(memory.data() + offset, memory.size() - offset); + readValueFromBuffer(attribute_index, out[i], read_buffer); } } } @@ -386,7 +431,7 @@ void CachePartition::getValueFromStorage( std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(MAX_ATTRIBUTES_SIZE * index_to_out.size(), READ_BUFFER_ALIGNMENT); + DB::Memory read_buffer(MAX_ATTRIBUTES_SIZE * index_to_out.size(), BUFFER_ALIGNMENT); std::vector requests(index_to_out.size()); memset(requests.data(), 0, requests.size() * sizeof(requests.front())); @@ -453,47 +498,24 @@ void CachePartition::getValueFromStorage( { Poco::Logger::get("Read:").information("ito: f:" + std::to_string(index_to_out[event.data].first) + " s:" + std::to_string(index_to_out[event.data].second)); Poco::Logger::get("Read:").information("data: " + std::to_string(event.data) + " res: " + std::to_string(event.res)); + DB::ReadBufferFromMemory buf(read_buffer.data() + event.data * MAX_ATTRIBUTES_SIZE, event.res); + readValueFromBuffer(attribute_index, out[index_to_out[event.data].second], buf); + } +} - for (size_t i = 0; i < attribute_index; ++i) - { - switch (attributes_buffer[i].type) - { - #define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - TYPE tmp; \ - readBinary(tmp, buf); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) - #undef DISPATCH - - case AttributeUnderlyingType::utString: - // TODO: string support - break; - } - } - - switch (attributes_buffer[attribute_index].type) +template +void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const +{ + for (size_t i = 0; i < attribute_index; ++i) + { + switch (attributes_buffer[i].type) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - readBinary(out[index_to_out[event.data].second], buf); \ + { \ + buf.ignore(sizeof(TYPE)); \ + } \ break; DISPATCH(UInt8) @@ -517,6 +539,34 @@ void CachePartition::getValueFromStorage( break; } } + + switch (attributes_buffer[attribute_index].type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + readBinary(dst, buf); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + // TODO: string support + break; + } } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 03baf91d7e1..61185774ad2 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -52,7 +52,7 @@ public: CachePartition( const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, - const std::string & dir_path, const size_t file_id, const size_t max_size, const size_t buffer_size = 4 * 1024); + const std::string & dir_path, const size_t file_id, const size_t max_size); ~CachePartition(); @@ -121,7 +121,7 @@ private: void flush(); - void appendValuesToBufferAttribute(Attribute & to, const Attribute & from); + size_t appendValuesToAttribute(Attribute & to, const Attribute & from); template void getValueFromMemory( @@ -131,9 +131,12 @@ private: void getValueFromStorage( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + template + void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; + size_t file_id; size_t max_size; - size_t buffer_size; + //size_t buffer_size; std::string path; //mutable std::shared_mutex rw_lock; @@ -164,9 +167,12 @@ private: Attribute keys_buffer; Attributes attributes_buffer; //MutableColumns buffer; - size_t bytes = 0; - size_t current_block_id = 0; - size_t current_address_in_block = 0; + + DB::Memory<> memory; + std::optional write_buffer; + + size_t current_memory_block_id = 0; + size_t current_file_block_id = 0; mutable std::atomic element_count{0}; }; From dbb565f34a8fff82c6249727092062893db9fd4d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 7 Jan 2020 17:59:03 +0300 Subject: [PATCH 0016/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 134 +++++++++++-------- dbms/src/Dictionaries/SSDCacheDictionary.h | 2 - 2 files changed, 76 insertions(+), 60 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 7b881fc6415..af958207b57 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "DictionaryFactory.h" #include @@ -29,11 +30,14 @@ namespace ProfileEvents extern const Event DictCacheLockWriteNs; extern const Event DictCacheLockReadNs; extern const Event FileOpen; + extern const Event WriteBufferAIOWrite; + extern const Event WriteBufferAIOWriteBytes; } namespace CurrentMetrics { extern const Metric DictCacheRequests; + extern const Metric Write; } namespace DB @@ -50,6 +54,8 @@ namespace ErrorCodes extern const int CANNOT_OPEN_FILE; extern const int CANNOT_IO_SUBMIT; extern const int CANNOT_IO_GETEVENTS; + extern const int AIO_WRITE_ERROR; + extern const int CANNOT_FSYNC; } namespace @@ -57,7 +63,7 @@ namespace constexpr size_t MAX_KEYS_TO_READ_ONCE = 128; constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; - constexpr size_t MAX_ATTRIBUTES_SIZE = 1024; + //constexpr size_t MAX_ATTRIBUTES_SIZE = 1024; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -177,7 +183,7 @@ CachePartition::CachePartition( ProfileEvents::increment(ProfileEvents::FileOpen); const std::string filename = path + BIN_FILE_EXT; - read_fd = ::open(filename.c_str(), O_RDONLY | O_DIRECT); + read_fd = ::open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, 0666); if (read_fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; @@ -201,7 +207,7 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & appendValuesToAttribute(keys_buffer, new_keys); if (!write_buffer) - write_buffer.emplace(memory.data(), memory.size()); + write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); for (size_t index = 0; index < ids.size();) { @@ -213,13 +219,15 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & for (const auto & attribute : new_attributes) { // TODO:: переделать через столбцы + getDataAt - switch (attribute.type) { + switch (attribute.type) + { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ { \ if (sizeof(TYPE) > write_buffer->available()) \ { \ flush(); \ + write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); \ continue; \ } \ else \ @@ -251,6 +259,9 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & break; } } + + flush(); + write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); ++index; } } @@ -296,73 +307,80 @@ size_t CachePartition::appendValuesToAttribute(Attribute & to, const Attribute & void CachePartition::flush() { - if (!write_data_buffer) - { - //write_data_buffer = std::make_unique(path + BIN_FILE_EXT, buffer_size, O_RDWR | O_CREAT | O_TRUNC); - // TODO: не перетирать + seek в конец файла - } - + write_buffer.reset(); const auto & ids = std::get>(keys_buffer.values); if (ids.empty()) return; Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!!"); - std::vector offsets; + AIOContext aio_context{1}; - size_t prev_size = 0; - for (size_t row = 0; row < ids.size(); ++row) + iocb write_request; + memset(&write_request, 0, sizeof(write_request)); + iocb * write_request_ptr{&write_request}; + +#if defined(__FreeBSD__) + write_request.aio.aio_lio_opcode = LIO_WRITE; + write_request.aio.aio_fildes = fd; + write_request.aio.aio_buf = reinterpret_cast(buffer_begin); + write_request.aio.aio_nbytes = region_aligned_size; + write_request.aio.aio_offset = region_aligned_begin; +#else + write_request.aio_lio_opcode = IOCB_CMD_PWRITE; + write_request.aio_fildes = read_fd; + write_request.aio_buf = reinterpret_cast(memory.data()); + write_request.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; + write_request.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE * current_file_block_id; +#endif + + Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); + + while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) { - offsets.push_back((offsets.empty() ? write_data_buffer->getPositionInFile() : offsets.back()) + prev_size); - prev_size = 0; - - for (size_t col = 0; col < attributes_buffer.size(); ++col) - { - const auto & attribute = attributes_buffer[col]; - - switch (attribute.type) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - const auto & values = std::get>(attribute.values); \ - writeBinary(values[row], *static_cast(write_data_buffer.get())); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - // TODO: string support - break; - } - } + if (errno != EINTR) + throw Exception("Cannot submit request for asynchronous IO on file " + path + BIN_FILE_EXT, ErrorCodes::CANNOT_IO_SUBMIT); } - write_data_buffer->sync(); + + CurrentMetrics::Increment metric_increment_write{CurrentMetrics::Write}; + + io_event event; + while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0) + { + if (errno != EINTR) + throw Exception("Failed to wait for asynchronous IO completion on file " + path + BIN_FILE_EXT, ErrorCodes::CANNOT_IO_GETEVENTS); + } + + // Unpoison the memory returned from an uninstrumented system function. + __msan_unpoison(&event, sizeof(event)); + + ssize_t bytes_written; +#if defined(__FreeBSD__) + bytes_written = aio_return(reinterpret_cast(event.udata)); +#else + bytes_written = event.res; +#endif + + ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); + ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); + + if (bytes_written != static_cast(write_request.aio_nbytes)) + throw Exception("Not all data was written for asynchronous IO on file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(bytes_written), ErrorCodes::AIO_WRITE_ERROR); + + int res = ::fsync(read_fd); + if (res == -1) + throwFromErrnoWithPath("Cannot fsync " + path, path, ErrorCodes::CANNOT_FSYNC); /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) { key_to_metadata[ids[row]].index.setInMemory(false); key_to_metadata[ids[row]].index.setBlockId(current_file_block_id); - key_to_metadata[ids[row]].index.setAddressInBlock(offsets[row]); Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_metadata[ids[row]].index.index)); } + ++current_file_block_id; + /// clear buffer std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); for (auto & attribute : attributes_buffer) @@ -409,7 +427,7 @@ void CachePartition::getValueFromMemory( Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(i) + " --- " + std::to_string(offset)); - ReadBufferFromMemory read_buffer(memory.data() + offset, memory.size() - offset); + ReadBufferFromMemory read_buffer(memory.data() + offset, SSD_BLOCK_SIZE - offset); readValueFromBuffer(attribute_index, out[i], read_buffer); } } @@ -431,7 +449,7 @@ void CachePartition::getValueFromStorage( std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(MAX_ATTRIBUTES_SIZE * index_to_out.size(), BUFFER_ALIGNMENT); + DB::Memory read_buffer(SSD_BLOCK_SIZE * index_to_out.size(), BUFFER_ALIGNMENT); std::vector requests(index_to_out.size()); memset(requests.data(), 0, requests.size() * sizeof(requests.front())); @@ -448,8 +466,8 @@ void CachePartition::getValueFromStorage( #else requests[i].aio_lio_opcode = IOCB_CMD_PREAD; requests[i].aio_fildes = read_fd; - requests[i].aio_buf = reinterpret_cast(read_buffer.data()) + i * MAX_ATTRIBUTES_SIZE; - requests[i].aio_nbytes = MAX_ATTRIBUTES_SIZE; + requests[i].aio_buf = reinterpret_cast(read_buffer.data()) + i * SSD_BLOCK_SIZE; + requests[i].aio_nbytes = SSD_BLOCK_SIZE; requests[i].aio_offset = index_to_out[i].first; requests[i].aio_data = i; #endif @@ -499,7 +517,7 @@ void CachePartition::getValueFromStorage( Poco::Logger::get("Read:").information("ito: f:" + std::to_string(index_to_out[event.data].first) + " s:" + std::to_string(index_to_out[event.data].second)); Poco::Logger::get("Read:").information("data: " + std::to_string(event.data) + " res: " + std::to_string(event.res)); - DB::ReadBufferFromMemory buf(read_buffer.data() + event.data * MAX_ATTRIBUTES_SIZE, event.res); + DB::ReadBufferFromMemory buf(read_buffer.data() + event.data * SSD_BLOCK_SIZE, event.res); readValueFromBuffer(attribute_index, out[index_to_out[event.data].second], buf); } } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 61185774ad2..c851905b821 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -143,8 +143,6 @@ private: //int index_fd; mutable int read_fd = -1; - std::unique_ptr write_data_buffer; - struct KeyMetadata final { using time_point_t = std::chrono::system_clock::time_point; From 2e10fe5878aff2774055b5db9cb4f90180d10c38 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 7 Jan 2020 20:55:32 +0300 Subject: [PATCH 0017/2229] aio read --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 151 +++++++++++-------- dbms/src/Dictionaries/SSDCacheDictionary.h | 4 +- 2 files changed, 87 insertions(+), 68 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index af958207b57..857b93fbc0f 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -60,10 +61,10 @@ namespace ErrorCodes namespace { - constexpr size_t MAX_KEYS_TO_READ_ONCE = 128; constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; - //constexpr size_t MAX_ATTRIBUTES_SIZE = 1024; + + constexpr size_t MAX_BLOCKS_TO_KEEP_IN_MEMORY = 16; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -183,8 +184,8 @@ CachePartition::CachePartition( ProfileEvents::increment(ProfileEvents::FileOpen); const std::string filename = path + BIN_FILE_EXT; - read_fd = ::open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, 0666); - if (read_fd == -1) + fd = ::open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, 0666); + if (fd == -1) { auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); @@ -194,7 +195,7 @@ CachePartition::CachePartition( CachePartition::~CachePartition() { - ::close(read_fd); + ::close(fd); } void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes) @@ -260,8 +261,6 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & } } - flush(); - write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); ++index; } } @@ -316,19 +315,18 @@ void CachePartition::flush() AIOContext aio_context{1}; - iocb write_request; - memset(&write_request, 0, sizeof(write_request)); + iocb write_request{}; iocb * write_request_ptr{&write_request}; #if defined(__FreeBSD__) write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; - write_request.aio.aio_buf = reinterpret_cast(buffer_begin); - write_request.aio.aio_nbytes = region_aligned_size; - write_request.aio.aio_offset = region_aligned_begin; + write_request.aio.aio_buf = reinterpret_cast(memory.data()); + write_request.aio.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; + write_request.aio.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; - write_request.aio_fildes = read_fd; + write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory.data()); write_request.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; write_request.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE * current_file_block_id; @@ -367,9 +365,8 @@ void CachePartition::flush() if (bytes_written != static_cast(write_request.aio_nbytes)) throw Exception("Not all data was written for asynchronous IO on file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(bytes_written), ErrorCodes::AIO_WRITE_ERROR); - int res = ::fsync(read_fd); - if (res == -1) - throwFromErrnoWithPath("Cannot fsync " + path, path, ErrorCodes::CANNOT_FSYNC); + if (::fsync(fd) < 0) + throwFromErrnoWithPath("Cannot fsync " + path + BIN_FILE_EXT, path + BIN_FILE_EXT, ErrorCodes::CANNOT_FSYNC); /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) @@ -417,7 +414,6 @@ template void CachePartition::getValueFromMemory( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const { - //const auto & attribute = std::get>(attributes_buffer[attribute_index].values); for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; @@ -437,88 +433,113 @@ template void CachePartition::getValueFromStorage( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const { - std::vector> index_to_out; + std::vector> index_to_out; for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; if (index.exists() && !index.inMemory()) - index_to_out.emplace_back(index.getAddressInBlock(), i); + index_to_out.emplace_back(index, i); } if (index_to_out.empty()) return; + /// sort by (block_id, offset_in_block) std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(SSD_BLOCK_SIZE * index_to_out.size(), BUFFER_ALIGNMENT); + DB::Memory read_buffer(SSD_BLOCK_SIZE * MAX_BLOCKS_TO_KEEP_IN_MEMORY, BUFFER_ALIGNMENT); - std::vector requests(index_to_out.size()); - memset(requests.data(), 0, requests.size() * sizeof(requests.front())); - std::vector pointers(index_to_out.size()); + std::vector requests; + std::vector pointers; + std::vector> blocks_to_indices; + requests.reserve(index_to_out.size()); + pointers.reserve(index_to_out.size()); + blocks_to_indices.reserve(index_to_out.size()); for (size_t i = 0; i < index_to_out.size(); ++i) { + if (!requests.empty() && + static_cast(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE) + { + blocks_to_indices.back().push_back(i); + continue; + } + + iocb request{}; #if defined(__FreeBSD__) request.aio.aio_lio_opcode = LIO_READ; - request.aio.aio_fildes = read_fd; - request.aio.aio_buf = reinterpret_cast(read_buffer.data() + i * MAX_ATTRIBUTES_SIZE); - request.aio.aio_nbytes = MAX_ATTRIBUTES_SIZE; + request.aio.aio_fildes = fd; + request.aio.aio_buf = reinterpret_cast( + reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY)); + request.aio.aio_nbytes = SSD_BLOCK_SIZE; request.aio.aio_offset = index_to_out[i].first; request.aio_data = i; #else - requests[i].aio_lio_opcode = IOCB_CMD_PREAD; - requests[i].aio_fildes = read_fd; - requests[i].aio_buf = reinterpret_cast(read_buffer.data()) + i * SSD_BLOCK_SIZE; - requests[i].aio_nbytes = SSD_BLOCK_SIZE; - requests[i].aio_offset = index_to_out[i].first; - requests[i].aio_data = i; + request.aio_lio_opcode = IOCB_CMD_PREAD; + request.aio_fildes = fd; + request.aio_buf = reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY); + request.aio_nbytes = SSD_BLOCK_SIZE; + request.aio_offset = index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE; + request.aio_data = i; #endif + requests.push_back(request); + pointers.push_back(&requests.back()); - Poco::Logger::get("requests:").information(); - pointers[i] = &requests[i]; + blocks_to_indices.emplace_back(); + blocks_to_indices.back().push_back(i); } + Poco::Logger::get("requests:").information(std::to_string(requests.size())); - //const auto pointers = ext::map( - // std::begin(requests), std::end(requests), [](const iocb & request) { return &request; }); + AIOContext aio_context(MAX_BLOCKS_TO_KEEP_IN_MEMORY); - AIOContext context(MAX_KEYS_TO_READ_ONCE); + std::vector processed(requests.size(), false); + std::vector events(requests.size()); - std::vector events(index_to_out.size()); - - for (size_t i = 0; i < index_to_out.size(); i += MAX_KEYS_TO_READ_ONCE) + size_t to_push = 0; + size_t to_pop = 0; + while (to_pop < requests.size()) { - size_t to_push = std::min(MAX_KEYS_TO_READ_ONCE, index_to_out.size() - i); - size_t push_index = i; - int pushed = 0; - while (to_push > 0 && (pushed = io_submit(context.ctx, to_push, pointers.data() + push_index)) < 0) + /// get io tasks from previous iteration + size_t popped = 0; + while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) < 0) { if (errno != EINTR) throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); - to_push -= pushed; - push_index += pushed; - pushed = 0; } - size_t to_get = std::min(MAX_KEYS_TO_READ_ONCE, index_to_out.size() - i); - size_t got_index = i; - int got = 0; - while (to_get > 0 && (got = io_getevents(context.ctx, to_get, to_get, events.data() + got_index, NULL)) < 0) + for (size_t i = to_pop; i < to_pop + popped; ++i) + { + const auto request_id = events[i].data; + const auto & request = requests[request_id]; + if (events[i].res != static_cast(request.aio_nbytes)) + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(events[i].res), ErrorCodes::AIO_WRITE_ERROR); + + for (const size_t idx : blocks_to_indices[request_id]) + { + const auto & [file_index, out_index] = index_to_out[idx]; + DB::ReadBufferFromMemory buf( + reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), + SSD_BLOCK_SIZE - file_index.getAddressInBlock()); + readValueFromBuffer(attribute_index, out[out_index], buf); + + Poco::Logger::get("kek").information(std::to_string(file_index.getAddressInBlock()) + " " + std::to_string(file_index.getBlockId())); + } + + processed[request_id] = true; + } + + while (to_pop < requests.size() && processed[to_pop]) + ++to_pop; + + /// add new io tasks + const size_t new_tasks_count = std::min(MAX_BLOCKS_TO_KEEP_IN_MEMORY - (to_push - to_pop), requests.size() - to_push); + + size_t pushed = 0; + while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) { if (errno != EINTR) - throwFromErrno("io_getevents: Failed to get an event from asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); - to_get -= got; - got_index += got; - got = 0; + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); } - } - - //std::sort(std::begin(events), std::end(events), [](const auto & lhs, const auto & rhs) { return lhs.data < rhs.data; }); - for (const auto & event : events) - { - Poco::Logger::get("Read:").information("ito: f:" + std::to_string(index_to_out[event.data].first) + " s:" + std::to_string(index_to_out[event.data].second)); - Poco::Logger::get("Read:").information("data: " + std::to_string(event.data) + " res: " + std::to_string(event.res)); - - DB::ReadBufferFromMemory buf(read_buffer.data() + event.data * SSD_BLOCK_SIZE, event.res); - readValueFromBuffer(attribute_index, out[index_to_out[event.data].second], buf); + to_push += pushed; } } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index c851905b821..5f8017158e6 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -140,8 +140,7 @@ private: std::string path; //mutable std::shared_mutex rw_lock; - //int index_fd; - mutable int read_fd = -1; + int fd = -1; struct KeyMetadata final { @@ -164,7 +163,6 @@ private: Attribute keys_buffer; Attributes attributes_buffer; - //MutableColumns buffer; DB::Memory<> memory; std::optional write_buffer; From 297b8aa7ab2778d4e48578070810059e4d0e784f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 7 Jan 2020 22:18:24 +0300 Subject: [PATCH 0018/2229] fix read --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 31 +++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 857b93fbc0f..a237a438b3b 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -204,8 +204,9 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; const auto & ids = std::get>(new_keys.values); + auto & ids_buffer = std::get>(keys_buffer.values); - appendValuesToAttribute(keys_buffer, new_keys); + //appendValuesToAttribute(keys_buffer, new_keys); if (!write_buffer) write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); @@ -217,6 +218,8 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & key_index.setBlockId(current_memory_block_id); key_index.setAddressInBlock(write_buffer->offset()); + bool flushed = false; + for (const auto & attribute : new_attributes) { // TODO:: переделать через столбцы + getDataAt @@ -228,7 +231,7 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & if (sizeof(TYPE) > write_buffer->available()) \ { \ flush(); \ - write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); \ + flushed = true; \ continue; \ } \ else \ @@ -261,7 +264,13 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & } } - ++index; + if (!flushed) + { + ids_buffer.push_back(ids[index]); + ++index; + } + else + write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); } } @@ -388,26 +397,26 @@ template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found) const { + Poco::Logger::get("IDS:").information(std::to_string(ids.size())); PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { auto it = key_to_metadata.find(ids[i]); if (it == std::end(key_to_metadata)) // TODO: check expired { - Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i])); indices[i].setNotExists(); not_found[ids[i]].push_back(i); + Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i]) + " " + std::to_string(indices[i].index)); } else { - Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i])); indices[i] = it->second.index; + Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i]) + " " + std::to_string(indices[i].index)); } - - - getValueFromMemory(attribute_index, indices, out); - getValueFromStorage(attribute_index, indices, out); } + + getValueFromMemory(attribute_index, indices, out); + getValueFromStorage(attribute_index, indices, out); } template @@ -442,6 +451,8 @@ void CachePartition::getValueFromStorage( } if (index_to_out.empty()) return; + for (const auto & [index1, index2] : index_to_out) + Poco::Logger::get("FROM STORAGE:").information(std::to_string(index2) + " ## " + std::to_string(index1.getBlockId()) + " " + std::to_string(index1.getAddressInBlock())); /// sort by (block_id, offset_in_block) std::sort(std::begin(index_to_out), std::end(index_to_out)); @@ -478,7 +489,7 @@ void CachePartition::getValueFromStorage( request.aio_buf = reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY); request.aio_nbytes = SSD_BLOCK_SIZE; request.aio_offset = index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE; - request.aio_data = i; + request.aio_data = requests.size(); #endif requests.push_back(request); pointers.push_back(&requests.back()); From 05622f2bee73b269e20f469809edb36d1c91da2e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 15:40:29 +0300 Subject: [PATCH 0019/2229] has and ttl --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 121 +++++++++++++----- dbms/src/Dictionaries/SSDCacheDictionary.h | 62 +++++---- .../Functions/FunctionsExternalDictionaries.h | 1 + 3 files changed, 132 insertions(+), 52 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index a237a438b3b..6ca85ff23e7 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -82,20 +82,20 @@ namespace const std::string IND_FILE_EXT = ".idx"; } -CachePartition::KeyMetadata::time_point_t CachePartition::KeyMetadata::expiresAt() const +CachePartition::Metadata::time_point_t CachePartition::Metadata::expiresAt() const { return ext::safe_bit_cast(data & KEY_METADATA_EXPIRES_AT_MASK); } -void CachePartition::KeyMetadata::setExpiresAt(const time_point_t & t) +void CachePartition::Metadata::setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } -bool CachePartition::KeyMetadata::isDefault() const +bool CachePartition::Metadata::isDefault() const { return (data & KEY_METADATA_IS_DEFAULT_MASK) == KEY_METADATA_IS_DEFAULT_MASK; } -void CachePartition::KeyMetadata::setDefault() +void CachePartition::Metadata::setDefault() { data |= KEY_METADATA_IS_DEFAULT_MASK; } @@ -198,7 +198,7 @@ CachePartition::~CachePartition() ::close(fd); } -void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes) +void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata) { if (new_attributes.size() != attributes_buffer.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; @@ -206,17 +206,16 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & const auto & ids = std::get>(new_keys.values); auto & ids_buffer = std::get>(keys_buffer.values); - //appendValuesToAttribute(keys_buffer, new_keys); - if (!write_buffer) write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); for (size_t index = 0; index < ids.size();) { - auto & key_index = key_to_metadata[ids[index]].index; - key_index.setInMemory(true); - key_index.setBlockId(current_memory_block_id); - key_index.setAddressInBlock(write_buffer->offset()); + auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; + index_and_metadata.index.setInMemory(true); + index_and_metadata.index.setBlockId(current_memory_block_id); + index_and_metadata.index.setAddressInBlock(write_buffer->offset()); + index_and_metadata.metadata = metadata[index]; bool flushed = false; @@ -332,7 +331,7 @@ void CachePartition::flush() write_request.aio.aio_fildes = fd; write_request.aio.aio_buf = reinterpret_cast(memory.data()); write_request.aio.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; - write_request.aio.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE; + write_request.aio.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE * current_file_block_id; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; @@ -380,9 +379,9 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) { - key_to_metadata[ids[row]].index.setInMemory(false); - key_to_metadata[ids[row]].index.setBlockId(current_file_block_id); - Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_metadata[ids[row]].index.index)); + key_to_index_and_metadata[ids[row]].index.setInMemory(false); + key_to_index_and_metadata[ids[row]].index.setBlockId(current_file_block_id); + Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_index_and_metadata[ids[row]].index.index)); } ++current_file_block_id; @@ -395,23 +394,28 @@ void CachePartition::flush() template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::unordered_map> & not_found) const + ResultArrayType & out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const { Poco::Logger::get("IDS:").information(std::to_string(ids.size())); PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { - auto it = key_to_metadata.find(ids[i]); - if (it == std::end(key_to_metadata)) // TODO: check expired + auto it = key_to_index_and_metadata.find(ids[i]); + if (it == std::end(key_to_index_and_metadata)) { indices[i].setNotExists(); not_found[ids[i]].push_back(i); - Poco::Logger::get("part:").information("NOT FOUND " + std::to_string(ids[i]) + " " + std::to_string(indices[i].index)); + } + else if (it->second.metadata.expiresAt() <= now) + { + indices[i].setNotExists(); + not_found[ids[i]].push_back(i); + markExpired(it); } else { indices[i] = it->second.index; - Poco::Logger::get("part:").information("HIT " + std::to_string(ids[i]) + " " + std::to_string(indices[i].index)); } } @@ -482,7 +486,7 @@ void CachePartition::getValueFromStorage( reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY)); request.aio.aio_nbytes = SSD_BLOCK_SIZE; request.aio.aio_offset = index_to_out[i].first; - request.aio_data = i; + request.aio_data = requests.size(); #else request.aio_lio_opcode = IOCB_CMD_PREAD; request.aio_fildes = fd; @@ -619,22 +623,38 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst } } -void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out) const +template +void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, + std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { for (size_t i = 0; i < ids.size(); ++i) { - auto it = key_to_metadata.find(ids[i]); - if (it == std::end(key_to_metadata)) + auto it = key_to_index_and_metadata.find(ids[i]); + + if (it == std::end(key_to_index_and_metadata)) { - out[i] = 0; + not_found[ids[i]].push_back(i); + } + else if (it->second.metadata.expiresAt() <= now) + { + not_found[ids[i]].push_back(i); + markExpired(it); } else { - out[i] = it->second.isDefault(); + Poco::Logger::get("not expired").information("expires at " + std::to_string(std::chrono::system_clock::to_time_t(it->second.metadata.expiresAt())) + " now: " + std::to_string(std::chrono::system_clock::to_time_t(now))); + out[i] = !it->second.metadata.isDefault(); } } } +template +void CachePartition::markExpired(const Iterator & it) const +{ + Poco::Logger::get("markExpired").information("expired: " + std::to_string(it->first)); + key_to_index_and_metadata.erase(it); +} + CacheStorage::CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) : dictionary(dictionary_) , path(path_) @@ -689,15 +709,20 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values); + std::vector metadata(ids.size()); + const auto & dict_lifetime = dictionary.getLifetime(); + for (const auto i : ext::range(0, ids.size())) { + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); /// mark corresponding id as found on_updated(ids[i], i, new_attributes); remaining_ids[ids[i]] = 1; } /// TODO: Add TTL to block - partitions[0]->appendBlock(new_keys, new_attributes); + partitions[0]->appendBlock(new_keys, new_attributes, metadata); } stream->readSuffix(); @@ -761,6 +786,10 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector metadata; + const auto & dict_lifetime = dictionary.getLifetime(); + for (const auto & id_found_pair : remaining_ids) { if (id_found_pair.second) @@ -785,6 +814,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); + std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + metadata.emplace_back(); + metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); + metadata.back().setDefault(); + /// Set null_value for each attribute const auto & attributes = dictionary.getAttributes(); for (size_t i = 0; i < attributes.size(); ++i) @@ -827,8 +861,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes); + partitions[0]->appendBlock(new_keys, new_attributes, metadata); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); @@ -1001,8 +1036,10 @@ template void SSDCacheDictionary::getItemsNumberImpl( const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const { + const auto now = std::chrono::system_clock::now(); + std::unordered_map> not_found_ids; - storage.getValue(attribute_index, ids, out, not_found_ids); + storage.getValue(attribute_index, ids, out, not_found_ids, now); if (not_found_ids.empty()) return; @@ -1061,6 +1098,32 @@ void SSDCacheDictionary::getItemsString(const size_t attribute_index, const Padd UNUSED(get_default); } +void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray & out) const +{ + const auto now = std::chrono::system_clock::now(); + + std::unordered_map> not_found_ids; + storage.has(ids, out, not_found_ids, now); + if (not_found_ids.empty()) + return; + + std::vector required_ids(not_found_ids.size()); + std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; }); + + storage.update( + source_ptr, + required_ids, + [&](const auto id, const auto, const auto &) { + for (const size_t out_row : not_found_ids[id]) + out[out_row] = true; + }, + [&](const size_t id) + { + for (const size_t row : not_found_ids[id]) + out[row] = false; + }); +} + size_t SSDCacheDictionary::getAttributeIndex(const std::string & attr_name) const { auto it = attribute_index_by_name.find(attr_name); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 5f8017158e6..f77171b2098 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -47,6 +47,22 @@ public: class CachePartition { public: + struct Metadata final + { + using time_point_t = std::chrono::system_clock::time_point; + using time_point_rep_t = time_point_t::rep; + using time_point_urep_t = std::make_unsigned_t; + + time_point_t expiresAt() const; + void setExpiresAt(const time_point_t & t); + + bool isDefault() const; + void setDefault(); + + /// Stores both expiration time and `is_default` flag in the most significant bit + time_point_urep_t data = 0; + }; + using Offset = size_t; using Offsets = std::vector; @@ -61,14 +77,14 @@ public: template void getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::unordered_map> & not_found) const; + ResultArrayType & out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const; // TODO:: getString - /// 0 -- not found - /// 1 -- good - /// 2 -- expired - void has(const PaddedPODArray & ids, ResultArrayType & out) const; + template + void has(const PaddedPODArray & ids, ResultArrayType & out, + std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; struct Attribute { @@ -96,7 +112,7 @@ public: using Attributes = std::vector; // Key, (Metadata), attributes - void appendBlock(const Attribute & new_keys, const Attributes & new_attributes); + void appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata); private: struct Index final @@ -134,6 +150,9 @@ private: template void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; + template + void markExpired(const Iterator & it) const; + size_t file_id; size_t max_size; //size_t buffer_size; @@ -142,24 +161,13 @@ private: //mutable std::shared_mutex rw_lock; int fd = -1; - struct KeyMetadata final + struct IndexAndMetadata final { - using time_point_t = std::chrono::system_clock::time_point; - using time_point_rep_t = time_point_t::rep; - using time_point_urep_t = std::make_unsigned_t; - - time_point_t expiresAt() const; - void setExpiresAt(const time_point_t & t); - - bool isDefault() const; - void setDefault(); - Index index{}; - /// Stores both expiration time and `is_default` flag in the most significant bit - time_point_urep_t data = 0; + Metadata metadata{}; }; - std::unordered_map key_to_metadata; + mutable std::unordered_map key_to_index_and_metadata; Attribute keys_buffer; Attributes attributes_buffer; @@ -189,13 +197,21 @@ public: template void getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::unordered_map> & not_found) const + ResultArrayType & out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const { - partitions[0]->getValue(attribute_index, ids, out, not_found); + partitions[0]->getValue(attribute_index, ids, out, not_found, now); } // getString(); + template + void has(const PaddedPODArray & ids, ResultArrayType & out, + std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const + { + partitions[0]->has(ids, out, not_found, now); + } + template void update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found); @@ -356,7 +372,7 @@ public: void getString(const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const; - void has(const PaddedPODArray & /* ids */, PaddedPODArray & /* out */) const override {} // TODO + void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override // TODO { diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 8542bc00f93..746b894e154 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -137,6 +137,7 @@ private: if (!executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchSimple(block, arguments, result, dict_ptr) && + !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) From ddaf23d4e3885e3466467834d7a26542527e72fe Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 17:09:56 +0300 Subject: [PATCH 0020/2229] test --- .../01053_ssd_dictionary.reference | 17 ++++ .../0_stateless/01053_ssd_dictionary.sql | 78 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference create mode 100644 dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference new file mode 100644 index 00000000000..b85ff6488b3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -0,0 +1,17 @@ +UPDATE DICTIONARY +118 +VALUE FROM DISK +-100 +VALUE FROM RAM BUFFER +8 +VALUES FROM DISK AND RAM BUFFER +118 +VALUES NOT FROM TABLE +0 -1 +DUPLICATE KEYS +1 -100 +2 4 +3 -1 +3 -1 +2 4 +1 -100 diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql new file mode 100644 index 00000000000..f14ce5bdd91 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -0,0 +1,78 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +CREATE TABLE database_for_dict.table_for_dict +( + id UInt64, + a UInt64, + b Int32 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100), (2, 3, 4), (5, 6, 7), (10, 9, 8); + +DROP TABLE IF EXISTS database_for_dict.keys_table; + +CREATE TABLE database_for_dict.keys_table +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO database_for_dict.keys_table VALUES (1); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370; +INSERT INTO database_for_dict.keys_table VALUES (2); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370, 370; +INSERT INTO database_for_dict.keys_table VALUES (5); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 700, 370; +INSERT INTO database_for_dict.keys_table VALUES (10); + +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +CREATE DICTIONARY database_for_dict.ssd_dict +( + id UInt64, + a UInt64 DEFAULT 0, + b Int32 DEFAULT -1 +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1000 MAX 2000) +LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/1')); + +SELECT 'UPDATE DICTIONARY'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; + +SELECT 'VALUE FROM DISK'; +-- -100 +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); + +SELECT 'VALUE FROM RAM BUFFER'; +-- 8 +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(10)); + +SELECT 'VALUES FROM DISK AND RAM BUFFER'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; + +SELECT 'VALUES NOT FROM TABLE'; +-- 0 -1 +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)); + + +SELECT 'DUPLICATE KEYS'; +SELECT arrayJoin([1, 2, 3, 3, 2, 1]) AS id, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(id)); + +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +DROP DATABASE IF EXISTS database_for_dict; From 81c9d66c7be81faf1a24b68ee0b28d88952de309 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 17:14:19 +0300 Subject: [PATCH 0021/2229] fix --- dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index f14ce5bdd91..232f17691a4 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -23,8 +23,7 @@ CREATE TABLE database_for_dict.keys_table ( id UInt64 ) -ENGINE = MergeTree() -ORDER BY id; +ENGINE = StripeLog(); INSERT INTO database_for_dict.keys_table VALUES (1); INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370; From 371c3f819e5aaba9673a34f41af5086e53998481 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 17:21:18 +0300 Subject: [PATCH 0022/2229] read with one thread --- .../01053_ssd_dictionary.reference | 4 ++++ .../0_stateless/01053_ssd_dictionary.sql | 22 ++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index b85ff6488b3..d7a2b13a8ce 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -15,3 +15,7 @@ DUPLICATE KEYS 3 -1 2 4 1 -100 +UPDATE DICTIONARY (max_threads=1) +118 +VALUES FROM DISK AND RAM BUFFER (max_threads=1) +118 diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 232f17691a4..520c1181776 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -66,12 +66,32 @@ SELECT 'VALUES NOT FROM TABLE'; -- 0 -1 SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)); - SELECT 'DUPLICATE KEYS'; SELECT arrayJoin([1, 2, 3, 3, 2, 1]) AS id, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(id)); DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; +CREATE DICTIONARY database_for_dict.ssd_dict +( + id UInt64, + a UInt64 DEFAULT 0, + b Int32 DEFAULT -1 +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1000 MAX 2000) +LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/1')); + +SELECT 'UPDATE DICTIONARY (max_threads=1)'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table SETTINGS max_threads=1; + +SELECT 'VALUES FROM DISK AND RAM BUFFER (max_threads=1)'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table SETTINGS max_threads=1; + +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + DROP TABLE IF EXISTS database_for_dict.table_for_dict; DROP DATABASE IF EXISTS database_for_dict; From 4a65b1b345136045987aca6af4948042822ebe56 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 17:25:58 +0300 Subject: [PATCH 0023/2229] test for dict with MT --- .../01053_ssd_dictionary.reference | 4 +-- .../0_stateless/01053_ssd_dictionary.sql | 28 ++++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index d7a2b13a8ce..cda8dc267c5 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -15,7 +15,7 @@ DUPLICATE KEYS 3 -1 2 4 1 -100 -UPDATE DICTIONARY (max_threads=1) +UPDATE DICTIONARY (MT) 118 -VALUES FROM DISK AND RAM BUFFER (max_threads=1) +VALUES FROM DISK AND RAM BUFFER (MT) 118 diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 520c1181776..ce33187c5b0 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -71,6 +71,26 @@ SELECT arrayJoin([1, 2, 3, 3, 2, 1]) AS id, dictGetInt32('database_for_dict.ssd_ DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; +DROP TABLE IF EXISTS database_for_dict.keys_table; + +CREATE TABLE database_for_dict.keys_table +( + id UInt64 +) +ENGINE = MergeTree() +ORDER BY id; + +INSERT INTO database_for_dict.keys_table VALUES (1); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370; +INSERT INTO database_for_dict.keys_table VALUES (2); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370, 370; +INSERT INTO database_for_dict.keys_table VALUES (5); +INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 700, 370; +INSERT INTO database_for_dict.keys_table VALUES (10); + +-- one block +OPTIMIZE TABLE database_for_dict.keys_table; + CREATE DICTIONARY database_for_dict.ssd_dict ( id UInt64, @@ -82,13 +102,13 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dic LIFETIME(MIN 1000 MAX 2000) LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/1')); -SELECT 'UPDATE DICTIONARY (max_threads=1)'; +SELECT 'UPDATE DICTIONARY (MT)'; -- 118 -SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table SETTINGS max_threads=1; +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; -SELECT 'VALUES FROM DISK AND RAM BUFFER (max_threads=1)'; +SELECT 'VALUES FROM DISK AND RAM BUFFER (MT)'; -- 118 -SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table SETTINGS max_threads=1; +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; From d968bf66b7b86024101780c59c7f1ed044b9e02e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 20:10:37 +0300 Subject: [PATCH 0024/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 28 +++++++------------ .../0_stateless/01053_ssd_dictionary.sql | 3 -- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 6ca85ff23e7..c598d03c96a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -64,7 +64,7 @@ namespace constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; - constexpr size_t MAX_BLOCKS_TO_KEEP_IN_MEMORY = 16; + constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -330,14 +330,14 @@ void CachePartition::flush() write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; write_request.aio.aio_buf = reinterpret_cast(memory.data()); - write_request.aio.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; - write_request.aio.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE * current_file_block_id; + write_request.aio.aio_nbytes = SSD_BLOCK_SIZE; + write_request.aio.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory.data()); - write_request.aio_nbytes = DEFAULT_AIO_FILE_BLOCK_SIZE; - write_request.aio_offset = DEFAULT_AIO_FILE_BLOCK_SIZE * current_file_block_id; + write_request.aio_nbytes = SSD_BLOCK_SIZE; + write_request.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; #endif Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); @@ -381,7 +381,6 @@ void CachePartition::flush() { key_to_index_and_metadata[ids[row]].index.setInMemory(false); key_to_index_and_metadata[ids[row]].index.setBlockId(current_file_block_id); - Poco::Logger::get("INDEX:").information("NEW MAP: " + std::to_string(ids[row]) + " -> " + std::to_string(key_to_index_and_metadata[ids[row]].index.index)); } ++current_file_block_id; @@ -434,8 +433,6 @@ void CachePartition::getValueFromMemory( { const size_t offset = index.getAddressInBlock(); - Poco::Logger::get("part:").information("GET FROM MEMORY " + std::to_string(i) + " --- " + std::to_string(offset)); - ReadBufferFromMemory read_buffer(memory.data() + offset, SSD_BLOCK_SIZE - offset); readValueFromBuffer(attribute_index, out[i], read_buffer); } @@ -455,13 +452,11 @@ void CachePartition::getValueFromStorage( } if (index_to_out.empty()) return; - for (const auto & [index1, index2] : index_to_out) - Poco::Logger::get("FROM STORAGE:").information(std::to_string(index2) + " ## " + std::to_string(index1.getBlockId()) + " " + std::to_string(index1.getAddressInBlock())); /// sort by (block_id, offset_in_block) std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(SSD_BLOCK_SIZE * MAX_BLOCKS_TO_KEEP_IN_MEMORY, BUFFER_ALIGNMENT); + DB::Memory read_buffer(SSD_BLOCK_SIZE * READ_BUFFER_SIZE_BLOCKS, BUFFER_ALIGNMENT); std::vector requests; std::vector pointers; @@ -483,14 +478,14 @@ void CachePartition::getValueFromStorage( request.aio.aio_lio_opcode = LIO_READ; request.aio.aio_fildes = fd; request.aio.aio_buf = reinterpret_cast( - reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY)); + reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS)); request.aio.aio_nbytes = SSD_BLOCK_SIZE; request.aio.aio_offset = index_to_out[i].first; request.aio_data = requests.size(); #else request.aio_lio_opcode = IOCB_CMD_PREAD; request.aio_fildes = fd; - request.aio_buf = reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (i % MAX_BLOCKS_TO_KEEP_IN_MEMORY); + request.aio_buf = reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS); request.aio_nbytes = SSD_BLOCK_SIZE; request.aio_offset = index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE; request.aio_data = requests.size(); @@ -504,7 +499,7 @@ void CachePartition::getValueFromStorage( Poco::Logger::get("requests:").information(std::to_string(requests.size())); - AIOContext aio_context(MAX_BLOCKS_TO_KEEP_IN_MEMORY); + AIOContext aio_context(READ_BUFFER_SIZE_BLOCKS); std::vector processed(requests.size(), false); std::vector events(requests.size()); @@ -535,8 +530,6 @@ void CachePartition::getValueFromStorage( reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), SSD_BLOCK_SIZE - file_index.getAddressInBlock()); readValueFromBuffer(attribute_index, out[out_index], buf); - - Poco::Logger::get("kek").information(std::to_string(file_index.getAddressInBlock()) + " " + std::to_string(file_index.getBlockId())); } processed[request_id] = true; @@ -546,7 +539,7 @@ void CachePartition::getValueFromStorage( ++to_pop; /// add new io tasks - const size_t new_tasks_count = std::min(MAX_BLOCKS_TO_KEEP_IN_MEMORY - (to_push - to_pop), requests.size() - to_push); + const size_t new_tasks_count = std::min(READ_BUFFER_SIZE_BLOCKS - (to_push - to_pop), requests.size() - to_push); size_t pushed = 0; while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) @@ -642,7 +635,6 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayTypesecond.metadata.expiresAt())) + " now: " + std::to_string(std::chrono::system_clock::to_time_t(now))); out[i] = !it->second.metadata.isDefault(); } } diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index ce33187c5b0..e526df908a5 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -88,9 +88,6 @@ INSERT INTO database_for_dict.keys_table VALUES (5); INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 700, 370; INSERT INTO database_for_dict.keys_table VALUES (10); --- one block -OPTIMIZE TABLE database_for_dict.keys_table; - CREATE DICTIONARY database_for_dict.ssd_dict ( id UInt64, From ce29a3cc000aaba6c6e14514afcc3b5dbf983b1e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 20:52:13 +0300 Subject: [PATCH 0025/2229] remove unused attributes --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 58 +++++--------------- dbms/src/Dictionaries/SSDCacheDictionary.h | 5 +- 2 files changed, 15 insertions(+), 48 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index c598d03c96a..12314929731 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -61,10 +61,11 @@ namespace ErrorCodes namespace { - constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; - constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры + constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры - constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; + constexpr size_t AIO_MAX_SIMULTANIOUS_REQUESTS = 32; + constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; // TODO: в параметры static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -76,7 +77,7 @@ namespace constexpr size_t INDEX_IN_BLOCK_MASK = (1ULL << INDEX_IN_BLOCK_BITS) - 1; constexpr size_t BLOCK_INDEX_MASK = ((1ULL << (BLOCK_INDEX_BITS + INDEX_IN_BLOCK_BITS)) - 1) ^ INDEX_IN_BLOCK_MASK; - constexpr size_t NOT_FOUND = -1; + constexpr size_t NOT_EXISTS = -1; const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; @@ -107,12 +108,12 @@ bool CachePartition::Index::inMemory() const bool CachePartition::Index::exists() const { - return index != NOT_FOUND; + return index != NOT_EXISTS; } void CachePartition::Index::setNotExists() { - index = NOT_FOUND; + index = NOT_EXISTS; } void CachePartition::Index::setInMemory(const bool in_memory) @@ -141,44 +142,13 @@ void CachePartition::Index::setBlockId(const size_t block_id) } CachePartition::CachePartition( - const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure, + const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure_, const std::string & dir_path, const size_t file_id_, const size_t max_size_) - : file_id(file_id_), max_size(max_size_), path(dir_path + "/" + std::to_string(file_id)), memory(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT) + : file_id(file_id_), max_size(max_size_), path(dir_path + "/" + std::to_string(file_id)) + , attributes_structure(attributes_structure_), memory(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT) { keys_buffer.type = AttributeUnderlyingType::utUInt64; keys_buffer.values = std::vector(); - for (const auto & type : attributes_structure) - { - switch (type) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - attributes_buffer.emplace_back(); \ - attributes_buffer.back().type = type; \ - attributes_buffer.back().values = std::vector(); \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - // TODO: string support - break; - } - } { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -200,7 +170,7 @@ CachePartition::~CachePartition() void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata) { - if (new_attributes.size() != attributes_buffer.size()) + if (new_attributes.size() != attributes_structure.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; const auto & ids = std::get>(new_keys.values); @@ -387,8 +357,6 @@ void CachePartition::flush() /// clear buffer std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); - for (auto & attribute : attributes_buffer) - std::visit([](auto & attr) { attr.clear(); }, attribute.values); } template @@ -556,7 +524,7 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst { for (size_t i = 0; i < attribute_index; ++i) { - switch (attributes_buffer[i].type) + switch (attributes_structure[i]) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ @@ -587,7 +555,7 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst } } - switch (attributes_buffer[attribute_index].type) + switch (attributes_structure[attribute_index]) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index f77171b2098..87d5564d338 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -155,7 +155,6 @@ private: size_t file_id; size_t max_size; - //size_t buffer_size; std::string path; //mutable std::shared_mutex rw_lock; @@ -170,7 +169,7 @@ private: mutable std::unordered_map key_to_index_and_metadata; Attribute keys_buffer; - Attributes attributes_buffer; + const std::vector attributes_structure; DB::Memory<> memory; std::optional write_buffer; @@ -178,7 +177,7 @@ private: size_t current_memory_block_id = 0; size_t current_file_block_id = 0; - mutable std::atomic element_count{0}; + // mutable std::atomic element_count{0}; }; using CachePartitionPtr = std::unique_ptr; From fc94ffe84e7c15f16db093dd9335cdbfd09efbad Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 8 Jan 2020 22:41:05 +0300 Subject: [PATCH 0026/2229] some refactoring --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 108 ++++++++----------- dbms/src/Dictionaries/SSDCacheDictionary.h | 68 ++++++------ 2 files changed, 76 insertions(+), 100 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 12314929731..dcc8c9fdf34 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -364,7 +364,6 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { - Poco::Logger::get("IDS:").information(std::to_string(ids.size())); PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { @@ -465,8 +464,6 @@ void CachePartition::getValueFromStorage( blocks_to_indices.back().push_back(i); } - Poco::Logger::get("requests:").information(std::to_string(requests.size())); - AIOContext aio_context(READ_BUFFER_SIZE_BLOCKS); std::vector processed(requests.size(), false); @@ -611,30 +608,27 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType void CachePartition::markExpired(const Iterator & it) const { - Poco::Logger::get("markExpired").information("expired: " + std::to_string(it->first)); key_to_index_and_metadata.erase(it); } -CacheStorage::CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) - : dictionary(dictionary_) +CacheStorage::CacheStorage( + const Attributes & attributes_structure_, const std::string & path_, + const size_t partitions_count_, const size_t partition_max_size_) + : attributes_structure(attributes_structure_) , path(path_) , partition_max_size(partition_max_size_) , log(&Poco::Logger::get("CacheStorage")) { - std::vector structure; - for (const auto & item : dictionary.getStructure().attributes) - { - structure.push_back(item.underlying_type); - } for (size_t partition_id = 0; partition_id < partitions_count_; ++partition_id) - partitions.emplace_back(std::make_unique(AttributeUnderlyingType::utUInt64, structure, path_, partition_id, partition_max_size)); + partitions.emplace_back(std::make_unique(AttributeUnderlyingType::utUInt64, + attributes_structure, path_, partition_id, partition_max_size)); } template void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, - PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found) + PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, + const DictionaryLifetime lifetime, const std::vector & null_values) { - Poco::Logger::get("cachestorage").information("update"); CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); @@ -664,17 +658,15 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorread()) { const auto new_keys = createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front(); - const auto new_attributes = createAttributesFromBlock( - block, 1, ext::map(dictionary.getAttributes(), [](const auto & attribute) { return attribute.type; })); + const auto new_attributes = createAttributesFromBlock(block, 1, attributes_structure); const auto & ids = std::get>(new_keys.values); std::vector metadata(ids.size()); - const auto & dict_lifetime = dictionary.getLifetime(); for (const auto i : ext::range(0, ids.size())) { - std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); /// mark corresponding id as found on_updated(ids[i], i, new_attributes); @@ -699,8 +691,8 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); \ break; @@ -748,7 +740,6 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector metadata; - const auto & dict_lifetime = dictionary.getLifetime(); for (const auto & id_found_pair : remaining_ids) { @@ -774,24 +765,23 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); - std::uniform_int_distribution distribution{dict_lifetime.min_sec, dict_lifetime.max_sec}; + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata.emplace_back(); metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); metadata.back().setDefault(); /// Set null_value for each attribute - const auto & attributes = dictionary.getAttributes(); - for (size_t i = 0; i < attributes.size(); ++i) + for (size_t i = 0; i < attributes_structure.size(); ++i) { - const auto & attribute = attributes[i]; + const auto & attribute = attributes_structure[i]; // append null - switch (attribute.type) + switch (attribute) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ { \ auto & to_values = std::get>(new_attributes[i].values); \ - auto & null_value = std::get(attribute.null_value); \ + auto & null_value = std::get(null_values[i]); \ to_values.push_back(null_value); \ } \ break; @@ -891,7 +881,8 @@ SSDCacheDictionary::SSDCacheDictionary( , dict_lifetime(dict_lifetime_) , path(path_) , partition_max_size(partition_max_size_) - , storage(*this, path, 1, partition_max_size) + , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), + path, 1, partition_max_size) , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) @@ -906,7 +897,7 @@ SSDCacheDictionary::SSDCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ - const auto null_value = std::get(attributes[index].null_value); \ + const auto null_value = std::get(null_values[index]); \ getItemsNumberImpl( \ index, \ ids, \ @@ -1017,15 +1008,17 @@ void SSDCacheDictionary::getItemsNumberImpl( { for (const size_t row : not_found_ids[id]) out[row] = get_default(row); - }); + }, + getLifetime(), + null_values); } void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const { const auto index = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); - const auto null_value = StringRef{std::get(attributes[index].null_value)}; + const auto null_value = StringRef{std::get(null_values[index])}; getItemsString(index, ids, out, [&](const size_t) { return null_value; }); } @@ -1034,7 +1027,7 @@ void SSDCacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const ColumnString * const def, ColumnString * const out) const { const auto index = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); getItemsString(index, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } @@ -1043,7 +1036,7 @@ void SSDCacheDictionary::getString( const std::string & attribute_name, const PaddedPODArray & ids, const String & def, ColumnString * const out) const { const auto index = getAttributeIndex(attribute_name); - checkAttributeType(name, attribute_name, attributes[index].type, AttributeUnderlyingType::utString); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); getItemsString(index, ids, out, [&](const size_t) { return StringRef{def}; }); } @@ -1081,7 +1074,9 @@ void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArraysecond; } -SSDCacheDictionary::Attribute & SSDCacheDictionary::getAttribute(const std::string & attr_name) -{ - return attributes[getAttributeIndex(attr_name)]; -} - -const SSDCacheDictionary::Attribute & SSDCacheDictionary::getAttribute(const std::string & attr_name) const -{ - return attributes[getAttributeIndex(attr_name)]; -} - -const SSDCacheDictionary::Attributes & SSDCacheDictionary::getAttributes() const -{ - return attributes; -} - template -SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value) +AttributeValueVariant SSDCacheDictionary::createAttributeNullValueWithTypeImpl(const Field & null_value) { - Attribute attr{type, {}}; - attr.null_value = static_cast(null_value.get>()); + AttributeValueVariant var_null_value = static_cast(null_value.get>()); bytes_allocated += sizeof(T); - return attr; + return var_null_value; } template <> -SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value) +AttributeValueVariant SSDCacheDictionary::createAttributeNullValueWithTypeImpl(const Field & null_value) { - Attribute attr{type, {}}; - attr.null_value = null_value.get(); + AttributeValueVariant var_null_value = null_value.get(); bytes_allocated += sizeof(StringRef); //if (!string_arena) // string_arena = std::make_unique(); - return attr; + return var_null_value; } -SSDCacheDictionary::Attribute SSDCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) +AttributeValueVariant SSDCacheDictionary::createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value) { switch (type) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - return createAttributeWithTypeImpl(type, null_value); + return createAttributeNullValueWithTypeImpl(null_value); DISPATCH(UInt8) DISPATCH(UInt16) @@ -1157,13 +1135,13 @@ case AttributeUnderlyingType::ut##TYPE: \ void SSDCacheDictionary::createAttributes() { - attributes.reserve(dict_struct.attributes.size()); + null_values.reserve(dict_struct.attributes.size()); for (size_t i = 0; i < dict_struct.attributes.size(); ++i) { const auto & attribute = dict_struct.attributes[i]; attribute_index_by_name.emplace(attribute.name, i); - attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); + null_values.push_back(createAttributeNullValueWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 87d5564d338..07f5f511b6c 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -44,6 +44,23 @@ public: ssize_t readString(const String & str, WriteBuffer & buffer); };*/ +using AttributeValueVariant = std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String>; + class CachePartition { public: @@ -186,9 +203,10 @@ using CachePartitionPtr = std::unique_ptr; class CacheStorage { public: + using Attributes = std::vector; using Key = IDictionary::Key; - CacheStorage(SSDCacheDictionary & dictionary_, const std::string & path_, + CacheStorage(const Attributes & attributes_structure_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_); template @@ -213,7 +231,8 @@ public: template void update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, - PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found); + PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, + const DictionaryLifetime lifetime, const std::vector & null_values); //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; @@ -225,10 +244,8 @@ private: CachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); - SSDCacheDictionary & dictionary; + const Attributes attributes_structure; - // Block structure: Key, (Default + TTL), Attr1, Attr2, ... - // const Block header; const std::string path; const size_t partition_max_size; std::vector partitions; @@ -244,6 +261,8 @@ private: mutable std::chrono::system_clock::time_point backoff_end_time; // stats + mutable size_t bytes_allocated = 0; + mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; @@ -380,39 +399,18 @@ public: return nullptr; } +private: + size_t getAttributeIndex(const std::string & attr_name) const; + struct Attribute { AttributeUnderlyingType type; - std::variant< - UInt8, - UInt16, - UInt32, - UInt64, - UInt128, - Int8, - Int16, - Int32, - Int64, - Decimal32, - Decimal64, - Decimal128, - Float32, - Float64, - String> null_value; + AttributeValueVariant null_value; }; - using Attributes = std::vector; - - /// переместить - const Attributes & getAttributes() const; - -private: - size_t getAttributeIndex(const std::string & attr_name) const; - Attribute & getAttribute(const std::string & attr_name); - const Attribute & getAttribute(const std::string & attr_name) const; template - Attribute createAttributeWithTypeImpl(const AttributeUnderlyingType type, const Field & null_value); - Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + AttributeValueVariant createAttributeNullValueWithTypeImpl(const Field & null_value); + AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value); void createAttributes(); template @@ -429,11 +427,11 @@ private: const std::string path; const size_t partition_max_size; - mutable CacheStorage storage; - Logger * const log; std::map attribute_index_by_name; - Attributes attributes; // TODO: move to storage + std::vector null_values; + mutable CacheStorage storage; + Logger * const log; mutable size_t bytes_allocated = 0; mutable std::atomic element_count{0}; From 07ffe96932c04e33e77b912d6d28d6e4543ede7b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 9 Jan 2020 22:34:03 +0300 Subject: [PATCH 0027/2229] locks --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 16 +++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 37 ++++---------------- 2 files changed, 11 insertions(+), 42 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index dcc8c9fdf34..9aba99157a3 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -165,11 +165,13 @@ CachePartition::CachePartition( CachePartition::~CachePartition() { + std::unique_lock lock(rw_lock); ::close(fd); } void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata) { + std::unique_lock lock(rw_lock); if (new_attributes.size() != attributes_structure.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; @@ -364,6 +366,7 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { + std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { @@ -377,7 +380,6 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray { indices[i].setNotExists(); not_found[ids[i]].push_back(i); - markExpired(it); } else { @@ -459,7 +461,6 @@ void CachePartition::getValueFromStorage( #endif requests.push_back(request); pointers.push_back(&requests.back()); - blocks_to_indices.emplace_back(); blocks_to_indices.back().push_back(i); } @@ -585,6 +586,7 @@ template void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { + std::shared_lock lock(rw_lock); for (size_t i = 0; i < ids.size(); ++i) { auto it = key_to_index_and_metadata.find(ids[i]); @@ -596,7 +598,6 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayTypesecond.metadata.expiresAt() <= now) { not_found[ids[i]].push_back(i); - markExpired(it); } else { @@ -605,12 +606,6 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType -void CachePartition::markExpired(const Iterator & it) const -{ - key_to_index_and_metadata.erase(it); -} - CacheStorage::CacheStorage( const Attributes & attributes_structure_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_) @@ -673,7 +668,6 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes, metadata); } @@ -760,8 +754,6 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 07f5f511b6c..af61f7be9cb 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -23,27 +23,6 @@ namespace DB class SSDCacheDictionary; class CacheStorage; -/*class SimpleSerializer -{ -public: - bool block() const { return false; } - - template - size_t estimateSizeNumber(T number) const; - - size_t estimateSizeString(const String & str) const; - - template - ssize_t writeNumber(T number, WriteBuffer & buffer); - - ssize_t writeString(const String & str, WriteBuffer & buffer); - - template - ssize_t readNumber(T number, WriteBuffer & buffer); - - ssize_t readString(const String & str, WriteBuffer & buffer); -};*/ - using AttributeValueVariant = std::variant< UInt8, UInt16, @@ -131,6 +110,7 @@ public: // Key, (Metadata), attributes void appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata); + void flush(); private: struct Index final { @@ -152,8 +132,6 @@ private: size_t index = 0; }; - void flush(); - size_t appendValuesToAttribute(Attribute & to, const Attribute & from); template @@ -167,14 +145,12 @@ private: template void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; - template - void markExpired(const Iterator & it) const; - size_t file_id; size_t max_size; std::string path; - //mutable std::shared_mutex rw_lock; + mutable std::shared_mutex rw_lock; + int fd = -1; struct IndexAndMetadata final @@ -197,7 +173,7 @@ private: // mutable std::atomic element_count{0}; }; -using CachePartitionPtr = std::unique_ptr; +using CachePartitionPtr = std::shared_ptr; class CacheStorage @@ -226,6 +202,7 @@ public: void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { + //for (auto & partition : partitions) partitions[0]->has(ids, out, not_found, now); } @@ -248,14 +225,14 @@ private: const std::string path; const size_t partition_max_size; + + mutable std::shared_mutex rw_lock; std::vector partitions; Logger * const log; mutable pcg64 rnd_engine; - mutable std::shared_mutex rw_lock; - mutable std::exception_ptr last_update_exception; mutable size_t update_error_count = 0; mutable std::chrono::system_clock::time_point backoff_end_time; From 75f750833b7ff13be61a382218cbbdcdc1b97a9b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 10 Jan 2020 21:01:23 +0300 Subject: [PATCH 0028/2229] opt table --- dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index e526df908a5..045b55b73a3 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -88,6 +88,8 @@ INSERT INTO database_for_dict.keys_table VALUES (5); INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 700, 370; INSERT INTO database_for_dict.keys_table VALUES (10); +OPTIMIZE TABLE database_for_dict.keys_table; + CREATE DICTIONARY database_for_dict.ssd_dict ( id UInt64, From 2e10d93cab95b028470bed5be1df09706861620a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 10 Jan 2020 22:19:03 +0300 Subject: [PATCH 0029/2229] some refactoring --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 61 ++++---------------- dbms/src/Dictionaries/SSDCacheDictionary.h | 12 +--- 2 files changed, 13 insertions(+), 60 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 9aba99157a3..52c9db98bbb 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -148,7 +148,7 @@ CachePartition::CachePartition( , attributes_structure(attributes_structure_), memory(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT) { keys_buffer.type = AttributeUnderlyingType::utUInt64; - keys_buffer.values = std::vector(); + keys_buffer.values = PaddedPODArray(); { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -169,7 +169,7 @@ CachePartition::~CachePartition() ::close(fd); } -void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata) +void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata) { std::unique_lock lock(rw_lock); if (new_attributes.size() != attributes_structure.size()) @@ -245,45 +245,6 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & } } -size_t CachePartition::appendValuesToAttribute(Attribute & to, const Attribute & from) -{ - switch (to.type) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - auto &to_values = std::get>(to.values); \ - auto &from_values = std::get>(from.values); \ - size_t prev_size = to_values.size(); \ - to_values.resize(to_values.size() + from_values.size()); \ - memcpy(&to_values[prev_size], &from_values[0], from_values.size() * sizeof(TYPE)); \ - return from_values.size() * sizeof(TYPE); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - // TODO: string support - break; - } - throw Exception{"Unknown attribute type: " + std::to_string(static_cast(to.type)), ErrorCodes::TYPE_MISMATCH}; -} - void CachePartition::flush() { write_buffer.reset(); @@ -652,12 +613,12 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorread()) { - const auto new_keys = createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front(); + const auto new_keys = std::move(createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front()); const auto new_attributes = createAttributesFromBlock(block, 1, attributes_structure); const auto & ids = std::get>(new_keys.values); - std::vector metadata(ids.size()); + PaddedPODArray metadata(ids.size()); for (const auto i : ext::range(0, ids.size())) { @@ -695,7 +656,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); + new_keys.values = PaddedPODArray(); CachePartition::Attributes new_attributes; { /// TODO: create attributes from structure @@ -707,7 +668,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); \ + new_attributes.back().values = PaddedPODArray(); \ break; DISPATCH(UInt8) @@ -733,7 +694,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector metadata; + PaddedPODArray metadata; for (const auto & id_found_pair : remaining_ids) { @@ -755,7 +716,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); + std::get>(new_keys.values).push_back(id); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata.emplace_back(); @@ -772,7 +733,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_attributes[i].values); \ + auto & to_values = std::get>(new_attributes[i].values); \ auto & null_value = std::get(null_values[i]); \ to_values.push_back(null_value); \ } \ @@ -826,7 +787,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ { \ - std::vector values(column->size()); \ + PaddedPODArray values(column->size()); \ const auto raw_data = column->getRawData(); \ memcpy(&values[0], raw_data.data, raw_data.size * sizeof(TYPE)); \ attributes.emplace_back(); \ @@ -994,7 +955,7 @@ void SSDCacheDictionary::getItemsNumberImpl( required_ids, [&](const auto id, const auto row, const auto & new_attributes) { for (const size_t out_row : not_found_ids[id]) - out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; + out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const size_t id) { diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index af61f7be9cb..53b2ef8624a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -85,7 +85,7 @@ public: struct Attribute { template - using Container = std::vector; + using Container = PaddedPODArray; AttributeUnderlyingType type; std::variant< @@ -108,7 +108,7 @@ public: using Attributes = std::vector; // Key, (Metadata), attributes - void appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const std::vector & metadata); + void appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata); void flush(); private: @@ -132,8 +132,6 @@ private: size_t index = 0; }; - size_t appendValuesToAttribute(Attribute & to, const Attribute & from); - template void getValueFromMemory( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; @@ -379,12 +377,6 @@ public: private: size_t getAttributeIndex(const std::string & attr_name) const; - struct Attribute - { - AttributeUnderlyingType type; - AttributeValueVariant null_value; - }; - template AttributeValueVariant createAttributeNullValueWithTypeImpl(const Field & null_value); AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value); From 15afa5240a4404ef7ed66b4be05fbbf4b1af42d7 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 10 Jan 2020 22:21:48 +0300 Subject: [PATCH 0030/2229] rm unused file --- .../src/Dictionaries/SSDCacheDictionary.inc.h | 45 ------------------- 1 file changed, 45 deletions(-) delete mode 100644 dbms/src/Dictionaries/SSDCacheDictionary.inc.h diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.inc.h b/dbms/src/Dictionaries/SSDCacheDictionary.inc.h deleted file mode 100644 index c3d11303509..00000000000 --- a/dbms/src/Dictionaries/SSDCacheDictionary.inc.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -namespace DB { -/* -template -void BlockFile::getValue(size_t column, const PaddedPODArray & ids, ResultArrayType & out, PaddedPODArray & not_found) const -{ - std::vector> offsets; - offsets.reserve(ids.size()); - - for (size_t i = 0; i < ids.size(); ++i) - { - auto it = key_to_file_offset.find(ids[i]); - if (it != std::end(key_to_file_offset)) - { - offsets.emplace_back(it->second, i); - } - else - { - not_found.push_back(i); - } - } - std::sort(std::begin(offsets), std::end(offsets)); - - Field field; - for (const auto & [offset, index] : offsets) - { - if (offset & OFFSET_MASK) - { - in_file.seek(offset && !OFFSET_MASK); - for (size_t col = 0; col < column; ++col) - { - const auto & type = header.getByPosition(column).type; - type->deserializeBinary(field, in_file); - } - } - else - { - buffer[column]->get(offset, field); - } - out[index] = DB::get(field); - } -} -*/ -} \ No newline at end of file From 50a68c41a89af89d8c3f158823e2102ad4f267c5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Fri, 10 Jan 2020 23:08:57 +0300 Subject: [PATCH 0031/2229] fixes --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 2 +- dbms/src/Dictionaries/SSDCacheDictionary.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 52c9db98bbb..e9b77c325c5 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -64,7 +64,7 @@ namespace constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры - constexpr size_t AIO_MAX_SIMULTANIOUS_REQUESTS = 32; + constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; // TODO: в параметры static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 53b2ef8624a..faea7f7a350 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -409,6 +409,3 @@ private: }; } - - -#include "SSDCacheDictionary.inc.h" \ No newline at end of file From 0e2080c7de456379884d2f9f25591543ac00c701 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 10:20:48 +0300 Subject: [PATCH 0032/2229] preallocate --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index e9b77c325c5..b673c13fe4f 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -64,6 +64,9 @@ namespace constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры + constexpr size_t FILE_SIZE_TO_PREALLOCATE = 1 * 1024 * 1024 * 1024; + constexpr size_t FILE_SIZE_IN_BLOCKS = FILE_SIZE_TO_PREALLOCATE / SSD_BLOCK_SIZE; + constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; // TODO: в параметры @@ -81,6 +84,15 @@ namespace const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; + + int preallocateDiskSpace(int fd, size_t len) + { + #if defined(__FreeBSD__) + return posix_fallocate(fd, 0, len); + #else + return fallocate(fd, 0, 0, len); + #endif + } } CachePartition::Metadata::time_point_t CachePartition::Metadata::expiresAt() const @@ -160,6 +172,11 @@ CachePartition::CachePartition( auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); } + + if (preallocateDiskSpace(fd, FILE_SIZE_TO_PREALLOCATE) < 0) + { + throwFromErrnoWithPath("Cannot preallocate space for the file " + filename, filename, ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } } } From ee1e8cbdb35cc9192ff9b58044ff7a42c24acd39 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 14:19:12 +0300 Subject: [PATCH 0033/2229] read multipartition --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 43 ++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 72 +++++++++++--------- 2 files changed, 62 insertions(+), 53 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index b673c13fe4f..8619051ee48 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -64,8 +64,8 @@ namespace constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры - constexpr size_t FILE_SIZE_TO_PREALLOCATE = 1 * 1024 * 1024 * 1024; - constexpr size_t FILE_SIZE_IN_BLOCKS = FILE_SIZE_TO_PREALLOCATE / SSD_BLOCK_SIZE; + constexpr size_t FILE_SIZE_IN_BLOCKS = 16; + constexpr size_t FILE_SIZE_TO_PREALLOCATE = FILE_SIZE_IN_BLOCKS * SSD_BLOCK_SIZE; constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; // TODO: в параметры @@ -186,9 +186,12 @@ CachePartition::~CachePartition() ::close(fd); } -void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata) +size_t CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata) { std::unique_lock lock(rw_lock); + if (current_file_block_id >= FILE_SIZE_IN_BLOCKS) + return 0; + if (new_attributes.size() != attributes_structure.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; @@ -257,9 +260,12 @@ void CachePartition::appendBlock(const Attribute & new_keys, const Attributes & ids_buffer.push_back(ids[index]); ++index; } - else + else if (current_file_block_id < FILE_SIZE_IN_BLOCKS) write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); + else + return index; // End of current file. } + return ids.size(); } void CachePartition::flush() @@ -339,29 +345,28 @@ void CachePartition::flush() std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); } -template +template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::unordered_map> & not_found, + ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { - auto it = key_to_index_and_metadata.find(ids[i]); - if (it == std::end(key_to_index_and_metadata)) + if (found[i]) { indices[i].setNotExists(); - not_found[ids[i]].push_back(i); } - else if (it->second.metadata.expiresAt() <= now) + else if (auto it = key_to_index_and_metadata.find(ids[i]); + it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) { - indices[i].setNotExists(); - not_found[ids[i]].push_back(i); + indices[i] = it->second.index; + found[i] = true; } else { - indices[i] = it->second.index; + indices[i].setNotExists(); } } @@ -560,22 +565,16 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst } } -template -void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, - std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const +void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); for (size_t i = 0; i < ids.size(); ++i) { auto it = key_to_index_and_metadata.find(ids[i]); - if (it == std::end(key_to_index_and_metadata)) + if (it == std::end(key_to_index_and_metadata) || it->second.metadata.expiresAt() <= now) { - not_found[ids[i]].push_back(i); - } - else if (it->second.metadata.expiresAt() <= now) - { - not_found[ids[i]].push_back(i); + out[i] = static_cast(-1); } else { diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index faea7f7a350..c6f2a26f15e 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -43,6 +43,26 @@ using AttributeValueVariant = std::variant< class CachePartition { public: + struct Index final + { + bool inMemory() const; + void setInMemory(const bool in_memory); + + bool exists() const; + void setNotExists(); + + size_t getAddressInBlock() const; + void setAddressInBlock(const size_t address_in_block); + + size_t getBlockId() const; + void setBlockId(const size_t block_id); + + bool operator< (const Index & rhs) const { return index < rhs.index; } + + /// Stores `is_in_memory` flag, block id, address in uncompressed block + size_t index = 0; + }; + struct Metadata final { using time_point_t = std::chrono::system_clock::time_point; @@ -61,6 +81,7 @@ public: using Offset = size_t; using Offsets = std::vector; + using Key = IDictionary::Key; CachePartition( const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, @@ -71,16 +92,14 @@ public: template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - template + template void getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::unordered_map> & not_found, + ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const; // TODO:: getString - template - void has(const PaddedPODArray & ids, ResultArrayType & out, - std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; + void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; struct Attribute { @@ -107,31 +126,12 @@ public: }; using Attributes = std::vector; - // Key, (Metadata), attributes - void appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata); + /// Returns false if there are no allocated space to append block. + size_t appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata); void flush(); + private: - struct Index final - { - bool inMemory() const; - void setInMemory(const bool in_memory); - - bool exists() const; - void setNotExists(); - - size_t getAddressInBlock() const; - void setAddressInBlock(const size_t address_in_block); - - size_t getBlockId() const; - void setBlockId(const size_t block_id); - - bool operator< (const Index & rhs) const { return index < rhs.index; } - - /// Stores `is_in_memory` flag, block id, address in uncompressed block - size_t index = 0; - }; - template void getValueFromMemory( const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; @@ -178,7 +178,7 @@ class CacheStorage { public: using Attributes = std::vector; - using Key = IDictionary::Key; + using Key = CachePartition::Key; CacheStorage(const Attributes & attributes_structure_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_); @@ -191,7 +191,13 @@ public: ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { - partitions[0]->getValue(attribute_index, ids, out, not_found, now); + std::vector found(ids.size(), false); + for (auto & partition : partitions) + partition->getValue(attribute_index, ids, out, found, now); + + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); } // getString(); @@ -200,8 +206,12 @@ public: void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { - //for (auto & partition : partitions) - partitions[0]->has(ids, out, not_found, now); + for (auto & partition : partitions) + partition->has(ids, out, now); + + for (size_t i = 0; i < ids.size(); ++i) + if (out[i] == static_cast(-1)) + not_found[ids[i]].push_back(i); } template From 8489f2fef5dd4081c64136064ef5446f218f0853 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 19:38:43 +0300 Subject: [PATCH 0034/2229] many files in cache --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 48 ++++++++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 19 +++++--- 2 files changed, 47 insertions(+), 20 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 8619051ee48..a5df0c67498 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -64,7 +64,7 @@ namespace constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры - constexpr size_t FILE_SIZE_IN_BLOCKS = 16; + constexpr size_t FILE_SIZE_IN_BLOCKS = 2; constexpr size_t FILE_SIZE_TO_PREALLOCATE = FILE_SIZE_IN_BLOCKS * SSD_BLOCK_SIZE; constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры @@ -186,7 +186,8 @@ CachePartition::~CachePartition() ::close(fd); } -size_t CachePartition::appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata) +size_t CachePartition::appendBlock( + const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); if (current_file_block_id >= FILE_SIZE_IN_BLOCKS) @@ -201,9 +202,9 @@ size_t CachePartition::appendBlock(const Attribute & new_keys, const Attributes if (!write_buffer) write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); - for (size_t index = 0; index < ids.size();) + for (size_t index = begin; index < ids.size();) { - auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; + IndexAndMetadata index_and_metadata; index_and_metadata.index.setInMemory(true); index_and_metadata.index.setBlockId(current_memory_block_id); index_and_metadata.index.setAddressInBlock(write_buffer->offset()); @@ -257,15 +258,16 @@ size_t CachePartition::appendBlock(const Attribute & new_keys, const Attributes if (!flushed) { + key_to_index_and_metadata[ids[index]] = index_and_metadata; ids_buffer.push_back(ids[index]); ++index; } else if (current_file_block_id < FILE_SIZE_IN_BLOCKS) write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); else - return index; // End of current file. + return index - begin; // End of current file. } - return ids.size(); + return ids.size() - begin; } void CachePartition::flush() @@ -471,7 +473,6 @@ void CachePartition::getValueFromStorage( const auto & request = requests[request_id]; if (events[i].res != static_cast(request.aio_nbytes)) throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(events[i].res), ErrorCodes::AIO_WRITE_ERROR); - for (const size_t idx : blocks_to_indices[request_id]) { const auto & [file_index, out_index] = index_to_out[idx]; @@ -583,17 +584,19 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType(AttributeUnderlyingType::utUInt64, - attributes_structure, path_, partition_id, partition_max_size)); } template @@ -601,6 +604,23 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & null_values) { + auto append_block = [this](const CachePartition::Attribute & new_keys, + const CachePartition::Attributes & new_attributes, const PaddedPODArray & metadata) + { + size_t inserted = 0; + while (inserted < metadata.size()) + { + if (!partitions.empty()) + inserted += partitions.front()->appendBlock(new_keys, new_attributes, metadata, inserted); + if (inserted < metadata.size()) + { + partitions.emplace_front(std::make_unique( + AttributeUnderlyingType::utUInt64, attributes_structure, path, + (partitions.empty() ? 0 : partitions.back()->getId() + 1), partition_max_size)); + } + } + }; + CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, requested_ids.size()); @@ -645,7 +665,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes, metadata); + append_block(new_keys, new_attributes, metadata); } stream->readSuffix(); @@ -782,7 +802,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes, metadata); + append_block(new_keys, new_attributes, metadata); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index c6f2a26f15e..9aa9b7f91aa 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -126,11 +126,13 @@ public: }; using Attributes = std::vector; - /// Returns false if there are no allocated space to append block. - size_t appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata); + size_t appendBlock(const Attribute & new_keys, const Attributes & new_attributes, + const PaddedPODArray & metadata, const size_t begin); void flush(); + size_t getId() const; + private: template void getValueFromMemory( @@ -177,10 +179,10 @@ using CachePartitionPtr = std::shared_ptr; class CacheStorage { public: - using Attributes = std::vector; + using AttributeTypes = std::vector; using Key = CachePartition::Key; - CacheStorage(const Attributes & attributes_structure_, const std::string & path_, + CacheStorage(const AttributeTypes & attributes_structure_, const std::string & path_, const size_t partitions_count_, const size_t partition_max_size_); template @@ -192,6 +194,8 @@ public: std::chrono::system_clock::time_point now) const { std::vector found(ids.size(), false); + + std::shared_lock lock(rw_lock); for (auto & partition : partitions) partition->getValue(attribute_index, ids, out, found, now); @@ -206,6 +210,7 @@ public: void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { + std::shared_lock lock(rw_lock); for (auto & partition : partitions) partition->has(ids, out, now); @@ -229,13 +234,15 @@ private: CachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); - const Attributes attributes_structure; + void collectGarbage() {} + + const AttributeTypes attributes_structure; const std::string path; const size_t partition_max_size; mutable std::shared_mutex rw_lock; - std::vector partitions; + std::list partitions; Logger * const log; From a77ef1ed074929335cdac5af6eb26c5a239be65e Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 23:23:51 +0300 Subject: [PATCH 0035/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 40 ++++++++++++++++++-- dbms/src/Dictionaries/SSDCacheDictionary.h | 26 ++----------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index a5df0c67498..198687f5b0e 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -82,6 +82,8 @@ namespace constexpr size_t NOT_EXISTS = -1; + constexpr UInt8 HAS_NOT_FOUND = 2; + const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; @@ -277,7 +279,7 @@ void CachePartition::flush() if (ids.empty()) return; - Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!!"); + Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); AIOContext aio_context{1}; @@ -358,16 +360,19 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray { if (found[i]) { + Poco::Logger::get("kek").information("FOUND BEFORE:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); indices[i].setNotExists(); } else if (auto it = key_to_index_and_metadata.find(ids[i]); it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) { + Poco::Logger::get("kek").information(std::to_string(file_id) + " FOUND BEFORE: inmemory: " + std::to_string(it->second.index.inMemory()) + " " + std::to_string(it->second.index.getBlockId()) + " " + std::to_string(it->second.index.getAddressInBlock())); indices[i] = it->second.index; found[i] = true; } else { + Poco::Logger::get("kek").information("NF:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); indices[i].setNotExists(); } } @@ -575,7 +580,8 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayTypesecond.metadata.expiresAt() <= now) { - out[i] = static_cast(-1); + Poco::Logger::get("kek").information("NF:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); + out[i] = HAS_NOT_FOUND; } else { @@ -599,6 +605,34 @@ CacheStorage::CacheStorage( { } +template +void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, + ResultArrayType & out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const +{ + std::vector found(ids.size(), false); + + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->getValue(attribute_index, ids, out, found, now); + + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); +} + +void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, + std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const +{ + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->has(ids, out, now); + + for (size_t i = 0; i < ids.size(); ++i) + if (out[i] == HAS_NOT_FOUND) + not_found[ids[i]].push_back(i); +} + template void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, @@ -616,7 +650,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector( AttributeUnderlyingType::utUInt64, attributes_structure, path, - (partitions.empty() ? 0 : partitions.back()->getId() + 1), partition_max_size)); + (partitions.empty() ? 0 : partitions.front()->getId() + 1), partition_max_size)); } } }; diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 9aa9b7f91aa..847aa4d4c45 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -191,33 +191,13 @@ public: template void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const - { - std::vector found(ids.size(), false); - - std::shared_lock lock(rw_lock); - for (auto & partition : partitions) - partition->getValue(attribute_index, ids, out, found, now); - - for (size_t i = 0; i < ids.size(); ++i) - if (!found[i]) - not_found[ids[i]].push_back(i); - } + std::chrono::system_clock::time_point now) const; // getString(); - template - void has(const PaddedPODArray & ids, ResultArrayType & out, - std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const - { - std::shared_lock lock(rw_lock); - for (auto & partition : partitions) - partition->has(ids, out, now); - for (size_t i = 0; i < ids.size(); ++i) - if (out[i] == static_cast(-1)) - not_found[ids[i]].push_back(i); - } + void has(const PaddedPODArray & ids, ResultArrayType & out, + std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; template void update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, From 590ed9759f04d1ea122a4fe9a2abcaf99865b47d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 23:27:50 +0300 Subject: [PATCH 0036/2229] test --- .../queries/0_stateless/01053_ssd_dictionary.reference | 5 +++++ dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index cda8dc267c5..dda210b162b 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -6,6 +6,11 @@ VALUE FROM RAM BUFFER 8 VALUES FROM DISK AND RAM BUFFER 118 +HAS +1 +2 +5 +10 VALUES NOT FROM TABLE 0 -1 DUPLICATE KEYS diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 045b55b73a3..64e95c868da 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -62,13 +62,17 @@ SELECT 'VALUES FROM DISK AND RAM BUFFER'; -- 118 SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; +SELECT 'HAS'; +-- 1 2 5 10 +SELECT id FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', toUInt64(id)) ORDER BY id; + SELECT 'VALUES NOT FROM TABLE'; -- 0 -1 SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)); SELECT 'DUPLICATE KEYS'; SELECT arrayJoin([1, 2, 3, 3, 2, 1]) AS id, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(id)); - +--SELECT DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; DROP TABLE IF EXISTS database_for_dict.keys_table; @@ -99,7 +103,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/1')); +LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/2')); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 From 52a101516d7736d56deee483f2c4add296cf56aa Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 11 Jan 2020 23:56:27 +0300 Subject: [PATCH 0037/2229] improve locks --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 101 ++++++++++--------- 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 198687f5b0e..7c6ae384233 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -664,60 +664,62 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector backoff_end_time) { - try + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + if (now > backoff_end_time) { - if (update_error_count) + try { - /// Recover after error: we have to clone the source here because - /// it could keep connections which should be reset after error. - source_ptr = source_ptr->clone(); - } - - Stopwatch watch; - auto stream = source_ptr->loadIds(requested_ids); - stream->readPrefix(); - - while (const auto block = stream->read()) - { - const auto new_keys = std::move(createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front()); - const auto new_attributes = createAttributesFromBlock(block, 1, attributes_structure); - - const auto & ids = std::get>(new_keys.values); - - PaddedPODArray metadata(ids.size()); - - for (const auto i : ext::range(0, ids.size())) + if (update_error_count) { - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); - /// mark corresponding id as found - on_updated(ids[i], i, new_attributes); - remaining_ids[ids[i]] = 1; + /// Recover after error: we have to clone the source here because + /// it could keep connections which should be reset after error. + source_ptr = source_ptr->clone(); } - append_block(new_keys, new_attributes, metadata); + Stopwatch watch; + auto stream = source_ptr->loadIds(requested_ids); + stream->readPrefix(); + + while (const auto block = stream->read()) + { + const auto new_keys = std::move(createAttributesFromBlock(block, 0, { AttributeUnderlyingType::utUInt64 }).front()); + const auto new_attributes = createAttributesFromBlock(block, 1, attributes_structure); + + const auto & ids = std::get>(new_keys.values); + + PaddedPODArray metadata(ids.size()); + + for (const auto i : ext::range(0, ids.size())) + { + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); + /// mark corresponding id as found + on_updated(ids[i], i, new_attributes); + remaining_ids[ids[i]] = 1; + } + + append_block(new_keys, new_attributes, metadata); + } + + stream->readSuffix(); + + update_error_count = 0; + last_update_exception = std::exception_ptr{}; + backoff_end_time = std::chrono::system_clock::time_point{}; + + ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); } + catch (...) + { + ++update_error_count; + last_update_exception = std::current_exception(); + backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, update_error_count)); - stream->readSuffix(); - - update_error_count = 0; - last_update_exception = std::exception_ptr{}; - backoff_end_time = std::chrono::system_clock::time_point{}; - - ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); - } - catch (...) - { - ++update_error_count; - last_update_exception = std::current_exception(); - backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, update_error_count)); - - tryLogException(last_update_exception, log, - "Could not update ssd cache dictionary, next update is scheduled at " + ext::to_string(backoff_end_time)); + tryLogException(last_update_exception, log, + "Could not update ssd cache dictionary, next update is scheduled at " + ext::to_string(backoff_end_time)); + } } } @@ -835,8 +837,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector Date: Sun, 12 Jan 2020 14:32:43 +0300 Subject: [PATCH 0038/2229] fifo compaction --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 50 ++++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 10 ++-- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 7c6ae384233..2b3fe9d8751 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -159,7 +160,7 @@ CachePartition::CachePartition( const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure_, const std::string & dir_path, const size_t file_id_, const size_t max_size_) : file_id(file_id_), max_size(max_size_), path(dir_path + "/" + std::to_string(file_id)) - , attributes_structure(attributes_structure_), memory(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT) + , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; keys_buffer.values = PaddedPODArray(); @@ -201,8 +202,10 @@ size_t CachePartition::appendBlock( const auto & ids = std::get>(new_keys.values); auto & ids_buffer = std::get>(keys_buffer.values); + if (!memory) + memory.emplace(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT); if (!write_buffer) - write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); + write_buffer.emplace(memory->data(), SSD_BLOCK_SIZE); for (size_t index = begin; index < ids.size();) { @@ -265,9 +268,14 @@ size_t CachePartition::appendBlock( ++index; } else if (current_file_block_id < FILE_SIZE_IN_BLOCKS) - write_buffer.emplace(memory.data(), SSD_BLOCK_SIZE); + { + write_buffer.emplace(memory->data(), SSD_BLOCK_SIZE); + } else + { + memory.reset(); return index - begin; // End of current file. + } } return ids.size() - begin; } @@ -289,13 +297,13 @@ void CachePartition::flush() #if defined(__FreeBSD__) write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; - write_request.aio.aio_buf = reinterpret_cast(memory.data()); + write_request.aio.aio_buf = reinterpret_cast(memory->data()); write_request.aio.aio_nbytes = SSD_BLOCK_SIZE; write_request.aio.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; - write_request.aio_buf = reinterpret_cast(memory.data()); + write_request.aio_buf = reinterpret_cast(memory->data()); write_request.aio_nbytes = SSD_BLOCK_SIZE; write_request.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; #endif @@ -392,7 +400,7 @@ void CachePartition::getValueFromMemory( { const size_t offset = index.getAddressInBlock(); - ReadBufferFromMemory read_buffer(memory.data() + offset, SSD_BLOCK_SIZE - offset); + ReadBufferFromMemory read_buffer(memory->data() + offset, SSD_BLOCK_SIZE - offset); readValueFromBuffer(attribute_index, out[i], read_buffer); } } @@ -595,12 +603,19 @@ size_t CachePartition::getId() const return file_id; } +void CachePartition::remove() +{ + std::unique_lock lock(rw_lock); + std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); +} + CacheStorage::CacheStorage( const AttributeTypes & attributes_structure_, const std::string & path_, - const size_t /* partitions_count_ */, const size_t partition_max_size_) + const size_t max_partitions_count_, const size_t partition_max_size_) : attributes_structure(attributes_structure_) , path(path_) , partition_max_size(partition_max_size_) + , max_partitions_count(max_partitions_count_) , log(&Poco::Logger::get("CacheStorage")) { } @@ -653,6 +668,8 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorgetId() + 1), partition_max_size)); } } + + collectGarbage(); }; CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; @@ -848,6 +865,23 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector max_partitions_count) + { + partition_delete_queue.push_back(partitions.back()); + partitions.pop_back(); + } + + // drop unused partitions + while (!partition_delete_queue.empty() && partition_delete_queue.front().use_count() == 1) + { + partition_delete_queue.front()->remove(); + partition_delete_queue.pop_front(); + } +} + CachePartition::Attributes CacheStorage::createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure) { @@ -910,7 +944,7 @@ SSDCacheDictionary::SSDCacheDictionary( , path(path_) , partition_max_size(partition_max_size_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, 1, partition_max_size) + path, 2, partition_max_size) , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 847aa4d4c45..7a876a3ed95 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -131,6 +131,8 @@ public: void flush(); + void remove(); + size_t getId() const; private: @@ -164,7 +166,7 @@ private: Attribute keys_buffer; const std::vector attributes_structure; - DB::Memory<> memory; + std::optional> memory; std::optional write_buffer; size_t current_memory_block_id = 0; @@ -183,7 +185,7 @@ public: using Key = CachePartition::Key; CacheStorage(const AttributeTypes & attributes_structure_, const std::string & path_, - const size_t partitions_count_, const size_t partition_max_size_); + const size_t max_partitions_count_, const size_t partition_max_size_); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -214,15 +216,17 @@ private: CachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); - void collectGarbage() {} + void collectGarbage(); const AttributeTypes attributes_structure; const std::string path; const size_t partition_max_size; + const size_t max_partitions_count; mutable std::shared_mutex rw_lock; std::list partitions; + std::list partition_delete_queue; Logger * const log; From e18fa890a12febead234f16631ed35225012be7a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 12 Jan 2020 15:29:42 +0300 Subject: [PATCH 0039/2229] remove and create files --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 20 ++++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 2b3fe9d8751..9e9d166dcae 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "DictionaryFactory.h" #include #include @@ -165,6 +166,10 @@ CachePartition::CachePartition( keys_buffer.type = AttributeUnderlyingType::utUInt64; keys_buffer.values = PaddedPODArray(); + Poco::File directory(dir_path); + if (!directory.exists()) + directory.createDirectory(); + { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -606,7 +611,8 @@ size_t CachePartition::getId() const void CachePartition::remove() { std::unique_lock lock(rw_lock); - std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); + Poco::File(path + BIN_FILE_EXT).remove(); + //std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); } CacheStorage::CacheStorage( @@ -620,6 +626,13 @@ CacheStorage::CacheStorage( { } +CacheStorage::~CacheStorage() +{ + std::unique_lock lock(rw_lock); + partition_delete_queue.splice(std::end(partition_delete_queue), partitions); + collectGarbage(); +} + template void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, @@ -868,10 +881,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector max_partitions_count) + while (partitions.size() > max_partitions_count) { - partition_delete_queue.push_back(partitions.back()); - partitions.pop_back(); + partition_delete_queue.splice(std::end(partition_delete_queue), partitions, std::prev(std::end(partitions))); } // drop unused partitions diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 7a876a3ed95..31732f25c1a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -187,6 +187,8 @@ public: CacheStorage(const AttributeTypes & attributes_structure_, const std::string & path_, const size_t max_partitions_count_, const size_t partition_max_size_); + ~CacheStorage(); + template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; From 3ccf10f7b23e6720b5a8bffccb1fde25918fe040 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 12 Jan 2020 17:23:32 +0300 Subject: [PATCH 0040/2229] params --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 119 +++++++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 45 +++++-- 2 files changed, 113 insertions(+), 51 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 9e9d166dcae..6a10dc471aa 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -63,14 +63,14 @@ namespace ErrorCodes namespace { - constexpr size_t SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры - constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; // TODO: в параметры + constexpr size_t DEFAULT_SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t DEFAULT_FILE_SIZE = 4 * 1024 * 1024 * 1024ULL; + constexpr size_t DEFAULT_PARTITIONS_COUNT = 16; + constexpr size_t DEFAULT_READ_BUFFER_SIZE = 16 * DEFAULT_SSD_BLOCK_SIZE; - constexpr size_t FILE_SIZE_IN_BLOCKS = 2; - constexpr size_t FILE_SIZE_TO_PREALLOCATE = FILE_SIZE_IN_BLOCKS * SSD_BLOCK_SIZE; + constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры - constexpr size_t READ_BUFFER_SIZE_BLOCKS = 16; // TODO: в параметры static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -158,9 +158,18 @@ void CachePartition::Index::setBlockId(const size_t block_id) } CachePartition::CachePartition( - const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure_, - const std::string & dir_path, const size_t file_id_, const size_t max_size_) - : file_id(file_id_), max_size(max_size_), path(dir_path + "/" + std::to_string(file_id)) + const AttributeUnderlyingType & /* key_structure */, + const std::vector & attributes_structure_, + const std::string & dir_path, + const size_t file_id_, + const size_t max_size_, + const size_t block_size_, + const size_t read_buffer_size_) + : file_id(file_id_) + , max_size(max_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) + , path(dir_path + "/" + std::to_string(file_id)) , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; @@ -181,7 +190,7 @@ CachePartition::CachePartition( throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); } - if (preallocateDiskSpace(fd, FILE_SIZE_TO_PREALLOCATE) < 0) + if (preallocateDiskSpace(fd, max_size * block_size) < 0) { throwFromErrnoWithPath("Cannot preallocate space for the file " + filename, filename, ErrorCodes::CANNOT_ALLOCATE_MEMORY); } @@ -198,7 +207,7 @@ size_t CachePartition::appendBlock( const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); - if (current_file_block_id >= FILE_SIZE_IN_BLOCKS) + if (current_file_block_id >= max_size) return 0; if (new_attributes.size() != attributes_structure.size()) @@ -208,9 +217,9 @@ size_t CachePartition::appendBlock( auto & ids_buffer = std::get>(keys_buffer.values); if (!memory) - memory.emplace(SSD_BLOCK_SIZE, BUFFER_ALIGNMENT); + memory.emplace(block_size, BUFFER_ALIGNMENT); if (!write_buffer) - write_buffer.emplace(memory->data(), SSD_BLOCK_SIZE); + write_buffer.emplace(memory->data(), block_size); for (size_t index = begin; index < ids.size();) { @@ -272,9 +281,9 @@ size_t CachePartition::appendBlock( ids_buffer.push_back(ids[index]); ++index; } - else if (current_file_block_id < FILE_SIZE_IN_BLOCKS) + else if (current_file_block_id < max_size) { - write_buffer.emplace(memory->data(), SSD_BLOCK_SIZE); + write_buffer.emplace(memory->data(), block_size); } else { @@ -303,14 +312,14 @@ void CachePartition::flush() write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; write_request.aio.aio_buf = reinterpret_cast(memory->data()); - write_request.aio.aio_nbytes = SSD_BLOCK_SIZE; - write_request.aio.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; + write_request.aio.aio_nbytes = block_size; + write_request.aio.aio_offset = block_size * current_file_block_id; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory->data()); - write_request.aio_nbytes = SSD_BLOCK_SIZE; - write_request.aio_offset = SSD_BLOCK_SIZE * current_file_block_id; + write_request.aio_nbytes = block_size; + write_request.aio_offset = block_size * current_file_block_id; #endif Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); @@ -405,7 +414,7 @@ void CachePartition::getValueFromMemory( { const size_t offset = index.getAddressInBlock(); - ReadBufferFromMemory read_buffer(memory->data() + offset, SSD_BLOCK_SIZE - offset); + ReadBufferFromMemory read_buffer(memory->data() + offset, block_size - offset); readValueFromBuffer(attribute_index, out[i], read_buffer); } } @@ -428,7 +437,7 @@ void CachePartition::getValueFromStorage( /// sort by (block_id, offset_in_block) std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(SSD_BLOCK_SIZE * READ_BUFFER_SIZE_BLOCKS, BUFFER_ALIGNMENT); + DB::Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); std::vector requests; std::vector pointers; @@ -439,7 +448,7 @@ void CachePartition::getValueFromStorage( for (size_t i = 0; i < index_to_out.size(); ++i) { if (!requests.empty() && - static_cast(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE) + static_cast(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * block_size) { blocks_to_indices.back().push_back(i); continue; @@ -457,9 +466,9 @@ void CachePartition::getValueFromStorage( #else request.aio_lio_opcode = IOCB_CMD_PREAD; request.aio_fildes = fd; - request.aio_buf = reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS); - request.aio_nbytes = SSD_BLOCK_SIZE; - request.aio_offset = index_to_out[i].first.getBlockId() * SSD_BLOCK_SIZE; + request.aio_buf = reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size); + request.aio_nbytes = block_size; + request.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #endif requests.push_back(request); @@ -468,7 +477,7 @@ void CachePartition::getValueFromStorage( blocks_to_indices.back().push_back(i); } - AIOContext aio_context(READ_BUFFER_SIZE_BLOCKS); + AIOContext aio_context(read_buffer_size); std::vector processed(requests.size(), false); std::vector events(requests.size()); @@ -496,7 +505,7 @@ void CachePartition::getValueFromStorage( const auto & [file_index, out_index] = index_to_out[idx]; DB::ReadBufferFromMemory buf( reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), - SSD_BLOCK_SIZE - file_index.getAddressInBlock()); + block_size - file_index.getAddressInBlock()); readValueFromBuffer(attribute_index, out[out_index], buf); } @@ -507,7 +516,7 @@ void CachePartition::getValueFromStorage( ++to_pop; /// add new io tasks - const size_t new_tasks_count = std::min(READ_BUFFER_SIZE_BLOCKS - (to_push - to_pop), requests.size() - to_push); + const size_t new_tasks_count = std::min(read_buffer_size - (to_push - to_pop), requests.size() - to_push); size_t pushed = 0; while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) @@ -616,12 +625,18 @@ void CachePartition::remove() } CacheStorage::CacheStorage( - const AttributeTypes & attributes_structure_, const std::string & path_, - const size_t max_partitions_count_, const size_t partition_max_size_) + const AttributeTypes & attributes_structure_, + const std::string & path_, + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_) : attributes_structure(attributes_structure_) , path(path_) - , partition_max_size(partition_max_size_) , max_partitions_count(max_partitions_count_) + , partition_size(partition_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) , log(&Poco::Logger::get("CacheStorage")) { } @@ -678,7 +693,8 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector( AttributeUnderlyingType::utUInt64, attributes_structure, path, - (partitions.empty() ? 0 : partitions.front()->getId() + 1), partition_max_size)); + (partitions.empty() ? 0 : partitions.front()->getId() + 1), + partition_size, block_size, read_buffer_size)); } } @@ -948,15 +964,21 @@ SSDCacheDictionary::SSDCacheDictionary( DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, const std::string & path_, - const size_t partition_max_size_) + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_) : name(name_) , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) , dict_lifetime(dict_lifetime_) , path(path_) - , partition_max_size(partition_max_size_) + , max_partitions_count(max_partitions_count_) + , partition_size(partition_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, 2, partition_max_size) + path, max_partitions_count, partition_size, block_size, read_buffer_size) , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) @@ -1240,17 +1262,36 @@ void registerDictionarySSDCache(DictionaryFactory & factory) "for a dictionary of layout 'range_hashed'", ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; - const auto max_partition_size = config.getInt(layout_prefix + ".ssd.max_partition_size"); - if (max_partition_size == 0) - throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + const auto max_partitions_count = config.getInt(layout_prefix + ".ssd.max_partitions_count", DEFAULT_PARTITIONS_COUNT); + if (max_partitions_count <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + + const auto block_size = config.getInt(layout_prefix + ".ssd.block_size", DEFAULT_SSD_BLOCK_SIZE); + if (block_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; + + const auto partition_size = config.getInt64(layout_prefix + ".ssd.partition_size", DEFAULT_FILE_SIZE); + if (partition_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; + if (partition_size % block_size != 0) + throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + + const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); + if (read_buffer_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + if (read_buffer_size % block_size != 0) + throw Exception{name + ": read_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; const auto path = config.getString(layout_prefix + ".ssd.path"); if (path.empty()) - throw Exception{name + ": dictionary of layout 'cache' cannot have empty path", + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, path, max_partition_size); + return std::make_unique( + name, dict_struct, std::move(source_ptr), dict_lifetime, path, + max_partitions_count, partition_size / block_size, block_size, read_buffer_size / block_size); }; factory.registerLayout("ssd", create_layout, false); } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 31732f25c1a..44b11421da4 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -84,8 +84,13 @@ public: using Key = IDictionary::Key; CachePartition( - const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, - const std::string & dir_path, const size_t file_id, const size_t max_size); + const AttributeUnderlyingType & key_structure, + const std::vector & attributes_structure, + const std::string & dir_path, + const size_t file_id, + const size_t max_size, + const size_t block_size, + const size_t read_buffer_size); ~CachePartition(); @@ -147,9 +152,11 @@ private: template void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; - size_t file_id; - size_t max_size; - std::string path; + const size_t file_id; + const size_t max_size; + const size_t block_size; + const size_t read_buffer_size; + const std::string path; mutable std::shared_mutex rw_lock; @@ -184,8 +191,13 @@ public: using AttributeTypes = std::vector; using Key = CachePartition::Key; - CacheStorage(const AttributeTypes & attributes_structure_, const std::string & path_, - const size_t max_partitions_count_, const size_t partition_max_size_); + CacheStorage( + const AttributeTypes & attributes_structure_, + const std::string & path_, + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_); ~CacheStorage(); @@ -223,8 +235,10 @@ private: const AttributeTypes attributes_structure; const std::string path; - const size_t partition_max_size; const size_t max_partitions_count; + const size_t partition_size; + const size_t block_size; + const size_t read_buffer_size; mutable std::shared_mutex rw_lock; std::list partitions; @@ -256,7 +270,10 @@ public: DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, const std::string & path, - const size_t partition_max_size_); + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_); std::string getName() const override { return name; } @@ -273,13 +290,14 @@ public: size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } - double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / partition_max_size; } // TODO: fix + double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / partition_size; } // TODO: fix bool supportUpdates() const override { return false; } std::shared_ptr clone() const override { - return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, partition_max_size); + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, + max_partitions_count, partition_size, block_size, read_buffer_size); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -398,7 +416,10 @@ private: const DictionaryLifetime dict_lifetime; const std::string path; - const size_t partition_max_size; + const size_t max_partitions_count; + const size_t partition_size; + const size_t block_size; + const size_t read_buffer_size; std::map attribute_index_by_name; std::vector null_values; From 1ee46c0690ade29b13ac1b4f83d83ac67c523cf2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 18 Jan 2020 14:47:58 +0300 Subject: [PATCH 0041/2229] select * from ssd_dictionary --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 42 ++++++++++++++++--- dbms/src/Dictionaries/SSDCacheDictionary.h | 11 +++-- .../01053_ssd_dictionary.reference | 7 ++++ .../0_stateless/01053_ssd_dictionary.sql | 26 +++++++++++- 4 files changed, 73 insertions(+), 13 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 6a10dc471aa..38d0d22be75 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" #include #include @@ -382,19 +383,16 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray { if (found[i]) { - Poco::Logger::get("kek").information("FOUND BEFORE:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); indices[i].setNotExists(); } else if (auto it = key_to_index_and_metadata.find(ids[i]); it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) { - Poco::Logger::get("kek").information(std::to_string(file_id) + " FOUND BEFORE: inmemory: " + std::to_string(it->second.index.inMemory()) + " " + std::to_string(it->second.index.getBlockId()) + " " + std::to_string(it->second.index.getAddressInBlock())); indices[i] = it->second.index; found[i] = true; } else { - Poco::Logger::get("kek").information("NF:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); indices[i].setNotExists(); } } @@ -499,7 +497,9 @@ void CachePartition::getValueFromStorage( const auto request_id = events[i].data; const auto & request = requests[request_id]; if (events[i].res != static_cast(request.aio_nbytes)) - throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(events[i].res), ErrorCodes::AIO_WRITE_ERROR); + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + + "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_WRITE_ERROR); for (const size_t idx : blocks_to_indices[request_id]) { const auto & [file_index, out_index] = index_to_out[idx]; @@ -602,7 +602,6 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayTypesecond.metadata.expiresAt() <= now) { - Poco::Logger::get("kek").information("NF:: Key :" + std::to_string(ids[i]) + " i: " + std::to_string(i)); out[i] = HAS_NOT_FOUND; } else @@ -617,6 +616,17 @@ size_t CachePartition::getId() const return file_id; } +PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point now) const +{ + const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; + + PaddedPODArray array; + for (const auto & [key, index_and_metadata] : key_to_index_and_metadata) + if (!index_and_metadata.metadata.isDefault() && index_and_metadata.metadata.expiresAt() > now) + array.push_back(key); + return array; +} + void CachePartition::remove() { std::unique_lock lock(rw_lock); @@ -894,6 +904,22 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector CacheStorage::getCachedIds() const +{ + PaddedPODArray array; + + const auto now = std::chrono::system_clock::now(); + + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + { + const auto cached_in_partition = partition->getCachedIds(now); + array.insert(std::begin(cached_in_partition), std::end(cached_in_partition)); + } + + return array; +} + void CacheStorage::collectGarbage() { // add partitions to queue @@ -1175,6 +1201,12 @@ void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray; + return std::make_shared(shared_from_this(), max_block_size, storage.getCachedIds(), column_names); +} + size_t SSDCacheDictionary::getAttributeIndex(const std::string & attr_name) const { auto it = attribute_index_by_name.find(attr_name); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 44b11421da4..789cf4efd5d 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -140,6 +140,8 @@ public: size_t getId() const; + PaddedPODArray getCachedIds(const std::chrono::system_clock::time_point now) const; + private: template void getValueFromMemory( @@ -220,6 +222,8 @@ public: PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, const DictionaryLifetime lifetime, const std::vector & null_values); + PaddedPODArray getCachedIds() const; + //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; std::exception_ptr getLastException() const { return last_update_exception; } @@ -388,12 +392,7 @@ public: void has(const PaddedPODArray & ids, PaddedPODArray & out) const override; - BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override // TODO - { - UNUSED(column_names); - UNUSED(max_block_size); - return nullptr; - } + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; private: size_t getAttributeIndex(const std::string & attr_name) const; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index dda210b162b..88b6db86f94 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -1,3 +1,10 @@ +TEST_SMALL +-100 +-1 +6 +0 +5 6 7 +1 100 -100 UPDATE DICTIONARY 118 VALUE FROM DISK diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 64e95c868da..da32cee0009 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -17,6 +17,28 @@ ORDER BY id; INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100), (2, 3, 4), (5, 6, 7), (10, 9, 8); +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +CREATE DICTIONARY database_for_dict.ssd_dict +( + id UInt64, + a UInt64 DEFAULT 0, + b Int32 DEFAULT -1 +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1000 MAX 2000) +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d')); + +SELECT 'TEST_SMALL'; +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(4)); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(5)); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(6)); + +SELECT * FROM database_for_dict.ssd_dict; +DROP DICTIONARY database_for_dict.ssd_dict; + DROP TABLE IF EXISTS database_for_dict.keys_table; CREATE TABLE database_for_dict.keys_table @@ -44,7 +66,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/1')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d')); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -103,7 +125,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(MAX_PARTITION_SIZE 1000 PATH '/mnt/disk4/clickhouse_dicts/2')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/2d')); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 From cac20e3c9f0e73355ae1fd2217674a39857ad27b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 18 Jan 2020 16:21:07 +0300 Subject: [PATCH 0042/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 4 ++-- dbms/src/Dictionaries/SSDCacheDictionary.h | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 38d0d22be75..bdb8c430a74 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -435,7 +435,7 @@ void CachePartition::getValueFromStorage( /// sort by (block_id, offset_in_block) std::sort(std::begin(index_to_out), std::end(index_to_out)); - DB::Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); + Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); std::vector requests; std::vector pointers; @@ -503,7 +503,7 @@ void CachePartition::getValueFromStorage( for (const size_t idx : blocks_to_indices[request_id]) { const auto & [file_index, out_index] = index_to_out[idx]; - DB::ReadBufferFromMemory buf( + ReadBufferFromMemory buf( reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), block_size - file_index.getAddressInBlock()); readValueFromBuffer(attribute_index, out[out_index], buf); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 789cf4efd5d..8f6a6c363b9 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -175,8 +175,8 @@ private: Attribute keys_buffer; const std::vector attributes_structure; - std::optional> memory; - std::optional write_buffer; + std::optional> memory; + std::optional write_buffer; size_t current_memory_block_id = 0; size_t current_file_block_id = 0; @@ -213,7 +213,6 @@ public: // getString(); - void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; From af1161a8e17256e0f36aaf5e0f7c1192f612b04c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 18 Jan 2020 20:46:00 +0300 Subject: [PATCH 0043/2229] counters --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 67 ++++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 31 ++++++--- 2 files changed, 76 insertions(+), 22 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index bdb8c430a74..982af2283fc 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -220,7 +220,12 @@ size_t CachePartition::appendBlock( if (!memory) memory.emplace(block_size, BUFFER_ALIGNMENT); if (!write_buffer) + { write_buffer.emplace(memory->data(), block_size); + // codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); + // compressed_buffer.emplace(*write_buffer, codec); + // hashing_buffer.emplace(*compressed_buffer); + } for (size_t index = begin; index < ids.size();) { @@ -616,6 +621,18 @@ size_t CachePartition::getId() const return file_id; } +double CachePartition::getLoadFactor() const +{ + std::shared_lock lock(rw_lock); + return static_cast(current_file_block_id) / max_size; +} + +size_t CachePartition::getElementCount() const +{ + std::shared_lock lock(rw_lock); + return key_to_index_and_metadata.size(); +} + PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point now) const { const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; @@ -665,25 +682,33 @@ void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray found(ids.size(), false); - std::shared_lock lock(rw_lock); - for (auto & partition : partitions) - partition->getValue(attribute_index, ids, out, found, now); + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->getValue(attribute_index, ids, out, found, now); - for (size_t i = 0; i < ids.size(); ++i) - if (!found[i]) - not_found[ids[i]].push_back(i); + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); + } + query_count.fetch_add(ids.size(), std::memory_order_relaxed); + hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { - std::shared_lock lock(rw_lock); - for (auto & partition : partitions) - partition->has(ids, out, now); + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->has(ids, out, now); - for (size_t i = 0; i < ids.size(); ++i) - if (out[i] == HAS_NOT_FOUND) - not_found[ids[i]].push_back(i); + for (size_t i = 0; i < ids.size(); ++i) + if (out[i] == HAS_NOT_FOUND) + not_found[ids[i]].push_back(i); + } + query_count.fetch_add(ids.size(), std::memory_order_relaxed); + hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } template @@ -920,6 +945,24 @@ PaddedPODArray CacheStorage::getCachedIds() const return array; } +double CacheStorage::getLoadFactor() const +{ + double result = 0; + std::shared_lock lock(rw_lock); + for (const auto & partition : partitions) + result += partition->getLoadFactor(); + return result / partitions.size(); +} + +size_t CacheStorage::getElementCount() const +{ + size_t result = 0; + std::shared_lock lock(rw_lock); + for (const auto & partition : partitions) + result += partition->getElementCount(); + return result; +} + void CacheStorage::collectGarbage() { // add partitions to queue diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 8f6a6c363b9..667c2528265 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -16,6 +16,8 @@ #include "IDictionary.h" #include "IDictionarySource.h" #include +#include +#include namespace DB { @@ -142,6 +144,10 @@ public: PaddedPODArray getCachedIds(const std::chrono::system_clock::time_point now) const; + double getLoadFactor() const; + + size_t getElementCount() const; + private: template void getValueFromMemory( @@ -177,11 +183,12 @@ private: std::optional> memory; std::optional write_buffer; + // std::optional compressed_buffer; + // std::optional hashing_buffer; + // CompressionCodecPtr codec; size_t current_memory_block_id = 0; size_t current_file_block_id = 0; - - // mutable std::atomic element_count{0}; }; using CachePartitionPtr = std::shared_ptr; @@ -229,6 +236,14 @@ public: const std::string & getPath() const { return path; } + size_t getQueryCount() const { return query_count.load(std::memory_order_relaxed); } + + size_t getHitCount() const { return hit_count.load(std::memory_order_acquire); } + + size_t getElementCount() const; + + double getLoadFactor() const; + private: CachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); @@ -258,7 +273,6 @@ private: // stats mutable size_t bytes_allocated = 0; - mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; }; @@ -284,16 +298,16 @@ public: size_t getBytesAllocated() const override { return 0; } // TODO: ? - size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } + size_t getQueryCount() const override { return storage.getQueryCount(); } double getHitRate() const override { - return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); + return static_cast(storage.getHitCount()) / storage.getQueryCount(); } - size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } + size_t getElementCount() const override { return storage.getElementCount(); } - double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / partition_size; } // TODO: fix + double getLoadFactor() const override { return storage.getLoadFactor(); } bool supportUpdates() const override { return false; } @@ -425,9 +439,6 @@ private: Logger * const log; mutable size_t bytes_allocated = 0; - mutable std::atomic element_count{0}; - mutable std::atomic hit_count{0}; - mutable std::atomic query_count{0}; }; } From ac7b8400f37314c05e08f5d1b0b7d7d686c0f1ae Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 19 Jan 2020 11:49:40 +0300 Subject: [PATCH 0044/2229] write buffer config --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 62 ++++++++++++------- dbms/src/Dictionaries/SSDCacheDictionary.h | 24 ++++--- .../0_stateless/01053_ssd_dictionary.sql | 4 +- 3 files changed, 57 insertions(+), 33 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 982af2283fc..8088025bd35 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -68,11 +68,10 @@ namespace constexpr size_t DEFAULT_FILE_SIZE = 4 * 1024 * 1024 * 1024ULL; constexpr size_t DEFAULT_PARTITIONS_COUNT = 16; constexpr size_t DEFAULT_READ_BUFFER_SIZE = 16 * DEFAULT_SSD_BLOCK_SIZE; + constexpr size_t DEFAULT_WRITE_BUFFER_SIZE = DEFAULT_SSD_BLOCK_SIZE; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; - constexpr size_t WRITE_BUFFER_SIZE_BLOCKS = 1; // TODO: в параметры - static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -165,11 +164,13 @@ CachePartition::CachePartition( const size_t file_id_, const size_t max_size_, const size_t block_size_, - const size_t read_buffer_size_) + const size_t read_buffer_size_, + const size_t write_buffer_size_) : file_id(file_id_) , max_size(max_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) , path(dir_path + "/" + std::to_string(file_id)) , attributes_structure(attributes_structure_) { @@ -221,7 +222,7 @@ size_t CachePartition::appendBlock( memory.emplace(block_size, BUFFER_ALIGNMENT); if (!write_buffer) { - write_buffer.emplace(memory->data(), block_size); + write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); // codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); // compressed_buffer.emplace(*write_buffer, codec); // hashing_buffer.emplace(*compressed_buffer); @@ -247,7 +248,9 @@ size_t CachePartition::appendBlock( { \ if (sizeof(TYPE) > write_buffer->available()) \ { \ - flush(); \ + write_buffer.reset(); \ + if (++current_memory_block_id == write_buffer_size) \ + flush(); \ flushed = true; \ continue; \ } \ @@ -287,14 +290,14 @@ size_t CachePartition::appendBlock( ids_buffer.push_back(ids[index]); ++index; } - else if (current_file_block_id < max_size) + else if (current_file_block_id < max_size) // next block in write buffer or flushed to ssd { - write_buffer.emplace(memory->data(), block_size); + write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); } - else + else // flushed to ssd, end of current file { memory.reset(); - return index - begin; // End of current file. + return index - begin; } } return ids.size() - begin; @@ -302,7 +305,6 @@ size_t CachePartition::appendBlock( void CachePartition::flush() { - write_buffer.reset(); const auto & ids = std::get>(keys_buffer.values); if (ids.empty()) return; @@ -324,7 +326,7 @@ void CachePartition::flush() write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory->data()); - write_request.aio_nbytes = block_size; + write_request.aio_nbytes = block_size * write_buffer_size; write_request.aio_offset = block_size * current_file_block_id; #endif @@ -367,11 +369,16 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) { - key_to_index_and_metadata[ids[row]].index.setInMemory(false); - key_to_index_and_metadata[ids[row]].index.setBlockId(current_file_block_id); + auto & index = key_to_index_and_metadata[ids[row]].index; + if (index.getInMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. + { + index.setInMemory(false); + index.setBlockId(current_file_block_id + index.getBlockId()); + } } - ++current_file_block_id; + current_file_block_id += write_buffer_size; + current_memory_block_id = 0; /// clear buffer std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); @@ -415,9 +422,9 @@ void CachePartition::getValueFromMemory( const auto & index = indices[i]; if (index.exists() && index.inMemory()) { - const size_t offset = index.getAddressInBlock(); + const size_t offset = index.getBlockId() * block_size + index.getAddressInBlock(); - ReadBufferFromMemory read_buffer(memory->data() + offset, block_size - offset); + ReadBufferFromMemory read_buffer(memory->data() + offset, block_size * write_buffer_size - offset); readValueFromBuffer(attribute_index, out[i], read_buffer); } } @@ -647,7 +654,7 @@ PaddedPODArray CachePartition::getCachedIds(const std::chro void CachePartition::remove() { std::unique_lock lock(rw_lock); - Poco::File(path + BIN_FILE_EXT).remove(); + //Poco::File(path + BIN_FILE_EXT).remove(); //std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); } @@ -657,13 +664,15 @@ CacheStorage::CacheStorage( const size_t max_partitions_count_, const size_t partition_size_, const size_t block_size_, - const size_t read_buffer_size_) + const size_t read_buffer_size_, + const size_t write_buffer_size_) : attributes_structure(attributes_structure_) , path(path_) , max_partitions_count(max_partitions_count_) , partition_size(partition_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) , log(&Poco::Logger::get("CacheStorage")) { } @@ -729,7 +738,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size)); + partition_size, block_size, read_buffer_size, write_buffer_size)); } } @@ -1036,7 +1045,8 @@ SSDCacheDictionary::SSDCacheDictionary( const size_t max_partitions_count_, const size_t partition_size_, const size_t block_size_, - const size_t read_buffer_size_) + const size_t read_buffer_size_, + const size_t write_buffer_size_) : name(name_) , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) @@ -1046,8 +1056,9 @@ SSDCacheDictionary::SSDCacheDictionary( , partition_size(partition_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, max_partitions_count, partition_size, block_size, read_buffer_size) + path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size) , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) @@ -1358,6 +1369,12 @@ void registerDictionarySSDCache(DictionaryFactory & factory) if (read_buffer_size % block_size != 0) throw Exception{name + ": read_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + const auto write_buffer_size = config.getInt64(layout_prefix + ".ssd.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); + if (write_buffer_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + if (write_buffer_size % block_size != 0) + throw Exception{name + ": write_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + const auto path = config.getString(layout_prefix + ".ssd.path"); if (path.empty()) throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", @@ -1366,7 +1383,8 @@ void registerDictionarySSDCache(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( name, dict_struct, std::move(source_ptr), dict_lifetime, path, - max_partitions_count, partition_size / block_size, block_size, read_buffer_size / block_size); + max_partitions_count, partition_size / block_size, block_size, + read_buffer_size / block_size, write_buffer_size / block_size); }; factory.registerLayout("ssd", create_layout, false); } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 667c2528265..a0ebf818090 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -92,7 +92,8 @@ public: const size_t file_id, const size_t max_size, const size_t block_size, - const size_t read_buffer_size); + const size_t read_buffer_size, + const size_t write_buffer_size); ~CachePartition(); @@ -164,6 +165,7 @@ private: const size_t max_size; const size_t block_size; const size_t read_buffer_size; + const size_t write_buffer_size; const std::string path; mutable std::shared_mutex rw_lock; @@ -201,12 +203,13 @@ public: using Key = CachePartition::Key; CacheStorage( - const AttributeTypes & attributes_structure_, - const std::string & path_, - const size_t max_partitions_count_, - const size_t partition_size_, - const size_t block_size_, - const size_t read_buffer_size_); + const AttributeTypes & attributes_structure, + const std::string & path, + const size_t max_partitions_count, + const size_t partition_size, + const size_t block_size, + const size_t read_buffer_size, + const size_t write_buffer_size); ~CacheStorage(); @@ -257,6 +260,7 @@ private: const size_t partition_size; const size_t block_size; const size_t read_buffer_size; + const size_t write_buffer_size; mutable std::shared_mutex rw_lock; std::list partitions; @@ -290,7 +294,8 @@ public: const size_t max_partitions_count_, const size_t partition_size_, const size_t block_size_, - const size_t read_buffer_size_); + const size_t read_buffer_size_, + const size_t write_buffer_size_); std::string getName() const override { return name; } @@ -314,7 +319,7 @@ public: std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, - max_partitions_count, partition_size, block_size, read_buffer_size); + max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -432,6 +437,7 @@ private: const size_t partition_size; const size_t block_size; const size_t read_buffer_size; + const size_t write_buffer_size; std::map attribute_index_by_name; std::vector null_values; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index da32cee0009..27036c24630 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -66,7 +66,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096)); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -125,7 +125,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/2d')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024)); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 From 9bf80448735455aeb978d4d41d7e58648681bb23 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 19 Jan 2020 12:51:19 +0300 Subject: [PATCH 0045/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index a0ebf818090..f25fa0f99a3 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -22,9 +22,6 @@ namespace DB { -class SSDCacheDictionary; -class CacheStorage; - using AttributeValueVariant = std::variant< UInt8, UInt16, From de2b39b5875db2b18a8265a2146f9139f2a9d7a1 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 19 Jan 2020 13:54:36 +0300 Subject: [PATCH 0046/2229] debug --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 8088025bd35..793a6db169c 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -370,7 +370,7 @@ void CachePartition::flush() for (size_t row = 0; row < ids.size(); ++row) { auto & index = key_to_index_and_metadata[ids[row]].index; - if (index.getInMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. + if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); index.setBlockId(current_file_block_id + index.getBlockId()); @@ -491,6 +491,8 @@ void CachePartition::getValueFromStorage( std::vector processed(requests.size(), false); std::vector events(requests.size()); + for (auto & event : events) + event.res = -1; // TODO: remove size_t to_push = 0; size_t to_pop = 0; @@ -501,7 +503,7 @@ void CachePartition::getValueFromStorage( while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) < 0) { if (errno != EINTR) - throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); } for (size_t i = to_pop; i < to_pop + popped; ++i) From c5b1e3c56f9333e36173a9c57552876f41fe00fe Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 20 Jan 2020 20:29:06 +0300 Subject: [PATCH 0047/2229] rocksdb --- .gitmodules | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitmodules b/.gitmodules index 6075b7e9243..ac7d7944616 100644 --- a/.gitmodules +++ b/.gitmodules @@ -134,3 +134,6 @@ [submodule "contrib/libc-headers"] path = contrib/libc-headers url = https://github.com/ClickHouse-Extras/libc-headers.git +[submodule "contrib/rocksdb"] + path = contrib/rocksdb + url = https://github.com/facebook/rocksdb.git From 94b41450e59f135539ebc98b7ca2ae2012dd1552 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 25 Jan 2020 22:23:27 +0300 Subject: [PATCH 0048/2229] fix --- dbms/src/Dictionaries/SSDCacheDictionary.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index f25fa0f99a3..9a69c3ff2f7 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -294,7 +294,9 @@ public: const size_t read_buffer_size_, const size_t write_buffer_size_); - std::string getName() const override { return name; } + const std::string & getDatabase() const override { return name; } + const std::string & getName() const override { return name; } + const std::string & getFullName() const override { return getName(); } std::string getTypeName() const override { return "SSDCache"; } From bbcba746b2e67c1553f488d5a0eb82db5ce58694 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 26 Jan 2020 20:35:39 +0300 Subject: [PATCH 0049/2229] strings basic --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 215 ++++++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 30 +-- .../Functions/FunctionsExternalDictionaries.h | 2 + .../01053_ssd_dictionary.reference | 9 +- .../0_stateless/01053_ssd_dictionary.sql | 16 +- 5 files changed, 184 insertions(+), 88 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 793a6db169c..88fdbd95fff 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -175,7 +175,7 @@ CachePartition::CachePartition( , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; - keys_buffer.values = PaddedPODArray(); + keys_buffer.values = CachePartition::Attribute::Container(); Poco::File directory(dir_path); if (!directory.exists()) @@ -278,9 +278,24 @@ size_t CachePartition::appendBlock( DISPATCH(Float64) #undef DISPATCH - case AttributeUnderlyingType::utString: - // TODO: string support - break; + case AttributeUnderlyingType::utString: + /*{ + LOG_DEBUG(&Poco::Logger::get("kek"), "string write"); + const auto & value = std::get>(attribute.values)[index]; + if (sizeof(UInt64) + value.size() > write_buffer->available()) + { + write_buffer.reset(); + if (++current_memory_block_id == write_buffer_size) + flush(); + flushed = true; + continue; + } + else + { + writeStringBinary(value, *write_buffer); + } + }*/ + break; } } @@ -386,8 +401,8 @@ void CachePartition::flush() template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::vector & found, - std::chrono::system_clock::time_point now) const + ResultArrayType & out, std::vector & found, + std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); @@ -409,13 +424,17 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } } - getValueFromMemory(attribute_index, indices, out); - getValueFromStorage(attribute_index, indices, out); + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + readValueFromBuffer(attribute_index, out, index, buf); + }; + + getValueFromMemory(indices, set_value); + getValueFromStorage(indices, set_value); } -template -void CachePartition::getValueFromMemory( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const +template +void CachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc set) const { for (size_t i = 0; i < indices.size(); ++i) { @@ -425,14 +444,13 @@ void CachePartition::getValueFromMemory( const size_t offset = index.getBlockId() * block_size + index.getAddressInBlock(); ReadBufferFromMemory read_buffer(memory->data() + offset, block_size * write_buffer_size - offset); - readValueFromBuffer(attribute_index, out[i], read_buffer); + set(i, read_buffer); } } } -template -void CachePartition::getValueFromStorage( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const +template +void CachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc set) const { std::vector> index_to_out; for (size_t i = 0; i < indices.size(); ++i) @@ -520,7 +538,7 @@ void CachePartition::getValueFromStorage( ReadBufferFromMemory buf( reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), block_size - file_index.getAddressInBlock()); - readValueFromBuffer(attribute_index, out[out_index], buf); + set(i, buf); } processed[request_id] = true; @@ -543,18 +561,18 @@ void CachePartition::getValueFromStorage( } template -void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const +void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const { for (size_t i = 0; i < attribute_index; ++i) { switch (attributes_structure[i]) { #define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - buf.ignore(sizeof(TYPE)); \ - } \ - break; + case AttributeUnderlyingType::ut##TYPE: \ + { \ + buf.ignore(sizeof(TYPE)); \ + } \ + break; DISPATCH(UInt8) DISPATCH(UInt16) @@ -572,39 +590,26 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst DISPATCH(Float64) #undef DISPATCH - case AttributeUnderlyingType::utString: - // TODO: string support - break; + case AttributeUnderlyingType::utString: + /*{ + size_t size = 0; + readVarUInt(size, buf); + buf.ignore(size); + }*/ + break; } } - switch (attributes_structure[attribute_index]) + //if constexpr (!std::is_same_v) + readBinary(dst[index], buf); + /*else { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - readBinary(dst, buf); \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - // TODO: string support - break; - } + LOG_DEBUG(&Poco::Logger::get("kek"), "string READ"); + UNUSED(index); + size_t size = 0; + readVarUInt(size, buf); + dst.insertData(buf.position(), size); + }*/ } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const @@ -820,7 +825,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); + new_keys.values = CachePartition::Attribute::Container(); CachePartition::Attributes new_attributes; { /// TODO: create attributes from structure @@ -832,7 +837,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); \ + new_attributes.back().values = CachePartition::Attribute::Container(); \ break; DISPATCH(UInt8) @@ -852,7 +857,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); + }*/ break; } } @@ -880,7 +889,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); + std::get>(new_keys.values).push_back(id); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata.emplace_back(); @@ -897,7 +906,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_attributes[i].values); \ + auto & to_values = std::get>(new_attributes[i].values); \ auto & null_value = std::get(null_values[i]); \ to_values.push_back(null_value); \ } \ @@ -920,7 +929,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_attributes[i].values); + auto & null_value = std::get(null_values[i]); + to_values.push_back(null_value); + }*/ break; } } @@ -1004,7 +1017,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ { \ - PaddedPODArray values(column->size()); \ + CachePartition::Attribute::Container values(column->size()); \ const auto raw_data = column->getRawData(); \ memcpy(&values[0], raw_data.data, raw_data.size * sizeof(TYPE)); \ attributes.emplace_back(); \ @@ -1030,7 +1043,18 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( #undef DISPATCH case AttributeUnderlyingType::utString: - // TODO: string support + /*{ + attributes.emplace_back(); + CachePartition::Attribute::Container values(column->size()); + for (size_t j = 0; j < column->size(); ++j) + { + const auto ref = column->getDataAt(j); + values[j].resize(ref.size); + memcpy(values[j].data(), ref.data, ref.size); + } + attributes.back().type = structure[i]; + attributes.back().values = std::move(values); + }*/ break; } } @@ -1180,7 +1204,7 @@ void SSDCacheDictionary::getItemsNumberImpl( required_ids, [&](const auto id, const auto row, const auto & new_attributes) { for (const size_t out_row : not_found_ids[id]) - out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; + out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const size_t id) { @@ -1198,7 +1222,7 @@ void SSDCacheDictionary::getString(const std::string & attribute_name, const Pad const auto null_value = StringRef{std::get(null_values[index])}; - getItemsString(index, ids, out, [&](const size_t) { return null_value; }); + getItemsStringImpl(index, ids, out, [&](const size_t) { return null_value; }); } void SSDCacheDictionary::getString( @@ -1207,7 +1231,7 @@ void SSDCacheDictionary::getString( const auto index = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); - getItemsString(index, ids, out, [&](const size_t row) { return def->getDataAt(row); }); + getItemsStringImpl(index, ids, out, [&](const size_t row) { return def->getDataAt(row); }); } void SSDCacheDictionary::getString( @@ -1216,17 +1240,78 @@ void SSDCacheDictionary::getString( const auto index = getAttributeIndex(attribute_name); checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); - getItemsString(index, ids, out, [&](const size_t) { return StringRef{def}; }); + getItemsStringImpl(index, ids, out, [&](const size_t) { return StringRef{def}; }); } template -void SSDCacheDictionary::getItemsString(const size_t attribute_index, const PaddedPODArray & ids, +void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const { UNUSED(attribute_index); UNUSED(ids); UNUSED(out); + const auto now = std::chrono::system_clock::now(); + UNUSED(now); UNUSED(get_default); + + return; +/* + std::unordered_map> not_found_ids; + + auto from_cache = ColumnString::create(); + //storage.template getValue(attribute_index, ids, *from_cache, not_found_ids, now); + if (not_found_ids.empty()) + { + out->getChars().resize(from_cache->getChars().size()); + memcpy(out->getChars().data(), from_cache->getChars().data(), from_cache->getChars().size() * sizeof(from_cache->getChars()[0])); + out->getOffsets().resize(from_cache->getOffsets().size()); + memcpy(out->getOffsets().data(), from_cache->getOffsets().data(), from_cache->getOffsets().size() * sizeof(from_cache->getOffsets()[0])); + return; + } + + std::vector required_ids(not_found_ids.size()); + std::transform(std::begin(not_found_ids), std::end(not_found_ids), std::begin(required_ids), [](const auto & pair) { return pair.first; }); + + std::unordered_map update_result; + + storage.update( + source_ptr, + required_ids, + [&](const auto id, const auto row, const auto & new_attributes) + { + update_result[id] = std::get>(new_attributes[attribute_index].values)[row]; + }, + [&](const size_t) {}, + getLifetime(), + null_values); + + LOG_DEBUG(&Poco::Logger::get("log"), "fill data"); + size_t from_cache_counter = 0; + for (size_t row = 0; row < ids.size(); ++row) + { + const auto & id = ids[row]; + auto it = not_found_ids.find(id); + if (it == std::end(not_found_ids)) + { + LOG_DEBUG(&Poco::Logger::get("log"), "fill found " << row << " " << id); + out->insertFrom(*from_cache, from_cache_counter++); + } + else + { + auto it_update = update_result.find(id); + if (it_update != std::end(update_result)) + { + LOG_DEBUG(&Poco::Logger::get("log"), "fill update " << row << " " << id); + out->insertData(it_update->second.data(), it_update->second.size()); + } + else + { + LOG_DEBUG(&Poco::Logger::get("log"), "fill default " << row << " " << id); + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + } + } + }*/ } void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray & out) const @@ -1284,8 +1369,6 @@ AttributeValueVariant SSDCacheDictionary::createAttributeNullValueWithTypeImpl(); bytes_allocated += sizeof(StringRef); - //if (!string_arena) - // string_arena = std::make_unique(); return var_null_value; } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 9a69c3ff2f7..4c8e48ca7b5 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -102,14 +102,12 @@ public: ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const; - // TODO:: getString - void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; struct Attribute { template - using Container = PaddedPODArray; + using Container = std::vector; AttributeUnderlyingType type; std::variant< @@ -134,6 +132,8 @@ public: size_t appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin); + //size_t appendDefaults(); + void flush(); void remove(); @@ -147,16 +147,14 @@ public: size_t getElementCount() const; private: - template - void getValueFromMemory( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; + template + void getValueFromMemory(const PaddedPODArray & indices, SetFunc set) const; + + template + void getValueFromStorage(const PaddedPODArray & indices, SetFunc set) const; template - void getValueFromStorage( - const size_t attribute_index, const PaddedPODArray & indices, ResultArrayType & out) const; - - template - void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; + void readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const; const size_t file_id; const size_t max_size; @@ -211,15 +209,13 @@ public: ~CacheStorage(); template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + using ResultArrayType = CachePartition::ResultArrayType; template void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; - // getString(); - void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; @@ -230,8 +226,6 @@ public: PaddedPODArray getCachedIds() const; - //BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const; - std::exception_ptr getLastException() const { return last_update_exception; } const std::string & getPath() const { return path; } @@ -339,7 +333,7 @@ public: std::exception_ptr getLastException() const override { return storage.getLastException(); } template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + using ResultArrayType = CacheStorage::ResultArrayType; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; @@ -423,7 +417,7 @@ private: void getItemsNumberImpl( const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; template - void getItemsString(const size_t attribute_index, const PaddedPODArray & ids, + void getItemsStringImpl(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; const std::string name; diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 080247170fa..56e2f65c5da 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -313,6 +313,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && + //!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && @@ -499,6 +500,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && + //!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index 88b6db86f94..2e0e18bd97c 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -3,14 +3,19 @@ TEST_SMALL -1 6 0 -5 6 7 -1 100 -100 +database +none +1 100 -100 clickhouse +2 3 4 database +5 6 7 columns UPDATE DICTIONARY 118 VALUE FROM DISK -100 +clickhouse VALUE FROM RAM BUFFER 8 + VALUES FROM DISK AND RAM BUFFER 118 HAS diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 27036c24630..b684a7acc8e 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -11,10 +11,12 @@ CREATE TABLE database_for_dict.table_for_dict id UInt64, a UInt64, b Int32 + --c String ) ENGINE = MergeTree() ORDER BY id; +--INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100, 'clickhouse'), (2, 3, 4, 'database'), (5, 6, 7, 'columns'), (10, 9, 8, ''); INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100), (2, 3, 4), (5, 6, 7), (10, 9, 8); DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; @@ -24,19 +26,22 @@ CREATE DICTIONARY database_for_dict.ssd_dict id UInt64, a UInt64 DEFAULT 0, b Int32 DEFAULT -1 + --c String DEFAULT 'none' ) PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(4)); SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(5)); SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(6)); +--SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(2)); +--SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(3)); -SELECT * FROM database_for_dict.ssd_dict; +SELECT * FROM database_for_dict.ssd_dict ORDER BY id; DROP DICTIONARY database_for_dict.ssd_dict; DROP TABLE IF EXISTS database_for_dict.keys_table; @@ -62,6 +67,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict id UInt64, a UInt64 DEFAULT 0, b Int32 DEFAULT -1 + --c String DEFAULT 'none' ) PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) @@ -76,10 +82,16 @@ SELECT 'VALUE FROM DISK'; -- -100 SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); +-- 'clickhouse' +--SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(1)); + SELECT 'VALUE FROM RAM BUFFER'; -- 8 SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(10)); +-- '' +--SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(10)); + SELECT 'VALUES FROM DISK AND RAM BUFFER'; -- 118 SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; From 6803196d9347ff050d75e8fcc55dec0a61d42745 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 27 Jan 2020 22:29:23 +0300 Subject: [PATCH 0050/2229] impr --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 24 +++++++++----------- dbms/src/Dictionaries/SSDCacheDictionary.h | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 88fdbd95fff..a3114c26595 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -424,13 +424,14 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } } - auto set_value = [&](const size_t index, ReadBuffer & buf) + getValueFromMemory(indices, [&](const size_t index, ReadBuffer & buf) { - readValueFromBuffer(attribute_index, out, index, buf); - }; - - getValueFromMemory(indices, set_value); - getValueFromStorage(indices, set_value); + readValueFromBuffer(attribute_index, out[index], buf); + }); + getValueFromStorage(indices, [&](const size_t index, ReadBuffer & buf) + { + readValueFromBuffer(attribute_index, out[index], buf); + }); } template @@ -561,7 +562,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } template -void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const +void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const { for (size_t i = 0; i < attribute_index; ++i) { @@ -569,9 +570,7 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - { \ - buf.ignore(sizeof(TYPE)); \ - } \ + buf.ignore(sizeof(TYPE)); \ break; DISPATCH(UInt8) @@ -601,7 +600,7 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst } //if constexpr (!std::is_same_v) - readBinary(dst[index], buf); + readBinary(dst, buf); /*else { LOG_DEBUG(&Poco::Logger::get("kek"), "string READ"); @@ -1018,8 +1017,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( case AttributeUnderlyingType::ut##TYPE: \ { \ CachePartition::Attribute::Container values(column->size()); \ - const auto raw_data = column->getRawData(); \ - memcpy(&values[0], raw_data.data, raw_data.size * sizeof(TYPE)); \ + memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ attributes.emplace_back(); \ attributes.back().type = structure[i]; \ attributes.back().values = std::move(values); \ diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 4c8e48ca7b5..08d0b69c3e3 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -154,7 +154,7 @@ private: void getValueFromStorage(const PaddedPODArray & indices, SetFunc set) const; template - void readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const; + void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; const size_t file_id; const size_t max_size; From 468b7237b242b9b709a7680897c9a39bbb86ef98 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 28 Jan 2020 23:32:41 +0300 Subject: [PATCH 0051/2229] strings support --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 61 ++++++++----------- dbms/src/Dictionaries/SSDCacheDictionary.h | 9 +-- .../Functions/FunctionsExternalDictionaries.h | 4 +- .../0_stateless/01053_ssd_dictionary.sql | 23 ++++--- 4 files changed, 44 insertions(+), 53 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index a3114c26595..ca3ae43bd07 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -279,7 +279,7 @@ size_t CachePartition::appendBlock( #undef DISPATCH case AttributeUnderlyingType::utString: - /*{ + { LOG_DEBUG(&Poco::Logger::get("kek"), "string write"); const auto & value = std::get>(attribute.values)[index]; if (sizeof(UInt64) + value.size() > write_buffer->available()) @@ -294,7 +294,7 @@ size_t CachePartition::appendBlock( { writeStringBinary(value, *write_buffer); } - }*/ + } break; } } @@ -424,18 +424,17 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } } - getValueFromMemory(indices, [&](const size_t index, ReadBuffer & buf) + auto set_value = [&](const size_t index, ReadBuffer & buf) { - readValueFromBuffer(attribute_index, out[index], buf); - }); - getValueFromStorage(indices, [&](const size_t index, ReadBuffer & buf) - { - readValueFromBuffer(attribute_index, out[index], buf); - }); + readValueFromBuffer(attribute_index, out, index, buf); + }; + + getValueFromMemory(indices, set_value); + getValueFromStorage(indices, set_value); } template -void CachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc set) const +void CachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const { for (size_t i = 0; i < indices.size(); ++i) { @@ -451,7 +450,7 @@ void CachePartition::getValueFromMemory(const PaddedPODArray & indices, S } template -void CachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc set) const +void CachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const { std::vector> index_to_out; for (size_t i = 0; i < indices.size(); ++i) @@ -539,7 +538,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, ReadBufferFromMemory buf( reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), block_size - file_index.getAddressInBlock()); - set(i, buf); + set(out_index, buf); } processed[request_id] = true; @@ -562,7 +561,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } template -void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const +void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const { for (size_t i = 0; i < attribute_index; ++i) { @@ -590,25 +589,24 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst #undef DISPATCH case AttributeUnderlyingType::utString: - /*{ + { size_t size = 0; readVarUInt(size, buf); buf.ignore(size); - }*/ + } break; } } - //if constexpr (!std::is_same_v) - readBinary(dst, buf); - /*else + if constexpr (!std::is_same_v) + readBinary(dst[index], buf); + else { - LOG_DEBUG(&Poco::Logger::get("kek"), "string READ"); UNUSED(index); size_t size = 0; readVarUInt(size, buf); dst.insertData(buf.position(), size); - }*/ + } } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const @@ -856,11 +854,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); - }*/ + } break; } } @@ -928,11 +926,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_attributes[i].values); auto & null_value = std::get(null_values[i]); to_values.push_back(null_value); - }*/ + } break; } } @@ -1041,7 +1039,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( #undef DISPATCH case AttributeUnderlyingType::utString: - /*{ + { attributes.emplace_back(); CachePartition::Attribute::Container values(column->size()); for (size_t j = 0; j < column->size(); ++j) @@ -1052,7 +1050,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( } attributes.back().type = structure[i]; attributes.back().values = std::move(values); - }*/ + } break; } } @@ -1245,19 +1243,12 @@ template void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const { - UNUSED(attribute_index); - UNUSED(ids); - UNUSED(out); const auto now = std::chrono::system_clock::now(); - UNUSED(now); - UNUSED(get_default); - return; -/* std::unordered_map> not_found_ids; auto from_cache = ColumnString::create(); - //storage.template getValue(attribute_index, ids, *from_cache, not_found_ids, now); + storage.getValue(attribute_index, ids, *from_cache, not_found_ids, now); if (not_found_ids.empty()) { out->getChars().resize(from_cache->getChars().size()); @@ -1309,7 +1300,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const out->insertData(to_insert.data, to_insert.size); } } - }*/ + } } void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray & out) const diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 08d0b69c3e3..bd120d65c7a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -95,7 +95,8 @@ public: ~CachePartition(); template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, + std::conditional_t, ColumnString, PaddedPODArray>>; template void getValue(const size_t attribute_index, const PaddedPODArray & ids, @@ -148,13 +149,13 @@ public: private: template - void getValueFromMemory(const PaddedPODArray & indices, SetFunc set) const; + void getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const; template - void getValueFromStorage(const PaddedPODArray & indices, SetFunc set) const; + void getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const; template - void readValueFromBuffer(const size_t attribute_index, Out & dst, ReadBuffer & buf) const; + void readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const; const size_t file_id; const size_t max_size; diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index 56e2f65c5da..33e9859bf33 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -313,7 +313,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && - //!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && @@ -500,7 +500,7 @@ private: if (!executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatch(block, arguments, result, dict_ptr) && - //!executeDispatch(block, arguments, result, dict_ptr) && + !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr)) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index b684a7acc8e..273501b280b 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -10,14 +10,13 @@ CREATE TABLE database_for_dict.table_for_dict ( id UInt64, a UInt64, - b Int32 - --c String + b Int32, + c String ) ENGINE = MergeTree() ORDER BY id; ---INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100, 'clickhouse'), (2, 3, 4, 'database'), (5, 6, 7, 'columns'), (10, 9, 8, ''); -INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100), (2, 3, 4), (5, 6, 7), (10, 9, 8); +INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100, 'clickhouse'), (2, 3, 4, 'database'), (5, 6, 7, 'columns'), (10, 9, 8, ''); DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; @@ -25,8 +24,8 @@ CREATE DICTIONARY database_for_dict.ssd_dict ( id UInt64, a UInt64 DEFAULT 0, - b Int32 DEFAULT -1 - --c String DEFAULT 'none' + b Int32 DEFAULT -1, + c String DEFAULT 'none' ) PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) @@ -38,8 +37,8 @@ SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(4)); SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(5)); SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(6)); ---SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(2)); ---SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(3)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(2)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(3)); SELECT * FROM database_for_dict.ssd_dict ORDER BY id; DROP DICTIONARY database_for_dict.ssd_dict; @@ -66,8 +65,8 @@ CREATE DICTIONARY database_for_dict.ssd_dict ( id UInt64, a UInt64 DEFAULT 0, - b Int32 DEFAULT -1 - --c String DEFAULT 'none' + b Int32 DEFAULT -1, + c String DEFAULT 'none' ) PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) @@ -83,14 +82,14 @@ SELECT 'VALUE FROM DISK'; SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); -- 'clickhouse' ---SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(1)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(1)); SELECT 'VALUE FROM RAM BUFFER'; -- 8 SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(10)); -- '' ---SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(10)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(10)); SELECT 'VALUES FROM DISK AND RAM BUFFER'; -- 118 From 80acedb8be98d279c57620b64976dacb0441d3c8 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 29 Jan 2020 21:51:09 +0300 Subject: [PATCH 0052/2229] getString --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 70 ++++++++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 18 +++-- 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index ca3ae43bd07..52f72ee8d3a 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -403,6 +403,33 @@ template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + ignoreFromBufferToIndex(attribute_index, buf); + readBinary(out[index], buf); + }; + + getImpl(ids, set_value, found, now); +} + +void CachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, + ColumnString * out, std::vector & found, std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t, ReadBuffer & buf) + { + ignoreFromBufferToIndex(attribute_index, buf); + size_t size = 0; + readVarUInt(size, buf); + out->insertData(buf.position(), size); + }; + + getImpl(ids, set_value, found, now); +} + +template +void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found, + std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); @@ -424,13 +451,8 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } } - auto set_value = [&](const size_t index, ReadBuffer & buf) - { - readValueFromBuffer(attribute_index, out, index, buf); - }; - - getValueFromMemory(indices, set_value); - getValueFromStorage(indices, set_value); + getValueFromMemory(indices, set); + getValueFromStorage(indices, set); } template @@ -560,8 +582,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } } -template -void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const +void CachePartition::ignoreFromBufferToIndex(const size_t attribute_index, ReadBuffer & buf) const { for (size_t i = 0; i < attribute_index; ++i) { @@ -597,16 +618,6 @@ void CachePartition::readValueFromBuffer(const size_t attribute_index, Out & dst break; } } - - if constexpr (!std::is_same_v) - readBinary(dst[index], buf); - else - { - UNUSED(index); - size_t size = 0; - readVarUInt(size, buf); - dst.insertData(buf.position(), size); - } } void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const @@ -708,6 +719,25 @@ void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, + ColumnString * out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const +{ + std::vector found(ids.size(), false); + + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->getString(attribute_index, ids, out, found, now); + + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); + } + query_count.fetch_add(ids.size(), std::memory_order_relaxed); + hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); +} + void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { @@ -1248,7 +1278,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const std::unordered_map> not_found_ids; auto from_cache = ColumnString::create(); - storage.getValue(attribute_index, ids, *from_cache, not_found_ids, now); + storage.getString(attribute_index, ids, from_cache.get(), not_found_ids, now); if (not_found_ids.empty()) { out->getChars().resize(from_cache->getChars().size()); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index bd120d65c7a..f2bcd73aeea 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -95,14 +95,17 @@ public: ~CachePartition(); template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, - std::conditional_t, ColumnString, PaddedPODArray>>; + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; template void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const; + void getString(const size_t attribute_index, const PaddedPODArray & ids, + ColumnString * out, std::vector & found, + std::chrono::system_clock::time_point now) const; + void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; struct Attribute @@ -148,14 +151,17 @@ public: size_t getElementCount() const; private: + template + void getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found, + std::chrono::system_clock::time_point now) const; + template void getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const; template void getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const; - template - void readValueFromBuffer(const size_t attribute_index, Out & dst, const size_t index, ReadBuffer & buf) const; + void ignoreFromBufferToIndex(const size_t attribute_index, ReadBuffer & buf) const; const size_t file_id; const size_t max_size; @@ -217,6 +223,10 @@ public: ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; + void getString(const size_t attribute_index, const PaddedPODArray & ids, + ColumnString * out, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const; + void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; From 98e54aaae0b20c51665873650cbc0b6b9a798745 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 1 Feb 2020 13:12:35 +0300 Subject: [PATCH 0053/2229] string support fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 55 +++++++++++++------- dbms/src/Dictionaries/SSDCacheDictionary.h | 6 +-- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 52f72ee8d3a..fbede378413 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -205,8 +205,24 @@ CachePartition::~CachePartition() ::close(fd); } +size_t CachePartition::appendDefaults( + const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin) +{ + std::unique_lock lock(rw_lock); + + const auto & ids = std::get>(new_keys.values); + for (size_t index = begin; index < ids.size(); ++index) + { + auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; + index_and_metadata.metadata = metadata[index]; + index_and_metadata.metadata.setDefault(); + } + + return ids.size() - begin; +} + size_t CachePartition::appendBlock( - const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) + const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); if (current_file_block_id >= max_size) @@ -401,8 +417,8 @@ void CachePartition::flush() template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::vector & found, - std::chrono::system_clock::time_point now) const + ResultArrayType & out, std::vector & found, + std::chrono::system_clock::time_point now) const { auto set_value = [&](const size_t index, ReadBuffer & buf) { @@ -414,14 +430,17 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } void CachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, - ColumnString * out, std::vector & found, std::chrono::system_clock::time_point now) const + StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::chrono::system_clock::time_point now) const { - auto set_value = [&](const size_t, ReadBuffer & buf) + auto set_value = [&](const size_t index, ReadBuffer & buf) { ignoreFromBufferToIndex(attribute_index, buf); size_t size = 0; readVarUInt(size, buf); - out->insertData(buf.position(), size); + char * string_ptr = arena.alloc(size); + memcpy(string_ptr, buf.position(), size); + refs[index].data = string_ptr; + refs[index].size = size; }; getImpl(ids, set_value, found, now); @@ -429,7 +448,7 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra template void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found, - std::chrono::system_clock::time_point now) const + std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); @@ -440,7 +459,7 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, indices[i].setNotExists(); } else if (auto it = key_to_index_and_metadata.find(ids[i]); - it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) + it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) { indices[i] = it->second.index; found[i] = true; @@ -720,15 +739,15 @@ void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ColumnString * out, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const + StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, + std::chrono::system_clock::time_point now) const { std::vector found(ids.size(), false); { std::shared_lock lock(rw_lock); for (auto & partition : partitions) - partition->getString(attribute_index, ids, out, found, now); + partition->getString(attribute_index, ids, refs, arena, found, now); for (size_t i = 0; i < ids.size(); ++i) if (!found[i]) @@ -1277,14 +1296,13 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const std::unordered_map> not_found_ids; - auto from_cache = ColumnString::create(); - storage.getString(attribute_index, ids, from_cache.get(), not_found_ids, now); + StringRefs refs(ids.size()); + ArenaWithFreeLists string_arena; + storage.getString(attribute_index, ids, refs, string_arena, not_found_ids, now); if (not_found_ids.empty()) { - out->getChars().resize(from_cache->getChars().size()); - memcpy(out->getChars().data(), from_cache->getChars().data(), from_cache->getChars().size() * sizeof(from_cache->getChars()[0])); - out->getOffsets().resize(from_cache->getOffsets().size()); - memcpy(out->getOffsets().data(), from_cache->getOffsets().data(), from_cache->getOffsets().size() * sizeof(from_cache->getOffsets()[0])); + for (size_t row = 0; row < ids.size(); ++row) + out->insertData(refs[row].data, refs[row].size); return; } @@ -1305,7 +1323,6 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const null_values); LOG_DEBUG(&Poco::Logger::get("log"), "fill data"); - size_t from_cache_counter = 0; for (size_t row = 0; row < ids.size(); ++row) { const auto & id = ids[row]; @@ -1313,7 +1330,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const if (it == std::end(not_found_ids)) { LOG_DEBUG(&Poco::Logger::get("log"), "fill found " << row << " " << id); - out->insertFrom(*from_cache, from_cache_counter++); + out->insertData(refs[row].data, refs[row].size); } else { diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index f2bcd73aeea..6a04d574bea 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -103,7 +103,7 @@ public: std::chrono::system_clock::time_point now) const; void getString(const size_t attribute_index, const PaddedPODArray & ids, - ColumnString * out, std::vector & found, + StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::chrono::system_clock::time_point now) const; void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; @@ -136,7 +136,7 @@ public: size_t appendBlock(const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin); - //size_t appendDefaults(); + size_t appendDefaults(const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin); void flush(); @@ -224,7 +224,7 @@ public: std::chrono::system_clock::time_point now) const; void getString(const size_t attribute_index, const PaddedPODArray & ids, - ColumnString * out, std::unordered_map> & not_found, + StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; void has(const PaddedPODArray & ids, ResultArrayType & out, From 21577c3395ed4238adcf676497cc52eb1d8be121 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 1 Feb 2020 21:13:43 +0300 Subject: [PATCH 0054/2229] default --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 227 ++++++++---------- dbms/src/Dictionaries/SSDCacheDictionary.h | 20 +- .../01053_ssd_dictionary.reference | 3 +- .../0_stateless/01053_ssd_dictionary.sql | 5 +- 4 files changed, 110 insertions(+), 145 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index fbede378413..468f9860be4 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -415,9 +415,9 @@ void CachePartition::flush() std::visit([](auto & attr) { attr.clear(); }, keys_buffer.values); } -template +template void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::vector & found, + ResultArrayType & out, std::vector & found, GetDefault & get_default, std::chrono::system_clock::time_point now) const { auto set_value = [&](const size_t index, ReadBuffer & buf) @@ -426,11 +426,17 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray readBinary(out[index], buf); }; - getImpl(ids, set_value, found, now); + auto set_default = [&](const size_t index) + { + out[index] = get_default(index); + }; + + getImpl(ids, set_value, set_default, found, now); } void CachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, - StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::chrono::system_clock::time_point now) const + StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::vector & default_ids, + std::chrono::system_clock::time_point now) const { auto set_value = [&](const size_t index, ReadBuffer & buf) { @@ -443,12 +449,17 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra refs[index].size = size; }; - getImpl(ids, set_value, found, now); + auto set_default = [&](const size_t index) + { + default_ids.push_back(index); + }; + + getImpl(ids, set_value, set_default, found, now); } -template -void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found, - std::chrono::system_clock::time_point now) const +template +void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, SetDefault & set_default, + std::vector & found, std::chrono::system_clock::time_point now) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); @@ -461,7 +472,13 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, else if (auto it = key_to_index_and_metadata.find(ids[i]); it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) { - indices[i] = it->second.index; + if (unlikely(it->second.metadata.isDefault())) + { + indices[i].setNotExists(); + set_default(i); + } + else + indices[i] = it->second.index; found[i] = true; } else @@ -718,41 +735,43 @@ CacheStorage::~CacheStorage() collectGarbage(); } -template +template void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const + GetDefault & get_default, std::chrono::system_clock::time_point now) const { std::vector found(ids.size(), false); { std::shared_lock lock(rw_lock); for (auto & partition : partitions) - partition->getValue(attribute_index, ids, out, found, now); - - for (size_t i = 0; i < ids.size(); ++i) - if (!found[i]) - not_found[ids[i]].push_back(i); + partition->getValue(attribute_index, ids, out, found, get_default, now); } + + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); + query_count.fetch_add(ids.size(), std::memory_order_relaxed); hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } void CacheStorage::getString(const size_t attribute_index, const PaddedPODArray & ids, StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const + std::vector & default_ids, std::chrono::system_clock::time_point now) const { std::vector found(ids.size(), false); { std::shared_lock lock(rw_lock); for (auto & partition : partitions) - partition->getString(attribute_index, ids, refs, arena, found, now); - - for (size_t i = 0; i < ids.size(); ++i) - if (!found[i]) - not_found[ids[i]].push_back(i); + partition->getString(attribute_index, ids, refs, arena, found, default_ids, now); } + + for (size_t i = 0; i < ids.size(); ++i) + if (!found[i]) + not_found[ids[i]].push_back(i); + query_count.fetch_add(ids.size(), std::memory_order_relaxed); hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } @@ -776,7 +795,7 @@ void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, - const DictionaryLifetime lifetime, const std::vector & null_values) + const DictionaryLifetime lifetime) { auto append_block = [this](const CachePartition::Attribute & new_keys, const CachePartition::Attributes & new_attributes, const PaddedPODArray & metadata) @@ -866,52 +885,30 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & metadata) + { + size_t inserted = 0; + while (inserted < metadata.size()) + { + if (!partitions.empty()) + inserted += partitions.front()->appendDefaults(new_keys, metadata, inserted); + if (inserted < metadata.size()) + { + partitions.emplace_front(std::make_unique( + AttributeUnderlyingType::utUInt64, attributes_structure, path, + (partitions.empty() ? 0 : partitions.front()->getId() + 1), + partition_size, block_size, read_buffer_size, write_buffer_size)); + } + } + collectGarbage(); + }; + + size_t not_found_num = 0, found_num = 0; /// Check which ids have not been found and require setting null_value CachePartition::Attribute new_keys; new_keys.type = AttributeUnderlyingType::utUInt64; new_keys.values = CachePartition::Attribute::Container(); - CachePartition::Attributes new_attributes; - { - /// TODO: create attributes from structure - for (const auto & attribute_type : attributes_structure) - { - switch (attribute_type) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - new_attributes.emplace_back(); \ - new_attributes.back().type = attribute_type; \ - new_attributes.back().values = CachePartition::Attribute::Container(); \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - { - new_attributes.emplace_back(); - new_attributes.back().type = attribute_type; - new_attributes.back().values = CachePartition::Attribute::Container(); - } - break; - } - } - } PaddedPODArray metadata; @@ -942,48 +939,6 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_attributes[i].values); \ - auto & null_value = std::get(null_values[i]); \ - to_values.push_back(null_value); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - { - auto & to_values = std::get>(new_attributes[i].values); - auto & null_value = std::get(null_values[i]); - to_values.push_back(null_value); - } - break; - } - } - /// inform caller that the cell has not been found on_id_not_found(id); } @@ -991,7 +946,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector> not_found_ids; - storage.getValue(attribute_index, ids, out, not_found_ids, now); + storage.getValue(attribute_index, ids, out, not_found_ids, get_default, now); if (not_found_ids.empty()) return; @@ -1256,8 +1211,7 @@ void SSDCacheDictionary::getItemsNumberImpl( for (const size_t row : not_found_ids[id]) out[row] = get_default(row); }, - getLifetime(), - null_values); + getLifetime()); } void SSDCacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray & ids, ColumnString * out) const @@ -1298,11 +1252,24 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const StringRefs refs(ids.size()); ArenaWithFreeLists string_arena; - storage.getString(attribute_index, ids, refs, string_arena, not_found_ids, now); + std::vector default_rows; + storage.getString(attribute_index, ids, refs, string_arena, not_found_ids, default_rows, now); + std::sort(std::begin(default_rows), std::end(default_rows)); + if (not_found_ids.empty()) { + size_t default_index = 0; for (size_t row = 0; row < ids.size(); ++row) - out->insertData(refs[row].data, refs[row].size); + { + if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row)) + { + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + ++default_index; + } + else + out->insertData(refs[row].data, refs[row].size); + } return; } @@ -1319,33 +1286,30 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const update_result[id] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const size_t) {}, - getLifetime(), - null_values); + getLifetime()); - LOG_DEBUG(&Poco::Logger::get("log"), "fill data"); + size_t default_index = 0; for (size_t row = 0; row < ids.size(); ++row) { const auto & id = ids[row]; - auto it = not_found_ids.find(id); - if (it == std::end(not_found_ids)) + if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row)) + { + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + ++default_index; + } + else if (auto it = not_found_ids.find(id); it == std::end(not_found_ids)) { - LOG_DEBUG(&Poco::Logger::get("log"), "fill found " << row << " " << id); out->insertData(refs[row].data, refs[row].size); } + else if (auto it_update = update_result.find(id); it_update != std::end(update_result)) + { + out->insertData(it_update->second.data(), it_update->second.size()); + } else { - auto it_update = update_result.find(id); - if (it_update != std::end(update_result)) - { - LOG_DEBUG(&Poco::Logger::get("log"), "fill update " << row << " " << id); - out->insertData(it_update->second.data(), it_update->second.size()); - } - else - { - LOG_DEBUG(&Poco::Logger::get("log"), "fill default " << row << " " << id); - auto to_insert = get_default(row); - out->insertData(to_insert.data, to_insert.size); - } + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); } } } @@ -1374,8 +1338,7 @@ void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; - template + template void getValue(const size_t attribute_index, const PaddedPODArray & ids, - ResultArrayType & out, std::vector & found, + ResultArrayType & out, std::vector & found, GetDefault & get_default, std::chrono::system_clock::time_point now) const; void getString(const size_t attribute_index, const PaddedPODArray & ids, StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, - std::chrono::system_clock::time_point now) const; + std::vector & default_ids, std::chrono::system_clock::time_point now) const; void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; @@ -151,9 +151,9 @@ public: size_t getElementCount() const; private: - template - void getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found, - std::chrono::system_clock::time_point now) const; + template + void getImpl(const PaddedPODArray & ids, SetFunc & set, SetDefault & set_default, + std::vector & found, std::chrono::system_clock::time_point now) const; template void getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const; @@ -218,14 +218,14 @@ public: template using ResultArrayType = CachePartition::ResultArrayType; - template + template void getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const; + GetDefault & get_default, std::chrono::system_clock::time_point now) const; void getString(const size_t attribute_index, const PaddedPODArray & ids, StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, - std::chrono::system_clock::time_point now) const; + std::vector & default_ids, std::chrono::system_clock::time_point now) const; void has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const; @@ -233,7 +233,7 @@ public: template void update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, - const DictionaryLifetime lifetime, const std::vector & null_values); + const DictionaryLifetime lifetime); PaddedPODArray getCachedIds() const; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index 2e0e18bd97c..8035fc015f1 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -24,7 +24,8 @@ HAS 5 10 VALUES NOT FROM TABLE -0 -1 +0 -1 none +0 -1 none DUPLICATE KEYS 1 -100 2 4 diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 273501b280b..7311d389470 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -100,8 +100,9 @@ SELECT 'HAS'; SELECT id FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', toUInt64(id)) ORDER BY id; SELECT 'VALUES NOT FROM TABLE'; --- 0 -1 -SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)); +-- 0 -1 none +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)), dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(1000000)); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(1000000)), dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1000000)), dictGetString('database_for_dict.ssd_dict', 'c', toUInt64(1000000)); SELECT 'DUPLICATE KEYS'; SELECT arrayJoin([1, 2, 3, 3, 2, 1]) AS id, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(id)); From d0cc94f0156776a73cc9ce990da84b986260664a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 1 Feb 2020 21:19:39 +0300 Subject: [PATCH 0055/2229] upd test --- .../tests/queries/0_stateless/01053_ssd_dictionary.reference | 5 ++++- dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index 8035fc015f1..3fd425ceebb 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -4,10 +4,13 @@ TEST_SMALL 6 0 database -none +a 1 100 -100 clickhouse 2 3 4 database +3 0 -1 a +4 0 -1 a 5 6 7 columns +6 0 -1 a UPDATE DICTIONARY 118 VALUE FROM DISK diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 7311d389470..9f027cd2039 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -17,6 +17,9 @@ ENGINE = MergeTree() ORDER BY id; INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100, 'clickhouse'), (2, 3, 4, 'database'), (5, 6, 7, 'columns'), (10, 9, 8, ''); +INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'a' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 370; +INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'b' FROM system.numbers LIMIT 370, 370; +INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'c' FROM system.numbers LIMIT 700, 370; DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; From 0fb0239383945049ebe332c5ee96ab241d982099 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 22 Mar 2020 16:23:13 +0300 Subject: [PATCH 0056/2229] store keys --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 21 ++++++++++++++++--- dbms/src/Dictionaries/SSDCacheDictionary.h | 2 +- dbms/src/Parsers/ASTDictionary.cpp | 2 +- .../0_stateless/01053_ssd_dictionary.sql | 6 +++--- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 468f9860be4..7dfad530f6c 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -254,8 +254,22 @@ size_t CachePartition::appendBlock( bool flushed = false; + if (sizeof(UInt64) > write_buffer->available()) + { + write_buffer.reset(); + if (++current_memory_block_id == write_buffer_size) + flush(); + flushed = true; + } + else + { + writeBinary(ids[index], *write_buffer); + } + for (const auto & attribute : new_attributes) { + if (flushed) + break; // TODO:: переделать через столбцы + getDataAt switch (attribute.type) { @@ -422,7 +436,7 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray { auto set_value = [&](const size_t index, ReadBuffer & buf) { - ignoreFromBufferToIndex(attribute_index, buf); + ignoreFromBufferToAttributeIndex(attribute_index, buf); readBinary(out[index], buf); }; @@ -440,7 +454,7 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra { auto set_value = [&](const size_t index, ReadBuffer & buf) { - ignoreFromBufferToIndex(attribute_index, buf); + ignoreFromBufferToAttributeIndex(attribute_index, buf); size_t size = 0; readVarUInt(size, buf); char * string_ptr = arena.alloc(size); @@ -618,8 +632,9 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } } -void CachePartition::ignoreFromBufferToIndex(const size_t attribute_index, ReadBuffer & buf) const +void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const { + buf.ignore(sizeof(UInt64)); for (size_t i = 0; i < attribute_index; ++i) { switch (attributes_structure[i]) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 0c37114cdfd..7082339a400 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -161,7 +161,7 @@ private: template void getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const; - void ignoreFromBufferToIndex(const size_t attribute_index, ReadBuffer & buf) const; + void ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const; const size_t file_id; const size_t max_size; diff --git a/dbms/src/Parsers/ASTDictionary.cpp b/dbms/src/Parsers/ASTDictionary.cpp index 2ac4cb84aca..a6b22e21640 100644 --- a/dbms/src/Parsers/ASTDictionary.cpp +++ b/dbms/src/Parsers/ASTDictionary.cpp @@ -96,7 +96,7 @@ void ASTDictionaryLayout::formatImpl(const FormatSettings & settings, << (settings.hilite ? hilite_none : "") << " "; - parameter->second->formatImpl(settings, state, frame); + parameter.second->formatImpl(settings, state, frame); first = false; } settings.ostr << ")"; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 9f027cd2039..13033eb2189 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -33,7 +33,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/0d')); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); @@ -74,7 +74,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096)); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096)); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -140,7 +140,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/mnt/disk4/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024)); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024)); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 From bcea5b26d7947803d3930c9ae6fa2d9dec2aaf0d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 22 Mar 2020 17:17:42 +0300 Subject: [PATCH 0057/2229] small fix --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 7dfad530f6c..e8b24de72ef 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -235,7 +235,7 @@ size_t CachePartition::appendBlock( auto & ids_buffer = std::get>(keys_buffer.values); if (!memory) - memory.emplace(block_size, BUFFER_ALIGNMENT); + memory.emplace(block_size * write_buffer_size, BUFFER_ALIGNMENT); if (!write_buffer) { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); From 9e61702b950e9f922b9c33f770dde22065445f17 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 30 Mar 2020 10:12:12 +0300 Subject: [PATCH 0058/2229] one file --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 167 ++++++++++++++++-- dbms/src/Dictionaries/SSDCacheDictionary.h | 3 + .../01053_ssd_dictionary.reference | 5 +- .../0_stateless/01053_ssd_dictionary.sql | 14 +- 4 files changed, 163 insertions(+), 26 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index e8b24de72ef..87431b3d728 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -49,17 +49,18 @@ namespace DB namespace ErrorCodes { - extern const int TYPE_MISMATCH; + extern const int AIO_READ_ERROR; + extern const int AIO_WRITE_ERROR; extern const int BAD_ARGUMENTS; - extern const int UNSUPPORTED_METHOD; + extern const int CANNOT_FSYNC; + extern const int CANNOT_IO_GETEVENTS; + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_OPEN_FILE; + extern const int FILE_DOESNT_EXIST; extern const int LOGICAL_ERROR; extern const int TOO_SMALL_BUFFER_SIZE; - extern const int FILE_DOESNT_EXIST; - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_IO_SUBMIT; - extern const int CANNOT_IO_GETEVENTS; - extern const int AIO_WRITE_ERROR; - extern const int CANNOT_FSYNC; + extern const int TYPE_MISMATCH; + extern const int UNSUPPORTED_METHOD; } namespace @@ -71,6 +72,7 @@ namespace constexpr size_t DEFAULT_WRITE_BUFFER_SIZE = DEFAULT_SSD_BLOCK_SIZE; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; @@ -225,8 +227,8 @@ size_t CachePartition::appendBlock( const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); - if (current_file_block_id >= max_size) - return 0; + //if (current_file_block_id >= max_size) + // return 0; if (new_attributes.size() != attributes_structure.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; @@ -239,6 +241,9 @@ size_t CachePartition::appendBlock( if (!write_buffer) { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); + uint32_t tmp = 0; + write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); + keys_in_block = 0; // codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); // compressed_buffer.emplace(*write_buffer, codec); // hashing_buffer.emplace(*compressed_buffer); @@ -279,6 +284,7 @@ size_t CachePartition::appendBlock( if (sizeof(TYPE) > write_buffer->available()) \ { \ write_buffer.reset(); \ + std::memcpy(memory->data() + block_size * current_memory_block_id, &keys_in_block, sizeof(keys_in_block)); /* set count */ \ if (++current_memory_block_id == write_buffer_size) \ flush(); \ flushed = true; \ @@ -310,11 +316,12 @@ size_t CachePartition::appendBlock( case AttributeUnderlyingType::utString: { - LOG_DEBUG(&Poco::Logger::get("kek"), "string write"); + //LOG_DEBUG(&Poco::Logger::get("kek"), "string write"); const auto & value = std::get>(attribute.values)[index]; if (sizeof(UInt64) + value.size() > write_buffer->available()) { write_buffer.reset(); + std::memcpy(memory->data() + block_size * current_memory_block_id, &keys_in_block, sizeof(keys_in_block)); // set count if (++current_memory_block_id == write_buffer_size) flush(); flushed = true; @@ -334,22 +341,33 @@ size_t CachePartition::appendBlock( key_to_index_and_metadata[ids[index]] = index_and_metadata; ids_buffer.push_back(ids[index]); ++index; + ++keys_in_block; } - else if (current_file_block_id < max_size) // next block in write buffer or flushed to ssd + else //if (current_file_block_id < max_size) // next block in write buffer or flushed to ssd { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); + uint32_t tmp = 0; + write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); + keys_in_block = 0; } - else // flushed to ssd, end of current file + /*else // flushed to ssd, end of current file { + //write_buffer.emplace(memory->data() + current_memory_block_id * block_size + BLOCK_SPECIAL_FIELDS_SIZE, block_size - BLOCK_SPECIAL_FIELDS_SIZE); + keys_in_block = 0; + //clearOldestBlocks(); memory.reset(); return index - begin; - } + }*/ } return ids.size() - begin; } void CachePartition::flush() { + if (current_file_block_id >= max_size) { + clearOldestBlocks(); + } + const auto & ids = std::get>(keys_buffer.values); if (ids.empty()) return; @@ -539,6 +557,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); + // TODO: merge requests std::vector requests; std::vector pointers; std::vector> blocks_to_indices; @@ -603,7 +622,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, if (events[i].res != static_cast(request.aio_nbytes)) throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + - "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_WRITE_ERROR); + "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_READ_ERROR); for (const size_t idx : blocks_to_indices[request_id]) { const auto & [file_index, out_index] = index_to_out[idx]; @@ -632,6 +651,124 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } } +void CachePartition::clearOldestBlocks() +{ + Poco::Logger::get("GC").information("GC clear -----------------"); + // write_buffer_size, because we need to erase the whole buffer. + Memory read_buffer_memory(block_size * write_buffer_size, BUFFER_ALIGNMENT); + + iocb request{}; +#if defined(__FreeBSD__) + request.aio.aio_lio_opcode = LIO_READ; + request.aio.aio_fildes = fd; + request.aio.aio_buf = reinterpret_cast(reinterpret_cast(read_buffer_memory.data())); + request.aio.aio_nbytes = block_size * write_buffer_size; + request.aio.aio_offset = (current_file_block_id % max_size) * block_size; + request.aio_data = 0; +#else + request.aio_lio_opcode = IOCB_CMD_PREAD; + request.aio_fildes = fd; + request.aio_buf = reinterpret_cast(read_buffer_memory.data()); + request.aio_nbytes = block_size * write_buffer_size; + request.aio_offset = (current_file_block_id % max_size) * block_size; + request.aio_data = 0; +#endif + + { + iocb* request_ptr = &request; + io_event event{}; + AIOContext aio_context(1); + + if (io_submit(aio_context.ctx, 1, &request_ptr) != 1) + { + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + } + + if (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) != 1) + { + throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); + } + + if (event.res != static_cast(request.aio_nbytes)) + { + throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "aio_nbytes=" + std::to_string(request.aio_nbytes) + + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); + } + } + + std::vector keys; + keys.reserve(write_buffer_size); + + // TODO: писать кол-во значений + for (size_t i = 0; i < write_buffer_size; ++i) + { + ReadBufferFromMemory read_buffer(read_buffer_memory.data() + i * block_size, block_size); + uint32_t keys_in_current_block = 0; + readBinary(keys_in_current_block, read_buffer); + Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); + + for (uint32_t j = 0; j < keys_in_current_block; ++j) + { + //Poco::Logger::get("GC").information(std::to_string(j) + " " + std::to_string(read_buffer.offset())); + keys.emplace_back(); + readBinary(keys.back(), read_buffer); + + for (size_t attr = 0; attr < attributes_structure.size(); ++attr) + { + + switch (attributes_structure[attr]) + { + #define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + read_buffer.ignore(sizeof(TYPE)); \ + //Poco::Logger::get("GC").information("read TYPE");\ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + #undef DISPATCH + + case AttributeUnderlyingType::utString: + { + //Poco::Logger::get("GC").information("read string"); + size_t size = 0; + readVarUInt(size, read_buffer); + //Poco::Logger::get("GC").information("read string " + std::to_string(size)); + read_buffer.ignore(size); + } + break; + } + } + } + } + + const size_t start_block = current_file_block_id % max_size; + const size_t finish_block = start_block + block_size * write_buffer_size; + for (const auto& key : keys) + { + auto it = key_to_index_and_metadata.find(key); + if (it != std::end(key_to_index_and_metadata)) { + size_t block_id = it->second.index.getBlockId(); + if (start_block <= block_id && block_id < finish_block) { + key_to_index_and_metadata.erase(it); + } + } + } +} + void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const { buf.ignore(sizeof(UInt64)); diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 7082339a400..849c64daf64 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -138,6 +138,8 @@ public: size_t appendDefaults(const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin); + void clearOldestBlocks(); + void flush(); void remove(); @@ -187,6 +189,7 @@ private: std::optional> memory; std::optional write_buffer; + uint32_t keys_in_block = 0; // std::optional compressed_buffer; // std::optional hashing_buffer; // CompressionCodecPtr codec; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference index 3fd425ceebb..d78ab31f8d9 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.reference @@ -22,10 +22,7 @@ VALUE FROM RAM BUFFER VALUES FROM DISK AND RAM BUFFER 118 HAS -1 -2 -5 -10 +1006 VALUES NOT FROM TABLE 0 -1 none 0 -1 none diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index 13033eb2189..b8dd9158b50 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -18,8 +18,8 @@ ORDER BY id; INSERT INTO database_for_dict.table_for_dict VALUES (1, 100, -100, 'clickhouse'), (2, 3, 4, 'database'), (5, 6, 7, 'columns'), (10, 9, 8, ''); INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'a' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 370; -INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'b' FROM system.numbers LIMIT 370, 370; -INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'c' FROM system.numbers LIMIT 700, 370; +INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'b' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 370, 370; +INSERT INTO database_for_dict.table_for_dict SELECT number, 0, -1, 'c' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 700, 370; DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; @@ -55,11 +55,11 @@ CREATE TABLE database_for_dict.keys_table ENGINE = StripeLog(); INSERT INTO database_for_dict.keys_table VALUES (1); -INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370; +INSERT INTO database_for_dict.keys_table SELECT 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 370; INSERT INTO database_for_dict.keys_table VALUES (2); -INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 370, 370; +INSERT INTO database_for_dict.keys_table SELECT 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 370, 370; INSERT INTO database_for_dict.keys_table VALUES (5); -INSERT INTO database_for_dict.keys_table SELECT intHash64(number) FROM system.numbers LIMIT 700, 370; +INSERT INTO database_for_dict.keys_table SELECT 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 700, 370; INSERT INTO database_for_dict.keys_table VALUES (10); DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; @@ -99,8 +99,8 @@ SELECT 'VALUES FROM DISK AND RAM BUFFER'; SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', toUInt64(id))) FROM database_for_dict.keys_table; SELECT 'HAS'; --- 1 2 5 10 -SELECT id FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', toUInt64(id)) ORDER BY id; +-- 1006 +SELECT count() FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', toUInt64(id)); SELECT 'VALUES NOT FROM TABLE'; -- 0 -1 none From c7a8063e751568fc3b688c0bff1cb7826827da66 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 4 Apr 2020 15:44:16 +0300 Subject: [PATCH 0059/2229] lru --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 50 +++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 67 +++++++++++++++++++- 2 files changed, 94 insertions(+), 23 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 87431b3d728..735f9a806f8 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -174,6 +174,7 @@ CachePartition::CachePartition( , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) , path(dir_path + "/" + std::to_string(file_id)) + , key_to_index_and_metadata(100000) , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; @@ -215,9 +216,11 @@ size_t CachePartition::appendDefaults( const auto & ids = std::get>(new_keys.values); for (size_t index = begin; index < ids.size(); ++index) { - auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; + //auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; + IndexAndMetadata index_and_metadata; index_and_metadata.metadata = metadata[index]; index_and_metadata.metadata.setDefault(); + key_to_index_and_metadata.set(ids[index], index_and_metadata); } return ids.size() - begin; @@ -338,7 +341,8 @@ size_t CachePartition::appendBlock( if (!flushed) { - key_to_index_and_metadata[ids[index]] = index_and_metadata; + //key_to_index_and_metadata[ids[index]] = index_and_metadata; + key_to_index_and_metadata.set(ids[index], index_and_metadata); ids_buffer.push_back(ids[index]); ++index; ++keys_in_block; @@ -432,11 +436,17 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) { - auto & index = key_to_index_and_metadata[ids[row]].index; - if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. - { - index.setInMemory(false); - index.setBlockId(current_file_block_id + index.getBlockId()); + IndexAndMetadata index_and_metadata; + if (key_to_index_and_metadata.get(ids[row], index_and_metadata)) { + auto & index = index_and_metadata.index; + if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. + { + index.setInMemory(false); + index.setBlockId(current_file_block_id + index.getBlockId()); + } + key_to_index_and_metadata.set(ids[row], index_and_metadata); + } else { + // Key was evicted from cache. } } @@ -497,20 +507,20 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { + IndexAndMetadata index_and_metadata; if (found[i]) { indices[i].setNotExists(); } - else if (auto it = key_to_index_and_metadata.find(ids[i]); - it != std::end(key_to_index_and_metadata) && it->second.metadata.expiresAt() > now) + else if (key_to_index_and_metadata.get(ids[i], index_and_metadata) && index_and_metadata.metadata.expiresAt() > now) { - if (unlikely(it->second.metadata.isDefault())) + if (unlikely(index_and_metadata.metadata.isDefault())) { indices[i].setNotExists(); set_default(i); } else - indices[i] = it->second.index; + indices[i] = index_and_metadata.index; found[i] = true; } else @@ -722,7 +732,6 @@ void CachePartition::clearOldestBlocks() #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ read_buffer.ignore(sizeof(TYPE)); \ - //Poco::Logger::get("GC").information("read TYPE");\ break; DISPATCH(UInt8) @@ -759,11 +768,11 @@ void CachePartition::clearOldestBlocks() const size_t finish_block = start_block + block_size * write_buffer_size; for (const auto& key : keys) { - auto it = key_to_index_and_metadata.find(key); - if (it != std::end(key_to_index_and_metadata)) { - size_t block_id = it->second.index.getBlockId(); + IndexAndMetadata index_and_metadata; + if (key_to_index_and_metadata.get(key, index_and_metadata)) { + size_t block_id = index_and_metadata.index.getBlockId(); if (start_block <= block_id && block_id < finish_block) { - key_to_index_and_metadata.erase(it); + key_to_index_and_metadata.erase(key); } } } @@ -813,15 +822,14 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayTypesecond.metadata.expiresAt() <= now) + IndexAndMetadata index_and_metadata; + if (!key_to_index_and_metadata.get(ids[i], index_and_metadata) || index_and_metadata.metadata.expiresAt() <= now) { out[i] = HAS_NOT_FOUND; } else { - out[i] = !it->second.metadata.isDefault(); + out[i] = !index_and_metadata.metadata.isDefault(); } } } @@ -849,7 +857,7 @@ PaddedPODArray CachePartition::getCachedIds(const std::chro PaddedPODArray array; for (const auto & [key, index_and_metadata] : key_to_index_and_metadata) - if (!index_and_metadata.metadata.isDefault() && index_and_metadata.metadata.expiresAt() > now) + if (!index_and_metadata.second.metadata.isDefault() && index_and_metadata.second.metadata.expiresAt() > now) array.push_back(key); return array; } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 849c64daf64..03553e6b50d 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,67 @@ namespace DB { +template +class CLRUCache +{ + using Iter = std::list::iterator; +public: + CLRUCache(size_t max_size_) : max_size(max_size_) { + } + + void set(K key, V val) { + auto it = cache.find(key); + if (it == std::end(cache)) { + auto & item = cache[key]; + item.first = queue.insert(std::end(queue), key); + item.second = val; + if (queue.size() > max_size) { + //Poco::Logger::get("Evict").fatal("eviction"); + cache.erase(queue.front()); + queue.pop_front(); + } + } else { + queue.erase(it->second.first); + it->second.first = queue.insert(std::end(queue), key); + it->second.second = val; + } + } + + bool get(K key, V & val) { + auto it = cache.find(key); + if (it == std::end(cache)) { + return false; + } + val = it->second.second; + queue.erase(it->second.first); + it->second.first = queue.insert(std::end(queue), key); + return true; + } + + void erase(K key) { + auto it = cache.find(key); + queue.erase(it->second.first); + cache.erase(it); + } + + size_t size() const { + return cache.size(); + } + + auto begin() { + return std::begin(cache); + } + + auto end() { + return std::end(cache); + } + +private: + std::unordered_map> cache; + std::list queue; + size_t max_size; +}; + using AttributeValueVariant = std::variant< UInt8, UInt16, @@ -59,7 +121,7 @@ public: bool operator< (const Index & rhs) const { return index < rhs.index; } /// Stores `is_in_memory` flag, block id, address in uncompressed block - size_t index = 0; + uint64_t index = 0; }; struct Metadata final @@ -182,7 +244,8 @@ private: Metadata metadata{}; }; - mutable std::unordered_map key_to_index_and_metadata; + //mutable std::unordered_map key_to_index_and_metadata; + mutable CLRUCache key_to_index_and_metadata; Attribute keys_buffer; const std::vector attributes_structure; From 0a52fc80f8cbc136844ca42c1dbbfa405bb798f2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 4 Apr 2020 16:24:35 +0300 Subject: [PATCH 0060/2229] lru settings --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 31 +++++++++++++------ dbms/src/Dictionaries/SSDCacheDictionary.h | 15 ++++++--- .../0_stateless/01053_ssd_dictionary.sql | 4 +-- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 735f9a806f8..3b6fea18567 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -71,6 +71,8 @@ namespace constexpr size_t DEFAULT_READ_BUFFER_SIZE = 16 * DEFAULT_SSD_BLOCK_SIZE; constexpr size_t DEFAULT_WRITE_BUFFER_SIZE = DEFAULT_SSD_BLOCK_SIZE; + constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000; + constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; @@ -167,14 +169,16 @@ CachePartition::CachePartition( const size_t max_size_, const size_t block_size_, const size_t read_buffer_size_, - const size_t write_buffer_size_) + const size_t write_buffer_size_, + const size_t max_stored_keys_) : file_id(file_id_) , max_size(max_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) , path(dir_path + "/" + std::to_string(file_id)) - , key_to_index_and_metadata(100000) + , key_to_index_and_metadata(max_stored_keys) , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; @@ -866,7 +870,7 @@ void CachePartition::remove() { std::unique_lock lock(rw_lock); //Poco::File(path + BIN_FILE_EXT).remove(); - //std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); + std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); } CacheStorage::CacheStorage( @@ -876,7 +880,8 @@ CacheStorage::CacheStorage( const size_t partition_size_, const size_t block_size_, const size_t read_buffer_size_, - const size_t write_buffer_size_) + const size_t write_buffer_size_, + const size_t max_stored_keys_) : attributes_structure(attributes_structure_) , path(path_) , max_partitions_count(max_partitions_count_) @@ -884,6 +889,7 @@ CacheStorage::CacheStorage( , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) , log(&Poco::Logger::get("CacheStorage")) { } @@ -970,7 +976,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size)); + partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1057,7 +1063,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size)); + partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1232,7 +1238,8 @@ SSDCacheDictionary::SSDCacheDictionary( const size_t partition_size_, const size_t block_size_, const size_t read_buffer_size_, - const size_t write_buffer_size_) + const size_t write_buffer_size_, + const size_t max_stored_keys_) : name(name_) , dict_struct(dict_struct_) , source_ptr(std::move(source_ptr_)) @@ -1243,8 +1250,9 @@ SSDCacheDictionary::SSDCacheDictionary( , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size) + path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) , log(&Poco::Logger::get("SSDCacheDictionary")) { if (!this->source_ptr->supportsSelectiveLoad()) @@ -1623,12 +1631,17 @@ void registerDictionarySSDCache(DictionaryFactory & factory) if (path.empty()) throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", ErrorCodes::BAD_ARGUMENTS}; + + const auto max_stored_keys = config.getInt64(layout_prefix + ".ssd.max_stored_keys", DEFAULT_MAX_STORED_KEYS); + if (max_stored_keys <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( name, dict_struct, std::move(source_ptr), dict_lifetime, path, max_partitions_count, partition_size / block_size, block_size, - read_buffer_size / block_size, write_buffer_size / block_size); + read_buffer_size / block_size, write_buffer_size / block_size, + max_stored_keys); }; factory.registerLayout("ssd", create_layout, false); } diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 03553e6b50d..305418ed7b7 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -38,7 +38,6 @@ public: item.first = queue.insert(std::end(queue), key); item.second = val; if (queue.size() > max_size) { - //Poco::Logger::get("Evict").fatal("eviction"); cache.erase(queue.front()); queue.pop_front(); } @@ -152,7 +151,8 @@ public: const size_t max_size, const size_t block_size, const size_t read_buffer_size, - const size_t write_buffer_size); + const size_t write_buffer_size, + const size_t max_stored_keys); ~CachePartition(); @@ -232,6 +232,7 @@ private: const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; + const size_t max_stored_keys; const std::string path; mutable std::shared_mutex rw_lock; @@ -277,7 +278,8 @@ public: const size_t partition_size, const size_t block_size, const size_t read_buffer_size, - const size_t write_buffer_size); + const size_t write_buffer_size, + const size_t max_stored_keys); ~CacheStorage(); @@ -329,6 +331,7 @@ private: const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; + const size_t max_stored_keys; mutable std::shared_mutex rw_lock; std::list partitions; @@ -363,7 +366,8 @@ public: const size_t partition_size_, const size_t block_size_, const size_t read_buffer_size_, - const size_t write_buffer_size_); + const size_t write_buffer_size_, + const size_t max_stored_keys_); const std::string & getDatabase() const override { return name; } const std::string & getName() const override { return name; } @@ -389,7 +393,7 @@ public: std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, - max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size); + max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -508,6 +512,7 @@ private: const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; + const size_t max_stored_keys; std::map attribute_index_by_name; std::vector null_values; diff --git a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql index b8dd9158b50..18a79223f8c 100644 --- a/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/dbms/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -74,7 +74,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096)); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -140,7 +140,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024)); +LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10)); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 From 9a2855fae7eee1eb150116f19af56453cc59ffc5 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 Apr 2020 04:26:34 +0300 Subject: [PATCH 0061/2229] in-memory parts: preparation --- .../MergeTree/MergeTreeDataPartCompact.cpp | 2 +- .../MergeTreeDataPartWriterCompact.cpp | 8 +-- .../MergeTreeDataPartWriterCompact.h | 4 +- ....cpp => MergeTreeDataPartWriterOnDisk.cpp} | 49 +++++++-------- ...iter.h => MergeTreeDataPartWriterOnDisk.h} | 59 ++++--------------- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 2 +- .../MergeTree/MergeTreeDataPartWriterWide.h | 4 +- .../MergeTree/MergedBlockOutputStream.cpp | 31 ++++++---- .../MergeTree/MergedBlockOutputStream.h | 5 ++ .../MergedColumnOnlyOutputStream.cpp | 14 +++-- 10 files changed, 74 insertions(+), 104 deletions(-) rename src/Storages/MergeTree/{IMergeTreeDataPartWriter.cpp => MergeTreeDataPartWriterOnDisk.cpp} (90%) rename src/Storages/MergeTree/{IMergeTreeDataPartWriter.h => MergeTreeDataPartWriterOnDisk.h} (68%) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 134b2fc1ef0..24d65622fe6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -54,7 +54,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( const NamesAndTypesList & columns_list, - const std::vector & indices_to_recalc, + const MergeTreeIndices & indices_to_recalc, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index e33d4a97cac..22282754d99 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -15,10 +15,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) -: IMergeTreeDataPartWriter(disk_, part_path_, - storage_, columns_list_, - indices_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_, true) + : MergeTreeDataPartWriterOnDisk(disk_, + part_path_, storage_, columns_list_, + indices_to_recalc_, marks_file_extension_, + default_codec_, settings_, index_granularity_, true) { using DataPart = MergeTreeDataPartCompact; String data_file_name = DataPart::DATA_FILE_NAME; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 0aff55588aa..c0aced61b95 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -1,10 +1,10 @@ -#include +#include namespace DB { /// Writes data part in compact format. -class MergeTreeDataPartWriterCompact : public IMergeTreeDataPartWriter +class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk { public: MergeTreeDataPartWriterCompact( diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp similarity index 90% rename from src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp rename to src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 8187799f4be..cdd7e592513 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -14,20 +14,20 @@ namespace constexpr auto INDEX_FILE_EXTENSION = ".idx"; } -void IMergeTreeDataPartWriter::Stream::finalize() +void MergeTreeDataPartWriterOnDisk::Stream::finalize() { compressed.next(); plain_file->next(); marks.next(); } -void IMergeTreeDataPartWriter::Stream::sync() +void MergeTreeDataPartWriterOnDisk::Stream::sync() { plain_file->sync(); marks_file->sync(); } -IMergeTreeDataPartWriter::Stream::Stream( +MergeTreeDataPartWriterOnDisk::Stream::Stream( const String & escaped_column_name_, DiskPtr disk_, const String & data_path_, @@ -47,7 +47,7 @@ IMergeTreeDataPartWriter::Stream::Stream( { } -void IMergeTreeDataPartWriter::Stream::addToChecksums(MergeTreeData::DataPart::Checksums & checksums) +void MergeTreeDataPartWriterOnDisk::Stream::addToChecksums(MergeTreeData::DataPart::Checksums & checksums) { String name = escaped_column_name; @@ -62,7 +62,7 @@ void IMergeTreeDataPartWriter::Stream::addToChecksums(MergeTreeData::DataPart::C } -IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( +MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( DiskPtr disk_, const String & part_path_, const MergeTreeData & storage_, @@ -73,15 +73,13 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_, bool need_finish_last_granule_) - : disk(std::move(disk_)) + : IMergeTreeDataPartWriter(storage_, + columns_list_, indices_to_recalc_, + index_granularity_, settings_) + , disk(std::move(disk_)) , part_path(part_path_) - , storage(storage_) - , columns_list(columns_list_) , marks_file_extension(marks_file_extension_) - , index_granularity(index_granularity_) , default_codec(default_codec_) - , skip_indices(indices_to_recalc_) - , settings(settings_) , compute_granularity(index_granularity.empty()) , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity) , need_finish_last_granule(need_finish_last_granule_) @@ -93,8 +91,6 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( disk->createDirectories(part_path); } -IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; - static void fillIndexGranularityImpl( const Block & block, size_t index_granularity_bytes, @@ -155,7 +151,7 @@ static void fillIndexGranularityImpl( } } -void IMergeTreeDataPartWriter::fillIndexGranularity(const Block & block) +void MergeTreeDataPartWriterOnDisk::fillIndexGranularity(const Block & block) { const auto storage_settings = storage.getSettings(); fillIndexGranularityImpl( @@ -169,7 +165,7 @@ void IMergeTreeDataPartWriter::fillIndexGranularity(const Block & block) need_finish_last_granule); } -void IMergeTreeDataPartWriter::initPrimaryIndex() +void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() { if (storage.hasPrimaryKey()) { @@ -180,13 +176,13 @@ void IMergeTreeDataPartWriter::initPrimaryIndex() primary_index_initialized = true; } -void IMergeTreeDataPartWriter::initSkipIndices() +void MergeTreeDataPartWriterOnDisk::initSkipIndices() { for (const auto & index : skip_indices) { String stream_name = index->getFileName(); skip_indices_streams.emplace_back( - std::make_unique( + std::make_unique( stream_name, disk, part_path + stream_name, INDEX_FILE_EXTENSION, @@ -200,8 +196,9 @@ void IMergeTreeDataPartWriter::initSkipIndices() skip_indices_initialized = true; } -void IMergeTreeDataPartWriter::calculateAndSerializePrimaryIndex(const Block & primary_index_block, size_t rows) +void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block) { + size_t rows = primary_index_block.rows(); if (!primary_index_initialized) throw Exception("Primary index is not initialized", ErrorCodes::LOGICAL_ERROR); @@ -250,9 +247,9 @@ void IMergeTreeDataPartWriter::calculateAndSerializePrimaryIndex(const Block & p } } -void IMergeTreeDataPartWriter::calculateAndSerializeSkipIndices( - const Block & skip_indexes_block, size_t rows) +void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block & skip_indexes_block) { + size_t rows = skip_indexes_block.rows(); if (!skip_indices_initialized) throw Exception("Skip indices are not initialized", ErrorCodes::LOGICAL_ERROR); @@ -314,7 +311,7 @@ void IMergeTreeDataPartWriter::calculateAndSerializeSkipIndices( skip_index_data_mark = skip_index_current_data_mark; } -void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) +void MergeTreeDataPartWriterOnDisk::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) { bool write_final_mark = (with_final_mark && data_written); if (write_final_mark && compute_granularity) @@ -340,7 +337,7 @@ void IMergeTreeDataPartWriter::finishPrimaryIndexSerialization(MergeTreeData::Da } } -void IMergeTreeDataPartWriter::finishSkipIndicesSerialization( +void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization( MergeTreeData::DataPart::Checksums & checksums) { for (size_t i = 0; i < skip_indices.size(); ++i) @@ -361,10 +358,4 @@ void IMergeTreeDataPartWriter::finishSkipIndicesSerialization( skip_index_filling.clear(); } -void IMergeTreeDataPartWriter::next() -{ - current_mark = next_mark; - index_offset = next_index_offset; -} - } diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h similarity index 68% rename from src/Storages/MergeTree/IMergeTreeDataPartWriter.h rename to src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 3e3496c88da..a5df9d4a389 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -16,7 +17,7 @@ namespace DB /// Writes data part to disk in different formats. /// Calculates and serializes primary and skip indices if needed. -class IMergeTreeDataPartWriter : private boost::noncopyable +class MergeTreeDataPartWriterOnDisk : public IMergeTreeDataPartWriter { public: using WrittenOffsetColumns = std::set; @@ -60,7 +61,7 @@ public: using StreamPtr = std::unique_ptr; - IMergeTreeDataPartWriter( + MergeTreeDataPartWriterOnDisk( DiskPtr disk, const String & part_path, const MergeTreeData & storage, @@ -72,75 +73,36 @@ public: const MergeTreeIndexGranularity & index_granularity, bool need_finish_last_granule); - virtual ~IMergeTreeDataPartWriter(); - - virtual void write( - const Block & block, const IColumn::Permutation * permutation = nullptr, - /* Blocks with already sorted index columns */ - const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) = 0; - - void calculateAndSerializePrimaryIndex(const Block & primary_index_block, size_t rows); - void calculateAndSerializeSkipIndices(const Block & skip_indexes_block, size_t rows); - - /// Shift mark and offset to prepare read next mark. - /// You must call it after calling write method and optionally - /// calling calculations of primary and skip indices. - void next(); + void calculateAndSerializePrimaryIndex(const Block & primary_index_block) final; + void calculateAndSerializeSkipIndices(const Block & skip_indexes_block) final; /// Count index_granularity for block and store in `index_granularity` - void fillIndexGranularity(const Block & block); + void fillIndexGranularity(const Block & block) final; - const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } + void initSkipIndices() final; + void initPrimaryIndex() final; - Columns releaseIndexColumns() - { - return Columns(std::make_move_iterator(index_columns.begin()), std::make_move_iterator(index_columns.end())); - } + virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) final; + virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums) final; void setWrittenOffsetColumns(WrittenOffsetColumns * written_offset_columns_) { written_offset_columns = written_offset_columns_; } - const MergeTreeIndices & getSkipIndices() { return skip_indices; } - - void initSkipIndices(); - void initPrimaryIndex(); - - virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) = 0; - void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums); - void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums); - protected: using SerializationState = IDataType::SerializeBinaryBulkStatePtr; using SerializationStates = std::unordered_map; DiskPtr disk; String part_path; - const MergeTreeData & storage; - NamesAndTypesList columns_list; const String marks_file_extension; - - MergeTreeIndexGranularity index_granularity; - CompressionCodecPtr default_codec; - MergeTreeIndices skip_indices; - - MergeTreeWriterSettings settings; - bool compute_granularity; bool with_final_mark; bool need_finish_last_granule; - size_t current_mark = 0; - - /// The offset to the first row of the block for which you want to write the index. - size_t index_offset = 0; - - size_t next_mark = 0; - size_t next_index_offset = 0; - /// Number of marsk in data from which skip indices have to start /// aggregation. I.e. it's data mark number, not skip indices mark. size_t skip_index_data_mark = 0; @@ -151,7 +113,6 @@ protected: std::unique_ptr index_file_stream; std::unique_ptr index_stream; - MutableColumns index_columns; DataTypes index_types; /// Index columns values from the last row from the last block /// It's written to index file in the `writeSuffixAndFinalizePart` method diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 1e5640b4e23..b953bc9d31e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -22,7 +22,7 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(disk_, part_path_, + : MergeTreeDataPartWriterOnDisk(disk_, part_path_, storage_, columns_list_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_, false) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index 4e4f4806d53..a9cdf4ce8e9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -1,10 +1,10 @@ -#include +#include namespace DB { /// Writes data part in wide format. -class MergeTreeDataPartWriterWide : public IMergeTreeDataPartWriter +class MergeTreeDataPartWriterWide : public MergeTreeDataPartWriterOnDisk { public: diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 2b482ac7c29..f10c1b8d533 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -97,6 +97,24 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( else part_columns = *total_columns_list; + if (new_part->isStoredOnDisk()) + finalizePartOnDisk(new_part, part_columns, checksums); + + new_part->setColumns(part_columns); + new_part->rows_count = rows_count; + new_part->modification_time = time(nullptr); + new_part->index = writer->releaseIndexColumns(); + new_part->checksums = checksums; + new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); + new_part->index_granularity = writer->getIndexGranularity(); + new_part->calculateColumnsSizesOnDisk(); +} + +void MergedBlockOutputStream::finalizePartOnDisk( + const MergeTreeData::MutableDataPartPtr & new_part, + NamesAndTypesList & part_columns, + MergeTreeData::DataPart::Checksums & checksums) +{ if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || isCompactPart(new_part)) { new_part->partition.store(storage, disk, part_path, checksums); @@ -137,15 +155,6 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( auto out = disk->writeFile(part_path + "checksums.txt", 4096); checksums.write(*out); } - - new_part->setColumns(part_columns); - new_part->rows_count = rows_count; - new_part->modification_time = time(nullptr); - new_part->index = writer->releaseIndexColumns(); - new_part->checksums = checksums; - new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); - new_part->index_granularity = writer->getIndexGranularity(); - new_part->calculateColumnsSizesOnDisk(); } void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Permutation * permutation) @@ -165,8 +174,8 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm Block skip_indexes_block = getBlockAndPermute(block, skip_indexes_column_names, permutation); writer->write(block, permutation, primary_key_block, skip_indexes_block); - writer->calculateAndSerializeSkipIndices(skip_indexes_block, rows); - writer->calculateAndSerializePrimaryIndex(primary_key_block, rows); + writer->calculateAndSerializeSkipIndices(skip_indexes_block); + writer->calculateAndSerializePrimaryIndex(primary_key_block); writer->next(); rows_count += rows; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 5a92977640e..8abc02e55e0 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -53,6 +53,11 @@ private: */ void writeImpl(const Block & block, const IColumn::Permutation * permutation); + void finalizePartOnDisk( + const MergeTreeData::MutableDataPartPtr & new_part, + NamesAndTypesList & part_columns, + MergeTreeData::DataPart::Checksums & checksums); + private: NamesAndTypesList columns_list; diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 892b4eccfbc..f37bde33083 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -30,8 +31,12 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( std::move(writer_settings), index_granularity); - writer->setWrittenOffsetColumns(offset_columns_); - writer->initSkipIndices(); + auto * writer_on_disk = dynamic_cast(writer.get()); + if (!writer_on_disk) + throw Exception("MergedColumnOnlyOutputStream supports only parts stored on disk", ErrorCodes::NOT_IMPLEMENTED); + + writer_on_disk->setWrittenOffsetColumns(offset_columns_); + writer_on_disk->initSkipIndices(); } void MergedColumnOnlyOutputStream::write(const Block & block) @@ -44,12 +49,11 @@ void MergedColumnOnlyOutputStream::write(const Block & block) Block skip_indexes_block = getBlockAndPermute(block, skip_indexes_column_names, nullptr); - size_t rows = block.rows(); - if (!rows) + if (!block.rows()) return; writer->write(block); - writer->calculateAndSerializeSkipIndices(skip_indexes_block, rows); + writer->calculateAndSerializeSkipIndices(skip_indexes_block); writer->next(); } From ee4eb97c3fc3425e060853425d5008b937985a23 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 Apr 2020 04:27:27 +0300 Subject: [PATCH 0062/2229] in-memory parts: preparation --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 38 +++++++++ .../MergeTree/IMergeTreeDataPartWriter.h | 78 +++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp create mode 100644 src/Storages/MergeTree/IMergeTreeDataPartWriter.h diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp new file mode 100644 index 00000000000..c5e20a8a95a --- /dev/null +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -0,0 +1,38 @@ +#include + +namespace DB +{ + +IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( + const MergeTreeData & storage_, + const MergeTreeWriterSettings & settings_) + : storage(storage_), settings(settings_) {} + +IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( + const MergeTreeData & storage_, + const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices_, + const MergeTreeIndexGranularity & index_granularity_, + const MergeTreeWriterSettings & settings_) + : storage(storage_) + , columns_list(columns_list_) + , skip_indices(skip_indices_) + , index_granularity(index_granularity_) + , settings(settings_) {} + +Columns IMergeTreeDataPartWriter::releaseIndexColumns() +{ + return Columns( + std::make_move_iterator(index_columns.begin()), + std::make_move_iterator(index_columns.end())); +} + +void IMergeTreeDataPartWriter::next() +{ + current_mark = next_mark; + index_offset = next_index_offset; +} + +IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default; + +} diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h new file mode 100644 index 00000000000..1cab6ae4b60 --- /dev/null +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -0,0 +1,78 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + + +/// Writes data part to disk in different formats. +/// Calculates and serializes primary and skip indices if needed. +class IMergeTreeDataPartWriter : private boost::noncopyable +{ +public: + IMergeTreeDataPartWriter(const MergeTreeData & storage_, + const MergeTreeWriterSettings & settings_); + + IMergeTreeDataPartWriter(const MergeTreeData & storage_, + const NamesAndTypesList & columns_list_, + const MergeTreeIndices & skip_indices_, + const MergeTreeIndexGranularity & index_granularity_, + const MergeTreeWriterSettings & settings_); + + virtual ~IMergeTreeDataPartWriter(); + + virtual void write( + const Block & block, const IColumn::Permutation * permutation = nullptr, + /* Blocks with already sorted index columns */ + const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) = 0; + + virtual void calculateAndSerializePrimaryIndex(const Block & /* primary_index_block */) {} + virtual void calculateAndSerializeSkipIndices(const Block & /* skip_indexes_block */) {} + + /// Shift mark and offset to prepare read next mark. + /// You must call it after calling write method and optionally + /// calling calculations of primary and skip indices. + void next(); + + /// Count index_granularity for block and store in `index_granularity` + virtual void fillIndexGranularity(const Block & /* block */) {} + + virtual void initSkipIndices() {} + virtual void initPrimaryIndex() {} + + virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync = false) = 0; + virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */) {} + virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & /* checksums */) {} + + Columns releaseIndexColumns(); + const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } + const MergeTreeIndices & getSkipIndices() { return skip_indices; } + +protected: + const MergeTreeData & storage; + NamesAndTypesList columns_list; + MergeTreeIndices skip_indices; + MergeTreeIndexGranularity index_granularity; + MergeTreeWriterSettings settings; + + size_t current_mark = 0; + + /// The offset to the first row of the block for which you want to write the index. + size_t index_offset = 0; + + size_t next_mark = 0; + size_t next_index_offset = 0; + + MutableColumns index_columns; +}; + +} From 391f7c34be97d56aab30ca7c7b6ecf882c93c015 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 14 Apr 2020 22:47:19 +0300 Subject: [PATCH 0063/2229] in memory parts: basic read/write --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- .../MergeTree/IMergeTreeDataPartWriter.cpp | 5 +- .../MergeTree/IMergeTreeDataPartWriter.h | 6 +- src/Storages/MergeTree/IMergeTreeReader.cpp | 8 ++ src/Storages/MergeTree/IMergeTreeReader.h | 2 + .../MergeTree/IMergedBlockOutputStream.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 73 ++++++++++-- src/Storages/MergeTree/MergeTreeData.h | 9 ++ .../MergeTree/MergeTreeDataMergerMutator.cpp | 14 +-- .../MergeTree/MergeTreeDataPartCompact.cpp | 1 - .../MergeTree/MergeTreeDataPartInMemory.cpp | 72 ++++++++++++ .../MergeTree/MergeTreeDataPartInMemory.h | 62 ++++++++++ .../MergeTreeDataPartWriterInMemory.cpp | 108 ++++++++++++++++++ .../MergeTreeDataPartWriterInMemory.h | 29 +++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 19 +-- src/Storages/MergeTree/MergeTreeIOSettings.h | 4 + .../MergeTreeIndexGranularityInfo.cpp | 4 + .../MergeTree/MergeTreeReaderCompact.cpp | 1 + .../MergeTree/MergeTreeReaderInMemory.cpp | 74 ++++++++++++ .../MergeTree/MergeTreeReaderInMemory.h | 34 ++++++ .../MergeTree/MergeTreeReaderWide.cpp | 7 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 3 +- .../MergeTree/MergeTreeSelectProcessor.h | 1 - src/Storages/MergeTree/MergeTreeSettings.h | 3 + ...rgeTreeThreadSelectBlockInputProcessor.cpp | 6 +- ...MergeTreeThreadSelectBlockInputProcessor.h | 2 +- .../MergeTree/MergeTreeWriteAheadLog.cpp | 104 +++++++++++++++++ .../MergeTree/MergeTreeWriteAheadLog.h | 56 +++++++++ .../MergeTree/MergedBlockOutputStream.cpp | 14 ++- .../MergeTree/MergedBlockOutputStream.h | 2 +- 32 files changed, 683 insertions(+), 50 deletions(-) create mode 100644 src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp create mode 100644 src/Storages/MergeTree/MergeTreeDataPartInMemory.h create mode 100644 src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp create mode 100644 src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h create mode 100644 src/Storages/MergeTree/MergeTreeReaderInMemory.cpp create mode 100644 src/Storages/MergeTree/MergeTreeReaderInMemory.h create mode 100644 src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp create mode 100644 src/Storages/MergeTree/MergeTreeWriteAheadLog.h diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 5d799d257bc..01dd6d5da47 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -380,7 +380,7 @@ String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const String IMergeTreeDataPart::getFullPath() const { - assertOnDisk(); + // assertOnDisk(); //TODO if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); @@ -390,7 +390,7 @@ String IMergeTreeDataPart::getFullPath() const String IMergeTreeDataPart::getFullRelativePath() const { - assertOnDisk(); + // assertOnDisk(); //TODO if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 784a3ff047b..32c5e6737b0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -284,7 +284,7 @@ public: size_t getFileSizeOrZero(const String & file_name) const; String getFullRelativePath() const; String getFullPath() const; - void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const; + virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const; void renameToDetached(const String & prefix) const; void makeCloneInDetached(const String & prefix) const; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index c5e20a8a95a..2d35b9ff723 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -5,8 +5,11 @@ namespace DB IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeData & storage_, + const NamesAndTypesList & columns_list_, const MergeTreeWriterSettings & settings_) - : storage(storage_), settings(settings_) {} + : storage(storage_) + , columns_list(columns_list_) + , settings(settings_) {} IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeData & storage_, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 1cab6ae4b60..f745c47d5b4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -19,7 +19,9 @@ namespace DB class IMergeTreeDataPartWriter : private boost::noncopyable { public: - IMergeTreeDataPartWriter(const MergeTreeData & storage_, + IMergeTreeDataPartWriter( + const MergeTreeData & storage_, + const NamesAndTypesList & columns_list_, const MergeTreeWriterSettings & settings_); IMergeTreeDataPartWriter(const MergeTreeData & storage_, @@ -49,7 +51,7 @@ public: virtual void initSkipIndices() {} virtual void initPrimaryIndex() {} - virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync = false) = 0; + virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) = 0; virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */) {} virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & /* checksums */) {} diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 8243983d837..a2984421c2a 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -248,4 +248,12 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) } } +void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) +{ + if (num_columns_to_read != columns.size()) + throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " + "Expected " + toString(columns.size()) + ", " + "got " + toString(num_columns_to_read), ErrorCodes::LOGICAL_ERROR); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 02d8f67f9d0..79f7860d1cc 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -61,6 +61,8 @@ protected: /// Returns actual column type in part, which can differ from table metadata. NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const; + void checkNumberOfColumns(size_t columns_num_to_read); + /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; /// Stores states for IDataType::deserializeBinaryBulk diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index c016ec325da..95db9010d3f 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -10,7 +10,7 @@ IMergedBlockOutputStream::IMergedBlockOutputStream( const MergeTreeDataPartPtr & data_part) : storage(data_part->storage) , disk(data_part->disk) - , part_path(data_part->getFullRelativePath()) + , part_path(data_part->isStoredOnDisk() ? data_part->getFullRelativePath() : "") { } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index aaacea6f1e7..fa1097c125f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,12 @@ MergeTreeData::MergeTreeData( String reason; if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) LOG_WARNING(log, reason + " Settings 'min_bytes_for_wide_part' and 'min_bytes_for_wide_part' will be ignored."); + + if (settings->in_memory_parts_enable_wal) + { + auto disk = reserveSpace(0)->getDisk(); + write_ahead_log = std::make_shared(*this, disk); + } } @@ -894,17 +901,21 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) continue; part_names_with_disks.emplace_back(it->name(), disk_ptr); + + if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + loadDataPartsFromWAL(disk_ptr, it->name()); } } auto part_lock = lockParts(); - data_parts_indexes.clear(); + // TODO: fix. + // data_parts_indexes.clear(); - if (part_names_with_disks.empty()) - { - LOG_DEBUG(log, "There is no data parts"); - return; - } + // if (part_names_with_disks.empty()) + // { + // LOG_DEBUG(log, "There is no data parts"); + // return; + // } /// Parallel loading of data parts. size_t num_threads = std::min(size_t(settings->max_part_loading_threads), part_names_with_disks.size()); @@ -1106,6 +1117,21 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) LOG_DEBUG(log, "Loaded data parts (" << data_parts_indexes.size() << " items)"); } +void MergeTreeData::loadDataPartsFromWAL(const DiskPtr & disk, const String & file_name) +{ + MergeTreeWriteAheadLog wal(*this, disk, file_name); + auto parts = wal.restore(); + for (auto & part : parts) + { + part->modification_time = time(nullptr); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->state = DataPartState::Committed; + + if (!data_parts_indexes.insert(part).second) + throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); + } +} + /// Is the part directory old. /// True if its modification time and the modification time of all files inside it is less then threshold. @@ -1544,6 +1570,21 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S } MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, size_t rows_count) const +{ + if (!canUseAdaptiveGranularity()) + return MergeTreeDataPartType::WIDE; + + const auto settings = getSettings(); + if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part) + return MergeTreeDataPartType::IN_MEMORY; + + if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) + return MergeTreeDataPartType::COMPACT; + + return MergeTreeDataPartType::WIDE; +} + +MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const { if (!canUseAdaptiveGranularity()) return MergeTreeDataPartType::WIDE; @@ -1564,8 +1605,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, return std::make_shared(*this, name, part_info, disk, relative_path); else if (type == MergeTreeDataPartType::WIDE) return std::make_shared(*this, name, part_info, disk, relative_path); + else if (type == MergeTreeDataPartType::IN_MEMORY) + return std::make_shared(*this, name, part_info, disk, relative_path); else - throw Exception("Unknown type in part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception("Unknown type of part " + relative_path, ErrorCodes::UNKNOWN_PART_TYPE); } static MergeTreeDataPartType getPartTypeFromMarkExtension(const String & mrk_ext) @@ -1876,6 +1919,13 @@ void MergeTreeData::renameTempPartAndReplace( addPartContributionToColumnSizes(part); } + auto * part_in_memory = dynamic_cast(part.get()); + if (part_in_memory && getSettings()->in_memory_parts_enable_wal) + { + auto wal = getWriteAheadLog(); + wal->write(part_in_memory->block, part_in_memory->name); + } + if (out_covered_parts) { for (DataPartPtr & covered_part : covered_parts) @@ -2699,6 +2749,8 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS } } + LOG_DEBUG(log, "MergeTreeData::getDataPartsVector: " << res.size()); + return res; } @@ -3616,4 +3668,11 @@ MergeTreeData::AlterConversions MergeTreeData::getAlterConversionsForPart(const return result; } + +MergeTreeData::WriteAheadLogPtr MergeTreeData::getWriteAheadLog() const +{ + // std::lock_guard lock(wal_mutex); + return write_ahead_log; +} + } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d299d39726e..243156dab94 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -190,6 +191,7 @@ public: DataPartsLock lockParts() const { return DataPartsLock(data_parts_mutex); } MergeTreeDataPartType choosePartType(size_t bytes_uncompressed, size_t rows_count) const; + MergeTreeDataPartType choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const; /// After this method setColumns must be called MutableDataPartPtr createPart(const String & name, @@ -389,6 +391,7 @@ public: /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); + void loadDataPartsFromWAL(const DiskPtr & disk, const String & file_name); String getLogName() const { return log_name; } @@ -659,6 +662,9 @@ public: /// Return alter conversions for part which must be applied on fly. AlterConversions getAlterConversionsForPart(const MergeTreeDataPartPtr part) const; + using WriteAheadLogPtr = std::shared_ptr; + WriteAheadLogPtr getWriteAheadLog() const; + MergeTreeDataFormatVersion format_version; Context & global_context; @@ -957,6 +963,9 @@ private: CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, SpacePtr space); bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason); + + WriteAheadLogPtr write_ahead_log; + // mutable std::mutex wal_mutex; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 8bc871476ed..533378d4af6 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -84,7 +84,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_) sum_bytes_uncompressed += part->getTotalColumnsSize().data_uncompressed; } - auto future_part_type = parts_.front()->storage.choosePartType(sum_bytes_uncompressed, sum_rows); + auto future_part_type = parts_.front()->storage.choosePartTypeOnDisk(sum_bytes_uncompressed, sum_rows); assign(std::move(parts_), future_part_type); } @@ -1039,7 +1039,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor need_remove_expired_values = true; /// All columns from part are changed and may be some more that were missing before in part - if (isCompactPart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList())) + if (!isWidePart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList())) { auto part_indices = getIndicesForNewDataPart(data.skip_indices, for_file_renames); mutateAllPartColumns( @@ -1231,7 +1231,7 @@ void MergeTreeDataMergerMutator::splitMutationCommands( { NameSet removed_columns_from_compact_part; NameSet already_changed_columns; - bool is_compact_part = isCompactPart(part); + bool is_wide_part = isWidePart(part); for (const auto & command : commands) { if (command.type == MutationCommand::Type::DELETE @@ -1257,14 +1257,14 @@ void MergeTreeDataMergerMutator::splitMutationCommands( for_file_renames.push_back(command); } - else if (is_compact_part && command.type == MutationCommand::Type::DROP_COLUMN) + else if (!is_wide_part && command.type == MutationCommand::Type::DROP_COLUMN) { removed_columns_from_compact_part.emplace(command.column_name); for_file_renames.push_back(command); } else if (command.type == MutationCommand::Type::RENAME_COLUMN) { - if (is_compact_part) + if (!is_wide_part) { for_interpreter.push_back( { @@ -1282,7 +1282,7 @@ void MergeTreeDataMergerMutator::splitMutationCommands( } } - if (is_compact_part) + if (!is_wide_part) { /// If it's compact part than we don't need to actually remove files from disk /// we just don't read dropped columns @@ -1558,9 +1558,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( merge_entry->bytes_written_uncompressed += block.bytes(); } - new_data_part->minmax_idx = std::move(minmax_idx); - mutating_stream->readSuffix(); out.writeSuffixAndFinalizePart(new_data_part); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 24d65622fe6..6789b22f01a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -1,7 +1,6 @@ #include "MergeTreeDataPartCompact.h" #include #include -#include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp new file mode 100644 index 00000000000..c154ccef4fe --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -0,0 +1,72 @@ +#include "MergeTreeDataPartInMemory.h" +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + +MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( + MergeTreeData & storage_, + const String & name_, + const DiskPtr & disk_, + const std::optional & relative_path_) + : IMergeTreeDataPart(storage_, name_, disk_, relative_path_, Type::IN_MEMORY) +{ +} + +MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( + const MergeTreeData & storage_, + const String & name_, + const MergeTreePartInfo & info_, + const DiskPtr & disk_, + const std::optional & relative_path_) + : IMergeTreeDataPart(storage_, name_, info_, disk_, relative_path_, Type::IN_MEMORY) +{ +} + +IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader( + const NamesAndTypesList & columns_to_read, + const MarkRanges & mark_ranges, + UncompressedCache * /* uncompressed_cache */, + MarkCache * /* mark_cache */, + const MergeTreeReaderSettings & reader_settings, + const ValueSizeMap & /* avg_value_size_hints */, + const ReadBufferFromFileBase::ProfileCallback & /* profile_callback */) const +{ + auto ptr = std::static_pointer_cast(shared_from_this()); + return std::make_unique( + ptr, columns_to_read, mark_ranges, reader_settings); +} + +IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( + const NamesAndTypesList & columns_list, + const std::vector & /* indices_to_recalc */, + const CompressionCodecPtr & /* default_codec */, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & /* computed_index_granularity */) const +{ + auto ptr = std::static_pointer_cast(shared_from_this()); + return std::make_unique(ptr, columns_list, writer_settings); +} + + +void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName & /*each_columns_size*/, ColumnSize & /*total_size*/) const +{ + // throw Exception("calculateEachColumnSizesOnDisk of in memory part", ErrorCodes::NOT_IMPLEMENTED); +} + +void MergeTreeDataPartInMemory::loadIndexGranularity() +{ + throw Exception("loadIndexGranularity of in memory part", ErrorCodes::NOT_IMPLEMENTED); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h new file mode 100644 index 00000000000..9fe0c139626 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +namespace DB +{ + +class MergeTreeDataPartInMemory : public IMergeTreeDataPart +{ +public: + MergeTreeDataPartInMemory( + const MergeTreeData & storage_, + const String & name_, + const MergeTreePartInfo & info_, + const DiskPtr & disk_, + const std::optional & relative_path_ = {}); + + MergeTreeDataPartInMemory( + MergeTreeData & storage_, + const String & name_, + const DiskPtr & disk_, + const std::optional & relative_path_ = {}); + + MergeTreeReaderPtr getReader( + const NamesAndTypesList & columns, + const MarkRanges & mark_ranges, + UncompressedCache * uncompressed_cache, + MarkCache * mark_cache, + const MergeTreeReaderSettings & reader_settings_, + const ValueSizeMap & avg_value_size_hints, + const ReadBufferFromFileBase::ProfileCallback & profile_callback) const override; + + MergeTreeWriterPtr getWriter( + const NamesAndTypesList & columns_list, + const std::vector & indices_to_recalc, + const CompressionCodecPtr & default_codec_, + const MergeTreeWriterSettings & writer_settings, + const MergeTreeIndexGranularity & computed_index_granularity) const override; + + bool isStoredOnDisk() const override { return false; } + + bool hasColumnFiles(const String & /* column_name */, const IDataType & /* type */) const override { return true; } + + String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } + + void renameTo(const String & /*new_relative_path*/, bool /*remove_new_dir_if_exists*/) const override {} + + mutable Block block; + +private: + void checkConsistency(bool /* require_part_metadata */) const override {} + + /// Loads marks index granularity into memory + void loadIndexGranularity() override; + + /// Compact parts doesn't support per column size, only total size + void calculateEachColumnSizesOnDisk(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; +}; + +using DataPartInMemoryPtr = std::shared_ptr; + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp new file mode 100644 index 00000000000..3d0d67e1ed6 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -0,0 +1,108 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +MergeTreeDataPartWriterInMemory::MergeTreeDataPartWriterInMemory( + const DataPartInMemoryPtr & part_, + const NamesAndTypesList & columns_list_, + const MergeTreeWriterSettings & settings_) + : IMergeTreeDataPartWriter(part_->storage, columns_list_, settings_) + , part(part_) {} + +void MergeTreeDataPartWriterInMemory::write( + const Block & block, const IColumn::Permutation * permutation, + const Block & primary_key_block, const Block & /* skip_indexes_block */) +{ + if (block_written) + throw Exception("DataPartWriterInMemory supports only one write", ErrorCodes::LOGICAL_ERROR); + + Block result_block; + if (permutation) + { + for (const auto & it : columns_list) + { + if (primary_key_block.has(it.name)) + result_block.insert(primary_key_block.getByName(it.name)); + else + { + auto column = block.getByName(it.name); + column.column = column.column->permute(*permutation, 0); + result_block.insert(column); + } + } + } + else + { + result_block = block; + } + + part->block = std::move(result_block); + block_written = true; +} + +void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Block & primary_index_block) +{ + size_t rows = primary_index_block.rows(); + if (!rows) + return; + + index_granularity.appendMark(rows); + index_granularity.appendMark(0); + + size_t primary_columns_num = primary_index_block.columns(); + index_columns.resize(primary_columns_num); + for (size_t i = 0; i < primary_columns_num; ++i) + { + const auto & primary_column = *primary_index_block.getByPosition(i).column; + index_columns[i] = primary_column.cloneEmpty(); + index_columns[i]->insertFrom(primary_column, 0); + index_columns[i]->insertFrom(primary_column, rows - 1); + } +} + +static MergeTreeDataPartChecksum createUncompressedChecksum(size_t size, SipHash & hash) +{ + MergeTreeDataPartChecksum checksum; + checksum.uncompressed_size = size; + hash.get128(checksum.uncompressed_hash.first, checksum.uncompressed_hash.second); + return checksum; +} + +void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) +{ + UNUSED(checksums); + SipHash hash; + part->block.updateHash(hash); + checksums.files["data.bin"] = createUncompressedChecksum(part->block.bytes(), hash); +} + +void MergeTreeDataPartWriterInMemory::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) +{ + UNUSED(checksums); + if (index_columns.empty()) + return; + + SipHash hash; + size_t index_size = 0; + size_t rows = index_columns[0]->size(); + for (size_t i = 0; i < rows; ++i) + { + for (const auto & col : index_columns) + { + col->updateHashWithValue(i, hash); + index_size += col->byteSize(); + } + } + + checksums.files["primary.idx"] = createUncompressedChecksum(index_size, hash); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h new file mode 100644 index 00000000000..c9b57e5e4b6 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h @@ -0,0 +1,29 @@ +#include +#include + +namespace DB +{ + +/// Writes data part in memory. +class MergeTreeDataPartWriterInMemory : public IMergeTreeDataPartWriter +{ +public: + MergeTreeDataPartWriterInMemory( + const DataPartInMemoryPtr & part_, + const NamesAndTypesList & columns_list_, + const MergeTreeWriterSettings & settings_); + + void write(const Block & block, const IColumn::Permutation * permutation, + const Block & primary_key_block, const Block & skip_indexes_block) override; + + void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) override; + + void calculateAndSerializePrimaryIndex(const Block & primary_index_block) override; + void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) override; + +private: + DataPartInMemoryPtr part; + bool block_written = false; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 816af8db3e9..c468dc07583 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -170,6 +170,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( const unsigned num_streams, const PartitionIdToMaxBlock * max_block_numbers_to_read) const { + LOG_DEBUG(log, "readFromParts size: " << parts.size()); + size_t part_index = 0; /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 23a60ddab78..c508110a4f0 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -243,16 +243,19 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; - /// The name could be non-unique in case of stale files from previous runs. - String full_path = new_data_part->getFullRelativePath(); - - if (new_data_part->disk->exists(full_path)) + if (new_data_part->isStoredOnDisk()) { - LOG_WARNING(log, "Removing old temporary directory " + fullPath(new_data_part->disk, full_path)); - new_data_part->disk->removeRecursive(full_path); - } + /// The name could be non-unique in case of stale files from previous runs. + String full_path = new_data_part->getFullRelativePath(); - new_data_part->disk->createDirectories(full_path); + if (new_data_part->disk->exists(full_path)) + { + LOG_WARNING(log, "Removing old temporary directory " + fullPath(new_data_part->disk, full_path)); + new_data_part->disk->removeRecursive(full_path); + } + + new_data_part->disk->createDirectories(full_path); + } /// If we need to calculate some columns to sort. if (data.hasSortingKey() || data.hasSkipIndices()) diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index f5c57659052..756ffc9f38c 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -17,6 +17,8 @@ struct MergeTreeReaderSettings struct MergeTreeWriterSettings { + MergeTreeWriterSettings() = default; + MergeTreeWriterSettings(const Settings & global_settings, bool can_use_adaptive_granularity_, size_t aio_threshold_, bool blocks_are_granules_size_ = false) : min_compress_block_size(global_settings.min_compress_block_size) @@ -31,6 +33,8 @@ struct MergeTreeWriterSettings bool can_use_adaptive_granularity; bool blocks_are_granules_size; + /// true if we write temporary files during alter. size_t estimated_size = 0; }; + } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index c481140cb84..ed0e7e55fc8 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -72,6 +72,8 @@ size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) con return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide(); else if (type == MergeTreeDataPartType::COMPACT) return getAdaptiveMrkSizeCompact(columns_num); + else if (type == MergeTreeDataPartType::IN_MEMORY) + return 0; else throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); } @@ -88,6 +90,8 @@ std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type) return ".mrk2"; else if (part_type == MergeTreeDataPartType::COMPACT) return ".mrk3"; + else if (part_type == MergeTreeDataPartType::IN_MEMORY) + return ""; else throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); } diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a895149e12e..a63397b9b9c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -102,6 +102,7 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, size_t read_rows = 0; size_t num_columns = columns.size(); + checkNumberOfColumns(num_columns); MutableColumns mutable_columns(num_columns); auto column_it = columns.begin(); diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp new file mode 100644 index 00000000000..8c61a879270 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_READ_ALL_DATA; + extern const int ARGUMENT_OUT_OF_BOUND; +} + + +MergeTreeReaderInMemory::MergeTreeReaderInMemory( + DataPartInMemoryPtr data_part_, + NamesAndTypesList columns_, + MarkRanges mark_ranges_, + MergeTreeReaderSettings settings_) + : IMergeTreeReader(data_part_, std::move(columns_), + nullptr, nullptr, std::move(mark_ranges_), + std::move(settings_), {}) + , part_in_memory(std::move(data_part_)) +{ +} + +size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool /* continue_reading */, size_t max_rows_to_read, Columns & res_columns) +{ + size_t total_marks = data_part->index_granularity.getMarksCount(); + if (from_mark >= total_marks) + throw Exception("Mark " + toString(from_mark) + " is out of bound. Max mark: " + + toString(total_marks), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + size_t num_columns = res_columns.size(); + checkNumberOfColumns(num_columns); + + size_t part_rows = part_in_memory->block.rows(); + if (total_rows_read >= part_rows) + throw Exception("Cannot read data in MergeTreeReaderInMemory. Rows already read: " + + toString(total_rows_read) + ". Rows in part: " + toString(part_rows), ErrorCodes::CANNOT_READ_ALL_DATA); + + auto column_it = columns.begin(); + size_t rows_read = 0; + for (size_t i = 0; i < num_columns; ++i, ++column_it) + { + auto [name, type] = getColumnFromPart(*column_it); + if (!part_in_memory->block.has(name)) + continue; + + const auto block_column = part_in_memory->block.getByPosition(i).column; + if (total_rows_read == 0 && part_rows <= max_rows_to_read) + { + res_columns[i] = block_column; + rows_read = part_rows; + } + else + { + if (res_columns[i] == nullptr) + res_columns[i] = type->createColumn(); + + auto mutable_column = res_columns[i]->assumeMutable(); + rows_read = std::min(max_rows_to_read, part_rows - total_rows_read); + mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_read); + res_columns[i] = std::move(mutable_column); + } + } + + total_rows_read += rows_read; + return rows_read; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h new file mode 100644 index 00000000000..6d64801682e --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class MergeTreeDataPartInMemory; +using DataPartInMemoryPtr = std::shared_ptr; + +/// Reader for InMemory parts +class MergeTreeReaderInMemory : public IMergeTreeReader +{ +public: + MergeTreeReaderInMemory( + DataPartInMemoryPtr data_part_, + NamesAndTypesList columns_, + MarkRanges mark_ranges_, + MergeTreeReaderSettings settings_); + + /// Return the number of rows has been read or zero if there is no columns to read. + /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark + size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) override; + + bool canReadIncompleteGranules() const override { return true; } + +private: + size_t total_rows_read = 0; + DataPartInMemoryPtr part_in_memory; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 1a03acb5758..c15ed240b82 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -21,7 +21,6 @@ namespace namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int MEMORY_LIMIT_EXCEEDED; } @@ -61,11 +60,7 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si try { size_t num_columns = columns.size(); - - if (res_columns.size() != num_columns) - throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " - "Expected " + toString(num_columns) + ", " - "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); + checkNumberOfColumns(num_columns); /// Pointers to offset columns that are common to the nested data structure columns. /// If append is true, then the value will be equal to nullptr and will be used only to diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index def01b192d5..4228ca2b472 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -36,8 +36,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( data_part{owned_data_part_}, all_mark_ranges(std::move(mark_ranges_)), part_index_in_query(part_index_in_query_), - check_columns(check_columns_), - path(data_part->getFullRelativePath()) + check_columns(check_columns_) { /// Let's estimate total number of rows for progress bar. for (const auto & range : all_mark_ranges) diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 4c64bfb6a18..c1c0cd6f782 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -64,7 +64,6 @@ private: size_t part_index_in_query = 0; bool check_columns; - String path; bool is_first_task = true; Logger * log = &Logger::get("MergeTreeSelectProcessor"); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 02c852b4f4b..f3f0e5a05d3 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -31,6 +31,9 @@ struct MergeTreeSettings : public SettingsCollection /** Data storing format settigns. */ \ M(SettingUInt64, min_bytes_for_wide_part, 0, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ M(SettingUInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ + M(SettingUInt64, min_bytes_for_compact_part, 0, "Minimal uncompressed size in bytes to create part in compact format instead of saving it in RAM", 0) \ + M(SettingUInt64, min_rows_for_compact_part, 0, "Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ + M(SettingBool, in_memory_parts_enable_wal, 0, "", 0) \ \ /** Merge settings. */ \ M(SettingUInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index aa8c550839d..d8784843e56 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -57,7 +57,7 @@ bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() return false; } - const std::string path = task->data_part->getFullRelativePath(); + const std::string part_name = task->data_part->name; /// Allows pool to reduce number of threads in case of too slow reads. auto profile_callback = [this](ReadBufferFromFileBase::ProfileInfo info_) { pool->profileFeedback(info_); }; @@ -82,7 +82,7 @@ bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() else { /// in other case we can reuse readers, anyway they will be "seeked" to required mark - if (path != last_readed_part_path) + if (part_name != last_readed_part_name) { auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); /// retain avg_value_size_hints @@ -97,7 +97,7 @@ bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() } } - last_readed_part_path = path; + last_readed_part_name = part_name; return true; } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h index e214696b705..01b227de19c 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h @@ -43,7 +43,7 @@ private: size_t min_marks_to_read; /// Last part readed in this thread - std::string last_readed_part_path; + std::string last_readed_part_name; /// Names from header. Used in order to order columns in read blocks. Names ordered_names; }; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp new file mode 100644 index 00000000000..d874a10d3f2 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -0,0 +1,104 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FORMAT_VERSION; +} + +// WALBlockOutputStream::WALBlockOutputStream(WriteBuffer & out_, const Block & header_) +// : NativeBlockOutputStream(out_, 0, header_), out(out_) {} + +// void WALBlockOutputStream::write(const Block & block, const String & part_name) +// { +// writeIntBinary(0, out); +// writeString(part_name, out); +// NativeBlockOutputStream::write(block); +// } + +MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( + const MergeTreeData & storage_, + const DiskPtr & disk_, + const String & name) + : storage(storage_) + , disk(disk_) + , path(storage.getFullPathOnDisk(disk) + name) + , out(disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)) + , block_out(std::make_unique(*out, 0, storage.getSampleBlock())) {} + + +void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name) +{ + std::lock_guard lock(write_mutex); + + auto part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); + min_block_number = std::min(min_block_number, part_info.min_block); + max_block_number = std::max(max_block_number, part_info.max_block); + + writeIntBinary(static_cast(0), *out); /// version + writeStringBinary(part_name, *out); + block_out->write(block); + block_out->flush(); + + if (out->count() > MAX_WAL_BYTES) + rotate(lock); +} + +void MergeTreeWriteAheadLog::rotate(const std::lock_guard & /*write_lock*/) +{ + String new_name = String(WAL_FILE_NAME) + "_" + + toString(min_block_number) + "_" + + toString(max_block_number) + WAL_FILE_EXTENSION; + + Poco::File(path).renameTo(storage.getFullPathOnDisk(disk) + new_name); + out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); + block_out = std::make_unique(*out, 0, storage.getSampleBlock()); + min_block_number = std::numeric_limits::max(); + max_block_number = 0; +} + +MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() +{ + std::lock_guard lock(write_mutex); + + MergeTreeData::MutableDataPartsVector result; + auto in = disk->readFile(path, DBMS_DEFAULT_BUFFER_SIZE); + NativeBlockInputStream block_in(*in, 0); + + while (!in->eof()) + { + UInt8 version; + String part_name; + readIntBinary(version, *in); + if (version != 0) + throw Exception("Unknown WAL format version: " + toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION); + + readStringBinary(part_name, *in); + auto part = storage.createPart( + part_name, + MergeTreeDataPartType::IN_MEMORY, + MergeTreePartInfo::fromPartName(part_name, storage.format_version), + storage.reserveSpace(0)->getDisk(), + part_name); + + auto block = block_in.read(); + + part->minmax_idx.update(block, storage.minmax_idx_columns); + MergedBlockOutputStream part_out(part, block.getNamesAndTypesList(), {}, nullptr); + part_out.writePrefix(); + part_out.write(block); + part_out.writeSuffixAndFinalizePart(part); + + result.push_back(std::move(part)); + } + + return result; +} + +} diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h new file mode 100644 index 00000000000..2014fba18de --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -0,0 +1,56 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +// class WALBlockOutputStream : public NativeBlockOutputStream +// { +// public: +// WALBlockOutputStream(WriteBuffer & out_, const Block & header_); +// void write(const Block & block, const String & part_name); + +// private: +// WriteBuffer & out; +// }; + +// class WALBlockInputStream : public NativeBlockInputStream +// { +// }; + +class MergeTreeData; + +class MergeTreeWriteAheadLog +{ +public: + constexpr static auto WAL_FILE_NAME = "wal"; + constexpr static auto WAL_FILE_EXTENSION = ".bin"; + constexpr static size_t MAX_WAL_BYTES = 1024; + + MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_, + const String & name = String(WAL_FILE_NAME) + WAL_FILE_EXTENSION); + + void write(const Block & block, const String & part_name); + std::vector> restore(); + +private: + void rotate(const std::lock_guard & write_lock); + + const MergeTreeData & storage; + DiskPtr disk; + String path; + + std::unique_ptr out; + std::unique_ptr block_out; + + Int64 min_block_number = std::numeric_limits::max(); + Int64 max_block_number = 0; + + mutable std::mutex write_mutex; +}; + +} diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f10c1b8d533..0a39a66d7ba 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -36,8 +36,11 @@ MergedBlockOutputStream::MergedBlockOutputStream( : IMergedBlockOutputStream(data_part) , columns_list(columns_list_) { - MergeTreeWriterSettings writer_settings(data_part->storage.global_context.getSettings(), - data_part->storage.canUseAdaptiveGranularity(), aio_threshold, blocks_are_granules_size); + MergeTreeWriterSettings writer_settings( + storage.global_context.getSettings(), + storage.canUseAdaptiveGranularity(), + aio_threshold, + blocks_are_granules_size); if (aio_threshold > 0 && !merged_column_to_size.empty()) { @@ -49,7 +52,8 @@ MergedBlockOutputStream::MergedBlockOutputStream( } } - disk->createDirectories(part_path); + if (!part_path.empty()) + disk->createDirectories(part_path); writer = data_part->getWriter(columns_list, skip_indices, default_codec, writer_settings); writer->initPrimaryIndex(); @@ -107,7 +111,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); - new_part->calculateColumnsSizesOnDisk(); + // new_part->calculateColumnsSizesOnDisk(); // TODO: Fix } void MergedBlockOutputStream::finalizePartOnDisk( @@ -165,7 +169,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm return; std::unordered_set skip_indexes_column_names_set; - for (const auto & index : storage.skip_indices) + for (const auto & index : writer->getSkipIndices()) std::copy(index->columns.cbegin(), index->columns.cend(), std::inserter(skip_indexes_column_names_set, skip_indexes_column_names_set.end())); Names skip_indexes_column_names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end()); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 8abc02e55e0..9097f78b749 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -60,7 +60,7 @@ private: private: NamesAndTypesList columns_list; - + IMergeTreeDataPart::MinMaxIndex minmax_idx; size_t rows_count = 0; }; From 4069dbcc58f86e25c0e5ea4b255dc5463a12d5c4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 20 Apr 2020 04:38:38 +0300 Subject: [PATCH 0064/2229] in-memory parts: add waiting for insert --- src/Core/Settings.h | 2 ++ src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 +++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 4 ++++ .../MergeTree/MergeTreeBlockOutputStream.cpp | 12 ++++++++++++ src/Storages/MergeTree/MergeTreeBlockOutputStream.h | 5 +++-- src/Storages/MergeTree/MergeTreeData.h | 1 + src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 11 +++++++++++ src/Storages/MergeTree/MergeTreeDataPartInMemory.h | 5 +++++ src/Storages/MergeTree/MergeTreeSettings.h | 1 + src/Storages/StorageMergeTree.cpp | 9 ++++++++- 10 files changed, 52 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 325abc16f3f..b38a05e73ea 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -89,6 +89,8 @@ struct Settings : public SettingsCollection \ M(SettingBool, optimize_move_to_prewhere, true, "Allows disabling WHERE to PREWHERE optimization in SELECT queries from MergeTree.", 0) \ \ + M(SettingMilliseconds, insert_in_memory_parts_timeout, 600000, "", 0) \ + \ M(SettingUInt64, replication_alter_partitions_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) \ M(SettingUInt64, replication_alter_columns_timeout, 60, "Wait for actions to change the table structure within the specified number of seconds. 0 - wait unlimited time.", 0) \ \ diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 01dd6d5da47..57bc040ab45 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -914,4 +914,9 @@ bool isWidePart(const MergeTreeDataPartPtr & data_part) return (data_part && data_part->getType() == MergeTreeDataPartType::WIDE); } +bool isInMemoryPart(const MergeTreeDataPartPtr & data_part) +{ + return (data_part && data_part->getType() == MergeTreeDataPartType::IN_MEMORY); +} + } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 32c5e6737b0..b1fb2554c76 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -91,6 +91,9 @@ public: virtual bool supportsVerticalMerge() const { return false; } + virtual bool waitUntilMerged(size_t /* timeout */) const { return true; } + virtual void notifyMerged() const {} + /// NOTE: Returns zeros if column files are not found in checksums. /// Otherwise return information about column size on disk. ColumnSize getColumnSize(const String & column_name, const IDataType & /* type */) const; @@ -354,5 +357,6 @@ using MergeTreeDataPartPtr = std::shared_ptr; bool isCompactPart(const MergeTreeDataPartPtr & data_part); bool isWidePart(const MergeTreeDataPartPtr & data_part); +bool isInMemoryPart(const MergeTreeDataPartPtr & data_part); } diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index be3caf98ad4..4f9500f973e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int TIMEOUT_EXCEEDED; +} + Block MergeTreeBlockOutputStream::getHeader() const { return storage.getSampleBlock(); @@ -26,6 +31,13 @@ void MergeTreeBlockOutputStream::write(const Block & block) PartLog::addNewPart(storage.global_context, part, watch.elapsed()); + if (isInMemoryPart(part) && storage.getSettings()->in_memory_parts_insert_sync) + { + if (!part->waitUntilMerged(in_memory_parts_timeout)) + throw Exception("Timeout exceeded while waiting to write part " + + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); + } + /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. if (storage.merging_mutating_task_handle) storage.merging_mutating_task_handle->wake(); diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.h b/src/Storages/MergeTree/MergeTreeBlockOutputStream.h index 8f957d631d3..d91794bc50e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.h +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.h @@ -13,8 +13,8 @@ class StorageMergeTree; class MergeTreeBlockOutputStream : public IBlockOutputStream { public: - MergeTreeBlockOutputStream(StorageMergeTree & storage_, size_t max_parts_per_block_) - : storage(storage_), max_parts_per_block(max_parts_per_block_) {} + MergeTreeBlockOutputStream(StorageMergeTree & storage_, size_t max_parts_per_block_, size_t in_memory_parts_timeout_) + : storage(storage_), max_parts_per_block(max_parts_per_block_), in_memory_parts_timeout(in_memory_parts_timeout_) {} Block getHeader() const override; void write(const Block & block) override; @@ -22,6 +22,7 @@ public: private: StorageMergeTree & storage; size_t max_parts_per_block; + size_t in_memory_parts_timeout; }; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 243156dab94..387647c5db3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index c154ccef4fe..41b35757ed8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -58,6 +58,17 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( return std::make_unique(ptr, columns_list, writer_settings); } +bool MergeTreeDataPartInMemory::waitUntilMerged(size_t timeout) const +{ + auto lock = storage.lockParts(); + return is_merged.wait_for(lock, std::chrono::milliseconds(timeout), + [this]() { return state == State::Outdated; }); +} + +void MergeTreeDataPartInMemory::notifyMerged() const +{ + is_merged.notify_one(); +} void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName & /*each_columns_size*/, ColumnSize & /*total_size*/) const { diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 9fe0c139626..29c01805529 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -45,9 +45,14 @@ public: void renameTo(const String & /*new_relative_path*/, bool /*remove_new_dir_if_exists*/) const override {} + bool waitUntilMerged(size_t timeout) const override; + void notifyMerged() const override; + mutable Block block; private: + mutable std::condition_variable is_merged; + void checkConsistency(bool /* require_part_metadata */) const override {} /// Loads marks index granularity into memory diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index f3f0e5a05d3..5544d267f89 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -34,6 +34,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, min_bytes_for_compact_part, 0, "Minimal uncompressed size in bytes to create part in compact format instead of saving it in RAM", 0) \ M(SettingUInt64, min_rows_for_compact_part, 0, "Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(SettingBool, in_memory_parts_enable_wal, 0, "", 0) \ + M(SettingBool, in_memory_parts_insert_sync, 0, "", 0) \ \ /** Merge settings. */ \ M(SettingUInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 1aac6717728..3a2bdf83903 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -158,7 +159,10 @@ std::optional StorageMergeTree::totalBytes() const BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Context & context) { - return std::make_shared(*this, context.getSettingsRef().max_partitions_per_insert_block); + const auto & settings = context.getSettingsRef(); + return std::make_shared( + *this, settings.max_partitions_per_insert_block, + settings.insert_in_memory_parts_timeout.totalMilliseconds()); } void StorageMergeTree::checkTableCanBeDropped() const @@ -630,6 +634,9 @@ bool StorageMergeTree::merge( throw; } + for (const auto & part : future_part.parts) + part->notifyMerged(); + return true; } From 354325e0cf3cd05f6088ce017fbe904b5a56fff6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 22 Apr 2020 09:23:25 +0300 Subject: [PATCH 0065/2229] checksums --- dbms/src/Dictionaries/SSDCacheDictionary.cpp | 47 ++++++++++++++++---- dbms/src/Dictionaries/SSDCacheDictionary.h | 6 +-- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.cpp b/dbms/src/Dictionaries/SSDCacheDictionary.cpp index 3b6fea18567..32fb3171737 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.cpp +++ b/dbms/src/Dictionaries/SSDCacheDictionary.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace ProfileEvents { @@ -61,6 +62,7 @@ namespace ErrorCodes extern const int TOO_SMALL_BUFFER_SIZE; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; + extern const int CORRUPTED_DATA; } namespace @@ -74,6 +76,7 @@ namespace constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000; constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t BLOCK_CHECKSUM_SIZE = 8; constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); @@ -248,11 +251,12 @@ size_t CachePartition::appendBlock( if (!write_buffer) { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); - uint32_t tmp = 0; + uint64_t tmp = 0; + write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); keys_in_block = 0; - // codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); - // compressed_buffer.emplace(*write_buffer, codec); + //codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); + //compressed_buffer.emplace(*write_buffer, codec); // hashing_buffer.emplace(*compressed_buffer); } @@ -269,6 +273,9 @@ size_t CachePartition::appendBlock( if (sizeof(UInt64) > write_buffer->available()) { write_buffer.reset(); + std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count + uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum + std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); if (++current_memory_block_id == write_buffer_size) flush(); flushed = true; @@ -291,7 +298,9 @@ size_t CachePartition::appendBlock( if (sizeof(TYPE) > write_buffer->available()) \ { \ write_buffer.reset(); \ - std::memcpy(memory->data() + block_size * current_memory_block_id, &keys_in_block, sizeof(keys_in_block)); /* set count */ \ + std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); /* set count */ \ + uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); /* checksum */ \ + std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); \ if (++current_memory_block_id == write_buffer_size) \ flush(); \ flushed = true; \ @@ -328,7 +337,9 @@ size_t CachePartition::appendBlock( if (sizeof(UInt64) + value.size() > write_buffer->available()) { write_buffer.reset(); - std::memcpy(memory->data() + block_size * current_memory_block_id, &keys_in_block, sizeof(keys_in_block)); // set count + std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count + uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum + std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); if (++current_memory_block_id == write_buffer_size) flush(); flushed = true; @@ -354,7 +365,8 @@ size_t CachePartition::appendBlock( else //if (current_file_block_id < max_size) // next block in write buffer or flushed to ssd { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); - uint32_t tmp = 0; + uint64_t tmp = 0; + write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); keys_in_block = 0; } @@ -540,6 +552,7 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, template void CachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const { + // Do not check checksum while reading from memory. for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; @@ -637,6 +650,16 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_READ_ERROR); + + uint64_t checksum = 0; + ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); + readBinary(checksum, buf_special); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(reinterpret_cast(request.aio_buf) + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + if (checksum != calculated_checksum) + { + throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); + } + for (const size_t idx : blocks_to_indices[request_id]) { const auto & [file_index, out_index] = index_to_out[idx]; @@ -718,13 +741,21 @@ void CachePartition::clearOldestBlocks() for (size_t i = 0; i < write_buffer_size; ++i) { ReadBufferFromMemory read_buffer(read_buffer_memory.data() + i * block_size, block_size); + + uint64_t checksum = 0; + readBinary(checksum, read_buffer); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(read_buffer_memory.data() + i * block_size + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + if (checksum != calculated_checksum) + { + throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); + } + uint32_t keys_in_current_block = 0; readBinary(keys_in_current_block, read_buffer); Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); for (uint32_t j = 0; j < keys_in_current_block; ++j) { - //Poco::Logger::get("GC").information(std::to_string(j) + " " + std::to_string(read_buffer.offset())); keys.emplace_back(); readBinary(keys.back(), read_buffer); @@ -756,10 +787,8 @@ void CachePartition::clearOldestBlocks() case AttributeUnderlyingType::utString: { - //Poco::Logger::get("GC").information("read string"); size_t size = 0; readVarUInt(size, read_buffer); - //Poco::Logger::get("GC").information("read string " + std::to_string(size)); read_buffer.ignore(size); } break; diff --git a/dbms/src/Dictionaries/SSDCacheDictionary.h b/dbms/src/Dictionaries/SSDCacheDictionary.h index 305418ed7b7..93758e9182f 100644 --- a/dbms/src/Dictionaries/SSDCacheDictionary.h +++ b/dbms/src/Dictionaries/SSDCacheDictionary.h @@ -254,9 +254,9 @@ private: std::optional> memory; std::optional write_buffer; uint32_t keys_in_block = 0; - // std::optional compressed_buffer; - // std::optional hashing_buffer; - // CompressionCodecPtr codec; + //std::optional compressed_buffer; + //std::optional hashing_buffer; + //CompressionCodecPtr codec; size_t current_memory_block_id = 0; size_t current_file_block_id = 0; From b01ea01e8727b8aeb6e1ca8f6ed6ff722a0c46e6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 22 Apr 2020 20:27:35 +0300 Subject: [PATCH 0066/2229] metadata on ssd --- src/Dictionaries/SSDCacheDictionary.cpp | 258 +++++++++++++----------- src/Dictionaries/SSDCacheDictionary.h | 12 +- 2 files changed, 140 insertions(+), 130 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 32fb3171737..a46d18fbd0a 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -218,29 +218,25 @@ CachePartition::~CachePartition() size_t CachePartition::appendDefaults( const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin) { - std::unique_lock lock(rw_lock); + /*std::unique_lock lock(rw_lock); const auto & ids = std::get>(new_keys.values); for (size_t index = begin; index < ids.size(); ++index) { - //auto & index_and_metadata = key_to_index_and_metadata[ids[index]]; IndexAndMetadata index_and_metadata; index_and_metadata.metadata = metadata[index]; index_and_metadata.metadata.setDefault(); key_to_index_and_metadata.set(ids[index], index_and_metadata); } - - return ids.size() - begin; + */ + return appendBlock(new_keys, Attributes{}, metadata, begin); } size_t CachePartition::appendBlock( const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); - //if (current_file_block_id >= max_size) - // return 0; - - if (new_attributes.size() != attributes_structure.size()) + if (!new_attributes.empty() && new_attributes.size() != attributes_structure.size()) throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; const auto & ids = std::get>(new_keys.values); @@ -248,18 +244,34 @@ size_t CachePartition::appendBlock( if (!memory) memory.emplace(block_size * write_buffer_size, BUFFER_ALIGNMENT); - if (!write_buffer) - { + + auto init_write_buffer = [&]() { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); uint64_t tmp = 0; write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); keys_in_block = 0; + }; + + if (!write_buffer) + { + init_write_buffer(); //codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); //compressed_buffer.emplace(*write_buffer, codec); // hashing_buffer.emplace(*compressed_buffer); } + bool flushed = false; + auto finish_block = [&]() { + write_buffer.reset(); + std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count + uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum + std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); + if (++current_memory_block_id == write_buffer_size) + flush(); + flushed = true; + }; + for (size_t index = begin; index < ids.size();) { IndexAndMetadata index_and_metadata; @@ -268,28 +280,21 @@ size_t CachePartition::appendBlock( index_and_metadata.index.setAddressInBlock(write_buffer->offset()); index_and_metadata.metadata = metadata[index]; - bool flushed = false; - - if (sizeof(UInt64) > write_buffer->available()) + flushed = false; + if (2 * sizeof(UInt64) > write_buffer->available()) // place for key and metadata { - write_buffer.reset(); - std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count - uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum - std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); - if (++current_memory_block_id == write_buffer_size) - flush(); - flushed = true; + finish_block(); } else { writeBinary(ids[index], *write_buffer); + writeBinary(metadata[index].data, *write_buffer); } for (const auto & attribute : new_attributes) { if (flushed) break; - // TODO:: переделать через столбцы + getDataAt switch (attribute.type) { #define DISPATCH(TYPE) \ @@ -297,13 +302,7 @@ size_t CachePartition::appendBlock( { \ if (sizeof(TYPE) > write_buffer->available()) \ { \ - write_buffer.reset(); \ - std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); /* set count */ \ - uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); /* checksum */ \ - std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); \ - if (++current_memory_block_id == write_buffer_size) \ - flush(); \ - flushed = true; \ + finish_block(); \ continue; \ } \ else \ @@ -336,13 +335,7 @@ size_t CachePartition::appendBlock( const auto & value = std::get>(attribute.values)[index]; if (sizeof(UInt64) + value.size() > write_buffer->available()) { - write_buffer.reset(); - std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count - uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum - std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); - if (++current_memory_block_id == write_buffer_size) - flush(); - flushed = true; + finish_block(); continue; } else @@ -356,28 +349,15 @@ size_t CachePartition::appendBlock( if (!flushed) { - //key_to_index_and_metadata[ids[index]] = index_and_metadata; key_to_index_and_metadata.set(ids[index], index_and_metadata); ids_buffer.push_back(ids[index]); ++index; ++keys_in_block; } - else //if (current_file_block_id < max_size) // next block in write buffer or flushed to ssd + else // next block in write buffer or flushed to ssd { - write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); - uint64_t tmp = 0; - write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); - write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); - keys_in_block = 0; + init_write_buffer(); } - /*else // flushed to ssd, end of current file - { - //write_buffer.emplace(memory->data() + current_memory_block_id * block_size + BLOCK_SPECIAL_FIELDS_SIZE, block_size - BLOCK_SPECIAL_FIELDS_SIZE); - keys_in_block = 0; - //clearOldestBlocks(); - memory.reset(); - return index - begin; - }*/ } return ids.size() - begin; } @@ -480,16 +460,27 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray { auto set_value = [&](const size_t index, ReadBuffer & buf) { - ignoreFromBufferToAttributeIndex(attribute_index, buf); - readBinary(out[index], buf); + buf.ignore(sizeof(Key)); // key + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) { + if (metadata.isDefault()) { + out[index] = get_default(index); + } else { + ignoreFromBufferToAttributeIndex(attribute_index, buf); + readBinary(out[index], buf); + } + found[index] = true; + } }; - auto set_default = [&](const size_t index) + /*auto set_default = [&](const size_t index) { out[index] = get_default(index); - }; + };*/ - getImpl(ids, set_value, set_default, found, now); + getImpl(ids, set_value, found); } void CachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, @@ -498,26 +489,55 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra { auto set_value = [&](const size_t index, ReadBuffer & buf) { - ignoreFromBufferToAttributeIndex(attribute_index, buf); - size_t size = 0; - readVarUInt(size, buf); - char * string_ptr = arena.alloc(size); - memcpy(string_ptr, buf.position(), size); - refs[index].data = string_ptr; - refs[index].size = size; + buf.ignore(sizeof(Key)); // key + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) { + if (metadata.isDefault()) { + default_ids.push_back(index); + } else { + ignoreFromBufferToAttributeIndex(attribute_index, buf); + size_t size = 0; + readVarUInt(size, buf); + char * string_ptr = arena.alloc(size); + memcpy(string_ptr, buf.position(), size); + refs[index].data = string_ptr; + refs[index].size = size; + } + found[index] = true; + } }; - auto set_default = [&](const size_t index) + /*auto set_default = [&](const size_t index) { + buf.ignore(sizeof(UInt64)); // key default_ids.push_back(index); - }; + };*/ - getImpl(ids, set_value, set_default, found, now); + getImpl(ids, set_value, found); } -template -void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, SetDefault & set_default, +void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + buf.ignore(sizeof(Key)); // key + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) { + out[index] = !metadata.isDefault(); + } + }; + + getImpl(ids, set_value, found); +} + +template +void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, + std::vector & found) const { std::shared_lock lock(rw_lock); PaddedPODArray indices(ids.size()); @@ -528,16 +548,15 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, { indices[i].setNotExists(); } - else if (key_to_index_and_metadata.get(ids[i], index_and_metadata) && index_and_metadata.metadata.expiresAt() > now) + else if (key_to_index_and_metadata.get(ids[i], index_and_metadata)/* && index_and_metadata.metadata.expiresAt() > now*/) { - if (unlikely(index_and_metadata.metadata.isDefault())) + /*if (unlikely(index_and_metadata.metadata.isDefault())) { indices[i].setNotExists(); - set_default(i); + //set_default(i); } - else - indices[i] = index_and_metadata.index; - found[i] = true; + else*/ + indices[i] = index_and_metadata.index; } else { @@ -758,40 +777,45 @@ void CachePartition::clearOldestBlocks() { keys.emplace_back(); readBinary(keys.back(), read_buffer); - - for (size_t attr = 0; attr < attributes_structure.size(); ++attr) + Metadata metadata; + readBinary(metadata.data, read_buffer); + + if (!metadata.isDefault()) { - - switch (attributes_structure[attr]) + for (size_t attr = 0; attr < attributes_structure.size(); ++attr) { - #define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - read_buffer.ignore(sizeof(TYPE)); \ - break; - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) - #undef DISPATCH - - case AttributeUnderlyingType::utString: + switch (attributes_structure[attr]) { - size_t size = 0; - readVarUInt(size, read_buffer); - read_buffer.ignore(size); + #define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + read_buffer.ignore(sizeof(TYPE)); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + #undef DISPATCH + + case AttributeUnderlyingType::utString: + { + size_t size = 0; + readVarUInt(size, read_buffer); + read_buffer.ignore(size); + } + break; } - break; } } } @@ -813,7 +837,7 @@ void CachePartition::clearOldestBlocks() void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const { - buf.ignore(sizeof(UInt64)); + //buf.ignore(2 * sizeof(UInt64)); // key and metadata for (size_t i = 0; i < attribute_index; ++i) { switch (attributes_structure[i]) @@ -850,23 +874,6 @@ void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_ind } } -void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const -{ - std::shared_lock lock(rw_lock); - for (size_t i = 0; i < ids.size(); ++i) - { - IndexAndMetadata index_and_metadata; - if (!key_to_index_and_metadata.get(ids[i], index_and_metadata) || index_and_metadata.metadata.expiresAt() <= now) - { - out[i] = HAS_NOT_FOUND; - } - else - { - out[i] = !index_and_metadata.metadata.isDefault(); - } - } -} - size_t CachePartition::getId() const { return file_id; @@ -884,13 +891,13 @@ size_t CachePartition::getElementCount() const return key_to_index_and_metadata.size(); } -PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point now) const +PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; PaddedPODArray array; for (const auto & [key, index_and_metadata] : key_to_index_and_metadata) - if (!index_and_metadata.second.metadata.isDefault() && index_and_metadata.second.metadata.expiresAt() > now) + if (!index_and_metadata.second.metadata.isDefault() /* && index_and_metadata.second.metadata.expiresAt() > now */) array.push_back(key); return array; } @@ -974,15 +981,20 @@ void CacheStorage::getString(const size_t attribute_index, const PaddedPODArray< void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { + for (size_t i = 0; i < ids.size(); ++i) + out[i] = HAS_NOT_FOUND; + std::vector found(ids.size(), false); + { std::shared_lock lock(rw_lock); for (auto & partition : partitions) - partition->has(ids, out, now); + partition->has(ids, out, found, now); for (size_t i = 0; i < ids.size(); ++i) if (out[i] == HAS_NOT_FOUND) not_found[ids[i]].push_back(i); } + query_count.fetch_add(ids.size(), std::memory_order_relaxed); hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 93758e9182f..650e6f3666e 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -168,7 +168,8 @@ public: StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::vector & default_ids, std::chrono::system_clock::time_point now) const; - void has(const PaddedPODArray & ids, ResultArrayType & out, std::chrono::system_clock::time_point now) const; + void has(const PaddedPODArray & ids, ResultArrayType & out, + std::vector & found, std::chrono::system_clock::time_point now) const; struct Attribute { @@ -215,9 +216,8 @@ public: size_t getElementCount() const; private: - template - void getImpl(const PaddedPODArray & ids, SetFunc & set, SetDefault & set_default, - std::vector & found, std::chrono::system_clock::time_point now) const; + template + void getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found) const; template void getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const; @@ -245,17 +245,15 @@ private: Metadata metadata{}; }; - //mutable std::unordered_map key_to_index_and_metadata; mutable CLRUCache key_to_index_and_metadata; Attribute keys_buffer; + //std::vector metadata_buffer; const std::vector attributes_structure; std::optional> memory; std::optional write_buffer; uint32_t keys_in_block = 0; - //std::optional compressed_buffer; - //std::optional hashing_buffer; //CompressionCodecPtr codec; size_t current_memory_block_id = 0; From 04347a94d4c09ff76d605f3d59d373279c135818 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Wed, 22 Apr 2020 21:00:08 +0300 Subject: [PATCH 0067/2229] get rid of metadata in ram --- src/Dictionaries/SSDCacheDictionary.cpp | 81 ++++++------------------- src/Dictionaries/SSDCacheDictionary.h | 10 +-- 2 files changed, 21 insertions(+), 70 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index a46d18fbd0a..4ec24b452bd 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -181,7 +181,7 @@ CachePartition::CachePartition( , write_buffer_size(write_buffer_size_) , max_stored_keys(max_stored_keys_) , path(dir_path + "/" + std::to_string(file_id)) - , key_to_index_and_metadata(max_stored_keys) + , key_to_index(max_stored_keys) , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; @@ -218,17 +218,6 @@ CachePartition::~CachePartition() size_t CachePartition::appendDefaults( const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin) { - /*std::unique_lock lock(rw_lock); - - const auto & ids = std::get>(new_keys.values); - for (size_t index = begin; index < ids.size(); ++index) - { - IndexAndMetadata index_and_metadata; - index_and_metadata.metadata = metadata[index]; - index_and_metadata.metadata.setDefault(); - key_to_index_and_metadata.set(ids[index], index_and_metadata); - } - */ return appendBlock(new_keys, Attributes{}, metadata, begin); } @@ -257,8 +246,6 @@ size_t CachePartition::appendBlock( { init_write_buffer(); //codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); - //compressed_buffer.emplace(*write_buffer, codec); - // hashing_buffer.emplace(*compressed_buffer); } bool flushed = false; @@ -274,11 +261,10 @@ size_t CachePartition::appendBlock( for (size_t index = begin; index < ids.size();) { - IndexAndMetadata index_and_metadata; - index_and_metadata.index.setInMemory(true); - index_and_metadata.index.setBlockId(current_memory_block_id); - index_and_metadata.index.setAddressInBlock(write_buffer->offset()); - index_and_metadata.metadata = metadata[index]; + Index cache_index; + cache_index.setInMemory(true); + cache_index.setBlockId(current_memory_block_id); + cache_index.setAddressInBlock(write_buffer->offset()); flushed = false; if (2 * sizeof(UInt64) > write_buffer->available()) // place for key and metadata @@ -331,7 +317,6 @@ size_t CachePartition::appendBlock( case AttributeUnderlyingType::utString: { - //LOG_DEBUG(&Poco::Logger::get("kek"), "string write"); const auto & value = std::get>(attribute.values)[index]; if (sizeof(UInt64) + value.size() > write_buffer->available()) { @@ -349,7 +334,7 @@ size_t CachePartition::appendBlock( if (!flushed) { - key_to_index_and_metadata.set(ids[index], index_and_metadata); + key_to_index.set(ids[index], cache_index); ids_buffer.push_back(ids[index]); ++index; ++keys_in_block; @@ -432,17 +417,14 @@ void CachePartition::flush() /// commit changes in index for (size_t row = 0; row < ids.size(); ++row) { - IndexAndMetadata index_and_metadata; - if (key_to_index_and_metadata.get(ids[row], index_and_metadata)) { - auto & index = index_and_metadata.index; + Index index; + if (key_to_index.get(ids[row], index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); index.setBlockId(current_file_block_id + index.getBlockId()); } - key_to_index_and_metadata.set(ids[row], index_and_metadata); - } else { - // Key was evicted from cache. + key_to_index.set(ids[row], index); } } @@ -475,11 +457,6 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray } }; - /*auto set_default = [&](const size_t index) - { - out[index] = get_default(index); - };*/ - getImpl(ids, set_value, found); } @@ -509,12 +486,6 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra } }; - /*auto set_default = [&](const size_t index) - { - buf.ignore(sizeof(UInt64)); // key - default_ids.push_back(index); - };*/ - getImpl(ids, set_value, found); } @@ -543,25 +514,13 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, PaddedPODArray indices(ids.size()); for (size_t i = 0; i < ids.size(); ++i) { - IndexAndMetadata index_and_metadata; + Index index; if (found[i]) - { indices[i].setNotExists(); - } - else if (key_to_index_and_metadata.get(ids[i], index_and_metadata)/* && index_and_metadata.metadata.expiresAt() > now*/) - { - /*if (unlikely(index_and_metadata.metadata.isDefault())) - { - indices[i].setNotExists(); - //set_default(i); - } - else*/ - indices[i] = index_and_metadata.index; - } + else if (key_to_index.get(ids[i], index)) + indices[i] = index; else - { indices[i].setNotExists(); - } } getValueFromMemory(indices, set); @@ -825,11 +784,11 @@ void CachePartition::clearOldestBlocks() const size_t finish_block = start_block + block_size * write_buffer_size; for (const auto& key : keys) { - IndexAndMetadata index_and_metadata; - if (key_to_index_and_metadata.get(key, index_and_metadata)) { - size_t block_id = index_and_metadata.index.getBlockId(); + Index index; + if (key_to_index.get(key, index)) { + size_t block_id = index.getBlockId(); if (start_block <= block_id && block_id < finish_block) { - key_to_index_and_metadata.erase(key); + key_to_index.erase(key); } } } @@ -888,7 +847,7 @@ double CachePartition::getLoadFactor() const size_t CachePartition::getElementCount() const { std::shared_lock lock(rw_lock); - return key_to_index_and_metadata.size(); + return key_to_index.size(); } PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const @@ -896,16 +855,14 @@ PaddedPODArray CachePartition::getCachedIds(const std::chro const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; PaddedPODArray array; - for (const auto & [key, index_and_metadata] : key_to_index_and_metadata) - if (!index_and_metadata.second.metadata.isDefault() /* && index_and_metadata.second.metadata.expiresAt() > now */) - array.push_back(key); + for (const auto & [key, index] : key_to_index) + array.push_back(key); // TODO: exclude default return array; } void CachePartition::remove() { std::unique_lock lock(rw_lock); - //Poco::File(path + BIN_FILE_EXT).remove(); std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); } diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 650e6f3666e..547e9d1e538 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -100,6 +100,7 @@ using AttributeValueVariant = std::variant< Float64, String>; + class CachePartition { public: @@ -239,16 +240,9 @@ private: int fd = -1; - struct IndexAndMetadata final - { - Index index{}; - Metadata metadata{}; - }; - - mutable CLRUCache key_to_index_and_metadata; + mutable CLRUCache key_to_index; Attribute keys_buffer; - //std::vector metadata_buffer; const std::vector attributes_structure; std::optional> memory; From cebe0d5850066c7f93305a058c2f80d51d388f59 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 23 Apr 2020 14:44:12 +0300 Subject: [PATCH 0068/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 60 ++++++++++++++----------- src/Dictionaries/SSDCacheDictionary.h | 41 ++++++++++------- 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 4ec24b452bd..a38d21cad9c 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include "DictionaryBlockInputStream.h" #include "DictionaryFactory.h" #include @@ -187,9 +186,7 @@ CachePartition::CachePartition( keys_buffer.type = AttributeUnderlyingType::utUInt64; keys_buffer.values = CachePartition::Attribute::Container(); - Poco::File directory(dir_path); - if (!directory.exists()) - directory.createDirectory(); + std::filesystem::create_directories(std::filesystem::path{dir_path}); { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -234,7 +231,8 @@ size_t CachePartition::appendBlock( if (!memory) memory.emplace(block_size * write_buffer_size, BUFFER_ALIGNMENT); - auto init_write_buffer = [&]() { + auto init_write_buffer = [&]() + { write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); uint64_t tmp = 0; write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); @@ -249,7 +247,8 @@ size_t CachePartition::appendBlock( } bool flushed = false; - auto finish_block = [&]() { + auto finish_block = [&]() + { write_buffer.reset(); std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum @@ -349,9 +348,8 @@ size_t CachePartition::appendBlock( void CachePartition::flush() { - if (current_file_block_id >= max_size) { + if (current_file_block_id >= max_size) clearOldestBlocks(); - } const auto & ids = std::get>(keys_buffer.values); if (ids.empty()) @@ -418,7 +416,8 @@ void CachePartition::flush() for (size_t row = 0; row < ids.size(); ++row) { Index index; - if (key_to_index.get(ids[row], index)) { + if (key_to_index.get(ids[row], index)) + { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); @@ -446,10 +445,12 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray Metadata metadata; readVarUInt(metadata.data, buf); - if (metadata.expiresAt() > now) { - if (metadata.isDefault()) { + if (metadata.expiresAt() > now) + { + if (metadata.isDefault()) out[index] = get_default(index); - } else { + else + { ignoreFromBufferToAttributeIndex(attribute_index, buf); readBinary(out[index], buf); } @@ -471,9 +472,10 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra readVarUInt(metadata.data, buf); if (metadata.expiresAt() > now) { - if (metadata.isDefault()) { + if (metadata.isDefault()) default_ids.push_back(index); - } else { + else + { ignoreFromBufferToAttributeIndex(attribute_index, buf); size_t size = 0; readVarUInt(size, buf); @@ -498,9 +500,8 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType now) { + if (metadata.expiresAt() > now) out[index] = !metadata.isDefault(); - } }; getImpl(ids, set_value, found); @@ -654,9 +655,9 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, ++to_pop; /// add new io tasks - const size_t new_tasks_count = std::min(read_buffer_size - (to_push - to_pop), requests.size() - to_push); + const int new_tasks_count = std::min(read_buffer_size - (to_push - to_pop), requests.size() - to_push); - size_t pushed = 0; + int pushed = 0; while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) { if (errno != EINTR) @@ -731,14 +732,14 @@ void CachePartition::clearOldestBlocks() uint32_t keys_in_current_block = 0; readBinary(keys_in_current_block, read_buffer); Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); - + for (uint32_t j = 0; j < keys_in_current_block; ++j) { keys.emplace_back(); readBinary(keys.back(), read_buffer); Metadata metadata; readBinary(metadata.data, read_buffer); - + if (!metadata.isDefault()) { for (size_t attr = 0; attr < attributes_structure.size(); ++attr) @@ -785,11 +786,11 @@ void CachePartition::clearOldestBlocks() for (const auto& key : keys) { Index index; - if (key_to_index.get(key, index)) { + if (key_to_index.get(key, index)) + { size_t block_id = index.getBlockId(); - if (start_block <= block_id && block_id < finish_block) { + if (start_block <= block_id && block_id < finish_block) key_to_index.erase(key); - } } } } @@ -1253,6 +1254,7 @@ SSDCacheDictionary::SSDCacheDictionary( path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) , log(&Poco::Logger::get("SSDCacheDictionary")) { + LOG_INFO(log, "Using storage path '" << path << "'."); if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; @@ -1368,7 +1370,8 @@ void SSDCacheDictionary::getItemsNumberImpl( storage.update( source_ptr, required_ids, - [&](const auto id, const auto row, const auto & new_attributes) { + [&](const auto id, const auto row, const auto & new_attributes) + { for (const size_t out_row : not_found_ids[id]) out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, @@ -1495,7 +1498,8 @@ void SSDCacheDictionary::has(const PaddedPODArray & ids, PaddedPODArray class CLRUCache { - using Iter = std::list::iterator; + using Iter = typename std::list::iterator; public: - CLRUCache(size_t max_size_) : max_size(max_size_) { + CLRUCache(size_t max_size_) : max_size(max_size_) + { } - void set(K key, V val) { + void set(K key, V val) + { auto it = cache.find(key); - if (it == std::end(cache)) { + if (it == std::end(cache)) + { auto & item = cache[key]; item.first = queue.insert(std::end(queue), key); item.second = val; - if (queue.size() > max_size) { + if (queue.size() > max_size) + { cache.erase(queue.front()); queue.pop_front(); } - } else { + } + else + { queue.erase(it->second.first); it->second.first = queue.insert(std::end(queue), key); it->second.second = val; } } - bool get(K key, V & val) { + bool get(K key, V & val) + { auto it = cache.find(key); - if (it == std::end(cache)) { + if (it == std::end(cache)) return false; - } val = it->second.second; queue.erase(it->second.first); it->second.first = queue.insert(std::end(queue), key); return true; } - void erase(K key) { + void erase(K key) + { auto it = cache.find(key); queue.erase(it->second.first); cache.erase(it); } - size_t size() const { + size_t size() const + { return cache.size(); } - auto begin() { + auto begin() + { return std::begin(cache); } - auto end() { + auto end() + { return std::end(cache); } @@ -401,7 +411,7 @@ public: bool hasHierarchy() const override { return false; } - void toParent(const PaddedPODArray & /* ids */, PaddedPODArray & /* out */ ) const override {} + void toParent(const PaddedPODArray &, PaddedPODArray &) const override { } std::exception_ptr getLastException() const override { return storage.getLastException(); } @@ -489,10 +499,11 @@ private: template void getItemsNumberImpl( const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, DefaultGetter && get_default) const; + template void getItemsStringImpl(const size_t attribute_index, const PaddedPODArray & ids, ColumnString * out, DefaultGetter && get_default) const; - + const std::string name; const DictionaryStructure dict_struct; mutable DictionarySourcePtr source_ptr; From 60648b3d49f06fa56eae0a96b4e6358460a3c748 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 23 Apr 2020 21:03:38 +0300 Subject: [PATCH 0069/2229] fix eintr --- src/Dictionaries/SSDCacheDictionary.cpp | 15 +++++++----- src/Dictionaries/SSDCacheDictionary.h | 31 ++++++++++++------------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index a38d21cad9c..ac3eb017db0 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -448,7 +448,7 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray if (metadata.expiresAt() > now) { if (metadata.isDefault()) - out[index] = get_default(index); + out[index] = get_default(index); else { ignoreFromBufferToAttributeIndex(attribute_index, buf); @@ -471,7 +471,8 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra Metadata metadata; readVarUInt(metadata.data, buf); - if (metadata.expiresAt() > now) { + if (metadata.expiresAt() > now) + { if (metadata.isDefault()) default_ids.push_back(index); else @@ -695,14 +696,16 @@ void CachePartition::clearOldestBlocks() io_event event{}; AIOContext aio_context(1); - if (io_submit(aio_context.ctx, 1, &request_ptr) != 1) + while (io_submit(aio_context.ctx, 1, &request_ptr) != 1) { - throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + if (errno != EINTR) + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); } - if (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) != 1) + while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) != 1) { - throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); + if (errno != EINTR) + throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); } if (event.res != static_cast(request.aio_nbytes)) diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index c4b09dbf74b..2899dbe319d 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -1,24 +1,24 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "DictionaryStructure.h" #include "IDictionary.h" #include "IDictionarySource.h" -#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include namespace DB { @@ -43,7 +43,7 @@ public: if (queue.size() > max_size) { cache.erase(queue.front()); - queue.pop_front(); + queue.pop_front(); } } else @@ -110,7 +110,6 @@ using AttributeValueVariant = std::variant< Float64, String>; - class CachePartition { public: From a9085760330c9b2c5bbf2e706dae6e5639e6bcc9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 23 Apr 2020 22:07:03 +0300 Subject: [PATCH 0070/2229] fix double params --- src/Parsers/ASTDictionary.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Parsers/ASTDictionary.cpp b/src/Parsers/ASTDictionary.cpp index 0aa311b5be2..ff5e06e4744 100644 --- a/src/Parsers/ASTDictionary.cpp +++ b/src/Parsers/ASTDictionary.cpp @@ -67,6 +67,8 @@ ASTPtr ASTDictionaryLayout::clone() const auto res = std::make_shared(*this); res->children.clear(); res->layout_type = layout_type; + res->parameters.clear(); + res->has_brackets = has_brackets; for (const auto & parameter : parameters) { res->parameters.emplace_back(parameter.first, nullptr); From 06945d83ecaed4092208392e017ca8111d225844 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 25 Apr 2020 11:12:13 +0300 Subject: [PATCH 0071/2229] impr --- src/Dictionaries/SSDCacheDictionary.cpp | 1 + src/Dictionaries/SSDCacheDictionary.h | 32 ++++++++++++++++--------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index ac3eb017db0..55655bea839 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -786,6 +786,7 @@ void CachePartition::clearOldestBlocks() const size_t start_block = current_file_block_id % max_size; const size_t finish_block = start_block + block_size * write_buffer_size; + Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); for (const auto& key : keys) { Index index; diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 2899dbe319d..685efba2600 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -27,6 +27,13 @@ template class CLRUCache { using Iter = typename std::list::iterator; + + struct Cell + { + Iter iter; + V val; + }; + public: CLRUCache(size_t max_size_) : max_size(max_size_) { @@ -38,8 +45,8 @@ public: if (it == std::end(cache)) { auto & item = cache[key]; - item.first = queue.insert(std::end(queue), key); - item.second = val; + item.iter = queue.insert(std::end(queue), key); + item.val = val; if (queue.size() > max_size) { cache.erase(queue.front()); @@ -48,9 +55,9 @@ public: } else { - queue.erase(it->second.first); - it->second.first = queue.insert(std::end(queue), key); - it->second.second = val; + queue.erase(it->second.iter); + it->second.iter = queue.insert(std::end(queue), key); + it->second.val = val; } } @@ -59,17 +66,20 @@ public: auto it = cache.find(key); if (it == std::end(cache)) return false; - val = it->second.second; - queue.erase(it->second.first); - it->second.first = queue.insert(std::end(queue), key); + val = it->second.val; + queue.erase(it->second.iter); + it->second.iter = queue.insert(std::end(queue), key); return true; } - void erase(K key) + bool erase(K key) { auto it = cache.find(key); - queue.erase(it->second.first); + if (it == std::end(cache)) + return false; + queue.erase(it->second.iter); cache.erase(it); + return true; } size_t size() const @@ -88,7 +98,7 @@ public: } private: - std::unordered_map> cache; + std::unordered_map cache; std::list queue; size_t max_size; }; From 4f968fb11d58e80014cf779e8e1d91e481dfc935 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 25 Apr 2020 15:05:01 +0300 Subject: [PATCH 0072/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 5 ++--- src/Dictionaries/SSDCacheDictionary.h | 11 +++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 55655bea839..2f9e7a800f7 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -615,7 +615,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, while (to_pop < requests.size()) { /// get io tasks from previous iteration - size_t popped = 0; + int popped = 0; while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) < 0) { if (errno != EINTR) @@ -857,8 +857,7 @@ size_t CachePartition::getElementCount() const PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { - const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; - + std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. PaddedPODArray array; for (const auto & [key, index] : key_to_index) array.push_back(key); // TODO: exclude default diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 685efba2600..4d207777c76 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -41,6 +41,7 @@ public: void set(K key, V val) { + std::lock_guard lock(mutex); auto it = cache.find(key); if (it == std::end(cache)) { @@ -63,6 +64,7 @@ public: bool get(K key, V & val) { + std::lock_guard lock(mutex); auto it = cache.find(key); if (it == std::end(cache)) return false; @@ -74,6 +76,7 @@ public: bool erase(K key) { + std::lock_guard lock(mutex); auto it = cache.find(key); if (it == std::end(cache)) return false; @@ -82,18 +85,21 @@ public: return true; } - size_t size() const + size_t size() { + std::lock_guard lock(mutex); return cache.size(); } auto begin() { + std::lock_guard lock(mutex); return std::begin(cache); } auto end() { + std::lock_guard lock(mutex); return std::end(cache); } @@ -101,6 +107,7 @@ private: std::unordered_map cache; std::list queue; size_t max_size; + std::mutex mutex; }; using AttributeValueVariant = std::variant< @@ -357,7 +364,7 @@ private: mutable std::chrono::system_clock::time_point backoff_end_time; // stats - mutable size_t bytes_allocated = 0; + //mutable size_t bytes_allocated = 0; mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; From c5f8ebd98ca681dbc92e0be796b84bc7e5cc1108 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 26 Apr 2020 11:41:07 +0300 Subject: [PATCH 0073/2229] fix deadlock --- src/Dictionaries/SSDCacheDictionary.cpp | 64 ++++++++++++------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 2f9e7a800f7..d8edf543df7 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1079,40 +1079,40 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); PaddedPODArray metadata; - - for (const auto & id_found_pair : remaining_ids) - { - if (id_found_pair.second) - { - ++found_num; - continue; - } - ++not_found_num; - - const auto id = id_found_pair.first; - - if (update_error_count) - { - /// TODO: юзать старые значения. - - /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. - std::rethrow_exception(last_update_exception); - } - - // Set key - std::get>(new_keys.values).push_back(id); - - std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; - metadata.emplace_back(); - metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); - metadata.back().setDefault(); - - /// inform caller that the cell has not been found - on_id_not_found(id); - } - { const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + for (const auto & id_found_pair : remaining_ids) + { + if (id_found_pair.second) + { + ++found_num; + continue; + } + ++not_found_num; + + const auto id = id_found_pair.first; + + if (update_error_count) + { + /// TODO: юзать старые значения. + + /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. + std::rethrow_exception(last_update_exception); + } + + // Set key + std::get>(new_keys.values).push_back(id); + + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + metadata.emplace_back(); + metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); + metadata.back().setDefault(); + + /// inform caller that the cell has not been found + on_id_not_found(id); + } + if (not_found_num) append_defaults(new_keys, metadata); } From 84d73c88fdd426be71acf11f68f5805c36ae0018 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:20:27 +0300 Subject: [PATCH 0074/2229] Added Ruby integrations activecube --- docs/en/interfaces/third-party/integrations.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 9f4b0f2fa65..6b1c170252c 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -96,5 +96,11 @@ toc_title: Integrations - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From d212bc2ebc42d72839606b33c84d32bbbf9b2d48 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:39:37 +0300 Subject: [PATCH 0075/2229] Added Ruby integrations --- docs/ru/interfaces/third-party/integrations.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index d16404e2f27..508c6734045 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -91,5 +91,10 @@ - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [Оригинальная статья](https://clickhouse.tech/docs/ru/interfaces/third-party/integrations/) From fd53a8c15502f88f910ff60b3e8bec2d7bd28954 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:40:39 +0300 Subject: [PATCH 0076/2229] Added Ruby integration --- docs/es/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/es/interfaces/third-party/integrations.md b/docs/es/interfaces/third-party/integrations.md index d706278a280..50fd9f1ce3b 100644 --- a/docs/es/interfaces/third-party/integrations.md +++ b/docs/es/interfaces/third-party/integrations.md @@ -98,5 +98,10 @@ toc_title: "Integraci\xF3n" - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [Método de codificación de datos:](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Artículo Original](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From d9648bc8348a133f946db76591edcb6da9cdf9f2 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:41:45 +0300 Subject: [PATCH 0077/2229] Added Ruby integration --- docs/fa/interfaces/third-party/integrations.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index 0ad13d1fd9e..f9e0411201a 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -95,5 +95,10 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - اکسیر - [Ecto](https://github.com/elixir-ecto/ecto) - [حذف جستجو](https://github.com/appodeal/clickhouse_ecto) - +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) + [مقاله اصلی](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From 1279c4a0f3bfd7c3a1d11cc2d31969c9a1109cf1 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:46:08 +0300 Subject: [PATCH 0078/2229] Added ruby integrations --- docs/fr/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/fr/interfaces/third-party/integrations.md b/docs/fr/interfaces/third-party/integrations.md index 565be2b9604..100e4e34f54 100644 --- a/docs/fr/interfaces/third-party/integrations.md +++ b/docs/fr/interfaces/third-party/integrations.md @@ -95,5 +95,10 @@ toc_title: "Int\xE9gration" - Elixir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Article Original](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From 7e25a352f386445690500e6ceea125265f9ba9ab Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:47:15 +0300 Subject: [PATCH 0079/2229] Added ruby integrations --- docs/ja/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md index 74d0a51e7de..f45a6734b2c 100644 --- a/docs/ja/interfaces/third-party/integrations.md +++ b/docs/ja/interfaces/third-party/integrations.md @@ -95,5 +95,10 @@ toc_title: "\u7D71\u5408" - エリクサー - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [元の記事](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From 06c7b8ba4c951c4c84d08c9516e3220edd2c3a3a Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:48:03 +0300 Subject: [PATCH 0080/2229] Added ruby integrations --- docs/tr/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/tr/interfaces/third-party/integrations.md b/docs/tr/interfaces/third-party/integrations.md index 2216e68a4c4..fdfbbc18b65 100644 --- a/docs/tr/interfaces/third-party/integrations.md +++ b/docs/tr/interfaces/third-party/integrations.md @@ -95,5 +95,10 @@ toc_title: Entegrasyonlar - İksir - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [Orijinal makale](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) From 8cb574fd907339c97038c7e1dfb10862d31fd496 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Mon, 27 Apr 2020 11:48:39 +0300 Subject: [PATCH 0081/2229] Added ruby integrations --- docs/zh/interfaces/third-party/integrations.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 128a4060c2d..ecd33e93a4d 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -89,5 +89,10 @@ - 仙丹 - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) [来源文章](https://clickhouse.tech/docs/zh/interfaces/third-party/integrations/) From 1789d6fa8213761d8309c925ea67ea291c8f0230 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 29 Apr 2020 02:07:11 +0300 Subject: [PATCH 0082/2229] add a test just in case --- .../test_polymorphic_parts/test.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index f7256de9d9a..2382dba863f 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -52,6 +52,7 @@ node1 = cluster.add_instance('node1', config_dir="configs", with_zookeeper=True) node2 = cluster.add_instance('node2', config_dir="configs", with_zookeeper=True) settings_default = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} +settings_compact_only = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 1000000, 'min_bytes_for_wide_part' : 0} settings_not_adaptive = {'index_granularity' : 64, 'index_granularity_bytes' : 0, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} node3 = cluster.add_instance('node3', config_dir="configs", with_zookeeper=True) @@ -69,6 +70,7 @@ def start_cluster(): cluster.start() create_tables('polymorphic_table', [node1, node2], [settings_default, settings_default], "shard1") + create_tables('compact_parts_only', [node1, node2], [settings_compact_only, settings_compact_only], "shard1") create_tables('non_adaptive_table', [node1, node2], [settings_not_adaptive, settings_default], "shard1") create_tables('polymorphic_table_compact', [node3, node4], [settings_compact, settings_wide], "shard2") create_tables('polymorphic_table_wide', [node3, node4], [settings_wide, settings_compact], "shard2") @@ -138,6 +140,31 @@ def test_polymorphic_parts_basics(start_cluster, first_node, second_node): second_node.query("SELECT count(ss) FROM polymorphic_table") == "2000\n" second_node.query("SELECT uniqExact(ss) FROM polymorphic_table") == "600\n" +# Checks mostly that merge from compact part to compact part works. +def test_compact_parts_only(start_cluster): + for i in range(20): + insert_random_data('compact_parts_only', node1, 100) + insert_random_data('compact_parts_only', node2, 100) + + node1.query("SYSTEM SYNC REPLICA compact_parts_only", timeout=20) + node2.query("SYSTEM SYNC REPLICA compact_parts_only", timeout=20) + + assert node1.query("SELECT count() FROM compact_parts_only") == "4000\n" + assert node2.query("SELECT count() FROM compact_parts_only") == "4000\n" + + assert node1.query("SELECT DISTINCT part_type FROM system.parts WHERE table = 'compact_parts_only' AND active") == "Compact\n" + assert node2.query("SELECT DISTINCT part_type FROM system.parts WHERE table = 'compact_parts_only' AND active") == "Compact\n" + + node1.query("OPTIMIZE TABLE compact_parts_only FINAL") + node2.query("SYSTEM SYNC REPLICA compact_parts_only", timeout=20) + assert node2.query("SELECT count() FROM compact_parts_only") == "4000\n" + + expected = "Compact\t1\n" + assert TSV(node1.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'compact_parts_only' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) + assert TSV(node2.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'compact_parts_only' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) + # Check that follower replicas create parts of the same type, which leader has chosen at merge. @pytest.mark.parametrize( From 42997bce868ee1d75cc1d63d24d43a603a967064 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 29 Apr 2020 20:14:49 +0300 Subject: [PATCH 0083/2229] im-memory parts: replication --- src/Storages/MergeTree/DataPartsExchange.cpp | 154 +++++++++++++----- src/Storages/MergeTree/DataPartsExchange.h | 10 +- .../MergeTreeDataPartWriterOnDisk.cpp | 1 + .../MergeTree/MergeTreeWriteAheadLog.cpp | 2 +- .../MergeTree/MergeTreeWriteAheadLog.h | 2 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 12 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/System/StorageSystemParts.cpp | 14 +- .../test_polymorphic_parts/test.py | 50 ++++-- 10 files changed, 178 insertions(+), 70 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index c656fbf0c58..1ecffdef5e1 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,6 +1,9 @@ #include +#include +#include #include #include +#include #include #include #include @@ -54,6 +57,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo int client_protocol_version = parse(params.get("client_protocol_version", "0")); String part_name = params.get("part"); + String part_type = params.get("part_type", "Wide"); // TODO: correct type with old versions const auto data_settings = data.getSettings(); @@ -84,24 +88,16 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo LOG_TRACE(log, "Sending part " << part_name); try - { + { auto storage_lock = data.lockStructureForShare( - false, RWLockImpl::NO_QUERY, data.getSettings()->lock_acquire_timeout_for_background_operations); + false, RWLockImpl::NO_QUERY, data.getSettings()->lock_acquire_timeout_for_background_operations); - MergeTreeData::DataPartPtr part = findPart(part_name); + auto part = findPart(part_name); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedSend}; - /// We'll take a list of files from the list of checksums. - MergeTreeData::DataPart::Checksums checksums = part->checksums; - /// Add files that are not in the checksum list. - checksums.files["checksums.txt"]; - checksums.files["columns.txt"]; - - MergeTreeData::DataPart::Checksums data_checksums; - if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) - writeBinary(checksums.getTotalSizeOnDisk(), out); + writeBinary(part->checksums.getTotalSizeOnDisk(), out); if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS) { @@ -110,37 +106,10 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo writeBinary(ttl_infos_buffer.str(), out); } - writeBinary(checksums.files.size(), out); - for (const auto & it : checksums.files) - { - String file_name = it.first; - - auto disk = part->disk; - String path = part->getFullRelativePath() + file_name; - - UInt64 size = disk->getFileSize(path); - - writeStringBinary(it.first, out); - writeBinary(size, out); - - auto file_in = disk->readFile(path); - HashingWriteBuffer hashing_out(out); - copyData(*file_in, hashing_out, blocker.getCounter()); - - if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); - - if (hashing_out.count() != size) - throw Exception("Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); - - writePODBinary(hashing_out.getHash(), out); - - if (file_name != "checksums.txt" && - file_name != "columns.txt") - data_checksums.addFile(file_name, hashing_out.count(), hashing_out.getHash()); - } - - part->checksums.checkEqual(data_checksums, false); + if (part_type == "InMemory") + sendPartFromMemory(part, out, storage_lock); + else + sendPartFromDisk(part, out, storage_lock); } catch (const NetException &) { @@ -160,6 +129,61 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo } } +void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, TableStructureReadLockHolder &) +{ + auto part_in_memory = dynamic_cast(part.get()); + if (!part_in_memory) + throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::NO_SUCH_DATA_PART); // TODO error code + + NativeBlockOutputStream block_out(out, 0, data.getSampleBlock()); + block_out.write(part_in_memory->block); + + // TODO send checksums +} + +void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, TableStructureReadLockHolder &) +{ + /// We'll take a list of files from the list of checksums. + MergeTreeData::DataPart::Checksums checksums = part->checksums; + /// Add files that are not in the checksum list. + checksums.files["checksums.txt"]; + checksums.files["columns.txt"]; + + MergeTreeData::DataPart::Checksums data_checksums; + + writeBinary(checksums.files.size(), out); + for (const auto & it : checksums.files) + { + String file_name = it.first; + + auto disk = part->disk; + String path = part->getFullRelativePath() + file_name; + + UInt64 size = disk->getFileSize(path); + + writeStringBinary(it.first, out); + writeBinary(size, out); + + auto file_in = disk->readFile(path); + HashingWriteBuffer hashing_out(out); + copyData(*file_in, hashing_out, blocker.getCounter()); + + if (blocker.isCancelled()) + throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + + if (hashing_out.count() != size) + throw Exception("Unexpected size of file " + path, ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + + writePODBinary(hashing_out.getHash(), out); + + if (file_name != "checksums.txt" && + file_name != "columns.txt") + data_checksums.addFile(file_name, hashing_out.count(), hashing_out.getHash()); + } + + part->checksums.checkEqual(data_checksums, false); +} + MergeTreeData::DataPartPtr Service::findPart(const String & name) { /// It is important to include PreCommitted and Outdated parts here because remote replicas cannot reliably @@ -174,6 +198,7 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( const String & part_name, + const String & part_type, const String & replica_path, const String & host, int port, @@ -196,6 +221,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( { {"endpoint", getEndpointId(replica_path)}, {"part", part_name}, + {"part_type", part_type}, {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS)}, {"compress", "false"} }); @@ -244,10 +270,48 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( reservation = data.makeEmptyReservationOnLargestDisk(); } - return downloadPart(part_name, replica_path, to_detached, tmp_prefix_, std::move(reservation), in); + return part_type == "InMemory" ? downloadPartToMemory(part_name, replica_path, in) + : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, std::move(reservation), in); } -MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( +MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( + const String & part_name, + const String & /* replica_path */, + PooledReadWriteBufferFromHTTP & in) +{ + NativeBlockInputStream block_in(in, 0); + auto block = block_in.read(); + MergeTreeData::MutableDataPartPtr new_data_part = + std::make_shared(data, part_name, nullptr); + + new_data_part->is_temp = true; + new_data_part->setColumns(block.getNamesAndTypesList()); + new_data_part->minmax_idx.update(block, data.minmax_idx_columns); + + auto partition_block = block; + data.partition_key_expr->execute(partition_block); + auto & partition = new_data_part->partition.value; + size_t partition_columns_num = data.partition_key_sample.columns(); + partition.resize(partition_columns_num); + + for (size_t i = 0; i < partition_columns_num; ++i) + { + const auto & column_name = data.partition_key_sample.getByPosition(i).name; + const auto & partition_column = partition_block.getByName(column_name).column; + partition[i] = (*partition_column)[0]; + } + + MergedBlockOutputStream part_out(new_data_part, block.getNamesAndTypesList(), {}, nullptr); + part_out.writePrefix(); + part_out.write(block); + part_out.writeSuffixAndFinalizePart(new_data_part); + + // TODO validate checksums + + return new_data_part; +} + +MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( const String & part_name, const String & replica_path, bool to_detached, diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index c0e8c0d2331..f17836cf9f3 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -31,6 +31,8 @@ public: private: MergeTreeData::DataPartPtr findPart(const String & name); + void sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, TableStructureReadLockHolder & storage_lock); + void sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, TableStructureReadLockHolder & storage_lock); private: /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, @@ -52,6 +54,7 @@ public: /// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory. MergeTreeData::MutableDataPartPtr fetchPart( const String & part_name, + const String & part_type, const String & replica_path, const String & host, int port, @@ -66,7 +69,7 @@ public: ActionBlocker blocker; private: - MergeTreeData::MutableDataPartPtr downloadPart( + MergeTreeData::MutableDataPartPtr downloadPartToDisk( const String & part_name, const String & replica_path, bool to_detached, @@ -74,6 +77,11 @@ private: const ReservationPtr reservation, PooledReadWriteBufferFromHTTP & in); + MergeTreeData::MutableDataPartPtr downloadPartToMemory( + const String & part_name, + const String & replica_path, + PooledReadWriteBufferFromHTTP & in); + MergeTreeData & data; Logger * log; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index cdd7e592513..2e8c068aaf8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -222,6 +222,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. + while (current_mark < ) for (size_t i = index_offset; i < rows;) { if (storage.hasPrimaryKey()) diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index d874a10d3f2..5d2d9270a89 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -60,7 +60,7 @@ void MergeTreeWriteAheadLog::rotate(const std::lock_guard & /*write_ out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); block_out = std::make_unique(*out, 0, storage.getSampleBlock()); min_block_number = std::numeric_limits::max(); - max_block_number = 0; + max_block_number = std::numeric_limits::min(); } MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 2014fba18de..4868012f6c0 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -48,7 +48,7 @@ private: std::unique_ptr block_out; Int64 min_block_number = std::numeric_limits::max(); - Int64 max_block_number = 0; + Int64 max_block_number = std::numeric_limits::min(); mutable std::mutex write_mutex; }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 72255081e6b..37ef8e61ef2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -239,6 +239,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo log_entry.new_part_name = part_name; log_entry.quorum = quorum; log_entry.block_id = block_id; + log_entry.new_part_type = part->getType(); /// Simultaneously add information about the part to all the necessary places in ZooKeeper and remove block_number_lock. diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3f907541a3c..424c054be54 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1400,7 +1400,8 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry) try { String part_name = entry.actual_new_part_name.empty() ? entry.new_part_name : entry.actual_new_part_name; - if (!fetchPart(part_name, zookeeper_path + "/replicas/" + replica, false, entry.quorum)) + String part_type = entry.new_part_type.toString(); + if (!fetchPart(part_name, part_type, zookeeper_path + "/replicas/" + replica, false, entry.quorum)) return false; } catch (Exception & e) @@ -1744,7 +1745,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (interserver_scheme != address.scheme) throw Exception("Interserver schemas are different '" + interserver_scheme + "' != '" + address.scheme + "', can't fetch part from " + address.host, ErrorCodes::LOGICAL_ERROR); - part_desc->res_part = fetcher.fetchPart(part_desc->found_new_part_name, source_replica_path, + part_desc->res_part = fetcher.fetchPart(part_desc->found_new_part_name, "Wide", source_replica_path, // TODO: fix part type address.host, address.replication_port, timeouts, user, password, interserver_scheme, false, TMP_PREFIX + "fetch_"); /// TODO: check columns_version of fetched part @@ -2693,7 +2694,8 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) } -bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & source_replica_path, bool to_detached, size_t quorum) +bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & part_type, + const String & source_replica_path, bool to_detached, size_t quorum) { const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -2798,7 +2800,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin ErrorCodes::LOGICAL_ERROR); return fetcher.fetchPart( - part_name, source_replica_path, + part_name, part_type, source_replica_path, address.host, address.replication_port, timeouts, user_password.first, user_password.second, interserver_scheme, to_detached); }; @@ -4305,7 +4307,7 @@ void StorageReplicatedMergeTree::fetchPartition(const ASTPtr & partition, const { try { - fetchPart(part, best_replica_path, true, 0); + fetchPart(part, "Wide", best_replica_path, true, 0); // TODO: fix part type } catch (const DB::Exception & e) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 01dd32614f9..589826fc2c6 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -454,7 +454,7 @@ private: * If quorum != 0, then the node for tracking the quorum is updated. * Returns false if part is already fetching right now. */ - bool fetchPart(const String & part_name, const String & replica_path, bool to_detached, size_t quorum); + bool fetchPart(const String & part_name, const String & part_type, const String & replica_path, bool to_detached, size_t quorum); /// Required only to avoid races between executeLogEntry and fetchPartition std::unordered_set currently_fetching_parts; diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 2dfbf415100..e5a367239b4 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -19,7 +19,7 @@ StorageSystemParts::StorageSystemParts(const std::string & name_) { {"partition", std::make_shared()}, {"name", std::make_shared()}, - {"part_type", std::make_shared()}, + {"part_type", std::make_shared()}, {"active", std::make_shared()}, {"marks", std::make_shared()}, {"rows", std::make_shared()}, @@ -111,8 +111,16 @@ void StorageSystemParts::processNextStorage(MutableColumns & columns_, const Sto columns_[i++]->insert(info.database); columns_[i++]->insert(info.table); columns_[i++]->insert(info.engine); - columns_[i++]->insert(part->disk->getName()); - columns_[i++]->insert(part->getFullPath()); + if (part->isStoredOnDisk()) + { + columns_[i++]->insert(part->disk->getName()); + columns_[i++]->insert(part->getFullPath()); + } + else + { + columns_[i++]->insertDefault(); + columns_[i++]->insertDefault(); + } if (has_state_column) columns_[i++]->insert(part->stateString()); diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 2382dba863f..cfcfc633c02 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -36,8 +36,8 @@ def create_tables(name, nodes, node_settings, shard): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/{shard}/{name}', '{repl}') PARTITION BY toYYYYMM(date) ORDER BY id - SETTINGS index_granularity = {index_granularity}, index_granularity_bytes = {index_granularity_bytes}, - min_rows_for_wide_part = {min_rows_for_wide_part}, min_bytes_for_wide_part = {min_bytes_for_wide_part} + SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes}, + min_rows_for_wide_part = {min_rows_for_wide_part}, min_rows_for_compact_part = {min_rows_for_compact_part} '''.format(name=name, shard=shard, repl=i, **settings)) def create_tables_old_format(name, nodes, shard): @@ -51,19 +51,24 @@ def create_tables_old_format(name, nodes, shard): node1 = cluster.add_instance('node1', config_dir="configs", with_zookeeper=True) node2 = cluster.add_instance('node2', config_dir="configs", with_zookeeper=True) -settings_default = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} -settings_compact_only = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 1000000, 'min_bytes_for_wide_part' : 0} -settings_not_adaptive = {'index_granularity' : 64, 'index_granularity_bytes' : 0, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} +settings_default = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 0} +settings_compact_only = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 1000000, 'min_rows_for_compact_part' : 0} +settings_not_adaptive = {'index_granularity_bytes' : 0, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 0} node3 = cluster.add_instance('node3', config_dir="configs", with_zookeeper=True) node4 = cluster.add_instance('node4', config_dir="configs", main_configs=['configs/no_leader.xml'], with_zookeeper=True) -settings_compact = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} -settings_wide = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 0, 'min_bytes_for_wide_part' : 0} +settings_compact = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 0} +settings_wide = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 0, 'min_rows_for_compact_part' : 0} node5 = cluster.add_instance('node5', config_dir='configs', main_configs=['configs/compact_parts.xml'], with_zookeeper=True) node6 = cluster.add_instance('node6', config_dir='configs', main_configs=['configs/compact_parts.xml'], with_zookeeper=True) +settings_in_memory = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 256} + +node9 = cluster.add_instance('node9', config_dir="configs", with_zookeeper=True) +node10 = cluster.add_instance('node10', config_dir="configs", with_zookeeper=True) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -75,6 +80,7 @@ def start_cluster(): create_tables('polymorphic_table_compact', [node3, node4], [settings_compact, settings_wide], "shard2") create_tables('polymorphic_table_wide', [node3, node4], [settings_wide, settings_compact], "shard2") create_tables_old_format('polymorphic_table', [node5, node6], "shard3") + create_tables('in_memory_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") yield cluster @@ -84,8 +90,8 @@ def start_cluster(): @pytest.mark.parametrize( ('first_node', 'second_node'), [ - (node1, node2), - (node5, node6) + (node1, node2), # compact parts + (node5, node6), # compact parts, old-format ] ) def test_polymorphic_parts_basics(start_cluster, first_node, second_node): @@ -198,8 +204,8 @@ def test_different_part_types_on_replicas(start_cluster, table, part_type): node7 = cluster.add_instance('node7', config_dir="configs", with_zookeeper=True, image='yandex/clickhouse-server:19.17.8.54', stay_alive=True, with_installed_binary=True) node8 = cluster.add_instance('node8', config_dir="configs", with_zookeeper=True) -settings7 = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760} -settings8 = {'index_granularity' : 64, 'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_bytes_for_wide_part' : 0} +settings7 = {'index_granularity_bytes' : 10485760} +settings8 = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 0} @pytest.fixture(scope="module") def start_cluster_diff_versions(): @@ -212,7 +218,7 @@ def start_cluster_diff_versions(): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/shard5/{name}', '1') PARTITION BY toYYYYMM(date) ORDER BY id - SETTINGS index_granularity = {index_granularity}, index_granularity_bytes = {index_granularity_bytes} + SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes} '''.format(name=name, **settings7) ) @@ -222,7 +228,7 @@ def start_cluster_diff_versions(): ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/shard5/{name}', '2') PARTITION BY toYYYYMM(date) ORDER BY id - SETTINGS index_granularity = {index_granularity}, index_granularity_bytes = {index_granularity_bytes}, + SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes}, min_rows_for_wide_part = {min_rows_for_wide_part}, min_bytes_for_wide_part = {min_bytes_for_wide_part} '''.format(name=name, **settings8) ) @@ -287,3 +293,21 @@ def test_polymorphic_parts_non_adaptive(start_cluster): "WHERE table = 'non_adaptive_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV("Wide\t2\n") assert node1.contains_in_log(" default.non_adaptive_table: Table can't create parts with adaptive granularity") + +def test_in_memory(start_cluster): + node9.query("SYSTEM STOP MERGES") + node10.query("SYSTEM STOP MERGES") + + for size in [200, 200, 300, 600]: + insert_random_data('in_memory_table', node9, size) + node10.query("SYSTEM SYNC REPLICA in_memory_table", timeout=20) + + assert node9.query("SELECT count() FROM in_memory_table") == "1300\n" + assert node10.query("SELECT count() FROM in_memory_table") == "1300\n" + + expected = "Compact\t1\nInMemory\t2\nWide\t1\n" + + assert TSV(node9.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) + assert TSV(node10.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) From fac0439efb0b52e435d166636101503cb1bdffb2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 30 Apr 2020 23:50:31 +0300 Subject: [PATCH 0084/2229] complex_key --- src/Dictionaries/SSDCacheDictionary.cpp | 119 +- src/Dictionaries/SSDCacheDictionary.h | 28 +- .../SSDComplexKeyCacheDictionary.cpp | 1805 +++++++++++++++++ .../SSDComplexKeyCacheDictionary.h | 790 ++++++++ src/Dictionaries/registerDictionaries.cpp | 1 + src/Dictionaries/registerDictionaries.h | 1 + src/Functions/FunctionsExternalDictionaries.h | 7 +- 7 files changed, 2676 insertions(+), 75 deletions(-) create mode 100644 src/Dictionaries/SSDComplexKeyCacheDictionary.cpp create mode 100644 src/Dictionaries/SSDComplexKeyCacheDictionary.h diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index d8edf543df7..108ed19c862 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -105,65 +105,65 @@ namespace } } -CachePartition::Metadata::time_point_t CachePartition::Metadata::expiresAt() const +SSDCachePartition::Metadata::time_point_t SSDCachePartition::Metadata::expiresAt() const { return ext::safe_bit_cast(data & KEY_METADATA_EXPIRES_AT_MASK); } -void CachePartition::Metadata::setExpiresAt(const time_point_t & t) +void SSDCachePartition::Metadata::setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } -bool CachePartition::Metadata::isDefault() const +bool SSDCachePartition::Metadata::isDefault() const { return (data & KEY_METADATA_IS_DEFAULT_MASK) == KEY_METADATA_IS_DEFAULT_MASK; } -void CachePartition::Metadata::setDefault() +void SSDCachePartition::Metadata::setDefault() { data |= KEY_METADATA_IS_DEFAULT_MASK; } -bool CachePartition::Index::inMemory() const +bool SSDCachePartition::Index::inMemory() const { return (index & KEY_IN_MEMORY) == KEY_IN_MEMORY; } -bool CachePartition::Index::exists() const +bool SSDCachePartition::Index::exists() const { return index != NOT_EXISTS; } -void CachePartition::Index::setNotExists() +void SSDCachePartition::Index::setNotExists() { index = NOT_EXISTS; } -void CachePartition::Index::setInMemory(const bool in_memory) +void SSDCachePartition::Index::setInMemory(const bool in_memory) { index = (index & ~KEY_IN_MEMORY) | (static_cast(in_memory) << KEY_IN_MEMORY_BIT); } -size_t CachePartition::Index::getAddressInBlock() const +size_t SSDCachePartition::Index::getAddressInBlock() const { return index & INDEX_IN_BLOCK_MASK; } -void CachePartition::Index::setAddressInBlock(const size_t address_in_block) +void SSDCachePartition::Index::setAddressInBlock(const size_t address_in_block) { index = (index & ~INDEX_IN_BLOCK_MASK) | address_in_block; } -size_t CachePartition::Index::getBlockId() const +size_t SSDCachePartition::Index::getBlockId() const { return (index & BLOCK_INDEX_MASK) >> INDEX_IN_BLOCK_BITS; } -void CachePartition::Index::setBlockId(const size_t block_id) +void SSDCachePartition::Index::setBlockId(const size_t block_id) { index = (index & ~BLOCK_INDEX_MASK) | (block_id << INDEX_IN_BLOCK_BITS); } -CachePartition::CachePartition( +SSDCachePartition::SSDCachePartition( const AttributeUnderlyingType & /* key_structure */, const std::vector & attributes_structure_, const std::string & dir_path, @@ -184,7 +184,7 @@ CachePartition::CachePartition( , attributes_structure(attributes_structure_) { keys_buffer.type = AttributeUnderlyingType::utUInt64; - keys_buffer.values = CachePartition::Attribute::Container(); + keys_buffer.values = SSDCachePartition::Attribute::Container(); std::filesystem::create_directories(std::filesystem::path{dir_path}); @@ -206,19 +206,19 @@ CachePartition::CachePartition( } } -CachePartition::~CachePartition() +SSDCachePartition::~SSDCachePartition() { std::unique_lock lock(rw_lock); ::close(fd); } -size_t CachePartition::appendDefaults( +size_t SSDCachePartition::appendDefaults( const Attribute & new_keys, const PaddedPODArray & metadata, const size_t begin) { return appendBlock(new_keys, Attributes{}, metadata, begin); } -size_t CachePartition::appendBlock( +size_t SSDCachePartition::appendBlock( const Attribute & new_keys, const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) { std::unique_lock lock(rw_lock); @@ -346,7 +346,7 @@ size_t CachePartition::appendBlock( return ids.size() - begin; } -void CachePartition::flush() +void SSDCachePartition::flush() { if (current_file_block_id >= max_size) clearOldestBlocks(); @@ -435,7 +435,7 @@ void CachePartition::flush() } template -void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, +void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::vector & found, GetDefault & get_default, std::chrono::system_clock::time_point now) const { @@ -461,7 +461,7 @@ void CachePartition::getValue(const size_t attribute_index, const PaddedPODArray getImpl(ids, set_value, found); } -void CachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, +void SSDCachePartition::getString(const size_t attribute_index, const PaddedPODArray & ids, StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, std::vector & default_ids, std::chrono::system_clock::time_point now) const { @@ -492,7 +492,7 @@ void CachePartition::getString(const size_t attribute_index, const PaddedPODArra getImpl(ids, set_value, found); } -void CachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, +void SSDCachePartition::has(const PaddedPODArray & ids, ResultArrayType & out, std::vector & found, std::chrono::system_clock::time_point now) const { auto set_value = [&](const size_t index, ReadBuffer & buf) @@ -509,7 +509,7 @@ void CachePartition::has(const PaddedPODArray & ids, ResultArrayType -void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, +void SSDCachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found) const { std::shared_lock lock(rw_lock); @@ -530,7 +530,7 @@ void CachePartition::getImpl(const PaddedPODArray & ids, SetFunc & set, } template -void CachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const +void SSDCachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const { // Do not check checksum while reading from memory. for (size_t i = 0; i < indices.size(); ++i) @@ -547,7 +547,7 @@ void CachePartition::getValueFromMemory(const PaddedPODArray & indices, S } template -void CachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const +void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const { std::vector> index_to_out; for (size_t i = 0; i < indices.size(); ++i) @@ -668,7 +668,7 @@ void CachePartition::getValueFromStorage(const PaddedPODArray & indices, } } -void CachePartition::clearOldestBlocks() +void SSDCachePartition::clearOldestBlocks() { Poco::Logger::get("GC").information("GC clear -----------------"); // write_buffer_size, because we need to erase the whole buffer. @@ -799,9 +799,8 @@ void CachePartition::clearOldestBlocks() } } -void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const +void SSDCachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const { - //buf.ignore(2 * sizeof(UInt64)); // key and metadata for (size_t i = 0; i < attribute_index; ++i) { switch (attributes_structure[i]) @@ -838,24 +837,24 @@ void CachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_ind } } -size_t CachePartition::getId() const +size_t SSDCachePartition::getId() const { return file_id; } -double CachePartition::getLoadFactor() const +double SSDCachePartition::getLoadFactor() const { std::shared_lock lock(rw_lock); return static_cast(current_file_block_id) / max_size; } -size_t CachePartition::getElementCount() const +size_t SSDCachePartition::getElementCount() const { std::shared_lock lock(rw_lock); return key_to_index.size(); } -PaddedPODArray CachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const +PaddedPODArray SSDCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. PaddedPODArray array; @@ -864,13 +863,13 @@ PaddedPODArray CachePartition::getCachedIds(const std::chro return array; } -void CachePartition::remove() +void SSDCachePartition::remove() { std::unique_lock lock(rw_lock); std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); } -CacheStorage::CacheStorage( +SSDCacheStorage::SSDCacheStorage( const AttributeTypes & attributes_structure_, const std::string & path_, const size_t max_partitions_count_, @@ -887,11 +886,11 @@ CacheStorage::CacheStorage( , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) , max_stored_keys(max_stored_keys_) - , log(&Poco::Logger::get("CacheStorage")) + , log(&Poco::Logger::get("SSDCacheStorage")) { } -CacheStorage::~CacheStorage() +SSDCacheStorage::~SSDCacheStorage() { std::unique_lock lock(rw_lock); partition_delete_queue.splice(std::end(partition_delete_queue), partitions); @@ -899,7 +898,7 @@ CacheStorage::~CacheStorage() } template -void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, +void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, GetDefault & get_default, std::chrono::system_clock::time_point now) const { @@ -919,7 +918,7 @@ void CacheStorage::getValue(const size_t attribute_index, const PaddedPODArray & ids, +void SSDCacheStorage::getString(const size_t attribute_index, const PaddedPODArray & ids, StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, std::vector & default_ids, std::chrono::system_clock::time_point now) const { @@ -939,7 +938,7 @@ void CacheStorage::getString(const size_t attribute_index, const PaddedPODArray< hit_count.fetch_add(ids.size() - not_found.size(), std::memory_order_release); } -void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, +void SSDCacheStorage::has(const PaddedPODArray & ids, ResultArrayType & out, std::unordered_map> & not_found, std::chrono::system_clock::time_point now) const { for (size_t i = 0; i < ids.size(); ++i) @@ -961,12 +960,12 @@ void CacheStorage::has(const PaddedPODArray & ids, ResultArrayType -void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, +void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, const DictionaryLifetime lifetime) { - auto append_block = [this](const CachePartition::Attribute & new_keys, - const CachePartition::Attributes & new_attributes, const PaddedPODArray & metadata) + auto append_block = [this](const SSDCachePartition::Attribute & new_keys, + const SSDCachePartition::Attributes & new_attributes, const PaddedPODArray & metadata) { size_t inserted = 0; while (inserted < metadata.size()) @@ -975,7 +974,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendBlock(new_keys, new_attributes, metadata, inserted); if (inserted < metadata.size()) { - partitions.emplace_front(std::make_unique( + partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); @@ -1017,9 +1016,9 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values); + const auto & ids = std::get>(new_keys.values); - PaddedPODArray metadata(ids.size()); + PaddedPODArray metadata(ids.size()); for (const auto i : ext::range(0, ids.size())) { @@ -1053,7 +1052,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & metadata) + auto append_defaults = [this](const SSDCachePartition::Attribute & new_keys, const PaddedPODArray & metadata) { size_t inserted = 0; while (inserted < metadata.size()) @@ -1062,7 +1061,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vectorappendDefaults(new_keys, metadata, inserted); if (inserted < metadata.size()) { - partitions.emplace_front(std::make_unique( + partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); @@ -1074,11 +1073,11 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector(); + new_keys.values = SSDCachePartition::Attribute::Container(); - PaddedPODArray metadata; + PaddedPODArray metadata; { const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; @@ -1102,7 +1101,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector>(new_keys.values).push_back(id); + std::get>(new_keys.values).push_back(id); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata.emplace_back(); @@ -1122,7 +1121,7 @@ void CacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector CacheStorage::getCachedIds() const +PaddedPODArray SSDCacheStorage::getCachedIds() const { PaddedPODArray array; @@ -1138,7 +1137,7 @@ PaddedPODArray CacheStorage::getCachedIds() const return array; } -double CacheStorage::getLoadFactor() const +double SSDCacheStorage::getLoadFactor() const { double result = 0; std::shared_lock lock(rw_lock); @@ -1147,7 +1146,7 @@ double CacheStorage::getLoadFactor() const return result / partitions.size(); } -size_t CacheStorage::getElementCount() const +size_t SSDCacheStorage::getElementCount() const { size_t result = 0; std::shared_lock lock(rw_lock); @@ -1156,7 +1155,7 @@ size_t CacheStorage::getElementCount() const return result; } -void CacheStorage::collectGarbage() +void SSDCacheStorage::collectGarbage() { // add partitions to queue while (partitions.size() > max_partitions_count) @@ -1172,10 +1171,10 @@ void CacheStorage::collectGarbage() } } -CachePartition::Attributes CacheStorage::createAttributesFromBlock( +SSDCachePartition::Attributes SSDCacheStorage::createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure) { - CachePartition::Attributes attributes; + SSDCachePartition::Attributes attributes; const auto columns = block.getColumns(); for (size_t i = 0; i < structure.size(); ++i) @@ -1186,7 +1185,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ { \ - CachePartition::Attribute::Container values(column->size()); \ + SSDCachePartition::Attribute::Container values(column->size()); \ memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ attributes.emplace_back(); \ attributes.back().type = structure[i]; \ @@ -1213,7 +1212,7 @@ CachePartition::Attributes CacheStorage::createAttributesFromBlock( case AttributeUnderlyingType::utString: { attributes.emplace_back(); - CachePartition::Attribute::Container values(column->size()); + SSDCachePartition::Attribute::Container values(column->size()); for (size_t j = 0; j < column->size(); ++j) { const auto ref = column->getDataAt(j); @@ -1376,7 +1375,7 @@ void SSDCacheDictionary::getItemsNumberImpl( [&](const auto id, const auto row, const auto & new_attributes) { for (const size_t out_row : not_found_ids[id]) - out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; + out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const size_t id) { @@ -1455,7 +1454,7 @@ void SSDCacheDictionary::getItemsStringImpl(const size_t attribute_index, const required_ids, [&](const auto id, const auto row, const auto & new_attributes) { - update_result[id] = std::get>(new_attributes[attribute_index].values)[row]; + update_result[id] = std::get>(new_attributes[attribute_index].values)[row]; }, [&](const size_t) {}, getLifetime()); diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 4d207777c76..0409a100aa6 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -127,7 +127,7 @@ using AttributeValueVariant = std::variant< Float64, String>; -class CachePartition +class SSDCachePartition { public: struct Index final @@ -170,7 +170,7 @@ public: using Offsets = std::vector; using Key = IDictionary::Key; - CachePartition( + SSDCachePartition( const AttributeUnderlyingType & key_structure, const std::vector & attributes_structure, const std::string & dir_path, @@ -181,7 +181,7 @@ public: const size_t write_buffer_size, const size_t max_stored_keys); - ~CachePartition(); + ~SSDCachePartition(); template using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; @@ -280,16 +280,16 @@ private: size_t current_file_block_id = 0; }; -using CachePartitionPtr = std::shared_ptr; +using SSDCachePartitionPtr = std::shared_ptr; -class CacheStorage +class SSDCacheStorage { public: using AttributeTypes = std::vector; - using Key = CachePartition::Key; + using Key = SSDCachePartition::Key; - CacheStorage( + SSDCacheStorage( const AttributeTypes & attributes_structure, const std::string & path, const size_t max_partitions_count, @@ -299,10 +299,10 @@ public: const size_t write_buffer_size, const size_t max_stored_keys); - ~CacheStorage(); + ~SSDCacheStorage(); template - using ResultArrayType = CachePartition::ResultArrayType; + using ResultArrayType = SSDCachePartition::ResultArrayType; template void getValue(const size_t attribute_index, const PaddedPODArray & ids, @@ -336,7 +336,7 @@ public: double getLoadFactor() const; private: - CachePartition::Attributes createAttributesFromBlock( + SSDCachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); void collectGarbage(); @@ -352,8 +352,8 @@ private: const size_t max_stored_keys; mutable std::shared_mutex rw_lock; - std::list partitions; - std::list partition_delete_queue; + std::list partitions; + std::list partition_delete_queue; Logger * const log; @@ -432,7 +432,7 @@ public: std::exception_ptr getLastException() const override { return storage.getLastException(); } template - using ResultArrayType = CacheStorage::ResultArrayType; + using ResultArrayType = SSDCacheStorage::ResultArrayType; #define DECLARE(TYPE) \ void get##TYPE(const std::string & attribute_name, const PaddedPODArray & ids, ResultArrayType & out) const; @@ -535,7 +535,7 @@ private: std::map attribute_index_by_name; std::vector null_values; - mutable CacheStorage storage; + mutable SSDCacheStorage storage; Logger * const log; mutable size_t bytes_allocated = 0; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp new file mode 100644 index 00000000000..83b9f0f25a7 --- /dev/null +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -0,0 +1,1805 @@ +#include "SSDComplexKeyCacheDictionary.h" + +#include +#include +#include +#include +#include +#include +#include +#include "DictionaryBlockInputStream.h" +#include "DictionaryFactory.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event DictCacheKeysRequested; + extern const Event DictCacheKeysRequestedMiss; + extern const Event DictCacheKeysRequestedFound; + extern const Event DictCacheKeysExpired; + extern const Event DictCacheKeysNotFound; + extern const Event DictCacheKeysHit; + extern const Event DictCacheRequestTimeNs; + extern const Event DictCacheRequests; + extern const Event DictCacheLockWriteNs; + extern const Event DictCacheLockReadNs; + extern const Event FileOpen; + extern const Event WriteBufferAIOWrite; + extern const Event WriteBufferAIOWriteBytes; +} + +namespace CurrentMetrics +{ + extern const Metric DictCacheRequests; + extern const Metric Write; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AIO_READ_ERROR; + extern const int AIO_WRITE_ERROR; + extern const int BAD_ARGUMENTS; + extern const int CANNOT_FSYNC; + extern const int CANNOT_IO_GETEVENTS; + extern const int CANNOT_IO_SUBMIT; + extern const int CANNOT_OPEN_FILE; + extern const int FILE_DOESNT_EXIST; + extern const int LOGICAL_ERROR; + extern const int TOO_SMALL_BUFFER_SIZE; + extern const int TYPE_MISMATCH; + extern const int UNSUPPORTED_METHOD; + extern const int CORRUPTED_DATA; +} + +namespace +{ + constexpr size_t DEFAULT_SSD_BLOCK_SIZE = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t DEFAULT_FILE_SIZE = 4 * 1024 * 1024 * 1024ULL; + constexpr size_t DEFAULT_PARTITIONS_COUNT = 16; + constexpr size_t DEFAULT_READ_BUFFER_SIZE = 16 * DEFAULT_SSD_BLOCK_SIZE; + constexpr size_t DEFAULT_WRITE_BUFFER_SIZE = DEFAULT_SSD_BLOCK_SIZE; + + constexpr size_t DEFAULT_MAX_STORED_KEYS = 100000; + + constexpr size_t BUFFER_ALIGNMENT = DEFAULT_AIO_FILE_BLOCK_SIZE; + constexpr size_t BLOCK_CHECKSUM_SIZE = 8; + constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; + + static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); + static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; + + constexpr size_t KEY_IN_MEMORY_BIT = 63; + constexpr size_t KEY_IN_MEMORY = (1ULL << KEY_IN_MEMORY_BIT); + constexpr size_t BLOCK_INDEX_BITS = 32; + constexpr size_t INDEX_IN_BLOCK_BITS = 16; + constexpr size_t INDEX_IN_BLOCK_MASK = (1ULL << INDEX_IN_BLOCK_BITS) - 1; + constexpr size_t BLOCK_INDEX_MASK = ((1ULL << (BLOCK_INDEX_BITS + INDEX_IN_BLOCK_BITS)) - 1) ^ INDEX_IN_BLOCK_MASK; + + constexpr size_t NOT_EXISTS = -1; + + constexpr UInt8 HAS_NOT_FOUND = 2; + + //constexpr UInt16 MAX_KEY_SIZE = std::numeric_limits::max(); + + const std::string BIN_FILE_EXT = ".bin"; + const std::string IND_FILE_EXT = ".idx"; + + int preallocateDiskSpace(int fd, size_t len) + { + #if defined(__FreeBSD__) + return posix_fallocate(fd, 0, len); + #else + return fallocate(fd, 0, 0, len); + #endif + } +} + +SSDComplexKeyCachePartition::Metadata::time_point_t SSDComplexKeyCachePartition::Metadata::expiresAt() const +{ + return ext::safe_bit_cast(data & KEY_METADATA_EXPIRES_AT_MASK); +} +void SSDComplexKeyCachePartition::Metadata::setExpiresAt(const time_point_t & t) +{ + data = ext::safe_bit_cast(t); +} + +bool SSDComplexKeyCachePartition::Metadata::isDefault() const +{ + return (data & KEY_METADATA_IS_DEFAULT_MASK) == KEY_METADATA_IS_DEFAULT_MASK; +} +void SSDComplexKeyCachePartition::Metadata::setDefault() +{ + data |= KEY_METADATA_IS_DEFAULT_MASK; +} + +bool SSDComplexKeyCachePartition::Index::inMemory() const +{ + return (index & KEY_IN_MEMORY) == KEY_IN_MEMORY; +} + +bool SSDComplexKeyCachePartition::Index::exists() const +{ + return index != NOT_EXISTS; +} + +void SSDComplexKeyCachePartition::Index::setNotExists() +{ + index = NOT_EXISTS; +} + +void SSDComplexKeyCachePartition::Index::setInMemory(const bool in_memory) +{ + index = (index & ~KEY_IN_MEMORY) | (static_cast(in_memory) << KEY_IN_MEMORY_BIT); +} + +size_t SSDComplexKeyCachePartition::Index::getAddressInBlock() const +{ + return index & INDEX_IN_BLOCK_MASK; +} + +void SSDComplexKeyCachePartition::Index::setAddressInBlock(const size_t address_in_block) +{ + index = (index & ~INDEX_IN_BLOCK_MASK) | address_in_block; +} + +size_t SSDComplexKeyCachePartition::Index::getBlockId() const +{ + return (index & BLOCK_INDEX_MASK) >> INDEX_IN_BLOCK_BITS; +} + +void SSDComplexKeyCachePartition::Index::setBlockId(const size_t block_id) +{ + index = (index & ~BLOCK_INDEX_MASK) | (block_id << INDEX_IN_BLOCK_BITS); +} + +SSDComplexKeyCachePartition::SSDComplexKeyCachePartition( + const AttributeUnderlyingType & /* key_structure */, + const std::vector & attributes_structure_, + const std::string & dir_path, + const size_t file_id_, + const size_t max_size_, + const size_t block_size_, + const size_t read_buffer_size_, + const size_t write_buffer_size_, + const size_t max_stored_keys_) + : file_id(file_id_) + , max_size(max_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) + , path(dir_path + "/" + std::to_string(file_id)) + , key_to_index(max_stored_keys, keys_pool) + , attributes_structure(attributes_structure_) +{ + std::filesystem::create_directories(std::filesystem::path{dir_path}); + + { + ProfileEvents::increment(ProfileEvents::FileOpen); + + const std::string filename = path + BIN_FILE_EXT; + fd = ::open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_DIRECT, 0666); + if (fd == -1) + { + auto error_code = (errno == ENOENT) ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE; + throwFromErrnoWithPath("Cannot open file " + filename, filename, error_code); + } + + if (preallocateDiskSpace(fd, max_size * block_size) < 0) + { + throwFromErrnoWithPath("Cannot preallocate space for the file " + filename, filename, ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + } +} + +SSDComplexKeyCachePartition::~SSDComplexKeyCachePartition() +{ + std::unique_lock lock(rw_lock); + ::close(fd); +} + +size_t SSDComplexKeyCachePartition::appendDefaults( + const KeyRefs & keys_in, + const PaddedPODArray & metadata, + const size_t begin) +{ + std::unique_lock lock(rw_lock); + KeyRefs keys(keys_in.size()); + for (size_t i = 0; i < keys_in.size(); ++i) + { + keys[i] = keys_pool.copyKeyFrom(keys_in[i]); + } + return append(keys, Attributes{}, metadata, begin); +} + +size_t SSDComplexKeyCachePartition::appendBlock( + const Columns & key_columns, const DataTypes & /* key_types */, + const Attributes & new_attributes, const PaddedPODArray & metadata, const size_t begin) +{ + std::unique_lock lock(rw_lock); + if (!new_attributes.empty() && new_attributes.size() != attributes_structure.size()) + throw Exception{"Wrong columns number in block.", ErrorCodes::BAD_ARGUMENTS}; + + const auto keys_size = key_columns.size(); + KeyRefs keys(key_columns.front()->size()); + { + StringRefs tmp_keys_refs(keys_size); + for (size_t i = 0; i < key_columns.front()->size(); ++i) + { + keys[i] = keys_pool.allocKey(i, key_columns, tmp_keys_refs); + } + } + + return append(keys, new_attributes, metadata, begin); +} + +size_t SSDComplexKeyCachePartition::append( + const KeyRefs & keys, + const Attributes & new_attributes, + const PaddedPODArray & metadata, + const size_t begin) +{ + if (!memory) + memory.emplace(block_size * write_buffer_size, BUFFER_ALIGNMENT); + + auto init_write_buffer = [&]() + { + write_buffer.emplace(memory->data() + current_memory_block_id * block_size, block_size); + uint64_t tmp = 0; + write_buffer->write(reinterpret_cast(&tmp), BLOCK_CHECKSUM_SIZE); + write_buffer->write(reinterpret_cast(&tmp), BLOCK_SPECIAL_FIELDS_SIZE); + keys_in_block = 0; + }; + + if (!write_buffer) + { + init_write_buffer(); + } + + bool flushed = false; + auto finish_block = [&]() + { + write_buffer.reset(); + std::memcpy(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, &keys_in_block, sizeof(keys_in_block)); // set count + uint64_t checksum = CityHash_v1_0_2::CityHash64(memory->data() + block_size * current_memory_block_id + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); // checksum + std::memcpy(memory->data() + block_size * current_memory_block_id, &checksum, sizeof(checksum)); + if (++current_memory_block_id == write_buffer_size) + flush(); + flushed = true; + }; + + for (size_t index = begin; index < keys.size();) + { + Poco::Logger::get("test").information("wb off: " + std::to_string(write_buffer->offset())); + Index cache_index; + cache_index.setInMemory(true); + cache_index.setBlockId(current_memory_block_id); + cache_index.setAddressInBlock(write_buffer->offset()); + + flushed = false; + if (keys[index].fullSize() + sizeof(UInt64) > write_buffer->available()) // place for key and metadata + { + finish_block(); + } + else + { + keys_pool.writeKey(keys[index], *write_buffer); + writeBinary(metadata[index].data, *write_buffer); + } + + Poco::Logger::get("test key").information("wb off: " + std::to_string(write_buffer->offset())); + + for (const auto & attribute : new_attributes) + { + if (flushed) + break; + switch (attribute.type) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + if (sizeof(TYPE) > write_buffer->available()) \ + { \ + finish_block(); \ + continue; \ + } \ + else \ + { \ + const auto & values = std::get>(attribute.values); \ + writeBinary(values[index], *write_buffer); \ + } \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + { + const auto & value = std::get>(attribute.values)[index]; + if (sizeof(UInt64) + value.size() > write_buffer->available()) + { + finish_block(); + continue; + } + else + { + writeStringBinary(value, *write_buffer); + } + } + break; + } + } + + if (!flushed) + { + key_to_index.set(keys[index], cache_index); + keys_buffer.push_back(keys[index]); + ++index; + ++keys_in_block; + } + else // next block in write buffer or flushed to ssd + { + init_write_buffer(); + } + Poco::Logger::get("test final").information("wb off: " + std::to_string(write_buffer->offset())); + } + return keys.size() - begin; +} + +void SSDComplexKeyCachePartition::flush() +{ + if (current_file_block_id >= max_size) + clearOldestBlocks(); + + if (keys_buffer.empty()) + return; + + Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); + + AIOContext aio_context{1}; + + iocb write_request{}; + iocb * write_request_ptr{&write_request}; + +#if defined(__FreeBSD__) + write_request.aio.aio_lio_opcode = LIO_WRITE; + write_request.aio.aio_fildes = fd; + write_request.aio.aio_buf = reinterpret_cast(memory->data()); + write_request.aio.aio_nbytes = block_size; + write_request.aio.aio_offset = block_size * current_file_block_id; +#else + write_request.aio_lio_opcode = IOCB_CMD_PWRITE; + write_request.aio_fildes = fd; + write_request.aio_buf = reinterpret_cast(memory->data()); + write_request.aio_nbytes = block_size * write_buffer_size; + write_request.aio_offset = block_size * current_file_block_id; +#endif + + Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); + + while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) + { + if (errno != EINTR) + throw Exception("Cannot submit request for asynchronous IO on file " + path + BIN_FILE_EXT, ErrorCodes::CANNOT_IO_SUBMIT); + } + + CurrentMetrics::Increment metric_increment_write{CurrentMetrics::Write}; + + io_event event; + while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) < 0) + { + if (errno != EINTR) + throw Exception("Failed to wait for asynchronous IO completion on file " + path + BIN_FILE_EXT, ErrorCodes::CANNOT_IO_GETEVENTS); + } + + // Unpoison the memory returned from an uninstrumented system function. + __msan_unpoison(&event, sizeof(event)); + + ssize_t bytes_written; +#if defined(__FreeBSD__) + bytes_written = aio_return(reinterpret_cast(event.udata)); +#else + bytes_written = event.res; +#endif + + ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); + ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); + + if (bytes_written != static_cast(write_request.aio_nbytes)) + throw Exception("Not all data was written for asynchronous IO on file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(bytes_written), ErrorCodes::AIO_WRITE_ERROR); + + if (::fsync(fd) < 0) + throwFromErrnoWithPath("Cannot fsync " + path + BIN_FILE_EXT, path + BIN_FILE_EXT, ErrorCodes::CANNOT_FSYNC); + + /// commit changes in index + for (size_t row = 0; row < keys_buffer.size(); ++row) + { + Index index; + if (key_to_index.get(keys_buffer[row], index)) + { + if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. + { + index.setInMemory(false); + index.setBlockId(current_file_block_id + index.getBlockId()); + } + key_to_index.set(keys_buffer[row], index); + } + } + + current_file_block_id += write_buffer_size; + current_memory_block_id = 0; + + /// clear buffer + keys_buffer.clear(); +} + +template +void SSDComplexKeyCachePartition::getValue( + const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, std::vector & found, GetDefault & get_default, + std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + keys_pool.ignoreKey(buf); + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) + { + if (metadata.isDefault()) + out[index] = get_default(index); + else + { + ignoreFromBufferToAttributeIndex(attribute_index, buf); + readBinary(out[index], buf); + } + found[index] = true; + } + }; + + getImpl(key_columns, key_types, set_value, found); +} + +void SSDComplexKeyCachePartition::getString(const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, + std::vector & default_ids, + std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + keys_pool.ignoreKey(buf); + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) + { + if (metadata.isDefault()) + default_ids.push_back(index); + else + { + ignoreFromBufferToAttributeIndex(attribute_index, buf); + size_t size = 0; + readVarUInt(size, buf); + char * string_ptr = arena.alloc(size); + memcpy(string_ptr, buf.position(), size); + refs[index].data = string_ptr; + refs[index].size = size; + } + found[index] = true; + } + }; + + getImpl(key_columns, key_types, set_value, found); +} + +void SSDComplexKeyCachePartition::has( + const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out, + std::vector & found, std::chrono::system_clock::time_point now) const +{ + auto set_value = [&](const size_t index, ReadBuffer & buf) + { + keys_pool.ignoreKey(buf); + Metadata metadata; + readVarUInt(metadata.data, buf); + + if (metadata.expiresAt() > now) + out[index] = !metadata.isDefault(); + }; + + getImpl(key_columns, key_types, set_value, found); +} + +template +void SSDComplexKeyCachePartition::getImpl( + const Columns & key_columns, const DataTypes & /* key_types */, + SetFunc & set, std::vector & found) const +{ + TemporalComplexKeysPool tmp_keys_pool; + StringRefs tmp_refs(key_columns.size()); + + std::shared_lock lock(rw_lock); + PaddedPODArray indices(key_columns.front()->size()); + for (size_t i = 0; i < key_columns.front()->size(); ++i) + { + auto key = tmp_keys_pool.allocKey(i, key_columns, tmp_refs); + SCOPE_EXIT(tmp_keys_pool.rollback(key)); + Index index; + if (found[i]) + indices[i].setNotExists(); + else if (key_to_index.get(key, index)) + indices[i] = index; + else + indices[i].setNotExists(); + } + + getValueFromMemory(indices, set); + getValueFromStorage(indices, set); +} + +template +void SSDComplexKeyCachePartition::getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const +{ + // Do not check checksum while reading from memory. + for (size_t i = 0; i < indices.size(); ++i) + { + const auto & index = indices[i]; + if (index.exists() && index.inMemory()) + { + const size_t offset = index.getBlockId() * block_size + index.getAddressInBlock(); + + ReadBufferFromMemory read_buffer(memory->data() + offset, block_size * write_buffer_size - offset); + set(i, read_buffer); + } + } +} + +template +void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const +{ + std::vector> index_to_out; + for (size_t i = 0; i < indices.size(); ++i) + { + const auto & index = indices[i]; + if (index.exists() && !index.inMemory()) + index_to_out.emplace_back(index, i); + } + if (index_to_out.empty()) + return; + + /// sort by (block_id, offset_in_block) + std::sort(std::begin(index_to_out), std::end(index_to_out)); + + Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); + + // TODO: merge requests + std::vector requests; + std::vector pointers; + std::vector> blocks_to_indices; + requests.reserve(index_to_out.size()); + pointers.reserve(index_to_out.size()); + blocks_to_indices.reserve(index_to_out.size()); + for (size_t i = 0; i < index_to_out.size(); ++i) + { + if (!requests.empty() && + static_cast(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * block_size) + { + blocks_to_indices.back().push_back(i); + continue; + } + + iocb request{}; +#if defined(__FreeBSD__) + request.aio.aio_lio_opcode = LIO_READ; + request.aio.aio_fildes = fd; + request.aio.aio_buf = reinterpret_cast( + reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS)); + request.aio.aio_nbytes = SSD_BLOCK_SIZE; + request.aio.aio_offset = index_to_out[i].first; + request.aio_data = requests.size(); +#else + request.aio_lio_opcode = IOCB_CMD_PREAD; + request.aio_fildes = fd; + request.aio_buf = reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size); + request.aio_nbytes = block_size; + request.aio_offset = index_to_out[i].first.getBlockId() * block_size; + request.aio_data = requests.size(); +#endif + requests.push_back(request); + pointers.push_back(&requests.back()); + blocks_to_indices.emplace_back(); + blocks_to_indices.back().push_back(i); + } + + AIOContext aio_context(read_buffer_size); + + std::vector processed(requests.size(), false); + std::vector events(requests.size()); + for (auto & event : events) + event.res = -1; // TODO: remove + + size_t to_push = 0; + size_t to_pop = 0; + while (to_pop < requests.size()) + { + /// get io tasks from previous iteration + int popped = 0; + while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) < 0) + { + if (errno != EINTR) + throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); + } + + for (size_t i = to_pop; i < to_pop + popped; ++i) + { + const auto request_id = events[i].data; + const auto & request = requests[request_id]; + if (events[i].res != static_cast(request.aio_nbytes)) + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + + "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_READ_ERROR); + + uint64_t checksum = 0; + ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); + readBinary(checksum, buf_special); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(reinterpret_cast(request.aio_buf) + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + if (checksum != calculated_checksum) + { + throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); + } + + for (const size_t idx : blocks_to_indices[request_id]) + { + const auto & [file_index, out_index] = index_to_out[idx]; + ReadBufferFromMemory buf( + reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), + block_size - file_index.getAddressInBlock()); + set(out_index, buf); + } + + processed[request_id] = true; + } + + while (to_pop < requests.size() && processed[to_pop]) + ++to_pop; + + /// add new io tasks + const int new_tasks_count = std::min(read_buffer_size - (to_push - to_pop), requests.size() - to_push); + + int pushed = 0; + while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) + { + if (errno != EINTR) + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + } + to_push += pushed; + } +} + +void SSDComplexKeyCachePartition::clearOldestBlocks() +{ + Poco::Logger::get("GC").information("GC clear -----------------"); + // write_buffer_size, because we need to erase the whole buffer. + Memory read_buffer_memory(block_size * write_buffer_size, BUFFER_ALIGNMENT); + + iocb request{}; +#if defined(__FreeBSD__) + request.aio.aio_lio_opcode = LIO_READ; + request.aio.aio_fildes = fd; + request.aio.aio_buf = reinterpret_cast(reinterpret_cast(read_buffer_memory.data())); + request.aio.aio_nbytes = block_size * write_buffer_size; + request.aio.aio_offset = (current_file_block_id % max_size) * block_size; + request.aio_data = 0; +#else + request.aio_lio_opcode = IOCB_CMD_PREAD; + request.aio_fildes = fd; + request.aio_buf = reinterpret_cast(read_buffer_memory.data()); + request.aio_nbytes = block_size * write_buffer_size; + request.aio_offset = (current_file_block_id % max_size) * block_size; + request.aio_data = 0; +#endif + + { + iocb* request_ptr = &request; + io_event event{}; + AIOContext aio_context(1); + + while (io_submit(aio_context.ctx, 1, &request_ptr) != 1) + { + if (errno != EINTR) + throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); + } + + while (io_getevents(aio_context.ctx, 1, 1, &event, nullptr) != 1) + { + if (errno != EINTR) + throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); + } + + if (event.res != static_cast(request.aio_nbytes)) + { + throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "aio_nbytes=" + std::to_string(request.aio_nbytes) + + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); + } + } + + TemporalComplexKeysPool tmp_keys_pool; + KeyRefs keys; + keys.reserve(write_buffer_size); + + // TODO: писать кол-во значений + for (size_t i = 0; i < write_buffer_size; ++i) + { + ReadBufferFromMemory read_buffer(read_buffer_memory.data() + i * block_size, block_size); + + uint64_t checksum = 0; + readBinary(checksum, read_buffer); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(read_buffer_memory.data() + i * block_size + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + if (checksum != calculated_checksum) + { + throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); + } + + uint32_t keys_in_current_block = 0; + readBinary(keys_in_current_block, read_buffer); + Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); + + for (uint32_t j = 0; j < keys_in_current_block; ++j) + { + keys.emplace_back(); + tmp_keys_pool.readKey(keys.back(), read_buffer); + + Metadata metadata; + readBinary(metadata.data, read_buffer); + + if (!metadata.isDefault()) + { + for (size_t attr = 0; attr < attributes_structure.size(); ++attr) + { + + switch (attributes_structure[attr]) + { + #define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + read_buffer.ignore(sizeof(TYPE)); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + #undef DISPATCH + + case AttributeUnderlyingType::utString: + { + size_t size = 0; + readVarUInt(size, read_buffer); + read_buffer.ignore(size); + } + break; + } + } + } + } + } + + const size_t start_block = current_file_block_id % max_size; + const size_t finish_block = start_block + block_size * write_buffer_size; + Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); + for (const auto& key : keys) + { + Index index; + if (key_to_index.get(key, index)) + { + size_t block_id = index.getBlockId(); + if (start_block <= block_id && block_id < finish_block) + key_to_index.erase(key); + } + } +} + +void SSDComplexKeyCachePartition::ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const +{ + for (size_t i = 0; i < attribute_index; ++i) + { + switch (attributes_structure[i]) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + buf.ignore(sizeof(TYPE)); \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + { + size_t size = 0; + readVarUInt(size, buf); + buf.ignore(size); + } + break; + } + } +} + +size_t SSDComplexKeyCachePartition::getId() const +{ + return file_id; +} + +double SSDComplexKeyCachePartition::getLoadFactor() const +{ + std::shared_lock lock(rw_lock); + return static_cast(current_file_block_id) / max_size; +} + +size_t SSDComplexKeyCachePartition::getElementCount() const +{ + std::shared_lock lock(rw_lock); + return key_to_index.size(); +} + +PaddedPODArray SSDComplexKeyCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const +{ + std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. + PaddedPODArray array; + for (const auto & [key, index] : key_to_index) + array.push_back(key); // TODO: exclude default + return array; +} + +void SSDComplexKeyCachePartition::remove() +{ + std::unique_lock lock(rw_lock); + std::filesystem::remove(std::filesystem::path(path + BIN_FILE_EXT)); +} + +SSDComplexKeyCacheStorage::SSDComplexKeyCacheStorage( + const AttributeTypes & attributes_structure_, + const std::string & path_, + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_, + const size_t write_buffer_size_, + const size_t max_stored_keys_) + : attributes_structure(attributes_structure_) + , path(path_) + , max_partitions_count(max_partitions_count_) + , partition_size(partition_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) + , log(&Poco::Logger::get("SSDComplexKeyCacheStorage")) +{ +} + +SSDComplexKeyCacheStorage::~SSDComplexKeyCacheStorage() +{ + std::unique_lock lock(rw_lock); + partition_delete_queue.splice(std::end(partition_delete_queue), partitions); + collectGarbage(); +} + +template +void SSDComplexKeyCacheStorage::getValue( + const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, + GetDefault & get_default, std::chrono::system_clock::time_point now) const +{ + size_t n = key_columns.front()->size(); + std::vector found(n, false); + + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->getValue(attribute_index, key_columns, key_types, out, found, get_default, now); + } + + size_t count_not_found = 0; + StringRefs tmp_refs(key_columns.size()); + for (size_t i = 0; i < n; ++i) + { + if (!found[i]) + { + auto key = not_found_pool.allocKey(i, key_columns, tmp_refs); + not_found[key].push_back(i); + ++count_not_found; + } + } + + query_count.fetch_add(n, std::memory_order_relaxed); + hit_count.fetch_add(n - count_not_found, std::memory_order_release); +} + +void SSDComplexKeyCacheStorage::getString( + const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types, + StringRefs & refs, ArenaWithFreeLists & arena, + std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, + std::vector & default_ids, std::chrono::system_clock::time_point now) const +{ + size_t n = key_columns.front()->size(); + std::vector found(n, false); + + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->getString(attribute_index, key_columns, key_types, refs, arena, found, default_ids, now); + } + + size_t count_not_found = 0; + StringRefs tmp_refs(key_columns.size()); + for (size_t i = 0; i < n; ++i) + { + if (!found[i]) + { + auto key = not_found_pool.allocKey(i, key_columns, tmp_refs); + not_found[key].push_back(i); + ++count_not_found; + } + } + + query_count.fetch_add(n, std::memory_order_relaxed); + hit_count.fetch_add(n - count_not_found, std::memory_order_release); +} + +void SSDComplexKeyCacheStorage::has( + const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out, + std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const +{ + size_t n = key_columns.front()->size(); + for (size_t i = 0; i < n; ++i) + out[i] = HAS_NOT_FOUND; + std::vector found(n, false); + + { + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + partition->has(key_columns, key_types, out, found, now); + } + + size_t count_not_found = 0; + StringRefs tmp_refs(key_columns.size()); + for (size_t i = 0; i < n; ++i) + { + if (out[i] == HAS_NOT_FOUND) + { + auto key = not_found_pool.allocKey(i, key_columns, tmp_refs); + not_found[key].push_back(i); + ++count_not_found; + } + } + + query_count.fetch_add(n, std::memory_order_relaxed); + hit_count.fetch_add(n - count_not_found, std::memory_order_release); +} + +template +void SSDComplexKeyCacheStorage::update( + DictionarySourcePtr & source_ptr, + const Columns & key_columns, + const DataTypes & key_types, + const KeyRefs & required_keys, + const std::vector & required_rows, + PresentIdHandler && on_updated, + AbsentIdHandler && on_key_not_found, + const DictionaryLifetime lifetime) +{ + auto append_block = [&key_types, this]( + const Columns & new_keys, + const SSDComplexKeyCachePartition::Attributes & new_attributes, + const PaddedPODArray & metadata) + { + size_t inserted = 0; + while (inserted < metadata.size()) + { + if (!partitions.empty()) + inserted += partitions.front()->appendBlock( + new_keys, key_types, new_attributes, metadata, inserted); + if (inserted < metadata.size()) + { + partitions.emplace_front(std::make_unique( + AttributeUnderlyingType::utUInt64, attributes_structure, path, + (partitions.empty() ? 0 : partitions.front()->getId() + 1), + partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + } + } + + collectGarbage(); + }; + + CurrentMetrics::Increment metric_increment{CurrentMetrics::DictCacheRequests}; + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequested, required_keys.size()); + + std::unordered_map remaining_keys{required_keys.size()}; + for (const auto & key : required_keys) + remaining_keys.insert({key, 0}); + + const auto now = std::chrono::system_clock::now(); + + { + const auto keys_size = key_columns.size(); + StringRefs keys(keys_size); + TemporalComplexKeysPool tmp_keys_pool; + + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + if (now > backoff_end_time) + { + try + { + if (update_error_count) + { + /// Recover after error: we have to clone the source here because + /// it could keep connections which should be reset after error. + source_ptr = source_ptr->clone(); + } + + Stopwatch watch; + auto stream = source_ptr->loadKeys(key_columns, required_rows); + stream->readPrefix(); + + while (const auto block = stream->read()) + { + const auto new_key_columns = ext::map( + ext::range(0, keys_size), + [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; }); + + const auto new_attributes = createAttributesFromBlock(block, keys_size, attributes_structure); + + const auto rows_num = block.rows(); + + PaddedPODArray metadata(rows_num); + + for (const auto i : ext::range(0, rows_num)) + { + auto key = tmp_keys_pool.allocKey(i, new_key_columns, keys); + SCOPE_EXIT(tmp_keys_pool.rollback(key)); + + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); + /// mark corresponding id as found + on_updated(key, i, new_attributes); + remaining_keys[key] = 1; + } + + append_block(new_key_columns, new_attributes, metadata); + } + + stream->readSuffix(); + + update_error_count = 0; + last_update_exception = std::exception_ptr{}; + backoff_end_time = std::chrono::system_clock::time_point{}; + + ProfileEvents::increment(ProfileEvents::DictCacheRequestTimeNs, watch.elapsed()); + } + catch (...) + { + ++update_error_count; + last_update_exception = std::current_exception(); + backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, update_error_count)); + + tryLogException(last_update_exception, log, + "Could not update ssd cache dictionary, next update is scheduled at " + ext::to_string(backoff_end_time)); + } + } + } + + auto append_defaults = [this]( + const KeyRefs & new_keys, + const PaddedPODArray & metadata) + { + size_t inserted = 0; + while (inserted < metadata.size()) + { + if (!partitions.empty()) + inserted += partitions.front()->appendDefaults( + new_keys, metadata, inserted); + if (inserted < metadata.size()) + { + partitions.emplace_front(std::make_unique( + AttributeUnderlyingType::utUInt64, attributes_structure, path, + (partitions.empty() ? 0 : partitions.front()->getId() + 1), + partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + } + } + + collectGarbage(); + }; + + size_t not_found_num = 0, found_num = 0; + /// Check which ids have not been found and require setting null_value + KeyRefs default_keys; + + PaddedPODArray metadata; + { + const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; + + for (const auto & key_found_pair : remaining_keys) + { + if (key_found_pair.second) + { + ++found_num; + continue; + } + ++not_found_num; + + const auto key = key_found_pair.first; + + if (update_error_count) + { + /// TODO: юзать старые значения. + + /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. + std::rethrow_exception(last_update_exception); + } + + std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; + metadata.emplace_back(); + metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); + metadata.back().setDefault(); + + default_keys.push_back(key); + + /// inform caller that the cell has not been found + on_key_not_found(key); + } + + if (not_found_num) + append_defaults(default_keys, metadata); + } + + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedMiss, not_found_num); + ProfileEvents::increment(ProfileEvents::DictCacheKeysRequestedFound, found_num); + ProfileEvents::increment(ProfileEvents::DictCacheRequests); +} + +PaddedPODArray SSDComplexKeyCacheStorage::getCachedIds() const +{ + /*PaddedPODArray array; + + const auto now = std::chrono::system_clock::now(); + + std::shared_lock lock(rw_lock); + for (auto & partition : partitions) + { + const auto cached_in_partition = partition->getCachedIds(now); + array.insert(std::begin(cached_in_partition), std::end(cached_in_partition)); + }*/ + + return {}; +} + +double SSDComplexKeyCacheStorage::getLoadFactor() const +{ + double result = 0; + std::shared_lock lock(rw_lock); + for (const auto & partition : partitions) + result += partition->getLoadFactor(); + return result / partitions.size(); +} + +size_t SSDComplexKeyCacheStorage::getElementCount() const +{ + size_t result = 0; + std::shared_lock lock(rw_lock); + for (const auto & partition : partitions) + result += partition->getElementCount(); + return result; +} + +void SSDComplexKeyCacheStorage::collectGarbage() +{ + // add partitions to queue + while (partitions.size() > max_partitions_count) + { + partition_delete_queue.splice(std::end(partition_delete_queue), partitions, std::prev(std::end(partitions))); + } + + // drop unused partitions + while (!partition_delete_queue.empty() && partition_delete_queue.front().use_count() == 1) + { + partition_delete_queue.front()->remove(); + partition_delete_queue.pop_front(); + } +} + +SSDComplexKeyCachePartition::Attributes SSDComplexKeyCacheStorage::createAttributesFromBlock( + const Block & block, const size_t begin_column, const std::vector & structure) +{ + SSDComplexKeyCachePartition::Attributes attributes; + + const auto columns = block.getColumns(); + for (size_t i = 0; i < structure.size(); ++i) + { + const auto & column = columns[i + begin_column]; + switch (structure[i]) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + SSDComplexKeyCachePartition::Attribute::Container values(column->size()); \ + memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ + attributes.emplace_back(); \ + attributes.back().type = structure[i]; \ + attributes.back().values = std::move(values); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + { + attributes.emplace_back(); + SSDComplexKeyCachePartition::Attribute::Container values(column->size()); + for (size_t j = 0; j < column->size(); ++j) + { + const auto ref = column->getDataAt(j); + values[j].resize(ref.size); + memcpy(values[j].data(), ref.data, ref.size); + } + attributes.back().type = structure[i]; + attributes.back().values = std::move(values); + } + break; + } + } + + return attributes; +} + +SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_, + const std::string & path_, + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_, + const size_t write_buffer_size_, + const size_t max_stored_keys_) + : name(name_) + , dict_struct(dict_struct_) + , source_ptr(std::move(source_ptr_)) + , dict_lifetime(dict_lifetime_) + , path(path_) + , max_partitions_count(max_partitions_count_) + , partition_size(partition_size_) + , block_size(block_size_) + , read_buffer_size(read_buffer_size_) + , write_buffer_size(write_buffer_size_) + , max_stored_keys(max_stored_keys_) + , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), + path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) + , log(&Poco::Logger::get("SSDComplexKeyCacheDictionary")) +{ + LOG_INFO(log, "Using storage path '" << path << "'."); + if (!this->source_ptr->supportsSelectiveLoad()) + throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD}; + + createAttributes(); +} + +#define DECLARE(TYPE) \ + void SSDComplexKeyCacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ + const auto null_value = std::get(null_values[index]); \ + getItemsNumberImpl( \ + index, \ + key_columns, \ + key_types, \ + out, \ + [&](const size_t) { return null_value; }); \ + } + + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +#define DECLARE(TYPE) \ + void SSDComplexKeyCacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ + getItemsNumberImpl( \ + index, \ + key_columns, \ + key_types, \ + out, \ + [&](const size_t row) { return def[row]; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +#define DECLARE(TYPE) \ + void SSDComplexKeyCacheDictionary::get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const \ + { \ + const auto index = getAttributeIndex(attribute_name); \ + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ + getItemsNumberImpl( \ + index, \ + key_columns, \ + key_types, \ + out, \ + [&](const size_t) { return def; }); \ + } + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + +template +void SSDComplexKeyCacheDictionary::getItemsNumberImpl( + const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, DefaultGetter && get_default) const +{ + const auto now = std::chrono::system_clock::now(); + + TemporalComplexKeysPool not_found_pool; + std::unordered_map> not_found_keys; + storage.getValue(attribute_index, key_columns, key_types, out, not_found_keys, not_found_pool, get_default, now); + if (not_found_keys.empty()) + return; + + std::vector required_keys(not_found_keys.size()); + std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; }); + std::vector required_rows; + required_rows.reserve(required_keys.size()); + for (const auto & key_ref : required_keys) + required_rows.push_back(not_found_keys[key_ref].front()); + + storage.update( + source_ptr, + key_columns, + key_types, + required_keys, + required_rows, + [&](const auto key, const auto row, const auto & new_attributes) + { + for (const size_t out_row : not_found_keys[key]) + out[out_row] = std::get>(new_attributes[attribute_index].values)[row]; + }, + [&](const auto key) + { + for (const size_t row : not_found_keys[key]) + out[row] = get_default(row); + }, + getLifetime()); +} + +void SSDComplexKeyCacheDictionary::getString( + const std::string & attribute_name, + const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const +{ + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); + + const auto null_value = StringRef{std::get(null_values[index])}; + + getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); +} + +void SSDComplexKeyCacheDictionary::getString( + const std::string & attribute_name, + const Columns & key_columns, const DataTypes & key_types, + const ColumnString * const def, ColumnString * const out) const +{ + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); + + getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t row) { return def->getDataAt(row); }); +} + +void SSDComplexKeyCacheDictionary::getString( + const std::string & attribute_name, + const Columns & key_columns, + const DataTypes & key_types, + const String & def, + ColumnString * const out) const +{ + const auto index = getAttributeIndex(attribute_name); + checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::utString); + + getItemsStringImpl(index, key_columns, key_types, out, [&](const size_t) { return StringRef{def}; }); +} + +template +void SSDComplexKeyCacheDictionary::getItemsStringImpl( + const size_t attribute_index, + const Columns & key_columns, + const DataTypes & key_types, + ColumnString * out, + DefaultGetter && get_default) const +{ + const auto now = std::chrono::system_clock::now(); + + TemporalComplexKeysPool not_found_pool; + std::unordered_map> not_found_keys; + + const size_t n = key_columns.front()->size(); + + StringRefs refs(n); + ArenaWithFreeLists string_arena; + std::vector default_rows; + storage.getString( + attribute_index, key_columns, key_types, + refs, string_arena, not_found_keys, not_found_pool, default_rows, now); + std::sort(std::begin(default_rows), std::end(default_rows)); + + if (not_found_keys.empty()) + { + size_t default_index = 0; + for (size_t row = 0; row < n; ++row) + { + if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row)) + { + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + ++default_index; + } + else + out->insertData(refs[row].data, refs[row].size); + } + return; + } + + std::vector required_keys(not_found_keys.size()); + std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; }); + + std::unordered_map update_result; + + std::vector required_rows; + required_rows.reserve(required_keys.size()); + for (const auto & key_ref : required_keys) + required_rows.push_back(not_found_keys[key_ref].front()); + + storage.update( + source_ptr, + key_columns, + key_types, + required_keys, + required_rows, + [&](const auto key, const auto row, const auto & new_attributes) + { + update_result[key] = std::get>(new_attributes[attribute_index].values)[row]; + }, + [&](const auto) {}, + getLifetime()); + + TemporalComplexKeysPool tmp_keys_pool; + StringRefs tmp_refs(key_columns.size()); + size_t default_index = 0; + for (size_t row = 0; row < n; ++row) + { + const auto key = tmp_keys_pool.allocKey(row, key_columns, tmp_refs); + SCOPE_EXIT(tmp_keys_pool.rollback(key)); + if (unlikely(default_index != default_rows.size() && default_rows[default_index] == row)) + { + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + ++default_index; + } + else if (auto it = not_found_keys.find(key); it == std::end(not_found_keys)) + { + out->insertData(refs[row].data, refs[row].size); + } + else if (auto it_update = update_result.find(key); it_update != std::end(update_result)) + { + out->insertData(it_update->second.data(), it_update->second.size()); + } + else + { + auto to_insert = get_default(row); + out->insertData(to_insert.data, to_insert.size); + } + } +} + +void SSDComplexKeyCacheDictionary::has( + const Columns & key_columns, + const DataTypes & key_types, + PaddedPODArray & out) const +{ + const auto now = std::chrono::system_clock::now(); + + std::unordered_map> not_found_keys; + TemporalComplexKeysPool not_found_pool; + storage.has(key_columns, key_types, out, not_found_keys, not_found_pool, now); + if (not_found_keys.empty()) + return; + + std::vector required_keys(not_found_keys.size()); + std::transform(std::begin(not_found_keys), std::end(not_found_keys), std::begin(required_keys), [](const auto & pair) { return pair.first; }); + + std::vector required_rows; + required_rows.reserve(required_keys.size()); + for (const auto & key_ref : required_keys) + required_rows.push_back(not_found_keys[key_ref].front()); + + storage.update( + source_ptr, + key_columns, + key_types, + required_keys, + required_rows, + [&](const auto key, const auto, const auto &) + { + for (const size_t out_row : not_found_keys[key]) + out[out_row] = true; + }, + [&](const auto key) + { + for (const size_t row : not_found_keys[key]) + out[row] = false; + }, + getLifetime()); +} + +BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream( + const Names & /* column_names */, size_t /* max_block_size*/) const +{ + //using BlockInputStreamType = DictionaryBlockInputStream; + return nullptr; //std::make_shared(shared_from_this(), max_block_size, storage.getCachedIds(), column_names); +} + +size_t SSDComplexKeyCacheDictionary::getAttributeIndex(const std::string & attr_name) const +{ + auto it = attribute_index_by_name.find(attr_name); + if (it == std::end(attribute_index_by_name)) + throw Exception{"Attribute `" + name + "` does not exist.", ErrorCodes::BAD_ARGUMENTS}; + return it->second; +} + +template +AttributeValueVariant SSDComplexKeyCacheDictionary::createAttributeNullValueWithTypeImpl(const Field & null_value) +{ + AttributeValueVariant var_null_value = static_cast(null_value.get>()); + bytes_allocated += sizeof(T); + return var_null_value; +} + +template <> +AttributeValueVariant SSDComplexKeyCacheDictionary::createAttributeNullValueWithTypeImpl(const Field & null_value) +{ + AttributeValueVariant var_null_value = null_value.get(); + bytes_allocated += sizeof(StringRef); + return var_null_value; +} + +AttributeValueVariant SSDComplexKeyCacheDictionary::createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value) +{ + switch (type) + { +#define DISPATCH(TYPE) \ +case AttributeUnderlyingType::ut##TYPE: \ + return createAttributeNullValueWithTypeImpl(null_value); + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) + DISPATCH(String) +#undef DISPATCH + } + throw Exception{"Unknown attribute type: " + std::to_string(static_cast(type)), ErrorCodes::TYPE_MISMATCH}; +} + +void SSDComplexKeyCacheDictionary::createAttributes() +{ + null_values.reserve(dict_struct.attributes.size()); + for (size_t i = 0; i < dict_struct.attributes.size(); ++i) + { + const auto & attribute = dict_struct.attributes[i]; + + attribute_index_by_name.emplace(attribute.name, i); + null_values.push_back(createAttributeNullValueWithType(attribute.underlying_type, attribute.null_value)); + + if (attribute.hierarchical) + throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), + ErrorCodes::TYPE_MISMATCH}; + } +} + +void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) +{ + auto create_layout = [=](const std::string & name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr) -> DictionaryPtr + { + if (dict_struct.id) + throw Exception{"'id' is not supported for dictionary of layout 'complex_key_cache'", ErrorCodes::UNSUPPORTED_METHOD}; + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception{name + + ": elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + ErrorCodes::BAD_ARGUMENTS}; + const auto & layout_prefix = config_prefix + ".layout"; + + const auto max_partitions_count = config.getInt(layout_prefix + ".ssd_complex_key.max_partitions_count", DEFAULT_PARTITIONS_COUNT); + if (max_partitions_count <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + + const auto block_size = config.getInt(layout_prefix + ".ssd_complex_key.block_size", DEFAULT_SSD_BLOCK_SIZE); + if (block_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; + + const auto partition_size = config.getInt64(layout_prefix + ".ssd_complex_key.partition_size", DEFAULT_FILE_SIZE); + if (partition_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; + if (partition_size % block_size != 0) + throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + + const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd_complex_key.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); + if (read_buffer_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + if (read_buffer_size % block_size != 0) + throw Exception{name + ": read_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + + const auto write_buffer_size = config.getInt64(layout_prefix + ".ssd_complex_key.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); + if (write_buffer_size <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + if (write_buffer_size % block_size != 0) + throw Exception{name + ": write_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + + auto path = config.getString(layout_prefix + ".ssd_complex_key.path"); + if (path.empty()) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", + ErrorCodes::BAD_ARGUMENTS}; + if (path.at(0) != '/') + path = std::filesystem::path{config.getString("path")}.concat(path).string(); + + const auto max_stored_keys = config.getInt64(layout_prefix + ".ssd_complex_key.max_stored_keys", DEFAULT_MAX_STORED_KEYS); + if (max_stored_keys <= 0) + throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; + + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + return std::make_unique( + name, dict_struct, std::move(source_ptr), dict_lifetime, path, + max_partitions_count, partition_size / block_size, block_size, + read_buffer_size / block_size, write_buffer_size / block_size, + max_stored_keys); + }; + factory.registerLayout("ssd_complex_key", create_layout, true); +} + +} diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h new file mode 100644 index 00000000000..6e64078cffd --- /dev/null +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -0,0 +1,790 @@ +#pragma once + +#include "DictionaryStructure.h" +#include "IDictionary.h" +#include "IDictionarySource.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class KeyRef +{ +public: + explicit KeyRef(char * data) : ptr(data) {} + + KeyRef() : ptr(nullptr) {} + + inline UInt16 size() const { + return *reinterpret_cast(ptr); + } + + inline size_t fullSize() const { + return static_cast(size()) + sizeof(UInt16); + } + + inline char * data() const { + return ptr + sizeof(UInt16); + } + + inline char * fullData() const { + return ptr; + } + + inline char * fullData() { + return ptr; + } + + inline const StringRef getRef() const { + return StringRef(data(), size()); + } + + inline bool operator==(const KeyRef & other) const { + return getRef() == other.getRef(); + } + + inline bool operator<(const KeyRef & other) const { + return getRef() < other.getRef(); + } + +private: + char * ptr; +}; + +using KeyRefs = std::vector; +} + +namespace std +{ + template <> + struct hash + { + size_t operator() (DB::KeyRef key_ref) const + { + return hasher(key_ref.getRef()); + } + + std::hash hasher; + }; +} + +namespace DB +{ + +using AttributeValueVariant = std::variant< + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Int8, + Int16, + Int32, + Int64, + Decimal32, + Decimal64, + Decimal128, + Float32, + Float64, + String>; + +template +class ComplexKeysPoolImpl +{ +public: + KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) + { + if constexpr (std::is_same_v) + { + // not working now + const auto res = arena->alloc(); + auto place = res; + + for (const auto & key_column : key_columns) + { + const StringRef key = key_column->getDataAt(row); + memcpy(place, key.data, key.size); + place += key.size; + } + + return KeyRef(res); + } + else + { + const auto keys_size = key_columns.size(); + UInt16 sum_keys_size{}; + + for (size_t j = 0; j < keys_size; ++j) + { + keys[j] = key_columns[j]->getDataAt(row); + sum_keys_size += keys[j].size; + if (!key_columns[j]->valuesHaveFixedSize()) // String + sum_keys_size += sizeof(size_t) + 1; + } + + auto place = arena.alloc(sum_keys_size + sizeof(sum_keys_size)); + + auto key_start = place; + memcpy(key_start, &sum_keys_size, sizeof(sum_keys_size)); + key_start += sizeof(sum_keys_size); + for (size_t j = 0; j < keys_size; ++j) + { + if (!key_columns[j]->valuesHaveFixedSize()) // String + { + auto start = key_start; + auto key_size = keys[j].size + 1; + memcpy(key_start, &key_size, sizeof(size_t)); + key_start += sizeof(size_t); + memcpy(key_start, keys[j].data, keys[j].size); + key_start += keys[j].size; + *key_start = '\0'; + ++key_start; + keys[j].data = start; + keys[j].size += sizeof(size_t) + 1; + } + else + { + memcpy(key_start, keys[j].data, keys[j].size); + keys[j].data = key_start; + key_start += keys[j].size; + } + } + + return KeyRef(place); + } + } + + KeyRef copyKeyFrom(const KeyRef & key) + { + char * data = arena.alloc(key.fullSize()); + memcpy(data, key.fullData(), key.fullSize()); + return KeyRef(data); + } + + void freeKey(const KeyRef & key) + { + if constexpr (std::is_same_v) + arena.free(key.fullData(), key.fullSize()); + else if constexpr (std::is_same_v) + arena.free(key.fullData()); + else + throw Exception("Free not supported.", ErrorCodes::LOGICAL_ERROR); + } + + void rollback(const KeyRef & key) + { + if constexpr (std::is_same_v) + arena.rollback(key.fullSize()); + else + throw Exception("Rollback not supported.", ErrorCodes::LOGICAL_ERROR); + } + + void writeKey(const KeyRef & key, WriteBuffer & buf) + { + buf.write(key.fullData(), key.fullSize()); + } + + void readKey(KeyRef & key, ReadBuffer & buf) + { + UInt16 sz; + readBinary(sz, buf); + char * data = nullptr; + if constexpr (std::is_same_v) + data = arena.alloc(); + else + data = arena.alloc(sz + sizeof(sz)); + memcpy(data, &sz, sizeof(sz)); + buf.read(data + sizeof(sz), sz); + key = KeyRef(data); + } + + void ignoreKey(ReadBuffer & buf) const + { + UInt16 sz; + readBinary(sz, buf); + buf.ignore(sz); + } + +private: + A arena; +}; + +using TemporalComplexKeysPool = ComplexKeysPoolImpl; +using ComplexKeysPool = ComplexKeysPoolImpl; +//using FixedComplexKeysPool = ComplexKeysPoolImpl; + +template +class ComplexKeyLRUCache +{ + using Iter = typename std::list::iterator; + + struct Cell + { + Iter iter; + V val; + }; + +public: + ComplexKeyLRUCache(size_t max_size_, Pool & keys_pool_) + : max_size(max_size_) + , keys_pool(keys_pool_) + { + } + + void set(K key, V val) + { + std::lock_guard lock(mutex); + auto it = cache.find(key); + if (it == std::end(cache)) + { + auto & item = cache[key]; + item.iter = queue.insert(std::end(queue), key); + item.val = val; + if (queue.size() > max_size) + { + keys_pool.freeKey(queue.front()); + cache.erase(queue.front()); + queue.pop_front(); + } + } + else + { + queue.erase(it->second.iter); + it->second.iter = queue.insert(std::end(queue), key); + it->second.val = val; + } + } + + bool get(K key, V & val) + { + std::lock_guard lock(mutex); + auto it = cache.find(key); + if (it == std::end(cache)) + return false; + val = it->second.val; + queue.erase(it->second.iter); + it->second.iter = queue.insert(std::end(queue), key); + return true; + } + + bool erase(K key) + { + std::lock_guard lock(mutex); + auto it = cache.find(key); + if (it == std::end(cache)) + return false; + + keys_pool.freeKey(key); + queue.erase(it->second.iter); + cache.erase(it); + return true; + } + + size_t size() + { + std::lock_guard lock(mutex); + return cache.size(); + } + + auto begin() + { + std::lock_guard lock(mutex); + return std::begin(cache); + } + + auto end() + { + std::lock_guard lock(mutex); + return std::end(cache); + } + +private: + std::unordered_map cache; + std::list queue; + size_t max_size; + Pool & keys_pool; + std::mutex mutex; +}; + +class SSDComplexKeyCachePartition +{ +public: + struct Index final + { + bool inMemory() const; + void setInMemory(const bool in_memory); + + bool exists() const; + void setNotExists(); + + size_t getAddressInBlock() const; + void setAddressInBlock(const size_t address_in_block); + + size_t getBlockId() const; + void setBlockId(const size_t block_id); + + bool operator< (const Index & rhs) const { return index < rhs.index; } + + /// Stores `is_in_memory` flag, block id, address in uncompressed block + uint64_t index = 0; + }; + + struct Metadata final + { + using time_point_t = std::chrono::system_clock::time_point; + using time_point_rep_t = time_point_t::rep; + using time_point_urep_t = std::make_unsigned_t; + + time_point_t expiresAt() const; + void setExpiresAt(const time_point_t & t); + + bool isDefault() const; + void setDefault(); + + /// Stores both expiration time and `is_default` flag in the most significant bit + time_point_urep_t data = 0; + }; + + using Offset = size_t; + using Offsets = std::vector; + + + SSDComplexKeyCachePartition( + const AttributeUnderlyingType & key_structure, + const std::vector & attributes_structure, + const std::string & dir_path, + const size_t file_id, + const size_t max_size, + const size_t block_size, + const size_t read_buffer_size, + const size_t write_buffer_size, + const size_t max_stored_keys); + + ~SSDComplexKeyCachePartition(); + + template + using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + + template + void getValue(const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, std::vector & found, GetDefault & get_default, + std::chrono::system_clock::time_point now) const; + + void getString(const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + StringRefs & refs, ArenaWithFreeLists & arena, std::vector & found, + std::vector & default_ids, std::chrono::system_clock::time_point now) const; + + void has(const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, std::vector & found, + std::chrono::system_clock::time_point now) const; + + struct Attribute + { + template + using Container = std::vector; + + AttributeUnderlyingType type; + std::variant< + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container, + Container> values; + }; + using Attributes = std::vector; + + size_t appendBlock( + const Columns & key_columns, + const DataTypes & key_types, + const Attributes & new_attributes, + const PaddedPODArray & metadata, + const size_t begin); + + size_t appendDefaults( + const KeyRefs & keys, + const PaddedPODArray & metadata, + const size_t begin); + + void clearOldestBlocks(); + + void flush(); + + void remove(); + + size_t getId() const; + + PaddedPODArray getCachedIds(const std::chrono::system_clock::time_point now) const; + + double getLoadFactor() const; + + size_t getElementCount() const; + +private: + size_t append( + const KeyRefs & keys, + const Attributes & new_attributes, + const PaddedPODArray & metadata, + const size_t begin); + + template + void getImpl(const Columns & key_columns, const DataTypes & key_types, + SetFunc & set, std::vector & found) const; + + template + void getValueFromMemory(const PaddedPODArray & indices, SetFunc & set) const; + + template + void getValueFromStorage(const PaddedPODArray & indices, SetFunc & set) const; + + void ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const; + + /*KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const; + void freeKey(const KeyRef key) const; + + void writeKey(KeyRef key, WriteBuffer & buf); + template + void readKey(KeyRef & key, ArenaForKey & arena, ReadBuffer & buf); + void ignoreKey(ReadBuffer & buf);*/ + + const size_t file_id; + const size_t max_size; + const size_t block_size; + const size_t read_buffer_size; + const size_t write_buffer_size; + const size_t max_stored_keys; + const std::string path; + + mutable std::shared_mutex rw_lock; + + int fd = -1; + + ComplexKeysPool keys_pool; + mutable ComplexKeyLRUCache key_to_index; + KeyRefs keys_buffer; + + const std::vector attributes_structure; + + std::optional> memory; + std::optional write_buffer; + uint32_t keys_in_block = 0; + //CompressionCodecPtr codec; + + size_t current_memory_block_id = 0; + size_t current_file_block_id = 0; +}; + +using SSDComplexKeyCachePartitionPtr = std::shared_ptr; + + +class SSDComplexKeyCacheStorage +{ +public: + using AttributeTypes = std::vector; + + SSDComplexKeyCacheStorage( + const AttributeTypes & attributes_structure, + const std::string & path, + const size_t max_partitions_count, + const size_t partition_size, + const size_t block_size, + const size_t read_buffer_size, + const size_t write_buffer_size, + const size_t max_stored_keys); + + ~SSDComplexKeyCacheStorage(); + + template + using ResultArrayType = SSDComplexKeyCachePartition::ResultArrayType; + + template + void getValue(const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, + GetDefault & get_default, std::chrono::system_clock::time_point now) const; + + void getString(const size_t attribute_index, const Columns & key_columns, const DataTypes & key_types, + StringRefs & refs, ArenaWithFreeLists & arena, std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, + std::vector & default_ids, std::chrono::system_clock::time_point now) const; + + void has(const Columns & key_columns, const DataTypes & key_types, ResultArrayType & out, + std::unordered_map> & not_found, + TemporalComplexKeysPool & not_found_pool, std::chrono::system_clock::time_point now) const; + + template + void update(DictionarySourcePtr & source_ptr, + const Columns & key_columns, const DataTypes & key_types, + const KeyRefs & required_keys, const std::vector & required_rows, + PresentIdHandler && on_updated, AbsentIdHandler && on_key_not_found, + const DictionaryLifetime lifetime); + + PaddedPODArray getCachedIds() const; + + std::exception_ptr getLastException() const { return last_update_exception; } + + const std::string & getPath() const { return path; } + + size_t getQueryCount() const { return query_count.load(std::memory_order_relaxed); } + + size_t getHitCount() const { return hit_count.load(std::memory_order_acquire); } + + size_t getElementCount() const; + + double getLoadFactor() const; + +private: + SSDComplexKeyCachePartition::Attributes createAttributesFromBlock( + const Block & block, const size_t begin_column, const std::vector & structure); + + void collectGarbage(); + + const AttributeTypes attributes_structure; + + const std::string path; + const size_t max_partitions_count; + const size_t partition_size; + const size_t block_size; + const size_t read_buffer_size; + const size_t write_buffer_size; + const size_t max_stored_keys; + + mutable std::shared_mutex rw_lock; + std::list partitions; + std::list partition_delete_queue; + + Logger * const log; + + mutable pcg64 rnd_engine; + + mutable std::exception_ptr last_update_exception; + mutable size_t update_error_count = 0; + mutable std::chrono::system_clock::time_point backoff_end_time; + + // stats + //mutable size_t bytes_allocated = 0; + + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; +}; + + +class SSDComplexKeyCacheDictionary final : public IDictionaryBase +{ +public: + SSDComplexKeyCacheDictionary( + const std::string & name_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const DictionaryLifetime dict_lifetime_, + const std::string & path, + const size_t max_partitions_count_, + const size_t partition_size_, + const size_t block_size_, + const size_t read_buffer_size_, + const size_t write_buffer_size_, + const size_t max_stored_keys_); + + const std::string & getDatabase() const override { return name; } + const std::string & getName() const override { return name; } + const std::string & getFullName() const override { return getName(); } + + std::string getKeyDescription() const { return dict_struct.getKeyDescription(); } + + std::string getTypeName() const override { return "SSDComplexKeyCache"; } + + size_t getBytesAllocated() const override { return 0; } // TODO: ? + + size_t getQueryCount() const override { return storage.getQueryCount(); } + + double getHitRate() const override + { + return static_cast(storage.getHitCount()) / storage.getQueryCount(); + } + + size_t getElementCount() const override { return storage.getElementCount(); } + + double getLoadFactor() const override { return storage.getLoadFactor(); } + + bool supportUpdates() const override { return false; } + + std::shared_ptr clone() const override + { + return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, + max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); + } + + const IDictionarySource * getSource() const override { return source_ptr.get(); } + + const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + + const DictionaryStructure & getStructure() const override { return dict_struct; } + + bool isInjective(const std::string & attribute_name) const override + { + return dict_struct.attributes[getAttributeIndex(attribute_name)].injective; + } + + /*bool hasHierarchy() const { return false; } + + void toParent(const PaddedPODArray &, PaddedPODArray &) const { }*/ + + std::exception_ptr getLastException() const override { return storage.getLastException(); } + + template + using ResultArrayType = SSDComplexKeyCacheStorage::ResultArrayType; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, + const DataTypes & key_types, ColumnString * out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const PaddedPODArray & def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, + const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const; + +#define DECLARE(TYPE) \ + void get##TYPE( \ + const std::string & attribute_name, \ + const Columns & key_columns, \ + const DataTypes & key_types, \ + const TYPE def, \ + ResultArrayType & out) const; + DECLARE(UInt8) + DECLARE(UInt16) + DECLARE(UInt32) + DECLARE(UInt64) + DECLARE(UInt128) + DECLARE(Int8) + DECLARE(Int16) + DECLARE(Int32) + DECLARE(Int64) + DECLARE(Float32) + DECLARE(Float64) + DECLARE(Decimal32) + DECLARE(Decimal64) + DECLARE(Decimal128) +#undef DECLARE + + void getString(const std::string & attribute_name, const Columns & key_columns, + const DataTypes & key_types, const String & def, ColumnString * const out) const; + + void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray & out) const; + + BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; + +private: + size_t getAttributeIndex(const std::string & attr_name) const; + + template + AttributeValueVariant createAttributeNullValueWithTypeImpl(const Field & null_value); + AttributeValueVariant createAttributeNullValueWithType(const AttributeUnderlyingType type, const Field & null_value); + void createAttributes(); + + template + void getItemsNumberImpl( + const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + ResultArrayType & out, DefaultGetter && get_default) const; + + template + void getItemsStringImpl( + const size_t attribute_index, + const Columns & key_columns, const DataTypes & key_types, + ColumnString * out, DefaultGetter && get_default) const; + + const std::string name; + const DictionaryStructure dict_struct; + mutable DictionarySourcePtr source_ptr; + const DictionaryLifetime dict_lifetime; + + const std::string path; + const size_t max_partitions_count; + const size_t partition_size; + const size_t block_size; + const size_t read_buffer_size; + const size_t write_buffer_size; + const size_t max_stored_keys; + + std::map attribute_index_by_name; + std::vector null_values; + mutable SSDComplexKeyCacheStorage storage; + Logger * const log; + + mutable size_t bytes_allocated = 0; +}; + +} diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index d2f37ad650b..f3df13374aa 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -32,6 +32,7 @@ void registerDictionaries() registerDictionaryHashed(factory); registerDictionaryCache(factory); registerDictionarySSDCache(factory); + registerDictionarySSDComplexKeyCache(factory); registerDictionaryPolygon(factory); } } diff --git a/src/Dictionaries/registerDictionaries.h b/src/Dictionaries/registerDictionaries.h index 1a4127b2d4c..131c0dd0af4 100644 --- a/src/Dictionaries/registerDictionaries.h +++ b/src/Dictionaries/registerDictionaries.h @@ -25,6 +25,7 @@ void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); void registerDictionarySSDCache(DictionaryFactory & factory); +void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory); void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaries(); diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index ec6f2d878c9..b359c7403f8 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -136,7 +137,7 @@ private: !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && - !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && #endif @@ -311,6 +312,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && #endif @@ -496,6 +498,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && #endif @@ -837,6 +840,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && #endif @@ -1100,6 +1104,7 @@ private: !executeDispatch(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && #endif From 8e3442bae494f412a22bb1aa094035506d93cf29 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 30 Apr 2020 23:51:06 +0300 Subject: [PATCH 0085/2229] complex_key_test --- ...01280_ssd_complex_key_dictionary.reference | 0 .../01280_ssd_complex_key_dictionary.sql | 133 ++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference create mode 100644 tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql new file mode 100644 index 00000000000..2b9288d9257 --- /dev/null +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql @@ -0,0 +1,133 @@ +SET send_logs_level = 'none'; + +DROP DATABASE IF EXISTS database_for_dict; + +CREATE DATABASE database_for_dict Engine = Ordinary; + +DROP TABLE IF EXISTS database_for_dict.table_for_dict; + +CREATE TABLE database_for_dict.table_for_dict +( + k1 String, + k2 Int32, + a UInt64, + b Int32, + c String +) +ENGINE = MergeTree() +ORDER BY (k1, k2); + +INSERT INTO database_for_dict.table_for_dict VALUES (toString(1), 3, 100, -100, 'clickhouse'), (toString(2), -1, 3, 4, 'database'), (toString(5), -3, 6, 7, 'columns'), (toString(10), -20, 9, 8, ''); +INSERT INTO database_for_dict.table_for_dict SELECT toString(number), number + 1, 0, -1, 'a' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 370; +INSERT INTO database_for_dict.table_for_dict SELECT toString(number), number + 10, 0, -1, 'b' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 370, 370; +INSERT INTO database_for_dict.table_for_dict SELECT toString(number), number + 100, 0, -1, 'c' FROM system.numbers WHERE number NOT IN (1, 2, 5, 10) LIMIT 700, 370; + +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +CREATE DICTIONARY database_for_dict.ssd_dict +( + k1 String, + k2 Int32, + a UInt64 DEFAULT 0, + b Int32 DEFAULT -1, + c String DEFAULT 'none' +) +PRIMARY KEY k1, k2 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1000 MAX 2000) +LAYOUT(SSD_COMPLEX_KEY(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); + +SELECT 'TEST_SMALL'; +SELECT 'VALUE FROM RAM BUFFER'; + +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('1', toInt32(3))); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('1', toInt32(3))); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('1', toInt32(3))); + +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('1', toInt32(3))); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('1', toInt32(3))); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('1', toInt32(3))); + +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('2', toInt32(-1))); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('2', toInt32(-1))); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('2', toInt32(-1))); + +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('5', toInt32(-3))); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('5', toInt32(-3))); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('5', toInt32(-3))); + +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', tuple('10', toInt32(-20))); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', tuple('10', toInt32(-20))); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', tuple('10', toInt32(-20))); + +DROP DICTIONARY database_for_dict.ssd_dict; + +DROP TABLE IF EXISTS database_for_dict.keys_table; + +CREATE TABLE database_for_dict.keys_table +( + k1 String, + k2 Int32 +) +ENGINE = StripeLog(); + +INSERT INTO database_for_dict.keys_table VALUES ('1', 3); +INSERT INTO database_for_dict.keys_table SELECT toString(intHash64(number + 1) % 1200), 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 370; +INSERT INTO database_for_dict.keys_table VALUES ('2', -1); +INSERT INTO database_for_dict.keys_table SELECT toString(intHash64(number + 1) % 1200), 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 370, 370; +INSERT INTO database_for_dict.keys_table VALUES ('5', -3); +INSERT INTO database_for_dict.keys_table SELECT toString(intHash64(number + 1) % 1200), 11 + intHash64(number) % 1200 FROM system.numbers LIMIT 700, 370; +INSERT INTO database_for_dict.keys_table VALUES ('10', -20); + +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +CREATE DICTIONARY database_for_dict.ssd_dict +( + k1 String, + k2 Int32, + a UInt64 DEFAULT 0, + b Int32 DEFAULT -1, + c String DEFAULT 'none' +) +PRIMARY KEY k1, k2 +SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) +LIFETIME(MIN 1000 MAX 2000) +LAYOUT(SSD_COMPLEX_KEY(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); + +SELECT 'UPDATE DICTIONARY'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', (k1, k2))) FROM database_for_dict.keys_table; + +SELECT 'VALUE FROM DISK'; +-- -100 +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('1', 3)); + +-- 'clickhouse' +SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('1', 3)); + +SELECT 'VALUE FROM RAM BUFFER'; +-- 8 +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('10', -20)); + +-- '' +SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('10', -20)); + +SELECT 'VALUES FROM DISK AND RAM BUFFER'; +-- 118 +SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', (k1, k2))) FROM database_for_dict.keys_table; + +SELECT 'HAS'; +-- 1006 +SELECT count() FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', (k1, k2)); + +SELECT 'VALUES NOT FROM TABLE'; +-- 0 -1 none +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', 0)), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', 0)), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', 0)); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', 0)), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', 0)), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', 0)); + +SELECT 'DUPLICATE KEYS'; +SELECT arrayJoin([('1', 3), ('2', -1), ('', 0), ('', 0), ('2', -1), ('1', 3)]) AS keys, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(keys)); +--SELECT +DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; + +DROP TABLE IF EXISTS database_for_dict.keys_table; From b3cfce523c24e560a76f652a423cbad252e5cf5d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 5 May 2020 04:24:51 +0300 Subject: [PATCH 0086/2229] in-memory parts: test for basic functionality --- .../01130_in_memory_parts.reference | 27 +++++++++++++++ .../0_stateless/01130_in_memory_parts.sql | 33 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 tests/queries/0_stateless/01130_in_memory_parts.reference create mode 100644 tests/queries/0_stateless/01130_in_memory_parts.sql diff --git a/tests/queries/0_stateless/01130_in_memory_parts.reference b/tests/queries/0_stateless/01130_in_memory_parts.reference new file mode 100644 index 00000000000..dbf39a0b48f --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts.reference @@ -0,0 +1,27 @@ +Simple selects +0 0 +1 1 +2 2 +3 0 +4 1 +50 2 +51 0 +52 1 +53 2 +54 0 +34 +0 +Mutations and Alters +66 +1 1 +2 2 +4 1 +5 2 +7 1 +[1,1] +[] +[4,16] +[] +[7,49] +1 1 +2 1 diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql new file mode 100644 index 00000000000..4c09eb19937 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS in_memory; +CREATE TABLE in_memory (a UInt32, b UInt32) + ENGINE = MergeTree ORDER BY a + SETTINGS min_rows_for_compact_part = 0; + +INSERT INTO in_memory SELECT number, number % 3 FROM numbers(100); + +SELECT 'Simple selects'; + +SELECT * FROM in_memory ORDER BY a LIMIT 5; +SELECT * FROM in_memory ORDER BY a LIMIT 5 OFFSET 50; +SELECT count() FROM in_memory WHERE b = 0 SETTINGS max_block_size = 10; +-- Check index +SELECT count() FROM in_memory WHERE a > 100 SETTINGS max_rows_to_read = 0, force_primary_key = 1; + +SELECT 'Mutations and Alters'; +SET mutations_sync = 1; + +ALTER TABLE in_memory DELETE WHERE b = 0; + +SELECT count() FROM in_memory; +SELECT * FROM in_memory ORDER BY a LIMIT 5; + +ALTER TABLE in_memory ADD COLUMN arr Array(UInt64); +ALTER TABLE in_memory UPDATE arr = [a, a * a] WHERE b = 1; + +SELECT arr FROM in_memory ORDER BY a LIMIT 5; + +ALTER TABLE in_memory MODIFY COLUMN b String; +ALTER TABLE in_memory RENAME COLUMN b to str; +SELECT DISTINCT str, length(str) FROM in_memory ORDER BY str; + +DROP TABLE in_memory; From 14e8592e47716a2c998091cd5d83ddc366b76d3c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 5 May 2020 04:27:31 +0300 Subject: [PATCH 0087/2229] in-memory parts: send checksums --- src/Storages/MergeTree/DataPartsExchange.cpp | 18 +++++++++++------- src/Storages/MergeTree/MergeTreeData.cpp | 2 -- .../MergeTree/MergeTreeReaderInMemory.cpp | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 7d1c0fb43b5..bbee34e4a0b 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -29,6 +29,8 @@ namespace ErrorCodes extern const int CANNOT_WRITE_TO_OSTREAM; extern const int CHECKSUM_DOESNT_MATCH; extern const int INSECURE_PATH; + extern const int CORRUPTED_DATA; + extern const int LOGICAL_ERROR; } namespace DataPartsExchange @@ -133,12 +135,11 @@ void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteB { auto part_in_memory = dynamic_cast(part.get()); if (!part_in_memory) - throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::NO_SUCH_DATA_PART); // TODO error code - - NativeBlockOutputStream block_out(out, 0, data.getSampleBlock()); - block_out.write(part_in_memory->block); + throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::LOGICAL_ERROR); - // TODO send checksums + NativeBlockOutputStream block_out(out, 0, data.getSampleBlock()); + part->checksums.write(out); + block_out.write(part_in_memory->block); } void Service::sendPartFromDisk(const MergeTreeData::DataPartPtr & part, WriteBuffer & out, TableStructureReadLockHolder &) @@ -279,6 +280,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( const String & /* replica_path */, PooledReadWriteBufferFromHTTP & in) { + MergeTreeData::DataPart::Checksums checksums; + if (!checksums.read(in)) + throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA); + NativeBlockInputStream block_in(in, 0); auto block = block_in.read(); MergeTreeData::MutableDataPartPtr new_data_part = @@ -305,8 +310,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( part_out.writePrefix(); part_out.write(block); part_out.writeSuffixAndFinalizePart(new_data_part); - - // TODO validate checksums + new_data_part->checksums.checkEqual(checksums, /* have_uncompressed = */ true); return new_data_part; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fa7bd9f77e5..9fd80765f0e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2740,8 +2740,6 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS } } - LOG_DEBUG(log, "MergeTreeData::getDataPartsVector: " << res.size()); - return res; } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 8c61a879270..671b36dfe86 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -49,7 +49,7 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool /* continue_read if (!part_in_memory->block.has(name)) continue; - const auto block_column = part_in_memory->block.getByPosition(i).column; + const auto block_column = part_in_memory->block.getByName(name).column; if (total_rows_read == 0 && part_rows <= max_rows_to_read) { res_columns[i] = block_column; From 4878c91d07768374c9e81b5d483bbbf8d67f8928 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 5 May 2020 18:06:16 +0300 Subject: [PATCH 0088/2229] in-memory parts: better restore from wal --- src/Storages/MergeTree/DataPartsExchange.cpp | 16 +----- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 50 +++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 1 - src/Storages/MergeTree/MergeTreePartition.cpp | 14 ++++++ src/Storages/MergeTree/MergeTreePartition.h | 2 + src/Storages/MergeTree/MergeTreeSettings.h | 2 +- .../MergeTree/MergeTreeWriteAheadLog.cpp | 14 ++---- .../MergeTree/MergeTreeWriteAheadLog.h | 18 +------ .../test_polymorphic_parts/test.py | 41 +++++++++++++-- 10 files changed, 88 insertions(+), 71 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index bbee34e4a0b..673c774ce5a 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -90,7 +90,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo LOG_TRACE(log, "Sending part " << part_name); try - { + { auto storage_lock = data.lockStructureForShare( false, RWLockImpl::NO_QUERY, data.getSettings()->lock_acquire_timeout_for_background_operations); @@ -292,19 +292,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( new_data_part->is_temp = true; new_data_part->setColumns(block.getNamesAndTypesList()); new_data_part->minmax_idx.update(block, data.minmax_idx_columns); - - auto partition_block = block; - data.partition_key_expr->execute(partition_block); - auto & partition = new_data_part->partition.value; - size_t partition_columns_num = data.partition_key_sample.columns(); - partition.resize(partition_columns_num); - - for (size_t i = 0; i < partition_columns_num; ++i) - { - const auto & column_name = data.partition_key_sample.getByPosition(i).name; - const auto & partition_column = partition_block.getByName(column_name).column; - partition[i] = (*partition_column)[0]; - } + new_data_part->partition.create(data, block, 0); MergedBlockOutputStream part_out(new_data_part, block.getNamesAndTypesList(), {}, nullptr); part_out.writePrefix(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index b1fb2554c76..8943a9fcb1f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -354,6 +354,7 @@ private: using MergeTreeDataPartState = IMergeTreeDataPart::State; using MergeTreeDataPartPtr = std::shared_ptr; +using MergeTreeMutableDataPartPtr = std::shared_ptr; bool isCompactPart(const MergeTreeDataPartPtr & data_part); bool isWidePart(const MergeTreeDataPartPtr & data_part); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9fd80765f0e..67a30934e2c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -248,8 +248,8 @@ MergeTreeData::MergeTreeData( if (settings->in_memory_parts_enable_wal) { - auto disk = reserveSpace(0)->getDisk(); - write_ahead_log = std::make_shared(*this, disk); + auto disk = makeEmptyReservationOnLargestDisk()->getDisk(); + write_ahead_log = std::make_shared(*this, std::move(disk)); } } @@ -859,6 +859,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) const auto settings = getSettings(); std::vector> part_names_with_disks; + MutableDataPartsVector parts_from_wal; Strings part_file_names; auto disks = getStoragePolicy()->getDisks(); @@ -899,19 +900,23 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) part_names_with_disks.emplace_back(it->name(), disk_ptr); if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) - loadDataPartsFromWAL(disk_ptr, it->name()); + { + MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); + auto current_parts = wal.restore(); + for (auto & part : current_parts) + parts_from_wal.push_back(std::move(part)); + } } } auto part_lock = lockParts(); - // TODO: fix. - // data_parts_indexes.clear(); + data_parts_indexes.clear(); - // if (part_names_with_disks.empty()) - // { - // LOG_DEBUG(log, "There is no data parts"); - // return; - // } + if (part_names_with_disks.empty() && parts_from_wal.empty()) + { + LOG_DEBUG(log, "There is no data parts"); + return; + } /// Parallel loading of data parts. size_t num_threads = std::min(size_t(settings->max_part_loading_threads), part_names_with_disks.size()); @@ -1043,6 +1048,16 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) pool.wait(); + for (auto & part : parts_from_wal) + { + part->modification_time = time(nullptr); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->state = DataPartState::Committed; + + if (!data_parts_indexes.insert(part).second) + throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); + } + if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR); @@ -1110,21 +1125,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) LOG_DEBUG(log, "Loaded data parts (" << data_parts_indexes.size() << " items)"); } -void MergeTreeData::loadDataPartsFromWAL(const DiskPtr & disk, const String & file_name) -{ - MergeTreeWriteAheadLog wal(*this, disk, file_name); - auto parts = wal.restore(); - for (auto & part : parts) - { - part->modification_time = time(nullptr); - /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later - part->state = DataPartState::Committed; - - if (!data_parts_indexes.insert(part).second) - throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); - } -} - /// Is the part directory old. /// True if its modification time and the modification time of all files inside it is less then threshold. diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 618668b0d87..4ae1a4bb0cb 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -370,7 +370,6 @@ public: /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); - void loadDataPartsFromWAL(const DiskPtr & disk, const String & file_name); String getLogName() const { return log_name; } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 000d0abad43..3124b16a138 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -152,4 +152,18 @@ void MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr checksums.files["partition.dat"].file_hash = out_hashing.getHash(); } +void MergeTreePartition::create(const MergeTreeData & storage, Block block, size_t row) +{ + storage.partition_key_expr->execute(block); + size_t partition_columns_num = storage.partition_key_sample.columns(); + value.resize(partition_columns_num); + + for (size_t i = 0; i < partition_columns_num; ++i) + { + const auto & column_name = storage.partition_key_sample.getByPosition(i).name; + const auto & partition_column = block.getByName(column_name).column; + partition_column->get(row, value[i]); + } +} + } diff --git a/src/Storages/MergeTree/MergeTreePartition.h b/src/Storages/MergeTree/MergeTreePartition.h index 2a589339ba8..d91022f655f 100644 --- a/src/Storages/MergeTree/MergeTreePartition.h +++ b/src/Storages/MergeTree/MergeTreePartition.h @@ -36,6 +36,8 @@ public: void store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const; void assign(const MergeTreePartition & other) { value.assign(other.value); } + + void create(const MergeTreeData & storage, Block block, size_t row); }; } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 20010eb8f4c..68e240f9d7e 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -33,7 +33,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(SettingUInt64, min_bytes_for_compact_part, 0, "Minimal uncompressed size in bytes to create part in compact format instead of saving it in RAM", 0) \ M(SettingUInt64, min_rows_for_compact_part, 0, "Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ - M(SettingBool, in_memory_parts_enable_wal, 0, "", 0) \ + M(SettingBool, in_memory_parts_enable_wal, 1, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(SettingBool, in_memory_parts_insert_sync, 0, "", 0) \ \ /** Merge settings. */ \ diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 84091f904e6..e5c0c370ae2 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -12,15 +12,6 @@ namespace ErrorCodes extern const int UNKNOWN_FORMAT_VERSION; } -// WALBlockOutputStream::WALBlockOutputStream(WriteBuffer & out_, const Block & header_) -// : NativeBlockOutputStream(out_, 0, header_), out(out_) {} - -// void WALBlockOutputStream::write(const Block & block, const String & part_name) -// { -// writeIntBinary(0, out); -// writeString(part_name, out); -// NativeBlockOutputStream::write(block); -// } MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( const MergeTreeData & storage_, @@ -28,7 +19,7 @@ MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( const String & name) : storage(storage_) , disk(disk_) - , path(storage.getFullPathOnDisk(disk) + name) + , path(storage.getRelativeDataPath() + name) { init(); } @@ -93,8 +84,9 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() part_name); auto block = block_in.read(); - part->minmax_idx.update(block, storage.minmax_idx_columns); + part->partition.create(storage, block, 0); + MergedBlockOutputStream part_out(part, block.getNamesAndTypesList(), {}, nullptr); part_out.writePrefix(); part_out.write(block); diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 50bb9aa5e13..7a0e5759624 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -8,20 +8,6 @@ namespace DB { -// class WALBlockOutputStream : public NativeBlockOutputStream -// { -// public: -// WALBlockOutputStream(WriteBuffer & out_, const Block & header_); -// void write(const Block & block, const String & part_name); - -// private: -// WriteBuffer & out; -// }; - -// class WALBlockInputStream : public NativeBlockInputStream -// { -// }; - class MergeTreeData; class MergeTreeWriteAheadLog @@ -29,13 +15,13 @@ class MergeTreeWriteAheadLog public: constexpr static auto WAL_FILE_NAME = "wal"; constexpr static auto WAL_FILE_EXTENSION = ".bin"; - constexpr static size_t MAX_WAL_BYTES = 1024; + constexpr static size_t MAX_WAL_BYTES = 1024 * 1024 * 1024; MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_, const String & name = String(WAL_FILE_NAME) + WAL_FILE_EXTENSION); void write(const Block & block, const String & part_name); - std::vector> restore(); + std::vector restore(); private: void init(); diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 1cd917a12bb..8cca8aa1072 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -39,7 +39,8 @@ def create_tables(name, nodes, node_settings, shard): PARTITION BY toYYYYMM(date) ORDER BY id SETTINGS index_granularity = 64, index_granularity_bytes = {index_granularity_bytes}, - min_rows_for_wide_part = {min_rows_for_wide_part}, min_rows_for_compact_part = {min_rows_for_compact_part} + min_rows_for_wide_part = {min_rows_for_wide_part}, min_rows_for_compact_part = {min_rows_for_compact_part}, + in_memory_parts_enable_wal = 1 '''.format(name=name, shard=shard, repl=i, **settings)) def create_tables_old_format(name, nodes, shard): @@ -68,8 +69,8 @@ node6 = cluster.add_instance('node6', config_dir='configs', main_configs=['confi settings_in_memory = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 256} -node9 = cluster.add_instance('node9', config_dir="configs", with_zookeeper=True) -node10 = cluster.add_instance('node10', config_dir="configs", with_zookeeper=True) +node9 = cluster.add_instance('node9', config_dir="configs", with_zookeeper=True, stay_alive=True) +node10 = cluster.add_instance('node10', config_dir="configs", with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") def start_cluster(): @@ -83,6 +84,7 @@ def start_cluster(): create_tables('polymorphic_table_wide', [node3, node4], [settings_wide, settings_compact], "shard2") create_tables_old_format('polymorphic_table', [node5, node6], "shard3") create_tables('in_memory_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") + create_tables('wal_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") yield cluster @@ -314,6 +316,39 @@ def test_in_memory(start_cluster): assert TSV(node10.query("SELECT part_type, count() FROM system.parts " \ "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) +def test_in_memory_wal(start_cluster): + node9.query("SYSTEM STOP MERGES") + node10.query("SYSTEM STOP MERGES") + + for i in range(5): + insert_random_data('wal_table', node9, 50) + node10.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + + assert node9.query("SELECT count() FROM wal_table") == "250\n" + assert node10.query("SELECT count() FROM wal_table") == "250\n" + + assert node9.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == '5\n' + assert node10.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == '5\n' + + # WAL works at inserts + node9.restart_clickhouse(kill=True) + time.sleep(5) + assert node9.query("SELECT count() FROM wal_table") == "250\n" + + # WAL works at fetches + node10.restart_clickhouse(kill=True) + time.sleep(5) + assert node10.query("SELECT count() FROM wal_table") == "250\n" + + node9.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") + insert_random_data('wal_table', node9, 50) + assert node9.query("SELECT count() FROM wal_table") == "300\n" + + # Data is lost without WAL + node9.restart_clickhouse(kill=True) + time.sleep(5) + assert node9.query("SELECT count() FROM wal_table") == "250\n" + def test_polymorphic_parts_index(start_cluster): node1.query(''' CREATE TABLE index_compact(a UInt32, s String) From ca621483a5ddb30b8e306e5d44179e7c94ddfd95 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:04 +0300 Subject: [PATCH 0089/2229] Update docs/en/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/en/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 6b1c170252c..993c4e8dffd 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -99,6 +99,7 @@ toc_title: Integrations - Ruby - [Ruby on rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From 845dab0245a321f5c6bab7cb431b2830e15f1d19 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:20 +0300 Subject: [PATCH 0090/2229] Update docs/en/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/en/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 993c4e8dffd..a7538112c22 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -97,7 +97,7 @@ toc_title: Integrations - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) From b218198ab84040d639a34008eaf7ffa86d00eb23 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:28 +0300 Subject: [PATCH 0091/2229] Update docs/es/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/es/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/es/interfaces/third-party/integrations.md b/docs/es/interfaces/third-party/integrations.md index 50fd9f1ce3b..416cab32a1d 100644 --- a/docs/es/interfaces/third-party/integrations.md +++ b/docs/es/interfaces/third-party/integrations.md @@ -99,7 +99,7 @@ toc_title: "Integraci\xF3n" - [Ecto](https://github.com/elixir-ecto/ecto) - [Método de codificación de datos:](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From 6d9e48a7756b53cd63e7dfbd7d161b89f2d9443f Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:37 +0300 Subject: [PATCH 0092/2229] Update docs/fa/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/fa/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index f9e0411201a..cda8fa250d1 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -96,7 +96,7 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - [Ecto](https://github.com/elixir-ecto/ecto) - [حذف جستجو](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From f0c6aa32dede17815c354594ab70411cf42fed9f Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:44 +0300 Subject: [PATCH 0093/2229] Update docs/fr/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/fr/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/fr/interfaces/third-party/integrations.md b/docs/fr/interfaces/third-party/integrations.md index 100e4e34f54..42341b97bea 100644 --- a/docs/fr/interfaces/third-party/integrations.md +++ b/docs/fr/interfaces/third-party/integrations.md @@ -96,7 +96,7 @@ toc_title: "Int\xE9gration" - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From a352ed42291d6d6afee2b36246866c6f70384852 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:10:58 +0300 Subject: [PATCH 0094/2229] Update docs/ru/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/ru/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 508c6734045..fe26c85e62c 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -92,7 +92,7 @@ - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From 0347fe936411abe24847f450b20bdfcfb528bbbb Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:12 +0300 Subject: [PATCH 0095/2229] Update docs/tr/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/tr/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tr/interfaces/third-party/integrations.md b/docs/tr/interfaces/third-party/integrations.md index fdfbbc18b65..19eca91b65e 100644 --- a/docs/tr/interfaces/third-party/integrations.md +++ b/docs/tr/interfaces/third-party/integrations.md @@ -96,7 +96,7 @@ toc_title: Entegrasyonlar - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From e576cfe52f75f3d02603c80d88d1d1627880215a Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:20 +0300 Subject: [PATCH 0096/2229] Update docs/zh/interfaces/third-party/integrations.md Co-authored-by: Ivan Blinkov --- docs/zh/interfaces/third-party/integrations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index ecd33e93a4d..a874b92b0cf 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -90,7 +90,7 @@ - [Ecto](https://github.com/elixir-ecto/ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - Ruby - - [Ruby on rails](https://rubyonrails.org/) + - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From c39b85ea6952a31896fbe35260f38b7313852294 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:31 +0300 Subject: [PATCH 0097/2229] Update docs/es/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/es/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/es/interfaces/third-party/integrations.md b/docs/es/interfaces/third-party/integrations.md index 416cab32a1d..dc28db3dff2 100644 --- a/docs/es/interfaces/third-party/integrations.md +++ b/docs/es/interfaces/third-party/integrations.md @@ -101,6 +101,7 @@ toc_title: "Integraci\xF3n" - Ruby - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From 4ac9dca348f906a491a33a06ea81cd0c9ace9016 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:41 +0300 Subject: [PATCH 0098/2229] Update docs/fa/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/fa/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index cda8fa250d1..e7f174f5f2d 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -98,6 +98,7 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - Ruby - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From c928547f82b78308f15be47fa8e76964f3debe89 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:48 +0300 Subject: [PATCH 0099/2229] Update docs/fr/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/fr/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/fr/interfaces/third-party/integrations.md b/docs/fr/interfaces/third-party/integrations.md index 42341b97bea..544c0e8c3b1 100644 --- a/docs/fr/interfaces/third-party/integrations.md +++ b/docs/fr/interfaces/third-party/integrations.md @@ -98,6 +98,7 @@ toc_title: "Int\xE9gration" - Ruby - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From e9f7319d59130916a5dd2c484dd3eda2bb91eac7 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:11:54 +0300 Subject: [PATCH 0100/2229] Update docs/ja/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/ja/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md index f45a6734b2c..09e68239b4b 100644 --- a/docs/ja/interfaces/third-party/integrations.md +++ b/docs/ja/interfaces/third-party/integrations.md @@ -98,6 +98,7 @@ toc_title: "\u7D71\u5408" - Ruby - [Ruby on rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From dd8d091c19028fb169bd00f443671f03d4018ab6 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:12:02 +0300 Subject: [PATCH 0101/2229] Update docs/ru/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/ru/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index fe26c85e62c..47e33a88a20 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -94,6 +94,7 @@ - Ruby - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From c43e01e712f14d6bf4b881be0cd984361e6ad9e7 Mon Sep 17 00:00:00 2001 From: Aleksey Date: Wed, 6 May 2020 19:12:09 +0300 Subject: [PATCH 0102/2229] Update docs/tr/interfaces/third-party/integrations.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/tr/interfaces/third-party/integrations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/tr/interfaces/third-party/integrations.md b/docs/tr/interfaces/third-party/integrations.md index 19eca91b65e..ac42757980c 100644 --- a/docs/tr/interfaces/third-party/integrations.md +++ b/docs/tr/interfaces/third-party/integrations.md @@ -98,6 +98,7 @@ toc_title: Entegrasyonlar - Ruby - [Ruby on Rails](https://rubyonrails.org/) - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - [GraphQL](https://github.com/graphql) - [activecube-graphql](https://github.com/bitquery/activecube-graphql) From 4a396090f8cfb2a19df73ddb6e85682792bd8479 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 7 May 2020 01:24:00 +0300 Subject: [PATCH 0103/2229] fix build --- src/Storages/StorageReplicatedMergeTree.cpp | 1 - src/Storages/tests/gtest_aux_funcs_for_adaptive_granularity.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a5cd4b6629a..9b138af30bf 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2714,7 +2714,6 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) } - void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); diff --git a/src/Storages/tests/gtest_aux_funcs_for_adaptive_granularity.cpp b/src/Storages/tests/gtest_aux_funcs_for_adaptive_granularity.cpp index 7488b6ea44a..d9ddb8e9722 100644 --- a/src/Storages/tests/gtest_aux_funcs_for_adaptive_granularity.cpp +++ b/src/Storages/tests/gtest_aux_funcs_for_adaptive_granularity.cpp @@ -3,7 +3,7 @@ #include // I know that inclusion of .cpp is not good at all -#include +#include using namespace DB; static Block getBlockWithSize(size_t required_size_in_bytes, size_t size_of_row_in_bytes) From 270268f4a28ddd3dca4a2dea0f2962d471e93ec0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 14:36:23 +0300 Subject: [PATCH 0104/2229] fix addr --- .../SSDComplexKeyCacheDictionary.cpp | 35 ++++++++++++++----- .../SSDComplexKeyCacheDictionary.h | 35 ++++++++++++++----- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 83b9f0f25a7..172cb91f012 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -268,6 +268,10 @@ size_t SSDComplexKeyCachePartition::append( { init_write_buffer(); } + if (!keys_buffer_pool) + { + keys_buffer_pool.emplace(); + } bool flushed = false; auto finish_block = [&]() @@ -360,7 +364,7 @@ size_t SSDComplexKeyCachePartition::append( if (!flushed) { key_to_index.set(keys[index], cache_index); - keys_buffer.push_back(keys[index]); + keys_buffer.push_back(keys_buffer_pool->copyKeyFrom(keys[index])); ++index; ++keys_in_block; } @@ -442,6 +446,7 @@ void SSDComplexKeyCachePartition::flush() for (size_t row = 0; row < keys_buffer.size(); ++row) { Index index; + Poco::Logger::get("get:").information("sz = " + std::to_string(keys_buffer[row].size())); if (key_to_index.get(keys_buffer[row], index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. @@ -451,6 +456,7 @@ void SSDComplexKeyCachePartition::flush() } key_to_index.set(keys_buffer[row], index); } + Poco::Logger::get("get:").information("finish"); } current_file_block_id += write_buffer_size; @@ -458,6 +464,8 @@ void SSDComplexKeyCachePartition::flush() /// clear buffer keys_buffer.clear(); + keys_buffer_pool.reset(); + keys_buffer_pool.emplace(); } template @@ -754,7 +762,6 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() TemporalComplexKeysPool tmp_keys_pool; KeyRefs keys; - keys.reserve(write_buffer_size); // TODO: писать кол-во значений for (size_t i = 0; i < write_buffer_size; ++i) @@ -777,6 +784,8 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() { keys.emplace_back(); tmp_keys_pool.readKey(keys.back(), read_buffer); + Poco::Logger::get("ClearOldestBlocks").information("ktest: sz=" + std::to_string(keys.back().size()) + + " data=" + std::to_string(reinterpret_cast(keys.back().fullData()))); Metadata metadata; readBinary(metadata.data, read_buffer); @@ -827,13 +836,18 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); for (const auto& key : keys) { + Poco::Logger::get("ClearOldestBlocks").information("ktest: null=" + std::to_string(key.isNull())); + Poco::Logger::get("ClearOldestBlocks").information("ktest: data=" + std::to_string(reinterpret_cast(key.fullData()))); + Poco::Logger::get("ClearOldestBlocks").information("ktest: sz=" + std::to_string(key.size()) + " fz=" + std::to_string(key.fullSize())); Index index; if (key_to_index.get(key, index)) { + Poco::Logger::get("ClearOldestBlocks").information("erase"); size_t block_id = index.getBlockId(); if (start_block <= block_id && block_id < finish_block) key_to_index.erase(key); } + Poco::Logger::get("ClearOldestBlocks").information("finish"); } } @@ -1038,6 +1052,7 @@ void SSDComplexKeyCacheStorage::update( const DataTypes & key_types, const KeyRefs & required_keys, const std::vector & required_rows, + TemporalComplexKeysPool & tmp_keys_pool, PresentIdHandler && on_updated, AbsentIdHandler && on_key_not_found, const DictionaryLifetime lifetime) @@ -1056,9 +1071,9 @@ void SSDComplexKeyCacheStorage::update( if (inserted < metadata.size()) { partitions.emplace_front(std::make_unique( - AttributeUnderlyingType::utUInt64, attributes_structure, path, - (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + AttributeUnderlyingType::utUInt64, attributes_structure, path, + (partitions.empty() ? 0 : partitions.front()->getId() + 1), + partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1077,7 +1092,6 @@ void SSDComplexKeyCacheStorage::update( { const auto keys_size = key_columns.size(); StringRefs keys(keys_size); - TemporalComplexKeysPool tmp_keys_pool; const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs}; @@ -1111,7 +1125,7 @@ void SSDComplexKeyCacheStorage::update( for (const auto i : ext::range(0, rows_num)) { auto key = tmp_keys_pool.allocKey(i, new_key_columns, keys); - SCOPE_EXIT(tmp_keys_pool.rollback(key)); + //SCOPE_EXIT(tmp_keys_pool.rollback(key)); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; metadata[i].setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); @@ -1475,12 +1489,14 @@ void SSDComplexKeyCacheDictionary::getItemsNumberImpl( for (const auto & key_ref : required_keys) required_rows.push_back(not_found_keys[key_ref].front()); + TemporalComplexKeysPool tmp_keys_pool; storage.update( source_ptr, key_columns, key_types, required_keys, required_rows, + tmp_keys_pool, [&](const auto key, const auto row, const auto & new_attributes) { for (const size_t out_row : not_found_keys[key]) @@ -1580,12 +1596,14 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl( for (const auto & key_ref : required_keys) required_rows.push_back(not_found_keys[key_ref].front()); + TemporalComplexKeysPool tmp_keys_pool; storage.update( source_ptr, key_columns, key_types, required_keys, required_rows, + tmp_keys_pool, [&](const auto key, const auto row, const auto & new_attributes) { update_result[key] = std::get>(new_attributes[attribute_index].values)[row]; @@ -1593,7 +1611,6 @@ void SSDComplexKeyCacheDictionary::getItemsStringImpl( [&](const auto) {}, getLifetime()); - TemporalComplexKeysPool tmp_keys_pool; StringRefs tmp_refs(key_columns.size()); size_t default_index = 0; for (size_t row = 0; row < n; ++row) @@ -1643,12 +1660,14 @@ void SSDComplexKeyCacheDictionary::has( for (const auto & key_ref : required_keys) required_rows.push_back(not_found_keys[key_ref].front()); + TemporalComplexKeysPool tmp_keys_pool; storage.update( source_ptr, key_columns, key_types, required_keys, required_rows, + tmp_keys_pool, [&](const auto key, const auto, const auto &) { for (const size_t out_row : not_found_keys[key]) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 6e64078cffd..2bb71ed52b1 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -36,13 +35,20 @@ public: KeyRef() : ptr(nullptr) {} inline UInt16 size() const { - return *reinterpret_cast(ptr); + UInt16 sz; + memcpy(&sz, ptr, sizeof(sz)); + return sz; + //return *reinterpret_cast(ptr); } inline size_t fullSize() const { return static_cast(size()) + sizeof(UInt16); } + inline bool isNull() const { + return ptr == nullptr; + } + inline char * data() const { return ptr + sizeof(UInt16); } @@ -114,6 +120,7 @@ class ComplexKeysPoolImpl public: KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) { + std::lock_guard lock(m); if constexpr (std::is_same_v) { // not working now @@ -151,7 +158,7 @@ public: { if (!key_columns[j]->valuesHaveFixedSize()) // String { - auto start = key_start; + //auto start = key_start; auto key_size = keys[j].size + 1; memcpy(key_start, &key_size, sizeof(size_t)); key_start += sizeof(size_t); @@ -159,13 +166,13 @@ public: key_start += keys[j].size; *key_start = '\0'; ++key_start; - keys[j].data = start; - keys[j].size += sizeof(size_t) + 1; + //keys[j].data = start; + //keys[j].size += sizeof(size_t) + 1; } else { memcpy(key_start, keys[j].data, keys[j].size); - keys[j].data = key_start; + //keys[j].data = key_start; key_start += keys[j].size; } } @@ -176,13 +183,17 @@ public: KeyRef copyKeyFrom(const KeyRef & key) { + std::lock_guard lock(m); + //Poco::Logger::get("test cpy").information("--- --- --- "); char * data = arena.alloc(key.fullSize()); + //Poco::Logger::get("test cpy").information("--- --- --- finish"); memcpy(data, key.fullData(), key.fullSize()); return KeyRef(data); } void freeKey(const KeyRef & key) { + std::lock_guard lock(m); if constexpr (std::is_same_v) arena.free(key.fullData(), key.fullSize()); else if constexpr (std::is_same_v) @@ -193,6 +204,7 @@ public: void rollback(const KeyRef & key) { + std::lock_guard lock(m); if constexpr (std::is_same_v) arena.rollback(key.fullSize()); else @@ -206,8 +218,10 @@ public: void readKey(KeyRef & key, ReadBuffer & buf) { + std::lock_guard lock(m); UInt16 sz; readBinary(sz, buf); + Poco::Logger::get("test read key").information("sz " + std::to_string(sz)); char * data = nullptr; if constexpr (std::is_same_v) data = arena.alloc(); @@ -216,6 +230,7 @@ public: memcpy(data, &sz, sizeof(sz)); buf.read(data + sizeof(sz), sz); key = KeyRef(data); + Poco::Logger::get("test read key").information("ksz = " + std::to_string(key.size())); } void ignoreKey(ReadBuffer & buf) const @@ -226,6 +241,7 @@ public: } private: + std::mutex m; A arena; }; @@ -270,7 +286,7 @@ public: else { queue.erase(it->second.iter); - it->second.iter = queue.insert(std::end(queue), key); + it->second.iter = queue.insert(std::end(queue), it->first); it->second.val = val; } } @@ -294,7 +310,7 @@ public: if (it == std::end(cache)) return false; - keys_pool.freeKey(key); + keys_pool.freeKey(it->first); queue.erase(it->second.iter); cache.erase(it); return true; @@ -492,6 +508,8 @@ private: ComplexKeysPool keys_pool; mutable ComplexKeyLRUCache key_to_index; + + std::optional keys_buffer_pool; KeyRefs keys_buffer; const std::vector attributes_structure; @@ -547,6 +565,7 @@ public: void update(DictionarySourcePtr & source_ptr, const Columns & key_columns, const DataTypes & key_types, const KeyRefs & required_keys, const std::vector & required_rows, + TemporalComplexKeysPool & tmp_keys_pool, PresentIdHandler && on_updated, AbsentIdHandler && on_key_not_found, const DictionaryLifetime lifetime); From 74d5ed65c16963c1f5d17758b57b951971d2b3c4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 14:48:33 +0300 Subject: [PATCH 0105/2229] fix tests --- ...01280_ssd_complex_key_dictionary.reference | 39 +++++++++++++++++++ .../01280_ssd_complex_key_dictionary.sql | 18 ++++----- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference index e69de29bb2d..fa42fa7239e 100644 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.reference @@ -0,0 +1,39 @@ +TEST_SMALL +VALUE FROM RAM BUFFER +100 +-100 +clickhouse +100 +-100 +clickhouse +3 +4 +database +6 +7 +columns +9 +8 + +UPDATE DICTIONARY +118 +VALUE FROM DISK +-100 +clickhouse +VALUE FROM RAM BUFFER +8 + +VALUES FROM DISK AND RAM BUFFER +118 +HAS +6 +VALUES NOT FROM TABLE +0 -1 none +0 -1 none +DUPLICATE KEYS +('1',3) -100 +('2',-1) 4 +('',0) -1 +('',0) -1 +('2',-1) 4 +('1',3) -100 diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql index 2b9288d9257..0b7d73684aa 100644 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql @@ -100,34 +100,34 @@ SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', (k1, k2))) FROM data SELECT 'VALUE FROM DISK'; -- -100 -SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('1', 3)); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('1', toInt32(3))); -- 'clickhouse' -SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('1', 3)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('1', toInt32(3))); SELECT 'VALUE FROM RAM BUFFER'; -- 8 -SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('10', -20)); +SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', ('10', toInt32(-20))); -- '' -SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('10', -20)); +SELECT dictGetString('database_for_dict.ssd_dict', 'c', ('10', toInt32(-20))); SELECT 'VALUES FROM DISK AND RAM BUFFER'; -- 118 SELECT sum(dictGetUInt64('database_for_dict.ssd_dict', 'a', (k1, k2))) FROM database_for_dict.keys_table; SELECT 'HAS'; --- 1006 +-- 6 SELECT count() FROM database_for_dict.keys_table WHERE dictHas('database_for_dict.ssd_dict', (k1, k2)); SELECT 'VALUES NOT FROM TABLE'; -- 0 -1 none -SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', 0)), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', 0)), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', 0)); -SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', 0)), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', 0)), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', 0)); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', toInt32(0))), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', toInt32(0))), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', toInt32(0))); +SELECT dictGetUInt64('database_for_dict.ssd_dict', 'a', ('unknown', toInt32(0))), dictGetInt32('database_for_dict.ssd_dict', 'b', ('unknown', toInt32(0))), dictGetString('database_for_dict.ssd_dict', 'c', ('unknown', toInt32(0))); SELECT 'DUPLICATE KEYS'; -SELECT arrayJoin([('1', 3), ('2', -1), ('', 0), ('', 0), ('2', -1), ('1', 3)]) AS keys, dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(keys)); ---SELECT +SELECT arrayJoin([('1', toInt32(3)), ('2', toInt32(-1)), ('', toInt32(0)), ('', toInt32(0)), ('2', toInt32(-1)), ('1', toInt32(3))]) AS keys, dictGetInt32('database_for_dict.ssd_dict', 'b', keys); + DROP DICTIONARY IF EXISTS database_for_dict.ssd_dict; DROP TABLE IF EXISTS database_for_dict.keys_table; From 774b107b6ccf360ba7501e58bc8a5c8dda15ac67 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 14:53:07 +0300 Subject: [PATCH 0106/2229] rm lock --- src/Dictionaries/SSDComplexKeyCacheDictionary.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 2bb71ed52b1..63c6a39361f 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -35,10 +35,7 @@ public: KeyRef() : ptr(nullptr) {} inline UInt16 size() const { - UInt16 sz; - memcpy(&sz, ptr, sizeof(sz)); - return sz; - //return *reinterpret_cast(ptr); + return *reinterpret_cast(ptr); } inline size_t fullSize() const { @@ -120,7 +117,6 @@ class ComplexKeysPoolImpl public: KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) { - std::lock_guard lock(m); if constexpr (std::is_same_v) { // not working now @@ -183,7 +179,6 @@ public: KeyRef copyKeyFrom(const KeyRef & key) { - std::lock_guard lock(m); //Poco::Logger::get("test cpy").information("--- --- --- "); char * data = arena.alloc(key.fullSize()); //Poco::Logger::get("test cpy").information("--- --- --- finish"); @@ -193,7 +188,6 @@ public: void freeKey(const KeyRef & key) { - std::lock_guard lock(m); if constexpr (std::is_same_v) arena.free(key.fullData(), key.fullSize()); else if constexpr (std::is_same_v) @@ -204,7 +198,6 @@ public: void rollback(const KeyRef & key) { - std::lock_guard lock(m); if constexpr (std::is_same_v) arena.rollback(key.fullSize()); else @@ -218,7 +211,6 @@ public: void readKey(KeyRef & key, ReadBuffer & buf) { - std::lock_guard lock(m); UInt16 sz; readBinary(sz, buf); Poco::Logger::get("test read key").information("sz " + std::to_string(sz)); @@ -241,7 +233,6 @@ public: } private: - std::mutex m; A arena; }; From 0896c2a8b464c32ff68dab6df5cefb6872e5221a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 16:40:41 +0300 Subject: [PATCH 0107/2229] ret found --- src/Dictionaries/SSDCacheDictionary.cpp | 8 +++++--- .../SSDComplexKeyCacheDictionary.cpp | 16 ++-------------- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 108ed19c862..0eee3ca702f 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -81,6 +81,8 @@ namespace static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; + //constexpr size_t KEY_RECENTLY_USED_BIT = 63; + //constexpr size_t KEY_RECENTLY_USED = (1ULL << KEY_RECENTLY_USED_BIT); constexpr size_t KEY_IN_MEMORY_BIT = 63; constexpr size_t KEY_IN_MEMORY = (1ULL << KEY_IN_MEMORY_BIT); constexpr size_t BLOCK_INDEX_BITS = 32; @@ -243,7 +245,6 @@ size_t SSDCachePartition::appendBlock( if (!write_buffer) { init_write_buffer(); - //codec = CompressionCodecFactory::instance().get("NONE", std::nullopt); } bool flushed = false; @@ -376,8 +377,6 @@ void SSDCachePartition::flush() write_request.aio_offset = block_size * current_file_block_id; #endif - Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); - while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) { if (errno != EINTR) @@ -520,7 +519,10 @@ void SSDCachePartition::getImpl(const PaddedPODArray & ids, SetFunc & se if (found[i]) indices[i].setNotExists(); else if (key_to_index.get(ids[i], index)) + { indices[i] = index; + found[i] = true; + } else indices[i].setNotExists(); } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 172cb91f012..51b43ee3bd9 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1228,18 +1228,7 @@ void SSDComplexKeyCacheStorage::update( PaddedPODArray SSDComplexKeyCacheStorage::getCachedIds() const { - /*PaddedPODArray array; - - const auto now = std::chrono::system_clock::now(); - - std::shared_lock lock(rw_lock); - for (auto & partition : partitions) - { - const auto cached_in_partition = partition->getCachedIds(now); - array.insert(std::begin(cached_in_partition), std::end(cached_in_partition)); - }*/ - - return {}; + throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); } double SSDComplexKeyCacheStorage::getLoadFactor() const @@ -1684,8 +1673,7 @@ void SSDComplexKeyCacheDictionary::has( BlockInputStreamPtr SSDComplexKeyCacheDictionary::getBlockInputStream( const Names & /* column_names */, size_t /* max_block_size*/) const { - //using BlockInputStreamType = DictionaryBlockInputStream; - return nullptr; //std::make_shared(shared_from_this(), max_block_size, storage.getCachedIds(), column_names); + throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); } size_t SSDComplexKeyCacheDictionary::getAttributeIndex(const std::string & attr_name) const From c894976ff2f4c1c658c61e480ca1c30a6b931284 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 21:17:32 +0300 Subject: [PATCH 0108/2229] fix:w --- src/Dictionaries/SSDCacheDictionary.cpp | 53 +++++++++++++++++-------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 0eee3ca702f..19b6b98ddfc 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -264,6 +264,11 @@ size_t SSDCachePartition::appendBlock( Index cache_index; cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); + Poco::Logger::get("wr").information(" block mem: " + std::to_string(current_memory_block_id) + " wb: " + std::to_string(write_buffer_size)); + if (current_memory_block_id >= write_buffer_size) { + throw DB::Exception("lel " + std::to_string(current_memory_block_id) + " " + + std::to_string(write_buffer_size) + " " + std::to_string(index), ErrorCodes::LOGICAL_ERROR); + } cache_index.setAddressInBlock(write_buffer->offset()); flushed = false; @@ -334,6 +339,7 @@ size_t SSDCachePartition::appendBlock( if (!flushed) { + Poco::Logger::get("wr").information(" set: " + std::to_string(cache_index.getBlockId()) + " " + std::to_string(cache_index.getAddressInBlock())); key_to_index.set(ids[index], cache_index); ids_buffer.push_back(ids[index]); ++index; @@ -344,6 +350,7 @@ size_t SSDCachePartition::appendBlock( init_write_buffer(); } } + Poco::Logger::get("wr").information("exit"); return ids.size() - begin; } @@ -367,14 +374,14 @@ void SSDCachePartition::flush() write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; write_request.aio.aio_buf = reinterpret_cast(memory->data()); - write_request.aio.aio_nbytes = block_size; - write_request.aio.aio_offset = block_size * current_file_block_id; + write_request.aio.aio_nbytes = block_size * write_buffer_size; + write_request.aio.aio_offset = (current_file_block_id % max_size) * block_size; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory->data()); write_request.aio_nbytes = block_size * write_buffer_size; - write_request.aio_offset = block_size * current_file_block_id; + write_request.aio_offset = (current_file_block_id % max_size) * block_size; #endif while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) @@ -420,7 +427,11 @@ void SSDCachePartition::flush() if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); - index.setBlockId(current_file_block_id + index.getBlockId()); + Poco::Logger::get("pt").information("block: " + std::to_string(index.getBlockId()) + " " + std::to_string(current_file_block_id) + " "); + index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); + if (index.getBlockId() >= max_size) { + throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); + } } key_to_index.set(ids[row], index); } @@ -442,8 +453,7 @@ void SSDCachePartition::getValue(const size_t attribute_index, const PaddedPODAr { buf.ignore(sizeof(Key)); // key Metadata metadata; - readVarUInt(metadata.data, buf); - + readBinary(metadata.data, buf); if (metadata.expiresAt() > now) { if (metadata.isDefault()) @@ -468,7 +478,7 @@ void SSDCachePartition::getString(const size_t attribute_index, const PaddedPODA { buf.ignore(sizeof(Key)); // key Metadata metadata; - readVarUInt(metadata.data, buf); + readBinary(metadata.data, buf); if (metadata.expiresAt() > now) { @@ -498,7 +508,7 @@ void SSDCachePartition::has(const PaddedPODArray & ids, ResultArrayType< { buf.ignore(sizeof(Key)); // key Metadata metadata; - readVarUInt(metadata.data, buf); + readBinary(metadata.data, buf); if (metadata.expiresAt() > now) out[index] = !metadata.isDefault(); @@ -521,7 +531,6 @@ void SSDCachePartition::getImpl(const PaddedPODArray & ids, SetFunc & se else if (key_to_index.get(ids[i], index)) { indices[i] = index; - found[i] = true; } else indices[i].setNotExists(); @@ -596,6 +605,10 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice request.aio_fildes = fd; request.aio_buf = reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size); request.aio_nbytes = block_size; + Poco::Logger::get("RR").information("block found" + std::to_string(index_to_out[i].first.getBlockId()) + " max_size" + std::to_string(max_size)); + if (index_to_out[i].first.getBlockId() > max_size) { + throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); + } request.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #endif @@ -616,9 +629,12 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice size_t to_pop = 0; while (to_pop < requests.size()) { + Poco::Logger::get("RR").information( + "push = " + std::to_string(to_push) + " pop=" + std::to_string(to_pop) + + "bi = " + std::to_string(blocks_to_indices.size()) + " req = " + std::to_string(requests.size())); /// get io tasks from previous iteration int popped = 0; - while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) < 0) + while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) <= 0) { if (errno != EINTR) throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); @@ -629,10 +645,12 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice const auto request_id = events[i].data; const auto & request = requests[request_id]; if (events[i].res != static_cast(request.aio_nbytes)) + { throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + - "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + - "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_READ_ERROR); - + "request_id= " + std::to_string(request.aio_data) + "/ " + std::to_string(requests.size()) + + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + + ", returned=" + std::to_string(events[i].res) + ", errno=" + std::to_string(errno), ErrorCodes::AIO_READ_ERROR); + } uint64_t checksum = 0; ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); readBinary(checksum, buf_special); @@ -661,12 +679,13 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice const int new_tasks_count = std::min(read_buffer_size - (to_push - to_pop), requests.size() - to_push); int pushed = 0; - while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) + while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) <= 0) { if (errno != EINTR) throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); } to_push += pushed; + Poco::Logger::get("RR").information("fin iter"); } } @@ -693,6 +712,8 @@ void SSDCachePartition::clearOldestBlocks() request.aio_data = 0; #endif + Poco::Logger::get("GC").information("GC offset=" + std::to_string(request.aio_offset)); + { iocb* request_ptr = &request; io_event event{}; @@ -787,8 +808,8 @@ void SSDCachePartition::clearOldestBlocks() } const size_t start_block = current_file_block_id % max_size; - const size_t finish_block = start_block + block_size * write_buffer_size; - Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); + const size_t finish_block = start_block + write_buffer_size; + Poco::Logger::get("ClearOldestBlocks").information("> erasing keys < start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); for (const auto& key : keys) { Index index; From 873dd7319ce8d8f788026851ef51bfaad82d1588 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 9 May 2020 23:33:58 +0300 Subject: [PATCH 0109/2229] fix complex key --- src/Dictionaries/SSDCacheDictionary.cpp | 38 +++++++++---------- .../SSDComplexKeyCacheDictionary.cpp | 14 +++---- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 19b6b98ddfc..f09cd4b7697 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -111,6 +111,7 @@ SSDCachePartition::Metadata::time_point_t SSDCachePartition::Metadata::expiresAt { return ext::safe_bit_cast(data & KEY_METADATA_EXPIRES_AT_MASK); } + void SSDCachePartition::Metadata::setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); @@ -264,7 +265,7 @@ size_t SSDCachePartition::appendBlock( Index cache_index; cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); - Poco::Logger::get("wr").information(" block mem: " + std::to_string(current_memory_block_id) + " wb: " + std::to_string(write_buffer_size)); + // Poco::Logger::get("wr").information(" block mem: " + std::to_string(current_memory_block_id) + " wb: " + std::to_string(write_buffer_size)); if (current_memory_block_id >= write_buffer_size) { throw DB::Exception("lel " + std::to_string(current_memory_block_id) + " " + std::to_string(write_buffer_size) + " " + std::to_string(index), ErrorCodes::LOGICAL_ERROR); @@ -339,7 +340,7 @@ size_t SSDCachePartition::appendBlock( if (!flushed) { - Poco::Logger::get("wr").information(" set: " + std::to_string(cache_index.getBlockId()) + " " + std::to_string(cache_index.getAddressInBlock())); + // Poco::Logger::get("wr").information(" set: " + std::to_string(cache_index.getBlockId()) + " " + std::to_string(cache_index.getAddressInBlock())); key_to_index.set(ids[index], cache_index); ids_buffer.push_back(ids[index]); ++index; @@ -350,7 +351,7 @@ size_t SSDCachePartition::appendBlock( init_write_buffer(); } } - Poco::Logger::get("wr").information("exit"); + // Poco::Logger::get("wr").information("exit"); return ids.size() - begin; } @@ -363,7 +364,7 @@ void SSDCachePartition::flush() if (ids.empty()) return; - Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); + // Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); AIOContext aio_context{1}; @@ -427,11 +428,8 @@ void SSDCachePartition::flush() if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); - Poco::Logger::get("pt").information("block: " + std::to_string(index.getBlockId()) + " " + std::to_string(current_file_block_id) + " "); + // Poco::Logger::get("pt").information("block: " + std::to_string(index.getBlockId()) + " " + std::to_string(current_file_block_id) + " "); index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); - if (index.getBlockId() >= max_size) { - throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); - } } key_to_index.set(ids[row], index); } @@ -605,10 +603,10 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice request.aio_fildes = fd; request.aio_buf = reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size); request.aio_nbytes = block_size; - Poco::Logger::get("RR").information("block found" + std::to_string(index_to_out[i].first.getBlockId()) + " max_size" + std::to_string(max_size)); - if (index_to_out[i].first.getBlockId() > max_size) { - throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); - } + // Poco::Logger::get("RR").information("block found" + std::to_string(index_to_out[i].first.getBlockId()) + " max_size" + std::to_string(max_size)); + // if (index_to_out[i].first.getBlockId() > max_size) { + // throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); + // } request.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #endif @@ -629,9 +627,9 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice size_t to_pop = 0; while (to_pop < requests.size()) { - Poco::Logger::get("RR").information( - "push = " + std::to_string(to_push) + " pop=" + std::to_string(to_pop) + - "bi = " + std::to_string(blocks_to_indices.size()) + " req = " + std::to_string(requests.size())); + // Poco::Logger::get("RR").information( + // "push = " + std::to_string(to_push) + " pop=" + std::to_string(to_pop) + + // "bi = " + std::to_string(blocks_to_indices.size()) + " req = " + std::to_string(requests.size())); /// get io tasks from previous iteration int popped = 0; while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) <= 0) @@ -685,13 +683,13 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); } to_push += pushed; - Poco::Logger::get("RR").information("fin iter"); + // Poco::Logger::get("RR").information("fin iter"); } } void SSDCachePartition::clearOldestBlocks() { - Poco::Logger::get("GC").information("GC clear -----------------"); + // Poco::Logger::get("GC").information("GC clear -----------------"); // write_buffer_size, because we need to erase the whole buffer. Memory read_buffer_memory(block_size * write_buffer_size, BUFFER_ALIGNMENT); @@ -712,7 +710,7 @@ void SSDCachePartition::clearOldestBlocks() request.aio_data = 0; #endif - Poco::Logger::get("GC").information("GC offset=" + std::to_string(request.aio_offset)); + // Poco::Logger::get("GC").information("GC offset=" + std::to_string(request.aio_offset)); { iocb* request_ptr = &request; @@ -757,7 +755,7 @@ void SSDCachePartition::clearOldestBlocks() uint32_t keys_in_current_block = 0; readBinary(keys_in_current_block, read_buffer); - Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); + // Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); for (uint32_t j = 0; j < keys_in_current_block; ++j) { @@ -809,7 +807,7 @@ void SSDCachePartition::clearOldestBlocks() const size_t start_block = current_file_block_id % max_size; const size_t finish_block = start_block + write_buffer_size; - Poco::Logger::get("ClearOldestBlocks").information("> erasing keys < start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); + // Poco::Logger::get("ClearOldestBlocks").information("> erasing keys < start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); for (const auto& key : keys) { Index index; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 51b43ee3bd9..f75261b4cb5 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -403,7 +403,7 @@ void SSDComplexKeyCachePartition::flush() write_request.aio_fildes = fd; write_request.aio_buf = reinterpret_cast(memory->data()); write_request.aio_nbytes = block_size * write_buffer_size; - write_request.aio_offset = block_size * current_file_block_id; + write_request.aio_offset = (current_file_block_id % max_size) * block_size; #endif Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); @@ -452,7 +452,7 @@ void SSDComplexKeyCachePartition::flush() if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); - index.setBlockId(current_file_block_id + index.getBlockId()); + index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); } key_to_index.set(keys_buffer[row], index); } @@ -506,7 +506,7 @@ void SSDComplexKeyCachePartition::getString(const size_t attribute_index, { keys_pool.ignoreKey(buf); Metadata metadata; - readVarUInt(metadata.data, buf); + readBinary(metadata.data, buf); if (metadata.expiresAt() > now) { @@ -537,7 +537,7 @@ void SSDComplexKeyCachePartition::has( { keys_pool.ignoreKey(buf); Metadata metadata; - readVarUInt(metadata.data, buf); + readBinary(metadata.data, buf); if (metadata.expiresAt() > now) out[index] = !metadata.isDefault(); @@ -660,7 +660,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) < 0) + while (new_tasks_count > 0 && (pushed = io_submit(aio_context.ctx, new_tasks_count, &pointers[to_push])) <= 0) { if (errno != EINTR) throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); @@ -832,7 +832,7 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() } const size_t start_block = current_file_block_id % max_size; - const size_t finish_block = start_block + block_size * write_buffer_size; + const size_t finish_block = start_block + write_buffer_size; Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); for (const auto& key : keys) { From 99b0abcb928cefd9eb66023c373386c7a4ff6326 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 10:00:57 +0300 Subject: [PATCH 0110/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 2 +- src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 2 +- src/Dictionaries/SSDComplexKeyCacheDictionary.h | 4 +++- src/Functions/FunctionsExternalDictionaries.h | 1 + 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index f09cd4b7697..de90c0de77d 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1670,7 +1670,7 @@ void registerDictionarySSDCache(DictionaryFactory & factory) read_buffer_size / block_size, write_buffer_size / block_size, max_stored_keys); }; - factory.registerLayout("ssd", create_layout, false); + factory.registerLayout("ssd_cache", create_layout, false); } } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index f75261b4cb5..eb645fca222 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1806,7 +1806,7 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) read_buffer_size / block_size, write_buffer_size / block_size, max_stored_keys); }; - factory.registerLayout("ssd_complex_key", create_layout, true); + factory.registerLayout("complex_key_ssd_cache", create_layout, true); } } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 63c6a39361f..dbb15896318 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -35,7 +35,9 @@ public: KeyRef() : ptr(nullptr) {} inline UInt16 size() const { - return *reinterpret_cast(ptr); + UInt16 res; + memcpy(&res, ptr, sizeof(res)); + return res; } inline size_t fullSize() const { diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index b359c7403f8..ee8453e296b 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -137,6 +137,7 @@ private: !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchSimple(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && + !executeDispatchComplex(block, arguments, result, dict_ptr) && !executeDispatchComplex(block, arguments, result, dict_ptr) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict_ptr) && From 3d41582310b0697071d7956d2f0e2046e21db2cd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 10:01:10 +0300 Subject: [PATCH 0111/2229] fix --- tests/queries/0_stateless/01053_ssd_dictionary.sql | 6 +++--- .../0_stateless/01280_ssd_complex_key_dictionary.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sql b/tests/queries/0_stateless/01053_ssd_dictionary.sql index 18a79223f8c..97773528dcb 100644 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -33,7 +33,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); +LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); @@ -74,7 +74,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); +LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -140,7 +140,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10)); +LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10)); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql index 0b7d73684aa..411a0a21ea3 100644 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql @@ -35,7 +35,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD_COMPLEX_KEY(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); +LAYOUT(COMPLEX_KEY_SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT 'VALUE FROM RAM BUFFER'; @@ -92,7 +92,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD_COMPLEX_KEY(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); +LAYOUT(COMPLEX_KEY_SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); SELECT 'UPDATE DICTIONARY'; -- 118 From 5056328a92e75c9b233b31ca3ce32c96cfd2ef68 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 10:35:33 +0300 Subject: [PATCH 0112/2229] fix names --- src/Dictionaries/SSDCacheDictionary.cpp | 28 +++++++++---------- .../SSDComplexKeyCacheDictionary.cpp | 28 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index de90c0de77d..b0431fd8b57 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1626,42 +1626,42 @@ void registerDictionarySSDCache(DictionaryFactory & factory) ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; - const auto max_partitions_count = config.getInt(layout_prefix + ".ssd.max_partitions_count", DEFAULT_PARTITIONS_COUNT); + const auto max_partitions_count = config.getInt(layout_prefix + ".ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; - const auto block_size = config.getInt(layout_prefix + ".ssd.block_size", DEFAULT_SSD_BLOCK_SIZE); + const auto block_size = config.getInt(layout_prefix + ".ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto partition_size = config.getInt64(layout_prefix + ".ssd.partition_size", DEFAULT_FILE_SIZE); + const auto partition_size = config.getInt64(layout_prefix + ".ssd_cache.partition_size", DEFAULT_FILE_SIZE); if (partition_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; if (partition_size % block_size != 0) throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); + const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd_cache.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); if (read_buffer_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; if (read_buffer_size % block_size != 0) throw Exception{name + ": read_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto write_buffer_size = config.getInt64(layout_prefix + ".ssd.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); + const auto write_buffer_size = config.getInt64(layout_prefix + ".ssd_cache.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); if (write_buffer_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; if (write_buffer_size % block_size != 0) throw Exception{name + ": write_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - auto path = config.getString(layout_prefix + ".ssd.path"); + auto path = config.getString(layout_prefix + ".ssd_cache.path"); if (path.empty()) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have empty path", ErrorCodes::BAD_ARGUMENTS}; if (path.at(0) != '/') path = std::filesystem::path{config.getString("path")}.concat(path).string(); - const auto max_stored_keys = config.getInt64(layout_prefix + ".ssd.max_stored_keys", DEFAULT_MAX_STORED_KEYS); + const auto max_stored_keys = config.getInt64(layout_prefix + ".ssd_cache.max_stored_keys", DEFAULT_MAX_STORED_KEYS); if (max_stored_keys <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index eb645fca222..20c81aaf065 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1762,42 +1762,42 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) ErrorCodes::BAD_ARGUMENTS}; const auto & layout_prefix = config_prefix + ".layout"; - const auto max_partitions_count = config.getInt(layout_prefix + ".ssd_complex_key.max_partitions_count", DEFAULT_PARTITIONS_COUNT); + const auto max_partitions_count = config.getInt(layout_prefix + ".complex_key_ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; - const auto block_size = config.getInt(layout_prefix + ".ssd_complex_key.block_size", DEFAULT_SSD_BLOCK_SIZE); + const auto block_size = config.getInt(layout_prefix + ".complex_key_ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto partition_size = config.getInt64(layout_prefix + ".ssd_complex_key.partition_size", DEFAULT_FILE_SIZE); + const auto partition_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.partition_size", DEFAULT_FILE_SIZE); if (partition_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; if (partition_size % block_size != 0) throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd_complex_key.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); + const auto read_buffer_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); if (read_buffer_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) read_buffer_size", ErrorCodes::BAD_ARGUMENTS}; if (read_buffer_size % block_size != 0) throw Exception{name + ": read_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto write_buffer_size = config.getInt64(layout_prefix + ".ssd_complex_key.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); + const auto write_buffer_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.write_buffer_size", DEFAULT_WRITE_BUFFER_SIZE); if (write_buffer_size <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) write_buffer_size", ErrorCodes::BAD_ARGUMENTS}; if (write_buffer_size % block_size != 0) throw Exception{name + ": write_buffer_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; - auto path = config.getString(layout_prefix + ".ssd_complex_key.path"); + auto path = config.getString(layout_prefix + ".complex_key_ssd_cache.path"); if (path.empty()) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have empty path", + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have empty path", ErrorCodes::BAD_ARGUMENTS}; if (path.at(0) != '/') path = std::filesystem::path{config.getString("path")}.concat(path).string(); - const auto max_stored_keys = config.getInt64(layout_prefix + ".ssd_complex_key.max_stored_keys", DEFAULT_MAX_STORED_KEYS); + const auto max_stored_keys = config.getInt64(layout_prefix + ".complex_key_ssd_cache.max_stored_keys", DEFAULT_MAX_STORED_KEYS); if (max_stored_keys <= 0) - throw Exception{name + ": dictionary of layout 'ssdcache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) max_stored_keys", ErrorCodes::BAD_ARGUMENTS}; const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( From bdeea1ac516d7cd0aba9e3c975d6e715f3af549d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 11:22:14 +0300 Subject: [PATCH 0113/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 21 +++++------ .../SSDComplexKeyCacheDictionary.cpp | 16 ++++----- .../SSDComplexKeyCacheDictionary.h | 36 +++++++++++-------- .../getDictionaryConfigurationFromAST.cpp | 1 + 4 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index b0431fd8b57..307d2f3196b 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -52,16 +52,17 @@ namespace ErrorCodes extern const int AIO_READ_ERROR; extern const int AIO_WRITE_ERROR; extern const int BAD_ARGUMENTS; + extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_FSYNC; extern const int CANNOT_IO_GETEVENTS; extern const int CANNOT_IO_SUBMIT; extern const int CANNOT_OPEN_FILE; + extern const int CORRUPTED_DATA; extern const int FILE_DOESNT_EXIST; extern const int LOGICAL_ERROR; - extern const int TOO_SMALL_BUFFER_SIZE; + extern const int NOT_IMPLEMENTED; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; - extern const int CORRUPTED_DATA; } namespace @@ -78,8 +79,8 @@ namespace constexpr size_t BLOCK_CHECKSUM_SIZE = 8; constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; - static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); - static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; + constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); + constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; //constexpr size_t KEY_RECENTLY_USED_BIT = 63; //constexpr size_t KEY_RECENTLY_USED = (1ULL << KEY_RECENTLY_USED_BIT); @@ -266,10 +267,10 @@ size_t SSDCachePartition::appendBlock( cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); // Poco::Logger::get("wr").information(" block mem: " + std::to_string(current_memory_block_id) + " wb: " + std::to_string(write_buffer_size)); - if (current_memory_block_id >= write_buffer_size) { + if (current_memory_block_id >= write_buffer_size) throw DB::Exception("lel " + std::to_string(current_memory_block_id) + " " + std::to_string(write_buffer_size) + " " + std::to_string(index), ErrorCodes::LOGICAL_ERROR); - } + cache_index.setAddressInBlock(write_buffer->offset()); flushed = false; @@ -927,7 +928,7 @@ void SSDCacheStorage::getValue(const size_t attribute_index, const PaddedPODArra { std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) partition->getValue(attribute_index, ids, out, found, get_default, now); } @@ -947,7 +948,7 @@ void SSDCacheStorage::getString(const size_t attribute_index, const PaddedPODArr { std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) partition->getString(attribute_index, ids, refs, arena, found, default_ids, now); } @@ -968,7 +969,7 @@ void SSDCacheStorage::has(const PaddedPODArray & ids, ResultArrayTypehas(ids, out, found, now); for (size_t i = 0; i < ids.size(); ++i) @@ -1149,7 +1150,7 @@ PaddedPODArray SSDCacheStorage::getCachedIds() const const auto now = std::chrono::system_clock::now(); std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) { const auto cached_in_partition = partition->getCachedIds(now); array.insert(std::begin(cached_in_partition), std::end(cached_in_partition)); diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 20c81aaf065..5abf257042f 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -52,16 +52,16 @@ namespace ErrorCodes extern const int AIO_READ_ERROR; extern const int AIO_WRITE_ERROR; extern const int BAD_ARGUMENTS; + extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_FSYNC; extern const int CANNOT_IO_GETEVENTS; extern const int CANNOT_IO_SUBMIT; extern const int CANNOT_OPEN_FILE; + extern const int CORRUPTED_DATA; extern const int FILE_DOESNT_EXIST; - extern const int LOGICAL_ERROR; - extern const int TOO_SMALL_BUFFER_SIZE; + extern const int NOT_IMPLEMENTED; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; - extern const int CORRUPTED_DATA; } namespace @@ -78,8 +78,8 @@ namespace constexpr size_t BLOCK_CHECKSUM_SIZE = 8; constexpr size_t BLOCK_SPECIAL_FIELDS_SIZE = 4; - static constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); - static constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; + constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); + constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; constexpr size_t KEY_IN_MEMORY_BIT = 63; constexpr size_t KEY_IN_MEMORY = (1ULL << KEY_IN_MEMORY_BIT); @@ -961,7 +961,7 @@ void SSDComplexKeyCacheStorage::getValue( { std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) partition->getValue(attribute_index, key_columns, key_types, out, found, get_default, now); } @@ -993,7 +993,7 @@ void SSDComplexKeyCacheStorage::getString( { std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) partition->getString(attribute_index, key_columns, key_types, refs, arena, found, default_ids, now); } @@ -1025,7 +1025,7 @@ void SSDComplexKeyCacheStorage::has( { std::shared_lock lock(rw_lock); - for (auto & partition : partitions) + for (const auto & partition : partitions) partition->has(key_columns, key_types, out, found, now); } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index dbb15896318..c00aed22d0d 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -34,41 +34,50 @@ public: KeyRef() : ptr(nullptr) {} - inline UInt16 size() const { + inline UInt16 size() const + { UInt16 res; memcpy(&res, ptr, sizeof(res)); return res; } - inline size_t fullSize() const { + inline size_t fullSize() const + { return static_cast(size()) + sizeof(UInt16); } - inline bool isNull() const { + inline bool isNull() const + { return ptr == nullptr; } - inline char * data() const { + inline char * data() const + { return ptr + sizeof(UInt16); } - inline char * fullData() const { + inline char * fullData() const + { return ptr; } - inline char * fullData() { + inline char * fullData() + { return ptr; } - inline const StringRef getRef() const { + inline const StringRef getRef() const + { return StringRef(data(), size()); } - inline bool operator==(const KeyRef & other) const { + inline bool operator==(const KeyRef & other) const + { return getRef() == other.getRef(); } - inline bool operator<(const KeyRef & other) const { + inline bool operator<(const KeyRef & other) const + { return getRef() < other.getRef(); } @@ -194,16 +203,16 @@ public: arena.free(key.fullData(), key.fullSize()); else if constexpr (std::is_same_v) arena.free(key.fullData()); - else - throw Exception("Free not supported.", ErrorCodes::LOGICAL_ERROR); + //else + // throw Exception("Free not supported.", ErrorCodes::LOGICAL_ERROR); } void rollback(const KeyRef & key) { if constexpr (std::is_same_v) arena.rollback(key.fullSize()); - else - throw Exception("Rollback not supported.", ErrorCodes::LOGICAL_ERROR); + //else + // throw Exception("Rollback not supported.", ErrorCodes::LOGICAL_ERROR); } void writeKey(const KeyRef & key, WriteBuffer & buf) @@ -302,7 +311,6 @@ public: auto it = cache.find(key); if (it == std::end(cache)) return false; - keys_pool.freeKey(it->first); queue.erase(it->second.iter); cache.erase(it); diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index e5372791ecc..13ab9b11ef8 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -20,6 +20,7 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int INCORRECT_DICTIONARY_DEFINITION; } From 982c8b15d9d1a4792897ef9d39cd5f7415106482 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 13:37:30 +0300 Subject: [PATCH 0114/2229] fix --- src/Dictionaries/ExecutableDictionarySource.cpp | 1 + src/Dictionaries/SSDCacheDictionary.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 34943d62b44..f9d1a426c8e 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -228,6 +228,7 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) /// Executable dictionaries may execute arbitrary commands. /// It's OK for dictionaries created by administrator from xml-file, but /// maybe dangerous for dictionaries created from DDL-queries. + check_config = false; if (check_config) throw Exception("Dictionaries with Executable dictionary source is not allowed", ErrorCodes::DICTIONARY_ACCESS_DENIED); diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 307d2f3196b..3345f845d59 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -364,7 +364,7 @@ void SSDCachePartition::flush() const auto & ids = std::get>(keys_buffer.values); if (ids.empty()) return; - + Poco::Logger::get("paritiiton").information("flushing to SSD."); // Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); AIOContext aio_context{1}; @@ -808,7 +808,7 @@ void SSDCachePartition::clearOldestBlocks() const size_t start_block = current_file_block_id % max_size; const size_t finish_block = start_block + write_buffer_size; - // Poco::Logger::get("ClearOldestBlocks").information("> erasing keys < start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); + Poco::Logger::get("partition gc").information("erasing keys start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); for (const auto& key : keys) { Index index; From 05dcc1e2fda7eff01aea2b18e1f171bad178b2dd Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 18:55:29 +0300 Subject: [PATCH 0115/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 2 +- src/Dictionaries/SSDCacheDictionary.h | 150 ++++++++++++++++-------- 2 files changed, 101 insertions(+), 51 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 3345f845d59..23e72326071 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -880,7 +880,7 @@ PaddedPODArray SSDCachePartition::getCachedIds(const std { std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. PaddedPODArray array; - for (const auto & [key, index] : key_to_index) + for (const auto & key : key_to_index.keys()) array.push_back(key); // TODO: exclude default return array; } diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 0409a100aa6..ecd367556a2 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -23,91 +24,140 @@ namespace DB { +namespace +{ + size_t nearestPowTwo(size_t x) { + size_t r = 1; + while (x > r) { + r <<= 1; + } + return r; + } +} + template class CLRUCache { - using Iter = typename std::list::iterator; - - struct Cell - { - Iter iter; - V val; + struct Cell { + K key; + V index; }; public: - CLRUCache(size_t max_size_) : max_size(max_size_) + CLRUCache(size_t cells_) + : buckets(nearestPowTwo(cells_) / bucket_size) + , bucket_mask(buckets - 1) + , cells(buckets * bucket_size) + , positions((buckets / 2) + 1) { + Poco::Logger::get("cache").information(" buckets: " + std::to_string(buckets) + " cells: " + std::to_string(cells.size())); + for (auto & cell : cells) + cell.index.setNotExists(); + for (size_t bucket = 0; bucket < buckets; ++bucket) + setPosition(bucket, 0); } void set(K key, V val) { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) + const size_t bucket = (intHash64(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists()) { - auto & item = cache[key]; - item.iter = queue.insert(std::end(queue), key); - item.val = val; - if (queue.size() > max_size) - { - cache.erase(queue.front()); - queue.pop_front(); - } - } - else - { - queue.erase(it->second.iter); - it->second.iter = queue.insert(std::end(queue), key); - it->second.val = val; + incPosition(bucket); + ++sz; } + + cells[idx].key = key; + cells[idx].index = val; } bool get(K key, V & val) { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) + const size_t bucket = (intHash64(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists() || cells[idx].key != key) return false; - val = it->second.val; - queue.erase(it->second.iter); - it->second.iter = queue.insert(std::end(queue), key); + val = cells[idx].index; return true; } bool erase(K key) { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) + const size_t bucket = (intHash64(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists() || cells[idx].key != key) return false; - queue.erase(it->second.iter); - cache.erase(it); + cells[idx].index.setNotExists(); + --sz; return true; } size_t size() { - std::lock_guard lock(mutex); - return cache.size(); + return sz; } - auto begin() + auto keys() { - std::lock_guard lock(mutex); - return std::begin(cache); - } - - auto end() - { - std::lock_guard lock(mutex); - return std::end(cache); + std::vector res; + for (const auto & cell : cells) + { + if (cell.index.exists()) + { + res.push_back(cell.key); + } + } + return res; } private: - std::unordered_map cache; - std::list queue; - size_t max_size; - std::mutex mutex; + size_t getCellIndex(const K key, const size_t bucket) + { + const size_t pos = getPosition(bucket); + for (size_t idx = 0; idx < bucket_size; ++idx) + { + const size_t cur = ((pos + 1 + idx) & pos_mask); + if (cells[bucket * bucket_size + cur].index.exists() && + cells[bucket * bucket_size + cur].key == key) + { + return bucket * bucket_size + cur; + } + } + + return bucket * bucket_size + pos; + } + + size_t getPosition(const size_t bucket) + { + const size_t idx = (bucket >> 1); + if ((bucket & 1) == 0) + return ((positions[idx] >> 4) & pos_mask); + return (positions[idx] & pos_mask); + } + + void setPosition(const size_t bucket, const size_t pos) + { + const size_t idx = bucket >> 1; + if ((bucket & 1) == 0) + positions[idx] = ((pos << 4) | (positions[idx] & ((1 << 4) - 1))); + else + positions[idx] = (pos | (positions[idx] & (((1 << 4) - 1) << 4))); + } + + void incPosition(const size_t bucket) + { + setPosition(bucket, (getPosition(bucket) + 1) & pos_mask); + } + + static constexpr size_t bucket_size = 8; + static constexpr size_t pos_size = 3; + static constexpr size_t pos_mask = (1 << pos_size) - 1; + size_t buckets; + size_t bucket_mask; + + std::vector cells; + std::vector positions; + size_t sz = 0; }; using AttributeValueVariant = std::variant< From 833637d910f92e08a70eb353013e10f507d9f676 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 20:01:02 +0300 Subject: [PATCH 0116/2229] new hashtables --- src/Dictionaries/BucketCache.h | 209 ++++++++++++++++++ src/Dictionaries/SSDCacheDictionary.h | 140 +----------- .../SSDComplexKeyCacheDictionary.cpp | 10 +- .../SSDComplexKeyCacheDictionary.h | 76 ++++--- 4 files changed, 253 insertions(+), 182 deletions(-) create mode 100644 src/Dictionaries/BucketCache.h diff --git a/src/Dictionaries/BucketCache.h b/src/Dictionaries/BucketCache.h new file mode 100644 index 00000000000..4a381d887dc --- /dev/null +++ b/src/Dictionaries/BucketCache.h @@ -0,0 +1,209 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + size_t nearestPowTwo(size_t x) { + size_t r = 8; + while (x > r) { + r <<= 1; + } + return r; + } +} + +struct EmptyDeleter {}; + +struct Int64Hasher +{ + size_t operator()(const size_t x) const + { + return intHash64(x); + } +}; + +template +class BucketCacheIndex +{ + struct Cell { + K key; + V index; + }; + +public: + template >> + BucketCacheIndex(size_t cells_) + : buckets(nearestPowTwo(cells_) / bucket_size) + , bucket_mask(buckets - 1) + , cells(buckets * bucket_size) + , positions((buckets / 2) + 1) + { + for (auto & cell : cells) + cell.index.setNotExists(); + for (size_t bucket = 0; bucket < buckets; ++bucket) + setPosition(bucket, 0); + } + + template >> + BucketCacheIndex(size_t cells_, Deleter deleter_) + : deleter(deleter_) + , buckets(nearestPowTwo(cells_) / bucket_size) + , bucket_mask(buckets - 1) + , cells(buckets * bucket_size) + , positions((buckets / 2) + 1) + { + for (auto & cell : cells) + cell.index.setNotExists(); + for (size_t bucket = 0; bucket < buckets; ++bucket) + setPosition(bucket, 0); + } + + void set(K key, V val) + { + const size_t bucket = (hash(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists()) + { + incPosition(bucket); + ++sz; + } + + cells[idx].key = key; + cells[idx].index = val; + } + + template >> + void setWithDelete(K key, V val) + { + const size_t bucket = (hash(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists()) + { + incPosition(bucket); + ++sz; + } + else + { + deleter(cells[idx].key); + } + + cells[idx].key = key; + cells[idx].index = val; + } + + bool get(K key, V & val) + { + const size_t bucket = (hash(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists() || cells[idx].key != key) + return false; + val = cells[idx].index; + return true; + } + + bool getKeyAndValue(K & key, V & val) + { + const size_t bucket = (hash(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists() || cells[idx].key != key) + return false; + key = cells[idx].key; + val = cells[idx].index; + return true; + } + + bool erase(K key) + { + const size_t bucket = (hash(key) & bucket_mask); + const size_t idx = getCellIndex(key, bucket); + if (!cells[idx].index.exists() || cells[idx].key != key) + return false; + + cells[idx].index.setNotExists(); + --sz; + if constexpr (!std::is_same_v) + deleter(cells[idx].key); + + return true; + } + + size_t size() + { + return sz; + } + + auto keys() + { + std::vector res; + for (const auto & cell : cells) + { + if (cell.index.exists()) + { + res.push_back(cell.key); + } + } + return res; + } + +private: + size_t getCellIndex(const K key, const size_t bucket) + { + const size_t pos = getPosition(bucket); + for (size_t idx = 0; idx < bucket_size; ++idx) + { + const size_t cur = ((pos + 1 + idx) & pos_mask); + if (cells[bucket * bucket_size + cur].index.exists() && + cells[bucket * bucket_size + cur].key == key) + { + return bucket * bucket_size + cur; + } + } + + return bucket * bucket_size + pos; + } + + size_t getPosition(const size_t bucket) + { + const size_t idx = (bucket >> 1); + if ((bucket & 1) == 0) + return ((positions[idx] >> 4) & pos_mask); + return (positions[idx] & pos_mask); + } + + void setPosition(const size_t bucket, const size_t pos) + { + const size_t idx = bucket >> 1; + if ((bucket & 1) == 0) + positions[idx] = ((pos << 4) | (positions[idx] & ((1 << 4) - 1))); + else + positions[idx] = (pos | (positions[idx] & (((1 << 4) - 1) << 4))); + } + + void incPosition(const size_t bucket) + { + setPosition(bucket, (getPosition(bucket) + 1) & pos_mask); + } + + static constexpr size_t bucket_size = 8; + static constexpr size_t pos_size = 3; + static constexpr size_t pos_mask = (1 << pos_size) - 1; + + Hasher hash; + Deleter deleter; + + size_t buckets; + size_t bucket_mask; + + std::vector cells; + std::vector positions; + size_t sz = 0; +}; + +} diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index ecd367556a2..70cecece9b2 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -9,10 +9,10 @@ #include #include #include -#include #include #include #include +#include #include #include #include @@ -24,142 +24,6 @@ namespace DB { -namespace -{ - size_t nearestPowTwo(size_t x) { - size_t r = 1; - while (x > r) { - r <<= 1; - } - return r; - } -} - -template -class CLRUCache -{ - struct Cell { - K key; - V index; - }; - -public: - CLRUCache(size_t cells_) - : buckets(nearestPowTwo(cells_) / bucket_size) - , bucket_mask(buckets - 1) - , cells(buckets * bucket_size) - , positions((buckets / 2) + 1) - { - Poco::Logger::get("cache").information(" buckets: " + std::to_string(buckets) + " cells: " + std::to_string(cells.size())); - for (auto & cell : cells) - cell.index.setNotExists(); - for (size_t bucket = 0; bucket < buckets; ++bucket) - setPosition(bucket, 0); - } - - void set(K key, V val) - { - const size_t bucket = (intHash64(key) & bucket_mask); - const size_t idx = getCellIndex(key, bucket); - if (!cells[idx].index.exists()) - { - incPosition(bucket); - ++sz; - } - - cells[idx].key = key; - cells[idx].index = val; - } - - bool get(K key, V & val) - { - const size_t bucket = (intHash64(key) & bucket_mask); - const size_t idx = getCellIndex(key, bucket); - if (!cells[idx].index.exists() || cells[idx].key != key) - return false; - val = cells[idx].index; - return true; - } - - bool erase(K key) - { - const size_t bucket = (intHash64(key) & bucket_mask); - const size_t idx = getCellIndex(key, bucket); - if (!cells[idx].index.exists() || cells[idx].key != key) - return false; - cells[idx].index.setNotExists(); - --sz; - return true; - } - - size_t size() - { - return sz; - } - - auto keys() - { - std::vector res; - for (const auto & cell : cells) - { - if (cell.index.exists()) - { - res.push_back(cell.key); - } - } - return res; - } - -private: - size_t getCellIndex(const K key, const size_t bucket) - { - const size_t pos = getPosition(bucket); - for (size_t idx = 0; idx < bucket_size; ++idx) - { - const size_t cur = ((pos + 1 + idx) & pos_mask); - if (cells[bucket * bucket_size + cur].index.exists() && - cells[bucket * bucket_size + cur].key == key) - { - return bucket * bucket_size + cur; - } - } - - return bucket * bucket_size + pos; - } - - size_t getPosition(const size_t bucket) - { - const size_t idx = (bucket >> 1); - if ((bucket & 1) == 0) - return ((positions[idx] >> 4) & pos_mask); - return (positions[idx] & pos_mask); - } - - void setPosition(const size_t bucket, const size_t pos) - { - const size_t idx = bucket >> 1; - if ((bucket & 1) == 0) - positions[idx] = ((pos << 4) | (positions[idx] & ((1 << 4) - 1))); - else - positions[idx] = (pos | (positions[idx] & (((1 << 4) - 1) << 4))); - } - - void incPosition(const size_t bucket) - { - setPosition(bucket, (getPosition(bucket) + 1) & pos_mask); - } - - static constexpr size_t bucket_size = 8; - static constexpr size_t pos_size = 3; - static constexpr size_t pos_mask = (1 << pos_size) - 1; - size_t buckets; - size_t bucket_mask; - - std::vector cells; - std::vector positions; - size_t sz = 0; -}; - using AttributeValueVariant = std::variant< UInt8, UInt16, @@ -316,7 +180,7 @@ private: int fd = -1; - mutable CLRUCache key_to_index; + mutable BucketCacheIndex key_to_index; Attribute keys_buffer; const std::vector attributes_structure; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 5abf257042f..24579fe4943 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -182,7 +182,7 @@ SSDComplexKeyCachePartition::SSDComplexKeyCachePartition( , write_buffer_size(write_buffer_size_) , max_stored_keys(max_stored_keys_) , path(dir_path + "/" + std::to_string(file_id)) - , key_to_index(max_stored_keys, keys_pool) + , key_to_index(max_stored_keys, KeyDeleter(keys_pool)) , attributes_structure(attributes_structure_) { std::filesystem::create_directories(std::filesystem::path{dir_path}); @@ -363,7 +363,7 @@ size_t SSDComplexKeyCachePartition::append( if (!flushed) { - key_to_index.set(keys[index], cache_index); + key_to_index.setWithDelete(keys[index], cache_index); keys_buffer.push_back(keys_buffer_pool->copyKeyFrom(keys[index])); ++index; ++keys_in_block; @@ -447,7 +447,7 @@ void SSDComplexKeyCachePartition::flush() { Index index; Poco::Logger::get("get:").information("sz = " + std::to_string(keys_buffer[row].size())); - if (key_to_index.get(keys_buffer[row], index)) + if (key_to_index.getKeyAndValue(keys_buffer[row], index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { @@ -910,8 +910,8 @@ PaddedPODArray SSDComplexKeyCachePartition::getCachedIds(const std::chro { std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. PaddedPODArray array; - for (const auto & [key, index] : key_to_index) - array.push_back(key); // TODO: exclude default + //for (const auto & [key, index] : key_to_index) + //array.push_back(key); // TODO: exclude default return array; } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index c00aed22d0d..9bec8a00252 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,11 @@ public: return getRef() == other.getRef(); } + inline bool operator!=(const KeyRef & other) const + { + return !(*this == other); + } + inline bool operator<(const KeyRef & other) const { return getRef() < other.getRef(); @@ -128,22 +134,7 @@ class ComplexKeysPoolImpl public: KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) { - if constexpr (std::is_same_v) - { - // not working now - const auto res = arena->alloc(); - auto place = res; - - for (const auto & key_column : key_columns) - { - const StringRef key = key_column->getDataAt(row); - memcpy(place, key.data, key.size); - place += key.size; - } - - return KeyRef(res); - } - else + if constexpr (!std::is_same_v) { const auto keys_size = key_columns.size(); UInt16 sum_keys_size{}; @@ -165,7 +156,6 @@ public: { if (!key_columns[j]->valuesHaveFixedSize()) // String { - //auto start = key_start; auto key_size = keys[j].size + 1; memcpy(key_start, &key_size, sizeof(size_t)); key_start += sizeof(size_t); @@ -173,26 +163,36 @@ public: key_start += keys[j].size; *key_start = '\0'; ++key_start; - //keys[j].data = start; - //keys[j].size += sizeof(size_t) + 1; } else { memcpy(key_start, keys[j].data, keys[j].size); - //keys[j].data = key_start; key_start += keys[j].size; } } return KeyRef(place); } + else + { + // not working now + const auto res = arena->alloc(); + auto place = res; + + for (const auto & key_column : key_columns) + { + const StringRef key = key_column->getDataAt(row); + memcpy(place, key.data, key.size); + place += key.size; + } + + return KeyRef(res); + } } KeyRef copyKeyFrom(const KeyRef & key) { - //Poco::Logger::get("test cpy").information("--- --- --- "); char * data = arena.alloc(key.fullSize()); - //Poco::Logger::get("test cpy").information("--- --- --- finish"); memcpy(data, key.fullData(), key.fullSize()); return KeyRef(data); } @@ -201,18 +201,14 @@ public: { if constexpr (std::is_same_v) arena.free(key.fullData(), key.fullSize()); - else if constexpr (std::is_same_v) - arena.free(key.fullData()); - //else - // throw Exception("Free not supported.", ErrorCodes::LOGICAL_ERROR); + /*else if constexpr (std::is_same_v) + arena.free(key.fullData());*/ } void rollback(const KeyRef & key) { if constexpr (std::is_same_v) arena.rollback(key.fullSize()); - //else - // throw Exception("Rollback not supported.", ErrorCodes::LOGICAL_ERROR); } void writeKey(const KeyRef & key, WriteBuffer & buf) @@ -249,7 +245,6 @@ private: using TemporalComplexKeysPool = ComplexKeysPoolImpl; using ComplexKeysPool = ComplexKeysPoolImpl; -//using FixedComplexKeysPool = ComplexKeysPoolImpl; template class ComplexKeyLRUCache @@ -343,6 +338,18 @@ private: std::mutex mutex; }; +struct KeyDeleter +{ + KeyDeleter(ComplexKeysPool & keys_pool_) : keys_pool(keys_pool_) {} + + void operator()(const KeyRef key) const + { + keys_pool.freeKey(key); + } + + ComplexKeysPool & keys_pool; +}; + class SSDComplexKeyCachePartition { public: @@ -487,14 +494,6 @@ private: void ignoreFromBufferToAttributeIndex(const size_t attribute_index, ReadBuffer & buf) const; - /*KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) const; - void freeKey(const KeyRef key) const; - - void writeKey(KeyRef key, WriteBuffer & buf); - template - void readKey(KeyRef & key, ArenaForKey & arena, ReadBuffer & buf); - void ignoreKey(ReadBuffer & buf);*/ - const size_t file_id; const size_t max_size; const size_t block_size; @@ -508,7 +507,7 @@ private: int fd = -1; ComplexKeysPool keys_pool; - mutable ComplexKeyLRUCache key_to_index; + mutable BucketCacheIndex, KeyDeleter> key_to_index; std::optional keys_buffer_pool; KeyRefs keys_buffer; @@ -518,7 +517,6 @@ private: std::optional> memory; std::optional write_buffer; uint32_t keys_in_block = 0; - //CompressionCodecPtr codec; size_t current_memory_block_id = 0; size_t current_file_block_id = 0; From 40823524a57ab18e26f25e679bb91f27d8ab5fc1 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 20:31:45 +0300 Subject: [PATCH 0117/2229] impr --- src/Dictionaries/BucketCache.h | 9 +- src/Dictionaries/SSDCacheDictionary.cpp | 27 +++--- src/Dictionaries/SSDCacheDictionary.h | 10 +- .../SSDComplexKeyCacheDictionary.cpp | 67 ++++++------- .../SSDComplexKeyCacheDictionary.h | 94 ++++++++----------- 5 files changed, 91 insertions(+), 116 deletions(-) diff --git a/src/Dictionaries/BucketCache.h b/src/Dictionaries/BucketCache.h index 4a381d887dc..7b5f56ba679 100644 --- a/src/Dictionaries/BucketCache.h +++ b/src/Dictionaries/BucketCache.h @@ -10,11 +10,11 @@ namespace DB namespace { - size_t nearestPowTwo(size_t x) { + size_t nearestPowTwo(size_t x) + { size_t r = 8; - while (x > r) { + while (x > r) r <<= 1; - } return r; } } @@ -32,7 +32,8 @@ struct Int64Hasher template class BucketCacheIndex { - struct Cell { + struct Cell + { K key; V index; }; diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 23e72326071..4b5a79f2a2b 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -60,7 +60,6 @@ namespace ErrorCodes extern const int CORRUPTED_DATA; extern const int FILE_DOESNT_EXIST; extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; } @@ -895,7 +894,7 @@ SSDCacheStorage::SSDCacheStorage( const AttributeTypes & attributes_structure_, const std::string & path_, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -903,7 +902,7 @@ SSDCacheStorage::SSDCacheStorage( : attributes_structure(attributes_structure_) , path(path_) , max_partitions_count(max_partitions_count_) - , partition_size(partition_size_) + , file_size(file_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) @@ -999,7 +998,7 @@ void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1086,7 +1085,7 @@ void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1258,7 +1257,7 @@ SSDCacheDictionary::SSDCacheDictionary( const DictionaryLifetime dict_lifetime_, const std::string & path_, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -1269,13 +1268,13 @@ SSDCacheDictionary::SSDCacheDictionary( , dict_lifetime(dict_lifetime_) , path(path_) , max_partitions_count(max_partitions_count_) - , partition_size(partition_size_) + , file_size(file_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) , max_stored_keys(max_stored_keys_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) + path, max_partitions_count, file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) , log(&Poco::Logger::get("SSDCacheDictionary")) { LOG_INFO(log, "Using storage path '" << path << "'."); @@ -1635,11 +1634,11 @@ void registerDictionarySSDCache(DictionaryFactory & factory) if (block_size <= 0) throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto partition_size = config.getInt64(layout_prefix + ".ssd_cache.partition_size", DEFAULT_FILE_SIZE); - if (partition_size <= 0) - throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; - if (partition_size % block_size != 0) - throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + const auto file_size = config.getInt64(layout_prefix + ".ssd_cache.file_size", DEFAULT_FILE_SIZE); + if (file_size <= 0) + throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) file_size", ErrorCodes::BAD_ARGUMENTS}; + if (file_size % block_size != 0) + throw Exception{name + ": file_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; const auto read_buffer_size = config.getInt64(layout_prefix + ".ssd_cache.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); if (read_buffer_size <= 0) @@ -1667,7 +1666,7 @@ void registerDictionarySSDCache(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( name, dict_struct, std::move(source_ptr), dict_lifetime, path, - max_partitions_count, partition_size / block_size, block_size, + max_partitions_count, file_size / block_size, block_size, read_buffer_size / block_size, write_buffer_size / block_size, max_stored_keys); }; diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 70cecece9b2..b209391cafa 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -207,7 +207,7 @@ public: const AttributeTypes & attributes_structure, const std::string & path, const size_t max_partitions_count, - const size_t partition_size, + const size_t file_size, const size_t block_size, const size_t read_buffer_size, const size_t write_buffer_size, @@ -259,7 +259,7 @@ private: const std::string path; const size_t max_partitions_count; - const size_t partition_size; + const size_t file_size; const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; @@ -295,7 +295,7 @@ public: const DictionaryLifetime dict_lifetime_, const std::string & path, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -325,7 +325,7 @@ public: std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, - max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); + max_partitions_count, file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -441,7 +441,7 @@ private: const std::string path; const size_t max_partitions_count; - const size_t partition_size; + const size_t file_size; const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 24579fe4943..748b7f6c445 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -287,7 +287,7 @@ size_t SSDComplexKeyCachePartition::append( for (size_t index = begin; index < keys.size();) { - Poco::Logger::get("test").information("wb off: " + std::to_string(write_buffer->offset())); + //Poco::Logger::get("test").information("wb off: " + std::to_string(write_buffer->offset())); Index cache_index; cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); @@ -304,7 +304,7 @@ size_t SSDComplexKeyCachePartition::append( writeBinary(metadata[index].data, *write_buffer); } - Poco::Logger::get("test key").information("wb off: " + std::to_string(write_buffer->offset())); + //Poco::Logger::get("test key").information("wb off: " + std::to_string(write_buffer->offset())); for (const auto & attribute : new_attributes) { @@ -372,7 +372,7 @@ size_t SSDComplexKeyCachePartition::append( { init_write_buffer(); } - Poco::Logger::get("test final").information("wb off: " + std::to_string(write_buffer->offset())); + //Poco::Logger::get("test final").information("wb off: " + std::to_string(write_buffer->offset())); } return keys.size() - begin; } @@ -385,7 +385,7 @@ void SSDComplexKeyCachePartition::flush() if (keys_buffer.empty()) return; - Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); + //Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); AIOContext aio_context{1}; @@ -406,7 +406,7 @@ void SSDComplexKeyCachePartition::flush() write_request.aio_offset = (current_file_block_id % max_size) * block_size; #endif - Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); + //Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) { @@ -446,7 +446,7 @@ void SSDComplexKeyCachePartition::flush() for (size_t row = 0; row < keys_buffer.size(); ++row) { Index index; - Poco::Logger::get("get:").information("sz = " + std::to_string(keys_buffer[row].size())); + //Poco::Logger::get("get:").information("sz = " + std::to_string(keys_buffer[row].size())); if (key_to_index.getKeyAndValue(keys_buffer[row], index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. @@ -456,7 +456,7 @@ void SSDComplexKeyCachePartition::flush() } key_to_index.set(keys_buffer[row], index); } - Poco::Logger::get("get:").information("finish"); + //Poco::Logger::get("get:").information("finish"); } current_file_block_id += write_buffer_size; @@ -714,7 +714,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray erasing keys <"); + //Poco::Logger::get("ClearOldestBlocks").information("> erasing keys <"); for (const auto& key : keys) { - Poco::Logger::get("ClearOldestBlocks").information("ktest: null=" + std::to_string(key.isNull())); - Poco::Logger::get("ClearOldestBlocks").information("ktest: data=" + std::to_string(reinterpret_cast(key.fullData()))); - Poco::Logger::get("ClearOldestBlocks").information("ktest: sz=" + std::to_string(key.size()) + " fz=" + std::to_string(key.fullSize())); + //Poco::Logger::get("ClearOldestBlocks").information("ktest: null=" + std::to_string(key.isNull())); + //Poco::Logger::get("ClearOldestBlocks").information("ktest: data=" + std::to_string(reinterpret_cast(key.fullData()))); + //Poco::Logger::get("ClearOldestBlocks").information("ktest: sz=" + std::to_string(key.size()) + " fz=" + std::to_string(key.fullSize())); Index index; if (key_to_index.get(key, index)) { - Poco::Logger::get("ClearOldestBlocks").information("erase"); + //Poco::Logger::get("ClearOldestBlocks").information("erase"); size_t block_id = index.getBlockId(); if (start_block <= block_id && block_id < finish_block) key_to_index.erase(key); } - Poco::Logger::get("ClearOldestBlocks").information("finish"); + //Poco::Logger::get("ClearOldestBlocks").information("finish"); } } @@ -908,11 +908,7 @@ size_t SSDComplexKeyCachePartition::getElementCount() const PaddedPODArray SSDComplexKeyCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { - std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. - PaddedPODArray array; - //for (const auto & [key, index] : key_to_index) - //array.push_back(key); // TODO: exclude default - return array; + throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); } void SSDComplexKeyCachePartition::remove() @@ -925,7 +921,7 @@ SSDComplexKeyCacheStorage::SSDComplexKeyCacheStorage( const AttributeTypes & attributes_structure_, const std::string & path_, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -933,7 +929,7 @@ SSDComplexKeyCacheStorage::SSDComplexKeyCacheStorage( : attributes_structure(attributes_structure_) , path(path_) , max_partitions_count(max_partitions_count_) - , partition_size(partition_size_) + , file_size(file_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) @@ -1073,7 +1069,7 @@ void SSDComplexKeyCacheStorage::update( partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1119,7 +1115,6 @@ void SSDComplexKeyCacheStorage::update( const auto new_attributes = createAttributesFromBlock(block, keys_size, attributes_structure); const auto rows_num = block.rows(); - PaddedPODArray metadata(rows_num); for (const auto i : ext::range(0, rows_num)) @@ -1172,7 +1167,7 @@ void SSDComplexKeyCacheStorage::update( partitions.emplace_front(std::make_unique( AttributeUnderlyingType::utUInt64, attributes_structure, path, (partitions.empty() ? 0 : partitions.front()->getId() + 1), - partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); + file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys)); } } @@ -1330,7 +1325,7 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( const DictionaryLifetime dict_lifetime_, const std::string & path_, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -1341,13 +1336,13 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( , dict_lifetime(dict_lifetime_) , path(path_) , max_partitions_count(max_partitions_count_) - , partition_size(partition_size_) + , file_size(file_size_) , block_size(block_size_) , read_buffer_size(read_buffer_size_) , write_buffer_size(write_buffer_size_) , max_stored_keys(max_stored_keys_) , storage(ext::map(dict_struct.attributes, [](const auto & attribute) { return attribute.underlying_type; }), - path, max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) + path, max_partitions_count, file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys) , log(&Poco::Logger::get("SSDComplexKeyCacheDictionary")) { LOG_INFO(log, "Using storage path '" << path << "'."); @@ -1770,11 +1765,11 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) if (block_size <= 0) throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) block_size", ErrorCodes::BAD_ARGUMENTS}; - const auto partition_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.partition_size", DEFAULT_FILE_SIZE); - if (partition_size <= 0) - throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) partition_size", ErrorCodes::BAD_ARGUMENTS}; - if (partition_size % block_size != 0) - throw Exception{name + ": partition_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; + const auto file_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.file_size", DEFAULT_FILE_SIZE); + if (file_size <= 0) + throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) file_size", ErrorCodes::BAD_ARGUMENTS}; + if (file_size % block_size != 0) + throw Exception{name + ": file_size must be a multiple of block_size", ErrorCodes::BAD_ARGUMENTS}; const auto read_buffer_size = config.getInt64(layout_prefix + ".complex_key_ssd_cache.read_buffer_size", DEFAULT_READ_BUFFER_SIZE); if (read_buffer_size <= 0) @@ -1802,7 +1797,7 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; return std::make_unique( name, dict_struct, std::move(source_ptr), dict_lifetime, path, - max_partitions_count, partition_size / block_size, block_size, + max_partitions_count, file_size / block_size, block_size, read_buffer_size / block_size, write_buffer_size / block_size, max_stored_keys); }; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 9bec8a00252..2d9409d2053 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -134,60 +134,42 @@ class ComplexKeysPoolImpl public: KeyRef allocKey(const size_t row, const Columns & key_columns, StringRefs & keys) { - if constexpr (!std::is_same_v) + const auto keys_size = key_columns.size(); + UInt16 sum_keys_size{}; + + for (size_t j = 0; j < keys_size; ++j) { - const auto keys_size = key_columns.size(); - UInt16 sum_keys_size{}; - - for (size_t j = 0; j < keys_size; ++j) - { - keys[j] = key_columns[j]->getDataAt(row); - sum_keys_size += keys[j].size; - if (!key_columns[j]->valuesHaveFixedSize()) // String - sum_keys_size += sizeof(size_t) + 1; - } - - auto place = arena.alloc(sum_keys_size + sizeof(sum_keys_size)); - - auto key_start = place; - memcpy(key_start, &sum_keys_size, sizeof(sum_keys_size)); - key_start += sizeof(sum_keys_size); - for (size_t j = 0; j < keys_size; ++j) - { - if (!key_columns[j]->valuesHaveFixedSize()) // String - { - auto key_size = keys[j].size + 1; - memcpy(key_start, &key_size, sizeof(size_t)); - key_start += sizeof(size_t); - memcpy(key_start, keys[j].data, keys[j].size); - key_start += keys[j].size; - *key_start = '\0'; - ++key_start; - } - else - { - memcpy(key_start, keys[j].data, keys[j].size); - key_start += keys[j].size; - } - } - - return KeyRef(place); + keys[j] = key_columns[j]->getDataAt(row); + sum_keys_size += keys[j].size; + if (!key_columns[j]->valuesHaveFixedSize()) // String + sum_keys_size += sizeof(size_t) + 1; } - else + + auto place = arena.alloc(sum_keys_size + sizeof(sum_keys_size)); + + auto key_start = place; + memcpy(key_start, &sum_keys_size, sizeof(sum_keys_size)); + key_start += sizeof(sum_keys_size); + for (size_t j = 0; j < keys_size; ++j) { - // not working now - const auto res = arena->alloc(); - auto place = res; - - for (const auto & key_column : key_columns) + if (!key_columns[j]->valuesHaveFixedSize()) // String { - const StringRef key = key_column->getDataAt(row); - memcpy(place, key.data, key.size); - place += key.size; + auto key_size = keys[j].size + 1; + memcpy(key_start, &key_size, sizeof(size_t)); + key_start += sizeof(size_t); + memcpy(key_start, keys[j].data, keys[j].size); + key_start += keys[j].size; + *key_start = '\0'; + ++key_start; + } + else + { + memcpy(key_start, keys[j].data, keys[j].size); + key_start += keys[j].size; } - - return KeyRef(res); } + + return KeyRef(place); } KeyRef copyKeyFrom(const KeyRef & key) @@ -201,8 +183,6 @@ public: { if constexpr (std::is_same_v) arena.free(key.fullData(), key.fullSize()); - /*else if constexpr (std::is_same_v) - arena.free(key.fullData());*/ } void rollback(const KeyRef & key) @@ -220,7 +200,7 @@ public: { UInt16 sz; readBinary(sz, buf); - Poco::Logger::get("test read key").information("sz " + std::to_string(sz)); + //Poco::Logger::get("test read key").information("sz " + std::to_string(sz)); char * data = nullptr; if constexpr (std::is_same_v) data = arena.alloc(); @@ -229,7 +209,7 @@ public: memcpy(data, &sz, sizeof(sz)); buf.read(data + sizeof(sz), sz); key = KeyRef(data); - Poco::Logger::get("test read key").information("ksz = " + std::to_string(key.size())); + //Poco::Logger::get("test read key").information("ksz = " + std::to_string(key.size())); } void ignoreKey(ReadBuffer & buf) const @@ -534,7 +514,7 @@ public: const AttributeTypes & attributes_structure, const std::string & path, const size_t max_partitions_count, - const size_t partition_size, + const size_t file_size, const size_t block_size, const size_t read_buffer_size, const size_t write_buffer_size, @@ -592,7 +572,7 @@ private: const std::string path; const size_t max_partitions_count; - const size_t partition_size; + const size_t file_size; const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; @@ -628,7 +608,7 @@ public: const DictionaryLifetime dict_lifetime_, const std::string & path, const size_t max_partitions_count_, - const size_t partition_size_, + const size_t file_size_, const size_t block_size_, const size_t read_buffer_size_, const size_t write_buffer_size_, @@ -660,7 +640,7 @@ public: std::shared_ptr clone() const override { return std::make_shared(name, dict_struct, source_ptr->clone(), dict_lifetime, path, - max_partitions_count, partition_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); + max_partitions_count, file_size, block_size, read_buffer_size, write_buffer_size, max_stored_keys); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -791,7 +771,7 @@ private: const std::string path; const size_t max_partitions_count; - const size_t partition_size; + const size_t file_size; const size_t block_size; const size_t read_buffer_size; const size_t write_buffer_size; From 607981a4581ed9c76e6fcd5fdc4d71cb134afd04 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 20:32:03 +0300 Subject: [PATCH 0118/2229] impr test --- tests/queries/0_stateless/01053_ssd_dictionary.sql | 6 +++--- .../0_stateless/01280_ssd_complex_key_dictionary.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01053_ssd_dictionary.sql b/tests/queries/0_stateless/01053_ssd_dictionary.sql index 97773528dcb..416d26bd637 100644 --- a/tests/queries/0_stateless/01053_ssd_dictionary.sql +++ b/tests/queries/0_stateless/01053_ssd_dictionary.sql @@ -33,7 +33,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); +LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT dictGetInt32('database_for_dict.ssd_dict', 'b', toUInt64(1)); @@ -74,7 +74,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); +LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); SELECT 'UPDATE DICTIONARY'; -- 118 @@ -140,7 +140,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY id SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10)); +LAYOUT(SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/2d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 1024 MAX_STORED_KEYS 10)); SELECT 'UPDATE DICTIONARY (MT)'; -- 118 diff --git a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql index 411a0a21ea3..952a8c2ff55 100644 --- a/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql +++ b/tests/queries/0_stateless/01280_ssd_complex_key_dictionary.sql @@ -35,7 +35,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(COMPLEX_KEY_SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); +LAYOUT(COMPLEX_KEY_SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/0d')); SELECT 'TEST_SMALL'; SELECT 'VALUE FROM RAM BUFFER'; @@ -92,7 +92,7 @@ CREATE DICTIONARY database_for_dict.ssd_dict PRIMARY KEY k1, k2 SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict')) LIFETIME(MIN 1000 MAX 2000) -LAYOUT(COMPLEX_KEY_SSD_CACHE(PARTITION_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); +LAYOUT(COMPLEX_KEY_SSD_CACHE(FILE_SIZE 8192 PATH '/var/lib/clickhouse/clickhouse_dicts/1d' BLOCK_SIZE 512 WRITE_BUFFER_SIZE 4096 MAX_STORED_KEYS 1000000)); SELECT 'UPDATE DICTIONARY'; -- 118 From cf93fa9cc33b3576c85e174a43c82e504b1498e0 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 21:01:23 +0300 Subject: [PATCH 0119/2229] bytes allocated --- src/Dictionaries/BucketCache.h | 17 ++-- src/Dictionaries/SSDCacheDictionary.cpp | 17 +++- src/Dictionaries/SSDCacheDictionary.h | 9 +- .../SSDComplexKeyCacheDictionary.cpp | 7 ++ .../SSDComplexKeyCacheDictionary.h | 99 ++----------------- 5 files changed, 45 insertions(+), 104 deletions(-) diff --git a/src/Dictionaries/BucketCache.h b/src/Dictionaries/BucketCache.h index 7b5f56ba679..9e0e83bf192 100644 --- a/src/Dictionaries/BucketCache.h +++ b/src/Dictionaries/BucketCache.h @@ -99,7 +99,7 @@ public: cells[idx].index = val; } - bool get(K key, V & val) + bool get(K key, V & val) const { const size_t bucket = (hash(key) & bucket_mask); const size_t idx = getCellIndex(key, bucket); @@ -109,7 +109,7 @@ public: return true; } - bool getKeyAndValue(K & key, V & val) + bool getKeyAndValue(K & key, V & val) const { const size_t bucket = (hash(key) & bucket_mask); const size_t idx = getCellIndex(key, bucket); @@ -135,12 +135,17 @@ public: return true; } - size_t size() + size_t size() const { return sz; } - auto keys() + size_t capacity() const + { + return cells.size(); + } + + auto keys() const { std::vector res; for (const auto & cell : cells) @@ -154,7 +159,7 @@ public: } private: - size_t getCellIndex(const K key, const size_t bucket) + size_t getCellIndex(const K key, const size_t bucket) const { const size_t pos = getPosition(bucket); for (size_t idx = 0; idx < bucket_size; ++idx) @@ -170,7 +175,7 @@ private: return bucket * bucket_size + pos; } - size_t getPosition(const size_t bucket) + size_t getPosition(const size_t bucket) const { const size_t idx = (bucket >> 1); if ((bucket & 1) == 0) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 4b5a79f2a2b..210463264bf 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -81,8 +81,6 @@ namespace constexpr UInt64 KEY_METADATA_EXPIRES_AT_MASK = std::numeric_limits::max(); constexpr UInt64 KEY_METADATA_IS_DEFAULT_MASK = ~KEY_METADATA_EXPIRES_AT_MASK; - //constexpr size_t KEY_RECENTLY_USED_BIT = 63; - //constexpr size_t KEY_RECENTLY_USED = (1ULL << KEY_RECENTLY_USED_BIT); constexpr size_t KEY_IN_MEMORY_BIT = 63; constexpr size_t KEY_IN_MEMORY = (1ULL << KEY_IN_MEMORY_BIT); constexpr size_t BLOCK_INDEX_BITS = 32; @@ -875,6 +873,12 @@ size_t SSDCachePartition::getElementCount() const return key_to_index.size(); } +size_t SSDCachePartition::getBytesAllocated() const +{ + std::shared_lock lock(rw_lock); + return 16.5 * key_to_index.capacity() + (memory ? memory->size() : 0); +} + PaddedPODArray SSDCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. @@ -1176,6 +1180,15 @@ size_t SSDCacheStorage::getElementCount() const return result; } +size_t SSDCacheStorage::getBytesAllocated() const +{ + size_t result = 0; + std::shared_lock lock(rw_lock); + for (const auto & partition : partitions) + result += partition->getBytesAllocated(); + return result; +} + void SSDCacheStorage::collectGarbage() { // add partitions to queue diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index b209391cafa..09c5f79da0d 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -156,6 +156,8 @@ public: size_t getElementCount() const; + size_t getBytesAllocated() const; + private: template void getImpl(const PaddedPODArray & ids, SetFunc & set, std::vector & found) const; @@ -249,6 +251,8 @@ public: double getLoadFactor() const; + size_t getBytesAllocated() const; + private: SSDCachePartition::Attributes createAttributesFromBlock( const Block & block, const size_t begin_column, const std::vector & structure); @@ -277,9 +281,6 @@ private: mutable size_t update_error_count = 0; mutable std::chrono::system_clock::time_point backoff_end_time; - // stats - //mutable size_t bytes_allocated = 0; - mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; }; @@ -307,7 +308,7 @@ public: std::string getTypeName() const override { return "SSDCache"; } - size_t getBytesAllocated() const override { return 0; } // TODO: ? + size_t getBytesAllocated() const override { return storage.getBytesAllocated(); } size_t getQueryCount() const override { return storage.getQueryCount(); } diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 748b7f6c445..0a97c59f524 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -906,6 +906,13 @@ size_t SSDComplexKeyCachePartition::getElementCount() const return key_to_index.size(); } +size_t SSDComplexKeyCachePartition::getBytesAllocated() const +{ + std::shared_lock lock(rw_lock); + return 16.5 * key_to_index.capacity() + keys_pool.size() + + (keys_buffer_pool ? keys_buffer_pool->size() : 0) + (memory ? memory->size() : 0); +} + PaddedPODArray SSDComplexKeyCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const { throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 2d9409d2053..b6717d16f65 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -219,6 +219,11 @@ public: buf.ignore(sz); } + size_t size() const + { + return arena.size(); + } + private: A arena; }; @@ -226,98 +231,6 @@ private: using TemporalComplexKeysPool = ComplexKeysPoolImpl; using ComplexKeysPool = ComplexKeysPoolImpl; -template -class ComplexKeyLRUCache -{ - using Iter = typename std::list::iterator; - - struct Cell - { - Iter iter; - V val; - }; - -public: - ComplexKeyLRUCache(size_t max_size_, Pool & keys_pool_) - : max_size(max_size_) - , keys_pool(keys_pool_) - { - } - - void set(K key, V val) - { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) - { - auto & item = cache[key]; - item.iter = queue.insert(std::end(queue), key); - item.val = val; - if (queue.size() > max_size) - { - keys_pool.freeKey(queue.front()); - cache.erase(queue.front()); - queue.pop_front(); - } - } - else - { - queue.erase(it->second.iter); - it->second.iter = queue.insert(std::end(queue), it->first); - it->second.val = val; - } - } - - bool get(K key, V & val) - { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) - return false; - val = it->second.val; - queue.erase(it->second.iter); - it->second.iter = queue.insert(std::end(queue), key); - return true; - } - - bool erase(K key) - { - std::lock_guard lock(mutex); - auto it = cache.find(key); - if (it == std::end(cache)) - return false; - keys_pool.freeKey(it->first); - queue.erase(it->second.iter); - cache.erase(it); - return true; - } - - size_t size() - { - std::lock_guard lock(mutex); - return cache.size(); - } - - auto begin() - { - std::lock_guard lock(mutex); - return std::begin(cache); - } - - auto end() - { - std::lock_guard lock(mutex); - return std::end(cache); - } - -private: - std::unordered_map cache; - std::list queue; - size_t max_size; - Pool & keys_pool; - std::mutex mutex; -}; - struct KeyDeleter { KeyDeleter(ComplexKeysPool & keys_pool_) : keys_pool(keys_pool_) {} @@ -455,6 +368,8 @@ public: size_t getElementCount() const; + size_t getBytesAllocated() const; + private: size_t append( const KeyRefs & keys, From 5e9cb4060fc0e6f6c086a418457200ffe77a8e86 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 10 May 2020 22:58:11 +0300 Subject: [PATCH 0120/2229] fix --- src/Dictionaries/BucketCache.h | 2 +- .../ExecutableDictionarySource.cpp | 1 - src/Dictionaries/SSDCacheDictionary.cpp | 130 +++++++++--------- src/Dictionaries/SSDCacheDictionary.h | 3 - 4 files changed, 67 insertions(+), 69 deletions(-) diff --git a/src/Dictionaries/BucketCache.h b/src/Dictionaries/BucketCache.h index 9e0e83bf192..9a231c7a1b5 100644 --- a/src/Dictionaries/BucketCache.h +++ b/src/Dictionaries/BucketCache.h @@ -10,7 +10,7 @@ namespace DB namespace { - size_t nearestPowTwo(size_t x) + inline size_t nearestPowTwo(size_t x) { size_t r = 8; while (x > r) diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index f9d1a426c8e..34943d62b44 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -228,7 +228,6 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory) /// Executable dictionaries may execute arbitrary commands. /// It's OK for dictionaries created by administrator from xml-file, but /// maybe dangerous for dictionaries created from DDL-queries. - check_config = false; if (check_config) throw Exception("Dictionaries with Executable dictionary source is not allowed", ErrorCodes::DICTIONARY_ACCESS_DENIED); diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 210463264bf..e3053e18e8e 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -297,7 +297,7 @@ size_t SSDCachePartition::appendBlock( } \ else \ { \ - const auto & values = std::get>(attribute.values); \ + const auto & values = std::get>(attribute.values); /* NOLINT */ \ writeBinary(values[index], *write_buffer); \ } \ } \ @@ -764,10 +764,9 @@ void SSDCachePartition::clearOldestBlocks() if (!metadata.isDefault()) { - for (size_t attr = 0; attr < attributes_structure.size(); ++attr) + for (const auto & attribute : attributes_structure) { - - switch (attributes_structure[attr]) + switch (attribute) { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ @@ -984,6 +983,67 @@ void SSDCacheStorage::has(const PaddedPODArray & ids, ResultArrayType & structure) +{ + SSDCachePartition::Attributes attributes; + + const auto columns = block.getColumns(); + for (size_t i = 0; i < structure.size(); ++i) + { + const auto & column = columns[i + begin_column]; + switch (structure[i]) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + SSDCachePartition::Attribute::Container values(column->size()); \ + memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ + attributes.emplace_back(); \ + attributes.back().type = structure[i]; \ + attributes.back().values = std::move(values); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + { + attributes.emplace_back(); + SSDCachePartition::Attribute::Container values(column->size()); + for (size_t j = 0; j < column->size(); ++j) + { + const auto ref = column->getDataAt(j); + values[j].resize(ref.size); + memcpy(values[j].data(), ref.data, ref.size); + } + attributes.back().type = structure[i]; + attributes.back().values = std::move(values); + } + break; + } + } + + return attributes; +} +} + template void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector & requested_ids, PresentIdHandler && on_updated, AbsentIdHandler && on_id_not_found, @@ -1205,64 +1265,6 @@ void SSDCacheStorage::collectGarbage() } } -SSDCachePartition::Attributes SSDCacheStorage::createAttributesFromBlock( - const Block & block, const size_t begin_column, const std::vector & structure) -{ - SSDCachePartition::Attributes attributes; - - const auto columns = block.getColumns(); - for (size_t i = 0; i < structure.size(); ++i) - { - const auto & column = columns[i + begin_column]; - switch (structure[i]) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - SSDCachePartition::Attribute::Container values(column->size()); \ - memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ - attributes.emplace_back(); \ - attributes.back().type = structure[i]; \ - attributes.back().values = std::move(values); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - { - attributes.emplace_back(); - SSDCachePartition::Attribute::Container values(column->size()); - for (size_t j = 0; j < column->size(); ++j) - { - const auto ref = column->getDataAt(j); - values[j].resize(ref.size); - memcpy(values[j].data(), ref.data, ref.size); - } - attributes.back().type = structure[i]; - attributes.back().values = std::move(values); - } - break; - } - } - - return attributes; -} - SSDCacheDictionary::SSDCacheDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, @@ -1303,8 +1305,8 @@ SSDCacheDictionary::SSDCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ - const auto null_value = std::get(null_values[index]); \ - getItemsNumberImpl( \ + const auto null_value = std::get(null_values[index]); /* NOLINT */ \ + getItemsNumberImpl( /* NOLINT */ \ index, \ ids, \ out, \ diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 09c5f79da0d..6352d3a2522 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -254,9 +254,6 @@ public: size_t getBytesAllocated() const; private: - SSDCachePartition::Attributes createAttributesFromBlock( - const Block & block, const size_t begin_column, const std::vector & structure); - void collectGarbage(); const AttributeTypes attributes_structure; From c26144968ad6202d6003e859bda32ed143dbeac9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 11 May 2020 17:44:46 +0300 Subject: [PATCH 0121/2229] fix --- src/Dictionaries/BucketCache.h | 2 +- src/Dictionaries/SSDCacheDictionary.cpp | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Dictionaries/BucketCache.h b/src/Dictionaries/BucketCache.h index 9a231c7a1b5..262aef21019 100644 --- a/src/Dictionaries/BucketCache.h +++ b/src/Dictionaries/BucketCache.h @@ -162,7 +162,7 @@ private: size_t getCellIndex(const K key, const size_t bucket) const { const size_t pos = getPosition(bucket); - for (size_t idx = 0; idx < bucket_size; ++idx) + for (int idx = 7; idx >= 0; --idx) { const size_t cur = ((pos + 1 + idx) & pos_mask); if (cells[bucket * bucket_size + cur].index.exists() && diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index e3053e18e8e..a065b367101 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -418,10 +418,10 @@ void SSDCachePartition::flush() throwFromErrnoWithPath("Cannot fsync " + path + BIN_FILE_EXT, path + BIN_FILE_EXT, ErrorCodes::CANNOT_FSYNC); /// commit changes in index - for (size_t row = 0; row < ids.size(); ++row) + for (const auto & id : ids) { Index index; - if (key_to_index.get(ids[row], index)) + if (key_to_index.get(id, index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { @@ -429,7 +429,7 @@ void SSDCachePartition::flush() // Poco::Logger::get("pt").information("block: " + std::to_string(index.getBlockId()) + " " + std::to_string(current_file_block_id) + " "); index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); } - key_to_index.set(ids[row], index); + key_to_index.set(id, index); } } @@ -1307,10 +1307,10 @@ SSDCacheDictionary::SSDCacheDictionary( checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ const auto null_value = std::get(null_values[index]); /* NOLINT */ \ getItemsNumberImpl( /* NOLINT */ \ - index, \ - ids, \ - out, \ - [&](const size_t) { return null_value; }); \ + index, /* NOLINT */ \ + ids, /* NOLINT */ \ + out, /* NOLINT */ \ + [&](const size_t) { return null_value; }); /* NOLINT */ \ } DECLARE(UInt8) From 4739b87732449af62b8b7dff26bc8103075d859d Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 05:14:50 +0300 Subject: [PATCH 0122/2229] Add -Distinct combinator --- .../AggregateFunctionDistinct.cpp | 53 +++++++++ .../AggregateFunctionDistinct.h | 108 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 1 + .../registerAggregateFunctions.h | 1 + 4 files changed, 163 insertions(+) create mode 100644 src/AggregateFunctions/AggregateFunctionDistinct.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionDistinct.h diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp new file mode 100644 index 00000000000..d477a04568f --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + +namespace DB +{ + + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + } + + class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator + { + public: + String getName() const override { return "Distinct"; } + + DataTypes transformArguments(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + +// return DataTypes(arguments.begin(), std::prev(arguments.end())); + DataTypes nested_arguments; + for (const auto & type : arguments) + { + nested_arguments.push_back(type); +// if (const DataTypeArray * array = typeid_cast(type.get())) +// nested_arguments.push_back(array->getNestedType()); +// else +// throw Exception("Illegal type " + type->getName() + " of argument" +// " for aggregate function with " + getName() + " suffix. Must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return nested_arguments; + } + + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + { + return std::make_shared(nested_function, arguments); + } + }; + + void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) + { + factory.registerCombinator(std::make_shared()); + } + +} diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h new file mode 100644 index 00000000000..160e113d23b --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -0,0 +1,108 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Adaptor for aggregate functions. + * Adding -Distinct suffix to aggregate function +**/ + +class AggregateFunctionDistinct final : public IAggregateFunctionHelper { +private: + mutable std::mutex mutex; + AggregateFunctionPtr nested_func; + mutable HashSet< + UInt128, + UInt128TrivialHash, + HashTableGrower<3>, + HashTableAllocatorWithStackMemory> storage; + +public: + AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) + : IAggregateFunctionHelper(arguments, {}) + , nested_func(nested) + { + if (arguments.empty()) + throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + String getName() const override { + return nested_func->getName() + "Distinct"; + } + + DataTypePtr getReturnType() const override { + return nested_func->getReturnType(); + } + + void create(AggregateDataPtr place) const override + { + nested_func->create(place); + } + + void destroy(AggregateDataPtr place) const noexcept override { + nested_func->destroy(place); + } + + size_t sizeOfData() const override + { + return nested_func->sizeOfData(); + } + + size_t alignOfData() const override + { + return nested_func->alignOfData(); + } + + bool hasTrivialDestructor() const override { + return nested_func->hasTrivialDestructor(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + UInt128 key; + SipHash hash; + columns[0]->updateHashWithValue(row_num, hash); + hash.get128(key.low, key.high); + { + std::lock_guard lock(mutex); + if (!storage.insert(key).second) { + return; + } + } + nested_func->add(place, columns, row_num, arena); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { + nested_func->merge(place, rhs, arena); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { + nested_func->serialize(place, buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { + nested_func->deserialize(place, buf, arena); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { + nested_func->insertResultInto(place, to); + } + + bool allocatesMemoryInArena() const override { + return nested_func->allocatesMemoryInArena(); + } +}; + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index a9ab1d4f8ea..a8d0cf6e37c 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -58,6 +58,7 @@ void registerAggregateFunctions() registerAggregateFunctionCombinatorNull(factory); registerAggregateFunctionCombinatorOrFill(factory); registerAggregateFunctionCombinatorResample(factory); + registerAggregateFunctionCombinatorDistinct(factory); } } diff --git a/src/AggregateFunctions/registerAggregateFunctions.h b/src/AggregateFunctions/registerAggregateFunctions.h index 88cdf4a504d..981273141f9 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.h +++ b/src/AggregateFunctions/registerAggregateFunctions.h @@ -45,6 +45,7 @@ void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &); +void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &); void registerAggregateFunctions(); From 6e2b93e5af00317f9612fbc9535cd6c8e00a5406 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 22:37:53 +0300 Subject: [PATCH 0123/2229] Stylefix --- .../AggregateFunctionDistinct.h | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 160e113d23b..bab78aa88bf 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -11,24 +11,35 @@ namespace DB { -namespace ErrorCodes -{ +namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +struct AggregateFunctionDistinctData { + using Key = UInt128; + + HashSet< + Key, + UInt128TrivialHash, + HashTableGrower<3>, + HashTableAllocatorWithStackMemory + > data; + std::mutex mutex; + + bool ALWAYS_INLINE TryToInsert(const Key& key) { + std::lock_guard lock(mutex); + return data.insert(key).second; + } +}; + /** Adaptor for aggregate functions. * Adding -Distinct suffix to aggregate function **/ class AggregateFunctionDistinct final : public IAggregateFunctionHelper { private: - mutable std::mutex mutex; AggregateFunctionPtr nested_func; - mutable HashSet< - UInt128, - UInt128TrivialHash, - HashTableGrower<3>, - HashTableAllocatorWithStackMemory> storage; + mutable AggregateFunctionDistinctData storage; public: AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) @@ -71,17 +82,14 @@ public: } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - UInt128 key; SipHash hash; columns[0]->updateHashWithValue(row_num, hash); + + UInt128 key; hash.get128(key.low, key.high); - { - std::lock_guard lock(mutex); - if (!storage.insert(key).second) { - return; - } - } - nested_func->add(place, columns, row_num, arena); + + if (storage.TryToInsert(key)) + nested_func->add(place, columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { From da81c56b5e5a33e1b36c5949775f22c5a78c350f Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 22:46:01 +0300 Subject: [PATCH 0124/2229] Delete extra lines --- src/AggregateFunctions/AggregateFunctionDistinct.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index d477a04568f..369b4a5f7df 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -23,16 +23,9 @@ namespace DB throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); -// return DataTypes(arguments.begin(), std::prev(arguments.end())); DataTypes nested_arguments; - for (const auto & type : arguments) - { + for (const auto & type : arguments) { nested_arguments.push_back(type); -// if (const DataTypeArray * array = typeid_cast(type.get())) -// nested_arguments.push_back(array->getNestedType()); -// else -// throw Exception("Illegal type " + type->getName() + " of argument" -// " for aggregate function with " + getName() + " suffix. Must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } return nested_arguments; From 95677432e5326267ca5639aa5d92d4a4caaab4ac Mon Sep 17 00:00:00 2001 From: bobrovskij artemij Date: Thu, 14 May 2020 02:20:45 +0300 Subject: [PATCH 0125/2229] MongoDB engine (read-only) --- src/Access/AccessType.h | 1 + src/Dictionaries/MongoDBDictionarySource.cpp | 44 ++---- src/Dictionaries/MongoDBDictionarySource.h | 7 + src/Storages/StorageMongoDB.cpp | 133 +++++++++++++++++++ src/Storages/StorageMongoDB.h | 60 +++++++++ src/Storages/registerStorages.cpp | 2 + src/Storages/registerStorages.h | 2 + 7 files changed, 219 insertions(+), 30 deletions(-) create mode 100644 src/Storages/StorageMongoDB.cpp create mode 100644 src/Storages/StorageMongoDB.h diff --git a/src/Access/AccessType.h b/src/Access/AccessType.h index d0665a6e55f..db31d87cfa6 100644 --- a/src/Access/AccessType.h +++ b/src/Access/AccessType.h @@ -150,6 +150,7 @@ enum class AccessType M(FILE, "", GLOBAL, SOURCES) \ M(URL, "", GLOBAL, SOURCES) \ M(REMOTE, "", GLOBAL, SOURCES) \ + M(MONGO, "", GLOBAL, SOURCES) \ M(MYSQL, "", GLOBAL, SOURCES) \ M(ODBC, "", GLOBAL, SOURCES) \ M(JDBC, "", GLOBAL, SOURCES) \ diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 7247d8a4613..da1db35437d 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -5,32 +5,19 @@ namespace DB { -namespace ErrorCodes -{ - extern const int SUPPORT_IS_DISABLED; -} -void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) -{ - auto create_table_source = [=](const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block, - const Context & /* context */, - bool /* check_config */) -> DictionarySourcePtr { -#if USE_POCO_MONGODB - return std::make_unique(dict_struct, config, config_prefix + ".mongodb", sample_block); -#else - (void)dict_struct; - (void)config; - (void)config_prefix; - (void)sample_block; - throw Exception{"Dictionary source of type `mongodb` is disabled because poco library was built without mongodb support.", - ErrorCodes::SUPPORT_IS_DISABLED}; -#endif - }; - factory.registerSource("mongodb", create_table_source); -} + void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) + { + auto create_table_source = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */, + bool /* check_config */) -> DictionarySourcePtr { + return std::make_unique(dict_struct, config, config_prefix + ".mongodb", sample_block); + }; + factory.registerSource("mongodb", create_table_source); + } } @@ -69,8 +56,7 @@ static const UInt64 max_block_size = 8192; # if POCO_VERSION < 0x01070800 /// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485 -static void -authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password) +void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password) { Poco::MongoDB::Database db(database); @@ -238,8 +224,7 @@ MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & MongoDBDictionarySource::~MongoDBDictionarySource() = default; -static std::unique_ptr -createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select) +std::unique_ptr createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select) { auto cursor = std::make_unique(database, collection); @@ -249,7 +234,6 @@ createCursor(const std::string & database, const std::string & collection, const for (const auto & column : sample_block_to_select) cursor->query().returnFieldSelector().add(column.name, 1); - return cursor; } diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index bf4669248dc..b4339d40a95 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -17,6 +17,7 @@ namespace Util namespace MongoDB { class Connection; + class Cursor; } } @@ -28,6 +29,12 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +# if POCO_VERSION < 0x01070800 +void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password); +# endif + +std::unique_ptr createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select); + /// Allows loading dictionaries from a MongoDB collection class MongoDBDictionarySource final : public IDictionarySource { diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp new file mode 100644 index 00000000000..68a3c3eb1da --- /dev/null +++ b/src/Storages/StorageMongoDB.cpp @@ -0,0 +1,133 @@ +#include "StorageMongoDB.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; + extern const int MONGODB_CANNOT_AUTHENTICATE; +} + +StorageMongoDB::StorageMongoDB( + const StorageID & table_id_, + const std::string & host_, + short unsigned int port_, + const std::string & database_name_, + const std::string & collection_name_, + const std::string & username_, + const std::string & password_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const Context & context_) + : IStorage(table_id_) + , host(host_) + , port(port_) + , database_name(database_name_) + , collection_name(collection_name_) + , username(username_) + , password(password_) + , global_context(context_) + , connection{std::make_shared(host, port)} +{ + setColumns(columns_); + setConstraints(constraints_); +} + + +Pipes StorageMongoDB::read( + const Names & column_names, + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned) +{ + check(column_names); + +#if POCO_VERSION >= 0x01070800 + Poco::MongoDB::Database poco_db(database_name); + if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) + throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); +#else + authenticate(*connection, database_name, username, password); +#endif + + Block sample_block; + for (const String & column_name : column_names) + { + auto column_data = getColumn(column_name); + sample_block.insert({ column_data.type, column_data.name }); + } + + Pipes pipes; + pipes.emplace_back(std::make_shared( + std::make_shared(connection, createCursor(database_name, collection_name, sample_block), sample_block, max_block_size))); + + return pipes; +} + +void registerStorageMongoDB(StorageFactory & factory) +{ + factory.registerStorage("MongoDB", [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + + if (engine_args.size() != 5) + throw Exception( + "Storage MongoDB requires 5 parameters: MongoDB('host:port', database, collection, 'user', 'password').", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & engine_arg : engine_args) + engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.local_context); + + /// 27017 is the default MongoDB port. + auto parsed_host_port = parseAddress(engine_args[0]->as().value.safeGet(), 27017); + + const String & remote_database = engine_args[1]->as().value.safeGet(); + const String & collection = engine_args[2]->as().value.safeGet(); + const String & username = engine_args[3]->as().value.safeGet(); + const String & password = engine_args[4]->as().value.safeGet(); + + + return StorageMongoDB::create( + args.table_id, + parsed_host_port.first, + parsed_host_port.second, + remote_database, + collection, + username, + password, + args.columns, + args.constraints, + args.context); + }, + { + .source_access_type = AccessType::MONGO, + }); +} + +} diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h new file mode 100644 index 00000000000..c037972f36b --- /dev/null +++ b/src/Storages/StorageMongoDB.h @@ -0,0 +1,60 @@ + +#pragma once + +#include "config_core.h" + +#include + +#include +#include +#include +#include + + +namespace DB +{ +/* Implements storage in the MongoDB database. + * Use ENGINE = mysql(host_port, database_name, table_name, user_name, password) + * Read only. + */ + +class StorageMongoDB final : public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; +public: + StorageMongoDB( + const StorageID & table_id_, + const std::string & host_, + short unsigned int port_, + const std::string & database_name_, + const std::string & collection_name_, + const std::string & username_, + const std::string & password_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const Context & context_); + + std::string getName() const override { return "MongoDB"; } + + Pipes read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + +private: + std::string host; + short unsigned int port; + std::string database_name; + std::string collection_name; + std::string username; + std::string password; + + Context global_context; + std::shared_ptr connection; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index f5fab52285d..8201bbb8ffa 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -46,6 +46,8 @@ void registerStorages() registerStorageMySQL(factory); #endif + registerStorageMongoDB(factory); + #if USE_RDKAFKA registerStorageKafka(factory); #endif diff --git a/src/Storages/registerStorages.h b/src/Storages/registerStorages.h index 63a758f5b38..aee2b3edd9e 100644 --- a/src/Storages/registerStorages.h +++ b/src/Storages/registerStorages.h @@ -40,6 +40,8 @@ void registerStorageJDBC(StorageFactory & factory); void registerStorageMySQL(StorageFactory & factory); #endif +void registerStorageMongoDB(StorageFactory & factory); + #if USE_RDKAFKA void registerStorageKafka(StorageFactory & factory); #endif From 4c03f4870366feb230f231bf7ad54a0c670ca59a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 May 2020 23:08:15 +0300 Subject: [PATCH 0126/2229] in-memory parts: better restoring --- src/Storages/MergeTree/DataPartsExchange.cpp | 17 +++-- src/Storages/MergeTree/DataPartsExchange.h | 1 - .../MergeTree/IMergeTreeDataPartWriter.cpp | 6 +- .../MergeTree/IMergeTreeDataPartWriter.h | 1 + src/Storages/MergeTree/MergeSelector.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 12 ++-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 + .../MergeTreeDataPartWriterInMemory.cpp | 43 ++++-------- .../MergeTreeDataPartWriterInMemory.h | 1 - .../MergeTreeDataPartWriterOnDisk.cpp | 1 - .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 5 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 - .../MergeTree/MergeTreeWriteAheadLog.cpp | 68 ++++++++++++++----- .../MergeTree/MergeTreeWriteAheadLog.h | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 11 ++- src/Storages/StorageReplicatedMergeTree.h | 2 +- tests/integration/helpers/network.py | 2 +- .../configs/do_not_merge.xml | 6 ++ .../test_polymorphic_parts/test.py | 63 ++++++++++++----- .../01130_in_memory_parts.reference | 8 +++ .../0_stateless/01130_in_memory_parts.sql | 8 ++- 21 files changed, 168 insertions(+), 99 deletions(-) create mode 100644 tests/integration/test_polymorphic_parts/configs/do_not_merge.xml diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 673c774ce5a..f61d80e63ac 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -40,6 +40,7 @@ namespace { constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE = 1; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS = 2; +constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE = 3; std::string getEndpointId(const std::string & node_id) @@ -59,7 +60,6 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo int client_protocol_version = parse(params.get("client_protocol_version", "0")); String part_name = params.get("part"); - String part_type = params.get("part_type", "Wide"); // TODO: correct type with old versions const auto data_settings = data.getSettings(); @@ -79,7 +79,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo } /// We pretend to work as older server version, to be sure that client will correctly process our version - response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS))}); + response.addCookie({"server_protocol_version", toString(std::min(client_protocol_version, REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE))}); ++total_sends; SCOPE_EXIT({--total_sends;}); @@ -108,7 +108,10 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo writeBinary(ttl_infos_buffer.str(), out); } - if (part_type == "InMemory") + if (client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) + writeStringBinary(part->getType().toString(), out); + + if (isInMemoryPart(part)) sendPartFromMemory(part, out, storage_lock); else sendPartFromDisk(part, out, storage_lock); @@ -199,7 +202,6 @@ MergeTreeData::DataPartPtr Service::findPart(const String & name) MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( const String & part_name, - const String & part_type, const String & replica_path, const String & host, int port, @@ -222,8 +224,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( { {"endpoint", getEndpointId(replica_path)}, {"part", part_name}, - {"part_type", part_type}, - {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE_AND_TTL_INFOS)}, + {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE)}, {"compress", "false"} }); @@ -271,6 +272,10 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( reservation = data.makeEmptyReservationOnLargestDisk(); } + String part_type = "Wide"; + if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) + readStringBinary(part_type, in); + return part_type == "InMemory" ? downloadPartToMemory(part_name, replica_path, in) : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, std::move(reservation), in); } diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index f17836cf9f3..217434d7e41 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -54,7 +54,6 @@ public: /// Downloads a part to tmp_directory. If to_detached - downloads to the `detached` directory. MergeTreeData::MutableDataPartPtr fetchPart( const String & part_name, - const String & part_type, const String & replica_path, const String & host, int port, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 2d35b9ff723..523774da233 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -9,7 +9,8 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeWriterSettings & settings_) : storage(storage_) , columns_list(columns_list_) - , settings(settings_) {} + , settings(settings_) + , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity){} IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeData & storage_, @@ -21,7 +22,8 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( , columns_list(columns_list_) , skip_indices(skip_indices_) , index_granularity(index_granularity_) - , settings(settings_) {} + , settings(settings_) + , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity) {} Columns IMergeTreeDataPartWriter::releaseIndexColumns() { diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 6ecdcf581c2..f03a442d990 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -65,6 +65,7 @@ protected: MergeTreeIndices skip_indices; MergeTreeIndexGranularity index_granularity; MergeTreeWriterSettings settings; + bool with_final_mark; size_t next_mark = 0; size_t next_index_offset = 0; diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h index 3c3cd8190ac..24612b367d5 100644 --- a/src/Storages/MergeTree/MergeSelector.h +++ b/src/Storages/MergeTree/MergeSelector.h @@ -60,7 +60,7 @@ public: const Partitions & partitions, const size_t max_total_size_to_merge) = 0; - virtual ~IMergeSelector() {} + virtual ~IMergeSelector() = default; }; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 049cc212a7b..bbe051a2476 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -245,12 +245,6 @@ MergeTreeData::MergeTreeData( String reason; if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) LOG_WARNING(log, reason + " Settings 'min_bytes_for_wide_part' and 'min_bytes_for_wide_part' will be ignored."); - - if (settings->in_memory_parts_enable_wal) - { - auto disk = makeEmptyReservationOnLargestDisk()->getDisk(); - write_ahead_log = std::make_shared(*this, std::move(disk)); - } } @@ -1121,6 +1115,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } } + if (settings->in_memory_parts_enable_wal) + { + auto disk = makeEmptyReservationOnLargestDisk()->getDisk(); + write_ahead_log = std::make_shared(*this, std::move(disk)); + } + calculateColumnSizesImpl(); LOG_DEBUG(log, "Loaded data parts (" << data_parts_indexes.size() << " items)"); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1e5f80d44b0..ff56b407998 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -231,12 +231,14 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( { /// Check predicate only for first part in each partition. if (!prev_part) + { /* Parts can be merged with themselves for TTL needs for example. * So we have to check if this part is currently being inserted with quorum and so on and so forth. * Obviously we have to check it manually only for the first part * of each partition because it will be automatically checked for a pair of parts. */ if (!can_merge_callback(nullptr, part, nullptr)) continue; + } const String & partition_id = part->info.partition_id; if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr))) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 3d0d67e1ed6..39e9757d81c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -27,21 +27,22 @@ void MergeTreeDataPartWriterInMemory::write( Block result_block; if (permutation) { - for (const auto & it : columns_list) + for (const auto & col : columns_list) { - if (primary_key_block.has(it.name)) - result_block.insert(primary_key_block.getByName(it.name)); + if (primary_key_block.has(col.name)) + result_block.insert(primary_key_block.getByName(col.name)); else { - auto column = block.getByName(it.name); - column.column = column.column->permute(*permutation, 0); - result_block.insert(column); + auto permuted = block.getByName(col.name); + permuted.column = permuted.column->permute(*permutation, 0); + result_block.insert(permuted); } } } else { - result_block = block; + for (const auto & col : columns_list) + result_block.insert(block.getByName(col.name)); } part->block = std::move(result_block); @@ -55,7 +56,8 @@ void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Bl return; index_granularity.appendMark(rows); - index_granularity.appendMark(0); + if (with_final_mark) + index_granularity.appendMark(0); size_t primary_columns_num = primary_index_block.columns(); index_columns.resize(primary_columns_num); @@ -64,7 +66,8 @@ void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Bl const auto & primary_column = *primary_index_block.getByPosition(i).column; index_columns[i] = primary_column.cloneEmpty(); index_columns[i]->insertFrom(primary_column, 0); - index_columns[i]->insertFrom(primary_column, rows - 1); + if (with_final_mark) + index_columns[i]->insertFrom(primary_column, rows - 1); } } @@ -78,31 +81,9 @@ static MergeTreeDataPartChecksum createUncompressedChecksum(size_t size, SipHash void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) { - UNUSED(checksums); SipHash hash; part->block.updateHash(hash); checksums.files["data.bin"] = createUncompressedChecksum(part->block.bytes(), hash); } -void MergeTreeDataPartWriterInMemory::finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) -{ - UNUSED(checksums); - if (index_columns.empty()) - return; - - SipHash hash; - size_t index_size = 0; - size_t rows = index_columns[0]->size(); - for (size_t i = 0; i < rows; ++i) - { - for (const auto & col : index_columns) - { - col->updateHashWithValue(i, hash); - index_size += col->byteSize(); - } - } - - checksums.files["primary.idx"] = createUncompressedChecksum(index_size, hash); -} - } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h index c9b57e5e4b6..425066a802e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.h @@ -19,7 +19,6 @@ public: void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) override; void calculateAndSerializePrimaryIndex(const Block & primary_index_block) override; - void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) override; private: DataPartInMemoryPtr part; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 3ada9973477..0543e6420ee 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -80,7 +80,6 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , marks_file_extension(marks_file_extension_) , default_codec(default_codec_) , compute_granularity(index_granularity.empty()) - , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity) { if (settings.blocks_are_granules_size && !index_granularity.empty()) throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 149de2d9f30..bb54b964793 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -82,8 +82,8 @@ public: void initSkipIndices() final; void initPrimaryIndex() final; - virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) final; - virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums) final; + void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums) final; + void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums) final; void setWrittenOffsetColumns(WrittenOffsetColumns * written_offset_columns_) { @@ -100,7 +100,6 @@ protected: CompressionCodecPtr default_codec; bool compute_granularity; - bool with_final_mark; bool need_finish_last_granule; /// Number of marsk in data from which skip indices have to start diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c8aa15e62f3..580c95b34dd 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -165,8 +165,6 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( const unsigned num_streams, const PartitionIdToMaxBlock * max_block_numbers_to_read) const { - LOG_DEBUG(log, "readFromParts size: " << parts.size()); - size_t part_index = 0; /// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it. diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index e5c0c370ae2..02c45dcfb64 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -10,16 +10,18 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_FORMAT_VERSION; + extern const int CANNOT_READ_ALL_DATA; } MergeTreeWriteAheadLog::MergeTreeWriteAheadLog( const MergeTreeData & storage_, const DiskPtr & disk_, - const String & name) + const String & name_) : storage(storage_) , disk(disk_) - , path(storage.getRelativeDataPath() + name) + , name(name_) + , path(storage.getRelativeDataPath() + name_) { init(); } @@ -29,7 +31,7 @@ void MergeTreeWriteAheadLog::init() out = disk->writeFile(path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append); block_out = std::make_unique(*out, 0, storage.getSampleBlock()); min_block_number = std::numeric_limits::max(); - max_block_number = std::numeric_limits::min(); + max_block_number = -1; } void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name) @@ -55,7 +57,7 @@ void MergeTreeWriteAheadLog::rotate() + toString(min_block_number) + "_" + toString(max_block_number) + WAL_FILE_EXTENSION; - Poco::File(path).renameTo(storage.getFullPathOnDisk(disk) + new_name); + disk->replaceFile(path, storage.getRelativeDataPath() + new_name); init(); } @@ -69,29 +71,61 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() while (!in->eof()) { + MergeTreeData::MutableDataPartPtr part; UInt8 version; String part_name; - readIntBinary(version, *in); - if (version != 0) - throw Exception("Unknown WAL format version: " + toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION); + Block block; - readStringBinary(part_name, *in); - auto part = storage.createPart( - part_name, - MergeTreeDataPartType::IN_MEMORY, - MergeTreePartInfo::fromPartName(part_name, storage.format_version), - storage.reserveSpace(0)->getDisk(), - part_name); + try + { + readIntBinary(version, *in); + if (version != 0) + throw Exception("Unknown WAL format version: " + toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION); - auto block = block_in.read(); - part->minmax_idx.update(block, storage.minmax_idx_columns); - part->partition.create(storage, block, 0); + readStringBinary(part_name, *in); + + part = storage.createPart( + part_name, + MergeTreeDataPartType::IN_MEMORY, + MergeTreePartInfo::fromPartName(part_name, storage.format_version), + storage.reserveSpace(0)->getDisk(), + part_name); + + block = block_in.read(); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION) + { + LOG_WARNING(&Logger::get(storage.getLogName() + " (WriteAheadLog)"), + "WAL file '" << path << "' is broken. " << e.displayText()); + + /// If file is broken, do not write new parts to it. + /// But if it contains any part rotate and save them. + if (max_block_number == -1) + Poco::File(path).remove(); + else if (name == DEFAULT_WAL_FILE) + rotate(); + + break; + } + throw; + } MergedBlockOutputStream part_out(part, block.getNamesAndTypesList(), {}, nullptr); + + part->minmax_idx.update(block, storage.minmax_idx_columns); + if (storage.partition_key_expr) + part->partition.create(storage, block, 0); + if (storage.hasSortingKey()) + storage.sorting_key_expr->execute(block); + part_out.writePrefix(); part_out.write(block); part_out.writeSuffixAndFinalizePart(part); + min_block_number = std::min(min_block_number, part->info.min_block); + max_block_number = std::max(max_block_number, part->info.max_block); result.push_back(std::move(part)); } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 7a0e5759624..22665048f56 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -15,10 +15,11 @@ class MergeTreeWriteAheadLog public: constexpr static auto WAL_FILE_NAME = "wal"; constexpr static auto WAL_FILE_EXTENSION = ".bin"; + constexpr static auto DEFAULT_WAL_FILE = "wal.bin"; constexpr static size_t MAX_WAL_BYTES = 1024 * 1024 * 1024; MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_, - const String & name = String(WAL_FILE_NAME) + WAL_FILE_EXTENSION); + const String & name = DEFAULT_WAL_FILE); void write(const Block & block, const String & part_name); std::vector restore(); @@ -29,13 +30,14 @@ private: const MergeTreeData & storage; DiskPtr disk; + String name; String path; std::unique_ptr out; std::unique_ptr block_out; Int64 min_block_number = std::numeric_limits::max(); - Int64 max_block_number = std::numeric_limits::min(); + Int64 max_block_number = -1; mutable std::mutex write_mutex; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9b138af30bf..73034ef2e7e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1409,8 +1409,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry) try { String part_name = entry.actual_new_part_name.empty() ? entry.new_part_name : entry.actual_new_part_name; - String part_type = entry.new_part_type.toString(); - if (!fetchPart(part_name, part_type, zookeeper_path + "/replicas/" + replica, false, entry.quorum)) + if (!fetchPart(part_name, zookeeper_path + "/replicas/" + replica, false, entry.quorum)) return false; } catch (Exception & e) @@ -1754,7 +1753,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (interserver_scheme != address.scheme) throw Exception("Interserver schemas are different '" + interserver_scheme + "' != '" + address.scheme + "', can't fetch part from " + address.host, ErrorCodes::LOGICAL_ERROR); - part_desc->res_part = fetcher.fetchPart(part_desc->found_new_part_name, "Wide", source_replica_path, // TODO: fix part type + part_desc->res_part = fetcher.fetchPart(part_desc->found_new_part_name, source_replica_path, address.host, address.replication_port, timeouts, user, password, interserver_scheme, false, TMP_PREFIX + "fetch_"); /// TODO: check columns_version of fetched part @@ -2766,7 +2765,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) } -bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & part_type, +bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const String & source_replica_path, bool to_detached, size_t quorum) { const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); @@ -2872,7 +2871,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin ErrorCodes::LOGICAL_ERROR); return fetcher.fetchPart( - part_name, part_type, source_replica_path, + part_name, source_replica_path, address.host, address.replication_port, timeouts, user_password.first, user_password.second, interserver_scheme, to_detached); }; @@ -4384,7 +4383,7 @@ void StorageReplicatedMergeTree::fetchPartition(const ASTPtr & partition, const { try { - fetchPart(part, "Wide", best_replica_path, true, 0); // TODO: fix part type + fetchPart(part, best_replica_path, true, 0); } catch (const DB::Exception & e) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 728ac792dab..70fb48e9b35 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -456,7 +456,7 @@ private: * If quorum != 0, then the node for tracking the quorum is updated. * Returns false if part is already fetching right now. */ - bool fetchPart(const String & part_name, const String & part_type, const String & replica_path, bool to_detached, size_t quorum); + bool fetchPart(const String & part_name, const String & replica_path, bool to_detached, size_t quorum); /// Required only to avoid races between executeLogEntry and fetchPartition std::unordered_set currently_fetching_parts; diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 3ba8ae3f9fd..5d738126f07 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -13,7 +13,7 @@ class PartitionManager: Can act as a context manager: - with pm as PartitionManager(): + with PartitionManager() as pm: pm.partition_instances(instance1, instance2) ... # At exit all partitions are removed automatically. diff --git a/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml b/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml new file mode 100644 index 00000000000..bc2dae31ad6 --- /dev/null +++ b/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml @@ -0,0 +1,6 @@ + + + 1 + 2 + + diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 8cca8aa1072..ba8b4b6b725 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -8,6 +8,7 @@ import struct from helpers.test_tools import TSV from helpers.test_tools import assert_eq_with_retry from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) @@ -69,8 +70,8 @@ node6 = cluster.add_instance('node6', config_dir='configs', main_configs=['confi settings_in_memory = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 256} -node9 = cluster.add_instance('node9', config_dir="configs", with_zookeeper=True, stay_alive=True) -node10 = cluster.add_instance('node10', config_dir="configs", with_zookeeper=True, stay_alive=True) +node9 = cluster.add_instance('node9', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) +node10 = cluster.add_instance('node10', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") def start_cluster(): @@ -317,37 +318,65 @@ def test_in_memory(start_cluster): "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) def test_in_memory_wal(start_cluster): - node9.query("SYSTEM STOP MERGES") - node10.query("SYSTEM STOP MERGES") + # Merges are disabled in config for i in range(5): insert_random_data('wal_table', node9, 50) node10.query("SYSTEM SYNC REPLICA wal_table", timeout=20) - assert node9.query("SELECT count() FROM wal_table") == "250\n" - assert node10.query("SELECT count() FROM wal_table") == "250\n" + def check(node, rows, parts): + node.query("SELECT count() FROM wal_table") == "{}\n".format(rows) + node.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == "{}\n".format(parts) - assert node9.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == '5\n' - assert node10.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == '5\n' + check(node9, 250, 5) + check(node10, 250, 5) # WAL works at inserts node9.restart_clickhouse(kill=True) - time.sleep(5) - assert node9.query("SELECT count() FROM wal_table") == "250\n" + check(node9, 250, 5) # WAL works at fetches node10.restart_clickhouse(kill=True) - time.sleep(5) - assert node10.query("SELECT count() FROM wal_table") == "250\n" + check(node10, 250, 5) - node9.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") insert_random_data('wal_table', node9, 50) - assert node9.query("SELECT count() FROM wal_table") == "300\n" + node10.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + + # Disable replication + with PartitionManager() as pm: + pm.partition_instances(node9, node10) + check(node9, 300, 6) + + wal_file = os.path.join(node9.path, "database/data/default/wal_table/wal.bin") + # Corrupt wal file + open(wal_file, 'rw+').truncate(os.path.getsize(wal_file) - 10) + node9.restart_clickhouse(kill=True) + + # Broken part is lost, but other restored successfully + check(node9, 250, 5) + # WAL with blocks from 0 to 4 + broken_wal_file = os.path.join(node9.path, "database/data/default/wal_table/wal_0_4.bin") + assert os.path.exists(broken_wal_file) + + # Fetch lost part from replica + node9.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + check(node9, 300, 6) + + #Check that new data is written to new wal, but old is still exists for restoring + assert os.path.getsize(wal_file) > 0 + assert os.path.getsize(broken_wal_file) # Data is lost without WAL - node9.restart_clickhouse(kill=True) - time.sleep(5) - assert node9.query("SELECT count() FROM wal_table") == "250\n" + node9.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") + with PartitionManager() as pm: + pm.partition_instances(node9, node10) + + insert_random_data('wal_table', node9, 50) + check(node9, 350, 7) + + node9.restart_clickhouse(kill=True) + check(node9, 300, 6) + def test_polymorphic_parts_index(start_cluster): node1.query(''' diff --git a/tests/queries/0_stateless/01130_in_memory_parts.reference b/tests/queries/0_stateless/01130_in_memory_parts.reference index dbf39a0b48f..ae32d3ea7a3 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.reference +++ b/tests/queries/0_stateless/01130_in_memory_parts.reference @@ -1,3 +1,4 @@ +InMemory 2 Simple selects 0 0 1 1 @@ -11,6 +12,8 @@ Simple selects 54 0 34 0 +20 +10 Mutations and Alters 66 1 1 @@ -25,3 +28,8 @@ Mutations and Alters [7,49] 1 1 2 1 +1 [1,1] +2 [] +4 [4,16] +5 [] +7 [7,49] diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql index 4c09eb19937..b704fbdf081 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts.sql @@ -1,9 +1,10 @@ DROP TABLE IF EXISTS in_memory; CREATE TABLE in_memory (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a - SETTINGS min_rows_for_compact_part = 0; + SETTINGS min_rows_for_compact_part = 1000; INSERT INTO in_memory SELECT number, number % 3 FROM numbers(100); +SELECT DISTINCT part_type, marks FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; SELECT 'Simple selects'; @@ -12,6 +13,8 @@ SELECT * FROM in_memory ORDER BY a LIMIT 5 OFFSET 50; SELECT count() FROM in_memory WHERE b = 0 SETTINGS max_block_size = 10; -- Check index SELECT count() FROM in_memory WHERE a > 100 SETTINGS max_rows_to_read = 0, force_primary_key = 1; +SELECT count() FROM in_memory WHERE a >= 10 AND a < 30 SETTINGS force_primary_key = 1; +SELECT DISTINCT blockSize() FROM in_memory SETTINGS max_block_size = 10; SELECT 'Mutations and Alters'; SET mutations_sync = 1; @@ -29,5 +32,8 @@ SELECT arr FROM in_memory ORDER BY a LIMIT 5; ALTER TABLE in_memory MODIFY COLUMN b String; ALTER TABLE in_memory RENAME COLUMN b to str; SELECT DISTINCT str, length(str) FROM in_memory ORDER BY str; +ALTER TABLE in_memory DROP COLUMN str; + +SELECT * FROM in_memory ORDER BY a LIMIT 5; DROP TABLE in_memory; From e8262ccaf67ed975cb8b07b198adeaa7dc47ab2c Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 15 May 2020 03:53:12 +0300 Subject: [PATCH 0127/2229] in-memory parts: add perf test --- tests/performance/polymorphic_parts.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/performance/polymorphic_parts.xml b/tests/performance/polymorphic_parts.xml index a8e305953d0..46e308848da 100644 --- a/tests/performance/polymorphic_parts.xml +++ b/tests/performance/polymorphic_parts.xml @@ -13,6 +13,13 @@ SAMPLE BY intHash32(UserID) SETTINGS min_bytes_for_wide_part = '10M' + + CREATE TABLE hits_memory AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_compact_part = '1M', min_bytes_for_wide_part = '10M', in_memory_parts_enable_wal = 1 + CREATE TABLE hits_buffer AS hits_10m_single ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) @@ -28,6 +35,11 @@ INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_memory(UserID) VALUES (rand()) + INSERT INTO hits_memory(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_memory(UserID) SELECT rand() FROM numbers(1000) + INSERT INTO hits_memory(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_buffer(UserID) VALUES (rand()) INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) @@ -35,5 +47,6 @@ DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_memory DROP TABLE IF EXISTS hits_buffer From 13224c22ab7c7078e4a41457a72bd971792d1dc9 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Fri, 15 May 2020 05:02:57 +0300 Subject: [PATCH 0128/2229] Stylecheck fix --- .../AggregateFunctionDistinct.cpp | 3 +- .../AggregateFunctionDistinct.h | 36 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index 369b4a5f7df..b01bd2226c7 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -24,7 +24,8 @@ namespace DB ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); DataTypes nested_arguments; - for (const auto & type : arguments) { + for (const auto & type : arguments) + { nested_arguments.push_back(type); } diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index bab78aa88bf..5580cc3b4df 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -11,11 +11,13 @@ namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct AggregateFunctionDistinctData { +struct AggregateFunctionDistinctData +{ using Key = UInt128; HashSet< @@ -36,7 +38,8 @@ struct AggregateFunctionDistinctData { * Adding -Distinct suffix to aggregate function **/ -class AggregateFunctionDistinct final : public IAggregateFunctionHelper { +class AggregateFunctionDistinct final : public IAggregateFunctionHelper +{ private: AggregateFunctionPtr nested_func; mutable AggregateFunctionDistinctData storage; @@ -50,11 +53,13 @@ public: throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - String getName() const override { + String getName() const override + { return nested_func->getName() + "Distinct"; } - DataTypePtr getReturnType() const override { + DataTypePtr getReturnType() const override + { return nested_func->getReturnType(); } @@ -77,11 +82,13 @@ public: return nested_func->alignOfData(); } - bool hasTrivialDestructor() const override { + bool hasTrivialDestructor() const override + { return nested_func->hasTrivialDestructor(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { SipHash hash; columns[0]->updateHashWithValue(row_num, hash); @@ -92,23 +99,28 @@ public: nested_func->add(place, columns, row_num, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + { nested_func->merge(place, rhs, arena); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + { nested_func->deserialize(place, buf, arena); } - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { nested_func->insertResultInto(place, to); } - bool allocatesMemoryInArena() const override { + bool allocatesMemoryInArena() const override + { return nested_func->allocatesMemoryInArena(); } }; From 7c6322c5b03232a0dfd603a54b2e0037bf817122 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 02:06:25 +0300 Subject: [PATCH 0129/2229] Add support for many columns --- src/AggregateFunctions/AggregateFunctionDistinct.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 5580cc3b4df..b87183f15d6 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -42,12 +42,13 @@ class AggregateFunctionDistinct final : public IAggregateFunctionHelper(arguments, {}) - , nested_func(nested) + , nested_func(nested), num_arguments(arguments.size()) { if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -90,7 +91,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - columns[0]->updateHashWithValue(row_num, hash); + for (size_t i = 0; i < num_arguments; ++i) + columns[i]->updateHashWithValue(row_num, hash); UInt128 key; hash.get128(key.low, key.high); From 8c6f687010a925955c63f201bf52a15baa699d08 Mon Sep 17 00:00:00 2001 From: bobrovskij artemij Date: Sat, 16 May 2020 01:53:09 +0300 Subject: [PATCH 0130/2229] build/style fix --- src/Storages/StorageMongoDB.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 68a3c3eb1da..2f27042b162 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -28,7 +28,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; extern const int MONGODB_CANNOT_AUTHENTICATE; } @@ -79,7 +78,7 @@ Pipes StorageMongoDB::read( Block sample_block; for (const String & column_name : column_names) { - auto column_data = getColumn(column_name); + auto column_data = getColumns().getPhysical(column_name); sample_block.insert({ column_data.type, column_data.name }); } From fa38cf780c0071e1d05e4e79cf0face02628516e Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 03:02:55 +0300 Subject: [PATCH 0131/2229] Add tests for -Distinct combinator --- tests/queries/0_stateless/01259_combinator_distinct.reference | 4 ++++ tests/queries/0_stateless/01259_combinator_distinct.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/01259_combinator_distinct.reference create mode 100644 tests/queries/0_stateless/01259_combinator_distinct.sql diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference new file mode 100644 index 00000000000..34d13676466 --- /dev/null +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -0,0 +1,4 @@ +499500 +78 +[0,1,2,3,4,5,6,7,8,9,10,11,12] +5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql new file mode 100644 index 00000000000..e3c4bb114a3 --- /dev/null +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -0,0 +1,4 @@ +SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers LIMIT 1000); +SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); \ No newline at end of file From aeb195950c65f7e4d9ab8ec374c599e8e1466442 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 03:15:44 +0300 Subject: [PATCH 0132/2229] Checkstyle fix --- .../AggregateFunctionDistinct.cpp | 61 +++++++++---------- .../AggregateFunctionDistinct.h | 3 +- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index b01bd2226c7..820c2f0f72c 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -6,42 +6,41 @@ namespace DB { - namespace ErrorCodes - { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - } +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} - class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator - { - public: - String getName() const override { return "Distinct"; } +class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator +{ +public: + String getName() const override { return "Distinct"; } - DataTypes transformArguments(const DataTypes & arguments) const override + DataTypes transformArguments(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + DataTypes nested_arguments; + for (const auto & type : arguments) { - if (arguments.empty()) - throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - DataTypes nested_arguments; - for (const auto & type : arguments) - { - nested_arguments.push_back(type); - } - - return nested_arguments; + nested_arguments.push_back(type); } - AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override - { - return std::make_shared(nested_function, arguments); - } - }; - - void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) - { - factory.registerCombinator(std::make_shared()); + return nested_arguments; } + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + { + return std::make_shared(nested_function, arguments); + } +}; + +void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) +{ + factory.registerCombinator(std::make_shared()); +} + } diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index b87183f15d6..cc4c52ea5ff 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -28,7 +28,8 @@ struct AggregateFunctionDistinctData > data; std::mutex mutex; - bool ALWAYS_INLINE TryToInsert(const Key& key) { + bool ALWAYS_INLINE TryToInsert(const Key& key) + { std::lock_guard lock(mutex); return data.insert(key).second; } From f4369381c97fca9394bd9f0673a5bb91b0983d29 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 16 May 2020 18:00:33 +0300 Subject: [PATCH 0133/2229] Fix build --- src/AggregateFunctions/AggregateFunctionDistinct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index cc4c52ea5ff..e7ccbc62c57 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -117,7 +117,7 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to) const override { nested_func->insertResultInto(place, to); } From 0b51e25a6991196f4fcedda0475a7f1428eaf6bf Mon Sep 17 00:00:00 2001 From: potya Date: Tue, 19 May 2020 02:53:41 +0300 Subject: [PATCH 0134/2229] Support numeric parameters in VARCHAR, VARBINARY, INT... data types (e.g. VARCHAR(255)) and ignore them completely. --- src/DataTypes/DataTypeString.cpp | 29 +++++++- src/DataTypes/DataTypesNumber.cpp | 119 ++++++++++++++++++++++++++++-- 2 files changed, 138 insertions(+), 10 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index c1afa8b90ea..358afc6c8f2 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -15,6 +15,9 @@ #include #include +#include +#include + #include #include #include @@ -27,6 +30,14 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNEXPECTED_AST_STRUCTURE; +} + + void DataTypeString::serializeBinary(const Field & field, WriteBuffer & ostr) const { const String & s = get(field); @@ -366,12 +377,24 @@ bool DataTypeString::equals(const IDataType & rhs) const return typeid(rhs) == typeid(*this); } +static DataTypePtr create(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 1) + throw Exception("String data type family mustnt have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + throw Exception("FixedString data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + + return std::make_shared(); +} + void registerDataTypeString(DataTypeFactory & factory) { - auto creator = static_cast([] { return DataTypePtr(std::make_shared()); }); - - factory.registerSimpleDataType("String", creator); + factory.registerDataType("String", create); /// These synonyms are added for compatibility. diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 4da767ae359..f260daa15b4 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -2,9 +2,115 @@ #include +#include +#include + + namespace DB { +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNEXPECTED_AST_STRUCTURE; +} + +static DataTypePtr createForInt8(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 1) + throw Exception("INT8 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + throw Exception("INT8 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + + return std::make_shared(); +} + +static DataTypePtr createForInt16(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 1) + throw Exception("INT16 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + throw Exception("INT16 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + + return std::make_shared(); +} + +static DataTypePtr createForInt32(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 1) + throw Exception("INT32 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + throw Exception("INT32 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + + return std::make_shared(); +} + +static DataTypePtr createForInt64(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 1) + throw Exception("INT64 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) + throw Exception("INT64 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + + return std::make_shared(); +} + +static DataTypePtr createForFloat32(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() > 2) + throw Exception("FLOAT32 data type family must not have more than two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + else if (arguments->children.size() == 1) { + const auto * argument = arguments->children[0]->as(); + if (!argument || argument->value.getType() != Field::Types::UInt64) + throw Exception("FLOAT32 data type family may have a non negative number as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } else if (arguments->children.size() == 2) { + const auto * beforePoint = arguments->children[0]->as(); + const auto * afterPoint = arguments->children[1]->as(); + if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || + !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) + throw Exception("FLOAT32 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + } + + return std::make_shared(); +} + +static DataTypePtr createForFloat64(const ASTPtr & arguments) +{ + if (arguments) { + if (arguments->children.size() != 2) + throw Exception("FLOAT64 data type family must have only two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + else { + const auto * beforePoint = arguments->children[0]->as(); + const auto * afterPoint = arguments->children[1]->as(); + if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || + !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) + throw Exception("FLOAT64 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + } + } + + return std::make_shared(); +} + + + void registerDataTypeNumbers(DataTypeFactory & factory) { factory.registerSimpleDataType("UInt8", [] { return DataTypePtr(std::make_shared()); }); @@ -12,13 +118,12 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerSimpleDataType("UInt32", [] { return DataTypePtr(std::make_shared()); }); factory.registerSimpleDataType("UInt64", [] { return DataTypePtr(std::make_shared()); }); - factory.registerSimpleDataType("Int8", [] { return DataTypePtr(std::make_shared()); }); - factory.registerSimpleDataType("Int16", [] { return DataTypePtr(std::make_shared()); }); - factory.registerSimpleDataType("Int32", [] { return DataTypePtr(std::make_shared()); }); - factory.registerSimpleDataType("Int64", [] { return DataTypePtr(std::make_shared()); }); - - factory.registerSimpleDataType("Float32", [] { return DataTypePtr(std::make_shared()); }); - factory.registerSimpleDataType("Float64", [] { return DataTypePtr(std::make_shared()); }); + factory.registerDataType("Int8", createForInt8); + factory.registerDataType("Int16", createForInt16); + factory.registerDataType("Int32", createForInt32); + factory.registerDataType("Int64", createForInt64); + factory.registerDataType("Float32", createForFloat32); + factory.registerDataType("Float64", createForFloat64); /// These synonyms are added for compatibility. From f13862872770cad328434824d2050ccec0b3ddc4 Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 01:18:49 +0300 Subject: [PATCH 0135/2229] Add cast_keep_nullable setting --- src/Core/Settings.h | 1 + src/Functions/FunctionsConversion.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 61fcf658ba8..8697e155024 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -428,6 +428,7 @@ struct Settings : public SettingsCollection M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ + M(SettingBool, cast_keep_nullable, true, "Cast operator keep Nullable for new data type", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 64708f45598..207e7759683 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -41,6 +41,7 @@ #include #include #include +#include namespace DB @@ -2406,6 +2407,11 @@ protected: " Instead there is a column with the following structure: " + column->dumpStructure(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + Settings set; + if (set.cast_keep_nullable) + if (arguments.back().type->isNullable()) { + return makeNullable(DataTypeFactory::instance().get(type_col->getValue())); + } return DataTypeFactory::instance().get(type_col->getValue()); } From 88709d0f01a88e66906fa8944ca433b3fa2f5fe3 Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 01:25:13 +0300 Subject: [PATCH 0136/2229] Back same as master --- src/DataTypes/DataTypeString.cpp | 40 +++------- src/DataTypes/DataTypesNumber.cpp | 128 +++--------------------------- 2 files changed, 22 insertions(+), 146 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 358afc6c8f2..cdeb6fe1012 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -15,9 +15,6 @@ #include #include -#include -#include - #include #include #include @@ -30,14 +27,6 @@ namespace DB { - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNEXPECTED_AST_STRUCTURE; -} - - void DataTypeString::serializeBinary(const Field & field, WriteBuffer & ostr) const { const String & s = get(field); @@ -104,8 +93,8 @@ void DataTypeString::serializeBinaryBulk(const IColumn & column, WriteBuffer & o return; size_t end = limit && offset + limit < size - ? offset + limit - : size; + ? offset + limit + : size; if (offset == 0) { @@ -377,38 +366,29 @@ bool DataTypeString::equals(const IDataType & rhs) const return typeid(rhs) == typeid(*this); } -static DataTypePtr create(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("String data type family mustnt have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("FixedString data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - void registerDataTypeString(DataTypeFactory & factory) { - factory.registerDataType("String", create); + auto creator = static_cast([] { return DataTypePtr(std::make_shared()); }); + + factory.registerSimpleDataType("String", creator); /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("MEDIUMTEXT", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("LONG", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("LONGTEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BLOB", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("BYTEA", "String", DataTypeFactory::CaseInsensitive); /// PostgreSQL } -} +} \ No newline at end of file diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index f260daa15b4..cd28b4fdad8 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -2,115 +2,9 @@ #include -#include -#include - - namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNEXPECTED_AST_STRUCTURE; -} - -static DataTypePtr createForInt8(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT8 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT8 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt16(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT16 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT16 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt32(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT32 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT32 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt64(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT64 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT64 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForFloat32(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 2) - throw Exception("FLOAT32 data type family must not have more than two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - else if (arguments->children.size() == 1) { - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT32 data type family may have a non negative number as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } else if (arguments->children.size() == 2) { - const auto * beforePoint = arguments->children[0]->as(); - const auto * afterPoint = arguments->children[1]->as(); - if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || - !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT32 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - } - - return std::make_shared(); -} - -static DataTypePtr createForFloat64(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() != 2) - throw Exception("FLOAT64 data type family must have only two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - else { - const auto * beforePoint = arguments->children[0]->as(); - const auto * afterPoint = arguments->children[1]->as(); - if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || - !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT64 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - } - - return std::make_shared(); -} - - - void registerDataTypeNumbers(DataTypeFactory & factory) { factory.registerSimpleDataType("UInt8", [] { return DataTypePtr(std::make_shared()); }); @@ -118,27 +12,29 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerSimpleDataType("UInt32", [] { return DataTypePtr(std::make_shared()); }); factory.registerSimpleDataType("UInt64", [] { return DataTypePtr(std::make_shared()); }); - factory.registerDataType("Int8", createForInt8); - factory.registerDataType("Int16", createForInt16); - factory.registerDataType("Int32", createForInt32); - factory.registerDataType("Int64", createForInt64); - factory.registerDataType("Float32", createForFloat32); - factory.registerDataType("Float64", createForFloat64); + factory.registerSimpleDataType("Int8", [] { return DataTypePtr(std::make_shared()); }); + factory.registerSimpleDataType("Int16", [] { return DataTypePtr(std::make_shared()); }); + factory.registerSimpleDataType("Int32", [] { return DataTypePtr(std::make_shared()); }); + factory.registerSimpleDataType("Int64", [] { return DataTypePtr(std::make_shared()); }); + + factory.registerSimpleDataType("Float32", [] { return DataTypePtr(std::make_shared()); }); + factory.registerSimpleDataType("Float64", [] { return DataTypePtr(std::make_shared()); }); /// These synonyms are added for compatibility. factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("BOOL", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("BOOLEAN", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); /// MySQL + factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); /// MS Access factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT2", "Int16", DataTypeFactory::CaseInsensitive); factory.registerAlias("INT", "Int32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT4", "Int32", DataTypeFactory::CaseInsensitive); factory.registerAlias("INTEGER", "Int32", DataTypeFactory::CaseInsensitive); factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive); factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive); + factory.registerAlias("REAL", "Float32", DataTypeFactory::CaseInsensitive); + factory.registerAlias("SINGLE", "Float32", DataTypeFactory::CaseInsensitive); /// MS Access factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); } -} +} \ No newline at end of file From f3c1cdb69617231b0d85caf1b66293de68c6eeeb Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 01:41:54 +0300 Subject: [PATCH 0137/2229] one more back to master --- src/DataTypes/DataTypeString.cpp | 9 +++------ src/DataTypes/DataTypesNumber.cpp | 7 +++---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index cdeb6fe1012..d0db66b202b 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -93,8 +93,8 @@ void DataTypeString::serializeBinaryBulk(const IColumn & column, WriteBuffer & o return; size_t end = limit && offset + limit < size - ? offset + limit - : size; + ? offset + limit + : size; if (offset == 0) { @@ -376,19 +376,16 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("VARCHAR2", "String", DataTypeFactory::CaseInsensitive); /// Oracle factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("MEDIUMTEXT", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("LONG", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("LONGTEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("CLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); - factory.registerAlias("BYTEA", "String", DataTypeFactory::CaseInsensitive); /// PostgreSQL } } \ No newline at end of file diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index cd28b4fdad8..82a1f35c297 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -25,15 +25,14 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("TINYINT", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("BOOL", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("BOOLEAN", "Int8", DataTypeFactory::CaseInsensitive); - factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); /// MySQL - factory.registerAlias("BYTE", "Int8", DataTypeFactory::CaseInsensitive); /// MS Access + factory.registerAlias("INT1", "Int8", DataTypeFactory::CaseInsensitive); factory.registerAlias("SMALLINT", "Int16", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INT2", "Int16", DataTypeFactory::CaseInsensitive); factory.registerAlias("INT", "Int32", DataTypeFactory::CaseInsensitive); + factory.registerAlias("INT4", "Int32", DataTypeFactory::CaseInsensitive); factory.registerAlias("INTEGER", "Int32", DataTypeFactory::CaseInsensitive); factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive); factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("REAL", "Float32", DataTypeFactory::CaseInsensitive); - factory.registerAlias("SINGLE", "Float32", DataTypeFactory::CaseInsensitive); /// MS Access factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); } From cee9c1517e5621e00c1efba830d798f1877612b4 Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 01:51:06 +0300 Subject: [PATCH 0138/2229] cast_keep_nullable set to false --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8697e155024..5e17e915cbe 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -428,7 +428,7 @@ struct Settings : public SettingsCollection M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ - M(SettingBool, cast_keep_nullable, true, "Cast operator keep Nullable for new data type", 0) \ + M(SettingBool, cast_keep_nullable, false, "Cast operator keep Nullable for new data type", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From d8bb98bbcf401e962ceb92351abc006f1ce330de Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 04:22:32 +0300 Subject: [PATCH 0139/2229] Add NULL and NOY NULL modifiers for data types in create query --- src/Core/Settings.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 22 +++++++++++++++++++++ src/Parsers/ASTColumnDeclaration.cpp | 10 ++++++++++ src/Parsers/ASTColumnDeclaration.h | 2 ++ src/Parsers/ParserCreateQuery.h | 21 ++++++++++++++++++++ 5 files changed, 56 insertions(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5e17e915cbe..501cd61e70c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -429,6 +429,7 @@ struct Settings : public SettingsCollection M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ M(SettingBool, cast_keep_nullable, false, "Cast operator keep Nullable for new data type", 0) \ + M(SettingBool, data_type_default_nullable, false, "Data types without NULL or NOT NULL will make Nullable", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 20c9dc839f4..d4609477c1e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -273,10 +274,13 @@ ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns_ast, const Context & context) { + Settings set; + /// First, deduce implicit types. /** all default_expressions as a single expression list, * mixed with conversion-columns for each explicitly specified type */ + ASTPtr default_expr_list = std::make_shared(); NamesAndTypesList column_names_and_types; @@ -285,9 +289,27 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; + if (col_decl.isNULL && col_decl.isNotNULL) + throw Exception{"Cant use NOT NULL and NULL together", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); + + if (col_decl.isNULL) { + if (column_type->isNullable()) + throw Exception{"Cant use NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + else { + column_type = makeNullable(column_type); + } + } else if (col_decl.isNotNULL) { + if (column_type->isNullable()) + throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + } + + if (set.data_type_default_nullable && !column_type->isNullable()) + column_type = makeNullable(column_type); + column_names_and_types.emplace_back(col_decl.name, column_type); } else diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index b281315f555..c43a4b554a4 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -16,6 +16,16 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->type); } + if (isNULL) + { + res->isNULL = isNULL; + } + + if (isNULL) + { + res->isNotNULL = isNotNULL; + } + if (default_expression) { res->default_expression = default_expression->clone(); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index ad23e0669bc..4816a433991 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -13,6 +13,8 @@ class ASTColumnDeclaration : public IAST public: String name; ASTPtr type; + bool isNULL; + bool isNotNULL; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 19410a78dd2..7cf2497b3fb 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -115,6 +117,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E NameParser name_parser; ParserIdentifierWithOptionalParameters type_parser; ParserKeyword s_default{"DEFAULT"}; + ParserKeyword s_null{"NULL"}; + ParserKeyword s_not_null{"NOT NULL"}; ParserKeyword s_materialized{"MATERIALIZED"}; ParserKeyword s_alias{"ALIAS"}; ParserKeyword s_comment{"COMMENT"}; @@ -135,6 +139,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E */ ASTPtr type; String default_specifier; + bool isNull = false; + bool isNotNull = false; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -163,6 +169,13 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type + if (s_null.checkWithoutMoving(pos, expected)) { + if (s_null.check(pos, expected)) + isNull = true; + } else if (s_not_null.checkWithoutMoving(pos, expected)) { + if (s_not_null.check(pos, expected)) + isNotNull = true; + } if (s_comment.ignore(pos, expected)) { @@ -193,6 +206,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(type)); } + if (isNull) { + column_declaration->isNULL = isNull; + } + + if (isNull) { + column_declaration->isNotNULL = isNotNull; + } + if (default_expression) { column_declaration->default_specifier = default_specifier; From a21cc0e76e488bb6414b80614a2621bbdfb10c71 Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 04:26:19 +0300 Subject: [PATCH 0140/2229] Add NULL and NOT NULL modifiers for data types in create query --- src/Core/Settings.h | 1 - src/Functions/FunctionsConversion.h | 6 ------ 2 files changed, 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 501cd61e70c..d0e2f459e8e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -428,7 +428,6 @@ struct Settings : public SettingsCollection M(SettingUInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \ M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ - M(SettingBool, cast_keep_nullable, false, "Cast operator keep Nullable for new data type", 0) \ M(SettingBool, data_type_default_nullable, false, "Data types without NULL or NOT NULL will make Nullable", 0) \ diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 207e7759683..64708f45598 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -41,7 +41,6 @@ #include #include #include -#include namespace DB @@ -2407,11 +2406,6 @@ protected: " Instead there is a column with the following structure: " + column->dumpStructure(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - Settings set; - if (set.cast_keep_nullable) - if (arguments.back().type->isNullable()) { - return makeNullable(DataTypeFactory::instance().get(type_col->getValue())); - } return DataTypeFactory::instance().get(type_col->getValue()); } From a055e3308713e4c13f997b4a771015021b5a5ab7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 07:28:55 +0300 Subject: [PATCH 0141/2229] Add libevent & AMQP-CPP libraries --- .gitmodules | 6 + CMakeLists.txt | 2 + cmake/find/amqpcpp.cmake | 20 + cmake/find/libevent.cmake | 22 + contrib/AMQP-CPP | 1 + contrib/CMakeLists.txt | 8 + contrib/amqpcpp-cmake/CMakeLists.txt | 44 ++ contrib/libevent | 1 + contrib/libevent-cmake/CMakeLists.txt | 42 ++ contrib/libevent-cmake/evconfig-private.h | 39 ++ contrib/libevent-cmake/event-config.h | 516 ++++++++++++++++++++++ src/CMakeLists.txt | 7 + 12 files changed, 708 insertions(+) create mode 100644 cmake/find/amqpcpp.cmake create mode 100644 cmake/find/libevent.cmake create mode 160000 contrib/AMQP-CPP create mode 100644 contrib/amqpcpp-cmake/CMakeLists.txt create mode 160000 contrib/libevent create mode 100644 contrib/libevent-cmake/CMakeLists.txt create mode 100644 contrib/libevent-cmake/evconfig-private.h create mode 100644 contrib/libevent-cmake/event-config.h diff --git a/.gitmodules b/.gitmodules index f7a16b84d37..bc4654e3b61 100644 --- a/.gitmodules +++ b/.gitmodules @@ -157,3 +157,9 @@ [submodule "contrib/openldap"] path = contrib/openldap url = https://github.com/openldap/openldap.git +[submodule "contrib/AMQP-CPP"] + path = contrib/AMQP-CPP + url = https://github.com/CopernicaMarketingSoftware/AMQP-CPP.git +[submodule "contrib/libevent"] + path = contrib/libevent + url = https://github.com/libevent/libevent.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dfd1df1cb..5e9a642c903 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -345,6 +345,8 @@ include (cmake/find/sparsehash.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) include (cmake/find/rdkafka.cmake) +include (cmake/find/libevent.cmake) +include (cmake/find/amqpcpp.cmake) include (cmake/find/capnp.cmake) include (cmake/find/llvm.cmake) include (cmake/find/opencl.cmake) diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake new file mode 100644 index 00000000000..147824ff395 --- /dev/null +++ b/cmake/find/amqpcpp.cmake @@ -0,0 +1,20 @@ +SET(ENABLE_AMQPCPP 1) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") + message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_AMQPCPP 0) +endif () + +if (ENABLE_AMQPCPP) + + set (USE_AMQPCPP 1) + set (AMQPCPP_LIBRARY AMQP-CPP) + + set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include") + + list (APPEND AMQPCPP_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include" + "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP") + +endif() + +message (STATUS "Using AMQP-CPP=${USE_AMQPCPP}: ${AMQPCPP_INCLUDE_DIR} : ${AMQPCPP_LIBRARY}") diff --git a/cmake/find/libevent.cmake b/cmake/find/libevent.cmake new file mode 100644 index 00000000000..2f714b43475 --- /dev/null +++ b/cmake/find/libevent.cmake @@ -0,0 +1,22 @@ +SET(ENABLE_LIBEVENT 1) +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libevent/CMakeLists.txt") + message (WARNING "submodule contrib/libevent is missing. to fix try run: + \n git submodule update --init --recursive") + + set (ENABLE_LIBEVENT 0) +endif () + +if (ENABLE_LIBEVENT) + + set (USE_LIBEVENT 1) + set (LIBEVENT_LIBRARY LIBEVENT) + + set (LIBEVENT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libevent") + + list (APPEND LIBEVENT_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/libevent/include/event2" + "${ClickHouse_SOURCE_DIR}/contrib/libevent/include") + +endif() + +message (STATUS "Using libevent=${USE_LIBEVENT}: ${LIBEVENT_INCLUDE_DIR} : ${LIBEVENT_LIBRARY}") diff --git a/contrib/AMQP-CPP b/contrib/AMQP-CPP new file mode 160000 index 00000000000..1c08399ab0a --- /dev/null +++ b/contrib/AMQP-CPP @@ -0,0 +1 @@ +Subproject commit 1c08399ab0ab9e4042ef8e2bbe9e208e5dcbc13b diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1031285eac7..ea90f7129f2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -317,3 +317,11 @@ endif() if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() + +if (USE_AMQPCPP) + add_subdirectory (amqpcpp-cmake) +endif() + +if (USE_LIBEVENT) + add_subdirectory(libevent-cmake) +endif() diff --git a/contrib/amqpcpp-cmake/CMakeLists.txt b/contrib/amqpcpp-cmake/CMakeLists.txt new file mode 100644 index 00000000000..eae3122e216 --- /dev/null +++ b/contrib/amqpcpp-cmake/CMakeLists.txt @@ -0,0 +1,44 @@ +set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP) + +set (SRCS + ${LIBRARY_DIR}/src/array.cpp + ${LIBRARY_DIR}/src/channel.cpp + ${LIBRARY_DIR}/src/channelimpl.cpp + ${LIBRARY_DIR}/src/connectionimpl.cpp + ${LIBRARY_DIR}/src/deferredcancel.cpp + ${LIBRARY_DIR}/src/deferredconfirm.cpp + ${LIBRARY_DIR}/src/deferredconsumer.cpp + ${LIBRARY_DIR}/src/deferredextreceiver.cpp + ${LIBRARY_DIR}/src/deferredget.cpp + ${LIBRARY_DIR}/src/deferredpublisher.cpp + ${LIBRARY_DIR}/src/deferredreceiver.cpp + ${LIBRARY_DIR}/src/field.cpp + ${LIBRARY_DIR}/src/flags.cpp + ${LIBRARY_DIR}/src/linux_tcp/openssl.cpp + ${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp + ${LIBRARY_DIR}/src/receivedframe.cpp + ${LIBRARY_DIR}/src/table.cpp + ${LIBRARY_DIR}/src/watchable.cpp +) + +add_library(amqp-cpp ${SRCS}) + +target_compile_options (amqp-cpp + PUBLIC + -Wno-old-style-cast + -Wno-inconsistent-missing-destructor-override + -Wno-deprecated + -Wno-unused-parameter + -Wno-shadow + -Wno-tautological-type-limit-compare + -Wno-extra-semi +# NOTE: disable all warnings at last because the warning: + # "conversion function converting 'XXX' to itself will never be used" + # doesn't have it's own diagnostic flag yet. + -w +) + +target_include_directories (amqp-cpp PUBLIC ${LIBRARY_DIR}/include) + +target_link_libraries (amqp-cpp PUBLIC libevent ssl) + diff --git a/contrib/libevent b/contrib/libevent new file mode 160000 index 00000000000..eee26deed38 --- /dev/null +++ b/contrib/libevent @@ -0,0 +1 @@ +Subproject commit eee26deed38fc7a6b6780b54628b007a2810efcd diff --git a/contrib/libevent-cmake/CMakeLists.txt b/contrib/libevent-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f99bc221482 --- /dev/null +++ b/contrib/libevent-cmake/CMakeLists.txt @@ -0,0 +1,42 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libevent) + +set(SRCS + ${LIBRARY_DIR}/buffer.c + ${LIBRARY_DIR}/bufferevent_filter.c + ${LIBRARY_DIR}/bufferevent_pair.c + ${LIBRARY_DIR}/bufferevent_ratelim.c + ${LIBRARY_DIR}/bufferevent_sock.c + ${LIBRARY_DIR}/bufferevent.c + ${LIBRARY_DIR}/event.c + ${LIBRARY_DIR}/evmap.c + ${LIBRARY_DIR}/evthread.c + ${LIBRARY_DIR}/evutil_rand.c + ${LIBRARY_DIR}/evutil_time.c + ${LIBRARY_DIR}/evutil.c + ${LIBRARY_DIR}/listener.c + ${LIBRARY_DIR}/log.c + ${LIBRARY_DIR}/signal.c + ${LIBRARY_DIR}/strlcpy.c + ${LIBRARY_DIR}/watch.c +) + +if (OS_LINUX) + list (APPEND SRCS + ${LIBRARY_DIR}/epoll.c + ${LIBRARY_DIR}/poll.c + ${LIBRARY_DIR}/select.c + ) + +elseif (OS_DARWIN) + list (APPEND SRCS ${LIBRARY_DIR}/kqueue.c) +endif () + +add_library(libevent ${SRCS}) + +target_compile_options (libevent PUBLIC -Wno-reserved-id-macro) + +if (OS_LINUX) + target_include_directories (libevent PUBLIC linux) +endif () + +target_include_directories (libevent PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/libevent-cmake/evconfig-private.h b/contrib/libevent-cmake/evconfig-private.h new file mode 100644 index 00000000000..a39d2b71fbc --- /dev/null +++ b/contrib/libevent-cmake/evconfig-private.h @@ -0,0 +1,39 @@ +#ifndef EVCONFIG_PRIVATE_H_INCLUDED_ +#define EVCONFIG_PRIVATE_H_INCLUDED_ + +/* Enable extensions on AIX 3, Interix. */ +/* #undef _ALL_SOURCE */ + +/* Enable GNU extensions on systems that have them. */ +#define _GNU_SOURCE 1 + +/* Enable threading extensions on Solaris. */ +/* #undef _POSIX_PTHREAD_SEMANTICS */ + +/* Enable extensions on HP NonStop. */ +/* #undef _TANDEM_SOURCE */ + +/* Enable general extensions on Solaris. */ +/* #undef __EXTENSIONS__ */ + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to 1 if on MINIX. */ +/* #undef _MINIX */ + +/* Define to 2 if the system does not provide POSIX.1 features except with + this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define to 1 if you need to in order for `stat' and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Enable POSIX.2 extensions on QNX for getopt */ +#ifdef __QNX__ +/* #undef __EXT_POSIX2 */ +#endif + +#endif diff --git a/contrib/libevent-cmake/event-config.h b/contrib/libevent-cmake/event-config.h new file mode 100644 index 00000000000..09067412490 --- /dev/null +++ b/contrib/libevent-cmake/event-config.h @@ -0,0 +1,516 @@ +/* event-config.h + * + * This file was generated by cmake when the makefiles were generated. + * + * DO NOT EDIT THIS FILE. + * + * Do not rely on macros in this file existing in later versions. + */ +#ifndef EVENT2_EVENT_CONFIG_H_INCLUDED_ +#define EVENT2_EVENT_CONFIG_H_INCLUDED_ + +/* Numeric representation of the version */ +#define EVENT__NUMERIC_VERSION 0x02020001 +#define EVENT__PACKAGE_VERSION "2.2.0" + +#define EVENT__VERSION_MAJOR 2 +#define EVENT__VERSION_MINOR 2 +#define EVENT__VERSION_PATCH 0 + +/* Version number of package */ +#define EVENT__VERSION "2.2.0-alpha-dev" + +/* Name of package */ +#define EVENT__PACKAGE "libevent" + +/* Define to the address where bug reports for this package should be sent. */ +#define EVENT__PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define EVENT__PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define EVENT__PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define EVENT__PACKAGE_TARNAME "" + +/* Define if libevent should build without support for a debug mode */ +/* #undef EVENT__DISABLE_DEBUG_MODE */ + +/* Define if libevent should not allow replacing the mm functions */ +/* #undef EVENT__DISABLE_MM_REPLACEMENT */ + +/* Define if libevent should not be compiled with thread support */ +/* #undef EVENT__DISABLE_THREAD_SUPPORT */ + +/* Define to 1 if you have the `accept4' function. */ +#define EVENT__HAVE_ACCEPT4 1 + +/* Define to 1 if you have the `arc4random' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the `arc4random_addrandom' function. */ +/* #undef EVENT__HAVE_ARC4RANDOM_ADDRANDOM */ + +/* Define if clock_gettime is available in libc */ +#define EVENT__DNS_USE_CPU_CLOCK_FOR_ID 1 + +/* Define is no secure id variant is available */ +/* #undef EVENT__DNS_USE_GETTIMEOFDAY_FOR_ID */ +/* #undef EVENT__DNS_USE_FTIME_FOR_ID */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +#define EVENT__HAVE_CLOCK_GETTIME 1 + +/* Define to 1 if you have the declaration of `CTL_KERN'. */ +#define EVENT__HAVE_DECL_CTL_KERN 1 + +/* Define to 1 if you have the declaration of `KERN_ARND'. */ +#define EVENT__HAVE_DECL_KERN_ARND 0 + +/* Define to 1 if you have `getrandom' function. */ +#define EVENT__HAVE_GETRANDOM 1 + +/* Define if /dev/poll is available */ +/* #undef EVENT__HAVE_DEVPOLL */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETDB_H 1 + +/* Define to 1 if fd_mask type is defined */ +#define EVENT__HAVE_FD_MASK 1 + +/* Define to 1 if the header file defines TAILQ_FOREACH. */ +#define EVENT__HAVE_TAILQFOREACH 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_DLFCN_H 1 + +/* Define if your system supports the epoll system calls */ +#define EVENT__HAVE_EPOLL 1 + +/* Define to 1 if you have the `epoll_create1' function. */ +#define EVENT__HAVE_EPOLL_CREATE1 1 + +/* Define to 1 if you have the `epoll_ctl' function. */ +#define EVENT__HAVE_EPOLL_CTL 1 + +/* Define to 1 if you have the `eventfd' function. */ +#define EVENT__HAVE_EVENTFD 1 + +/* Define if your system supports event ports */ +/* #undef EVENT__HAVE_EVENT_PORTS */ + +/* Define to 1 if you have the `fcntl' function. */ +#define EVENT__HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define EVENT__HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `getegid' function. */ +#define EVENT__HAVE_GETEGID 1 + +/* Define to 1 if you have the `geteuid' function. */ +#define EVENT__HAVE_GETEUID 1 + +/* TODO: Check for different gethostname argument counts. CheckPrototypeDefinition.cmake can be used. */ +/* Define this if you have any gethostbyname_r() */ +#define EVENT__HAVE_GETHOSTBYNAME_R 1 + +/* Define this if gethostbyname_r takes 3 arguments */ +/* #undef EVENT__HAVE_GETHOSTBYNAME_R_3_ARG */ + +/* Define this if gethostbyname_r takes 5 arguments */ +/* #undef EVENT__HAVE_GETHOSTBYNAME_R_5_ARG */ + +/* Define this if gethostbyname_r takes 6 arguments */ +#define EVENT__HAVE_GETHOSTBYNAME_R_6_ARG 1 + +/* Define to 1 if you have the `getifaddrs' function. */ +#define EVENT__HAVE_GETIFADDRS 1 + +/* Define to 1 if you have the `getnameinfo' function. */ +#define EVENT__HAVE_GETNAMEINFO 1 + +/* Define to 1 if you have the `getprotobynumber' function. */ +#define EVENT__HAVE_GETPROTOBYNUMBER 1 + +/* Define to 1 if you have the `getservbyname' function. */ +#define EVENT__HAVE_GETSERVBYNAME 1 + +/* Define to 1 if you have the `gettimeofday' function. */ +#define EVENT__HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_IFADDRS_H 1 + +/* Define to 1 if you have the `inet_ntop' function. */ +#define EVENT__HAVE_INET_NTOP 1 + +/* Define to 1 if you have the `inet_pton' function. */ +#define EVENT__HAVE_INET_PTON 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `issetugid' function. */ +/* #undef EVENT__HAVE_ISSETUGID */ + +/* Define to 1 if you have the `kqueue' function. */ +/* #undef EVENT__HAVE_KQUEUE */ + +/* Define if the system has zlib */ +#define EVENT__HAVE_LIBZ 1 + +/* Define to 1 if you have the `mach_absolute_time' function. */ +/* #undef EVENT__HAVE_MACH_ABSOLUTE_TIME */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_MACH_MACH_TIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_MACH_MACH_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mmap' function. */ +#define EVENT__HAVE_MMAP 1 + +/* Define to 1 if you have the `nanosleep' function. */ +#define EVENT__HAVE_NANOSLEEP 1 + +/* Define to 1 if you have the `usleep' function. */ +#define EVENT__HAVE_USLEEP 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_NETINET_IN6_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_NETINET_TCP_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_UN_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_AFUNIX_H */ + +/* Define if the system has openssl */ +#define EVENT__HAVE_OPENSSL 1 + +/* Define to 1 if you have the `pipe' function. */ +#define EVENT__HAVE_PIPE 1 + +/* Define to 1 if you have the `pipe2' function. */ +#define EVENT__HAVE_PIPE2 1 + +/* Define to 1 if you have the `poll' function. */ +#define EVENT__HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_POLL_H 1 + +/* Define to 1 if you have the `port_create' function. */ +/* #undef EVENT__HAVE_PORT_CREATE */ + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_PORT_H */ + +/* Define if we have pthreads on this system */ +#define EVENT__HAVE_PTHREADS 1 + +/* Define to 1 if you have the `putenv' function. */ +#define EVENT__HAVE_PUTENV 1 + +/* Define to 1 if the system has the type `sa_family_t'. */ +#define EVENT__HAVE_SA_FAMILY_T 1 + +/* Define to 1 if you have the `select' function. */ +#define EVENT__HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define EVENT__HAVE_SETENV 1 + +/* Define if F_SETFD is defined in */ +#define EVENT__HAVE_SETFD 1 + +/* Define to 1 if you have the `setrlimit' function. */ +#define EVENT__HAVE_SETRLIMIT 1 + +/* Define to 1 if you have the `sendfile' function. */ +#define EVENT__HAVE_SENDFILE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define EVENT__HAVE_SIGACTION 1 + +/* Define to 1 if you have the `signal' function. */ +#define EVENT__HAVE_SIGNAL 1 + +/* Define to 1 if you have the `strsignal' function. */ +#define EVENT__HAVE_STRSIGNAL 1 + +/* Define to 1 if you have the `splice' function. */ +#define EVENT__HAVE_SPLICE 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDDEF_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_STRING_H 1 + +/* Define to 1 if you have the `strlcpy' function. */ +/* #undef EVENT__HAVE_STRLCPY */ + +/* Define to 1 if you have the `strsep' function. */ +#define EVENT__HAVE_STRSEP 1 + +/* Define to 1 if you have the `strtok_r' function. */ +#define EVENT__HAVE_STRTOK_R 1 + +/* Define to 1 if you have the `strtoll' function. */ +#define EVENT__HAVE_STRTOLL 1 + +/* Define to 1 if you have the `_gmtime64_s' function. */ +/* #undef EVENT__HAVE__GMTIME64_S */ + +/* Define to 1 if you have the `_gmtime64' function. */ +/* #undef EVENT__HAVE__GMTIME64 */ + +/* Define to 1 if the system has the type `struct addrinfo'. */ +#define EVENT__HAVE_STRUCT_ADDRINFO 1 + +/* Define to 1 if the system has the type `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR 1 + +/* Define to 1 if `s6_addr16' is member of `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR16 1 + +/* Define to 1 if `s6_addr32' is member of `struct in6_addr'. */ +#define EVENT__HAVE_STRUCT_IN6_ADDR_S6_ADDR32 1 + +/* Define to 1 if the system has the type `struct sockaddr_in6'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_IN6 1 + +/* Define to 1 if `sin6_len' is member of `struct sockaddr_in6'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN6_SIN6_LEN */ + +/* Define to 1 if `sin_len' is member of `struct sockaddr_in'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */ + +/* Define to 1 if the system has the type `struct sockaddr_un'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_UN 1 + +/* Define to 1 if the system has the type `struct sockaddr_storage'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE 1 + +/* Define to 1 if `ss_family' is a member of `struct sockaddr_storage'. */ +#define EVENT__HAVE_STRUCT_SOCKADDR_STORAGE_SS_FAMILY 1 + +/* Define to 1 if `__ss_family' is a member of `struct sockaddr_storage'. */ +/* #undef EVENT__HAVE_STRUCT_SOCKADDR_STORAGE___SS_FAMILY */ + +/* Define to 1 if the system has the type `struct linger'. */ +#define EVENT__HAVE_STRUCT_LINGER 1 + +/* Define to 1 if you have the `sysctl' function. */ +/* #undef EVENT__HAVE_SYSCTL */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_EPOLL_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_EVENTFD_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_SYS_EVENT_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_MMAN_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SENDFILE_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_RANDOM_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef EVENT__HAVE_SYS_SYSCTL_H */ + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TIMERFD_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_UIO_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_ERRNO_H 1 + +/* Define if TAILQ_FOREACH is defined in */ +#define EVENT__HAVE_TAILQFOREACH 1 + +/* Define if timeradd is defined in */ +#define EVENT__HAVE_TIMERADD 1 + +/* Define if timerclear is defined in */ +#define EVENT__HAVE_TIMERCLEAR 1 + +/* Define if timercmp is defined in */ +#define EVENT__HAVE_TIMERCMP 1 + + +/* Define to 1 if you have the `timerfd_create' function. */ +#define EVENT__HAVE_TIMERFD_CREATE 1 + +/* Define if timerisset is defined in */ +#define EVENT__HAVE_TIMERISSET 1 + +/* Define to 1 if the system has the type `uint8_t'. */ +#define EVENT__HAVE_UINT8_T 1 + +/* Define to 1 if the system has the type `uint16_t'. */ +#define EVENT__HAVE_UINT16_T 1 + +/* Define to 1 if the system has the type `uint32_t'. */ +#define EVENT__HAVE_UINT32_T 1 + +/* Define to 1 if the system has the type `uint64_t'. */ +#define EVENT__HAVE_UINT64_T 1 + +/* Define to 1 if the system has the type `uintptr_t'. */ +#define EVENT__HAVE_UINTPTR_T 1 + +/* Define to 1 if you have the `umask' function. */ +#define EVENT__HAVE_UMASK 1 + +/* Define to 1 if you have the header file. */ +#define EVENT__HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define EVENT__HAVE_UNSETENV 1 + +/* Define to 1 if you have the `vasprintf' function. */ +#define EVENT__HAVE_VASPRINTF 1 + +/* Define if kqueue works correctly with pipes */ +/* #undef EVENT__HAVE_WORKING_KQUEUE */ + +#ifdef __USE_UNUSED_DEFINITIONS__ +/* Define to necessary symbol if this constant uses a non-standard name on your system. */ +/* XXX: Hello, this isn't even used, nor is it defined anywhere... - Ellzey */ +#define EVENT__PTHREAD_CREATE_JOINABLE +#endif + +/* The size of `pthread_t', as computed by sizeof. */ +#define EVENT__SIZEOF_PTHREAD_T 8 + +/* The size of a `int', as computed by sizeof. */ +#define EVENT__SIZEOF_INT 4 + +/* The size of a `long', as computed by sizeof. */ +#define EVENT__SIZEOF_LONG 8 + +/* The size of a `long long', as computed by sizeof. */ +#define EVENT__SIZEOF_LONG_LONG 8 + +/* The size of `off_t', as computed by sizeof. */ +#define EVENT__SIZEOF_OFF_T 8 + +#define EVENT__SIZEOF_SSIZE_T 8 + + +/* The size of a `short', as computed by sizeof. */ +#define EVENT__SIZEOF_SHORT 2 + +/* The size of `size_t', as computed by sizeof. */ +#define EVENT__SIZEOF_SIZE_T 8 + +/* Define to 1 if you can safely include both and . */ +/* #undef EVENT__TIME_WITH_SYS_TIME */ + +/* The size of `socklen_t', as computed by sizeof. */ +#define EVENT__SIZEOF_SOCKLEN_T 4 + +/* The size of 'void *', as computer by sizeof */ +#define EVENT__SIZEOF_VOID_P 8 + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +/* why not c++? + * + * and are we really expected to use EVENT__inline everywhere, + * shouldn't we just do: + * ifdef EVENT__inline + * define inline EVENT__inline + * + * - Ellzey + */ + +#define EVENT__inline inline +#endif + +#define EVENT__HAVE___func__ 1 +#define EVENT__HAVE___FUNCTION__ 1 + +/* Define to `unsigned' if does not define. */ +#define EVENT__size_t size_t + +/* Define to unsigned int if you dont have it */ +#define EVENT__socklen_t socklen_t + +/* Define to `int' if does not define. */ +#define EVENT__ssize_t ssize_t + +#endif /* \EVENT2_EVENT_CONFIG_H_INCLUDED_ */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 222a3e486f9..f720b5c1c85 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -72,6 +72,10 @@ if(USE_RDKAFKA) add_headers_and_sources(dbms Storages/Kafka) endif() +if (USE_AMQPCPP) + add_headers_and_sources(dbms Storages/RabbitMQ) +endif() + if (USE_AWS_S3) add_headers_and_sources(dbms Disks/S3) endif() @@ -253,6 +257,9 @@ if (USE_RDKAFKA) endif() endif() +if (USE_AMQPCPP) + dbms_target_link_libraries(PUBLIC amqp-cpp) +endif() if(RE2_INCLUDE_DIR) target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) From 3b75f214c59d1a674f283a9f4675005ef5f04f61 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 08:30:38 +0300 Subject: [PATCH 0142/2229] Register RabbitMQ storage --- src/Core/Settings.h | 1 + src/Core/config_core.h.in | 1 + src/Storages/RabbitMQ/RabbitMQSettings.cpp | 44 ++++ src/Storages/RabbitMQ/RabbitMQSettings.h | 26 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 249 +++++++++++++++++++++ src/Storages/RabbitMQ/StorageRabbitMQ.h | 62 +++++ src/Storages/registerStorages.cpp | 4 + src/Storages/registerStorages.h | 4 + 8 files changed, 391 insertions(+) create mode 100644 src/Storages/RabbitMQ/RabbitMQSettings.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQSettings.h create mode 100644 src/Storages/RabbitMQ/StorageRabbitMQ.cpp create mode 100644 src/Storages/RabbitMQ/StorageRabbitMQ.h diff --git a/src/Core/Settings.h b/src/Core/Settings.h index eda76584f0b..9cd6287a75d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -71,6 +71,7 @@ struct Settings : public SettingsCollection M(SettingMilliseconds, connection_pool_max_wait_ms, 0, "The wait time when the connection pool is full.", 0) \ M(SettingMilliseconds, replace_running_query_max_wait_ms, 5000, "The wait time for running query with the same query_id to finish when setting 'replace_running_query' is active.", 0) \ M(SettingMilliseconds, kafka_max_wait_ms, 5000, "The wait time for reading from Kafka before retry.", 0) \ + M(SettingMilliseconds, rabbitmq_max_wait_ms, 5000, "The wait time for reading from RabbitMQ before retry.", 0) \ M(SettingUInt64, poll_interval, DBMS_DEFAULT_POLL_INTERVAL, "Block at the query wait loop on the server for the specified number of seconds.", 0) \ M(SettingUInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(SettingUInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \ diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 620c23c21cc..5991c12a1f2 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -5,6 +5,7 @@ #cmakedefine01 USE_ICU #cmakedefine01 USE_MYSQL #cmakedefine01 USE_RDKAFKA +#cmakedefine01 USE_AMQPCPP #cmakedefine01 USE_EMBEDDED_COMPILER #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp new file mode 100644 index 00000000000..ed8d4ad801a --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_SETTING; +} + +IMPLEMENT_SETTINGS_COLLECTION(RabbitMQSettings, LIST_OF_RABBITMQ_SETTINGS) + +void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + try + { + applyChanges(storage_def.settings->changes); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::UNKNOWN_SETTING) + throw Exception(e.message() + " for storage " + storage_def.engine->name, ErrorCodes::BAD_ARGUMENTS); + else + e.rethrow(); + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} +} + diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h new file mode 100644 index 00000000000..0b0f58169fa --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace DB +{ + class ASTStorage; + + struct RabbitMQSettings : public SettingsCollection + { + +#define LIST_OF_RABBITMQ_SETTINGS(M) \ + M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ + M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_format, "", "The message format.", 0) \ + M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers).", 0) \ + M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ + M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ + + DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) + + void loadFromQuery(ASTStorage & storage_def); + }; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp new file mode 100644 index 00000000000..98e7e97e2e1 --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -0,0 +1,249 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +StorageRabbitMQ::StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const String & routing_key_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + size_t num_consumers_, + bool bind_by_id_, + size_t num_queues_, + bool hash_exchange_) + : IStorage(table_id_) + , global_context(context_.getGlobalContext()) + , rabbitmq_context(Context(global_context)) + , routing_key(global_context.getMacros()->expand(routing_key_)) + , exchange_name(exchange_name_) + , format_name(global_context.getMacros()->expand(format_name_)) + , row_delimiter(row_delimiter_) + , num_consumers(num_consumers_) + , bind_by_id(bind_by_id_) + , num_queues(num_queues_) + , hash_exchange(hash_exchange_) + , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , semaphore(0, num_consumers_) + , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) +{ +} + + +void registerStorageRabbitMQ(StorageFactory & factory) +{ + auto creator_fn = [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + size_t args_count = engine_args.size(); + bool has_settings = args.storage_def->settings; + + RabbitMQSettings rabbitmq_settings; + if (has_settings) + { + rabbitmq_settings.loadFromQuery(*args.storage_def); + } + + String host_port = rabbitmq_settings.rabbitmq_host_port; + if (args_count >= 1) + { + const auto * ast = engine_args[0]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + host_port = safeGet(ast->value); + } + else + { + throw Exception(String("RabbitMQ host:port must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + + String routing_key = rabbitmq_settings.rabbitmq_routing_key.value; + if (args_count >= 2) + { + const auto * ast = engine_args[1]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + routing_key = safeGet(ast->value); + } + else + { + throw Exception(String("RabbitMQ routing key must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + + String exchange = rabbitmq_settings.rabbitmq_exchange_name.value; + if (args_count >= 3) + { + engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context); + + const auto * ast = engine_args[2]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + exchange = safeGet(ast->value); + } + } + + String format = rabbitmq_settings.rabbitmq_format.value; + if (args_count >= 4) + { + engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context); + + const auto * ast = engine_args[3]->as(); + if (ast && ast->value.getType() == Field::Types::String) + { + format = safeGet(ast->value); + } + else + { + throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS); + } + } + + char row_delimiter = rabbitmq_settings.rabbitmq_row_delimiter; + if (args_count >= 5) + { + engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); + + const auto * ast = engine_args[4]->as(); + String arg; + if (ast && ast->value.getType() == Field::Types::String) + { + arg = safeGet(ast->value); + } + else + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + if (arg.size() > 1) + { + throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } + else if (arg.empty()) + { + row_delimiter = '\0'; + } + else + { + row_delimiter = arg[0]; + } + } + + size_t bind_by_id = static_cast(rabbitmq_settings.rabbitmq_bind_by_id); + if (args_count >= 6) + { + const auto * ast = engine_args[5]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + bind_by_id = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + } + } + + UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; + if (args_count >= 7) + { + const auto * ast = engine_args[6]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of consumers must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + UInt64 num_queues = rabbitmq_settings.rabbitmq_num_queues; + if (args_count >= 8) + { + const auto * ast = engine_args[7]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + num_consumers = safeGet(ast->value); + } + else + { + throw Exception("Number of queues must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + + size_t hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); + if (args_count >= 9) + { + const auto * ast = engine_args[8]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + hash_exchange = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + } + } + + return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, + format, row_delimiter, num_consumers, bind_by_id, num_queues, hash_exchange); + }; + + factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); + +} + + +NamesAndTypesList StorageRabbitMQ::getVirtuals() const +{ + return NamesAndTypesList{ + {"_exchange", std::make_shared()}, + {"_routingKey", std::make_shared()} + }; +} + +} + diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h new file mode 100644 index 00000000000..37b8c2b1078 --- /dev/null +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class StorageRabbitMQ final: public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; + +public: + std::string getName() const override { return "RabbitMQ"; } + bool supportsSettings() const override { return true; } + +protected: + StorageRabbitMQ( + const StorageID & table_id_, + Context & context_, + const ColumnsDescription & columns_, + const String & host_port_, + const String & routing_key_, const String & exchange_name_, + const String & format_name_, char row_delimiter_, + size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); + +private: + Context global_context; + Context rabbitmq_context; + + String routing_key; + const String exchange_name; + + const String format_name; + char row_delimiter; + size_t num_consumers; + size_t num_created_consumers = 0; + + bool bind_by_id; + size_t num_queues; + const bool hash_exchange; + + Poco::Logger * log; + std::pair parsed_address; + + Poco::Semaphore semaphore; + std::mutex mutex; + + size_t consumer_id = 0; + + BackgroundSchedulePool::TaskHolder task; + std::atomic stream_cancelled{false}; +}; + +} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 5ad26b70803..c349a4e5c8f 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -46,6 +46,10 @@ void registerStorages() #if USE_RDKAFKA registerStorageKafka(factory); #endif + + #if USE_AMQPCPP + registerStorageRabbitMQ(factory); + #endif } } diff --git a/src/Storages/registerStorages.h b/src/Storages/registerStorages.h index c9874551073..2823f5c2d2c 100644 --- a/src/Storages/registerStorages.h +++ b/src/Storages/registerStorages.h @@ -47,6 +47,10 @@ void registerStorageMySQL(StorageFactory & factory); void registerStorageKafka(StorageFactory & factory); #endif +#if USE_AMQPCPP +void registerStorageRabbitMQ(StorageFactory & factory); +#endif + void registerStorages(); } From 41b99edc044ae0fc820d99a9e8e69a59a27ca76c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:22:12 +0300 Subject: [PATCH 0143/2229] Add base for RabbitMQ integration tests --- .../compose/docker_compose_rabbitmq.yml | 12 ++ tests/integration/helpers/cluster.py | 24 +++- .../test_storage_rabbitmq/__init__.py | 0 .../configs/log_conf.xml | 11 ++ .../configs/rabbitmq.xml | 5 + .../test_storage_rabbitmq/configs/users.xml | 25 ++++ .../integration/test_storage_rabbitmq/test.py | 123 ++++++++++++++++++ .../test_rabbitmq_json.reference | 50 +++++++ 8 files changed, 247 insertions(+), 3 deletions(-) create mode 100644 docker/test/integration/compose/docker_compose_rabbitmq.yml create mode 100644 tests/integration/test_storage_rabbitmq/__init__.py create mode 100644 tests/integration/test_storage_rabbitmq/configs/log_conf.xml create mode 100644 tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml create mode 100644 tests/integration/test_storage_rabbitmq/configs/users.xml create mode 100644 tests/integration/test_storage_rabbitmq/test.py create mode 100644 tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference diff --git a/docker/test/integration/compose/docker_compose_rabbitmq.yml b/docker/test/integration/compose/docker_compose_rabbitmq.yml new file mode 100644 index 00000000000..7ebee3c0ea5 --- /dev/null +++ b/docker/test/integration/compose/docker_compose_rabbitmq.yml @@ -0,0 +1,12 @@ +version: '2.2' + +services: + rabbitmq1: + image: rabbitmq:3-management + hostname: rabbitmq1 + ports: + - "5672:5672" + - "15672:15672" + environment: + RABBITMQ_DEFAULT_USER: "root" + RABBITMQ_DEFAULT_PASS: "clickhouse" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 53c36ff8924..6d9ca1b7861 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -108,12 +108,14 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_rabbitmq_cmd = [] self.pre_zookeeper_commands = [] self.instances = {} self.with_zookeeper = False self.with_mysql = False self.with_postgres = False self.with_kafka = False + self.with_rabbitmq = False self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False @@ -143,7 +145,7 @@ class ClickHouseCluster: return cmd def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None, - with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, + with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -167,7 +169,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {}, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, + self.zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables or {}, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, @@ -231,6 +233,13 @@ class ClickHouseCluster: self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_kafka.yml')] cmds.append(self.base_kafka_cmd) + if with_rabbitmq and not self.with_rabbitmq: + self.with_rabbitmq = True + self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')]) + self.base_rabbitmq_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_rabbitmq.yml')] + cmds.append(self.base_rabbitmq_cmd) + if with_hdfs and not self.with_hdfs: self.with_hdfs = True self.base_cmd.extend(['--file', p.join(DOCKER_COMPOSE_DIR, 'docker_compose_hdfs.yml')]) @@ -482,6 +491,10 @@ class ClickHouseCluster: self.kafka_docker_id = self.get_instance_docker_id('kafka1') self.wait_schema_registry_to_start(120) + if self.with_rabbitmq and self.base_rabbitmq_cmd: + subprocess_check_call(self.base_rabbitmq_cmd + common_opts + ['--renew-anon-volumes']) + self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1') + if self.with_hdfs and self.base_hdfs_cmd: subprocess_check_call(self.base_hdfs_cmd + common_opts) self.wait_hdfs_to_start(120) @@ -621,7 +634,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, with_minio, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", @@ -648,6 +661,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka + self.with_rabbitmq = with_rabbitmq self.with_mongo = with_mongo self.with_redis = with_redis self.with_minio = with_minio @@ -993,6 +1007,9 @@ class ClickHouseInstance: depends_on.append("kafka1") depends_on.append("schema-registry") + if self.with_rabbitmq: + depends_on.append("rabbitmq1") + if self.with_zookeeper: depends_on.append("zoo1") depends_on.append("zoo2") @@ -1072,3 +1089,4 @@ class ClickHouseKiller(object): def __exit__(self, exc_type, exc_val, exc_tb): self.clickhouse_node.restore_clickhouse() + diff --git a/tests/integration/test_storage_rabbitmq/__init__.py b/tests/integration/test_storage_rabbitmq/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_storage_rabbitmq/configs/log_conf.xml b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml new file mode 100644 index 00000000000..f9d15e572aa --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml new file mode 100644 index 00000000000..33a8a43fb1a --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/rabbitmq.xml @@ -0,0 +1,5 @@ + + + earliest + + diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml new file mode 100644 index 00000000000..246e6b069ef --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/users.xml @@ -0,0 +1,25 @@ + + + + + + + + + + + + + ::/0 + + default + default + + + + + + + + diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py new file mode 100644 index 00000000000..475b89f6c60 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -0,0 +1,123 @@ +import os.path as p +import random +import threading +import time +import pytest + +from random import randrange +import pika +from sys import getdefaultencoding + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException +from helpers.network import PartitionManager + +import json +import subprocess + +from google.protobuf.internal.encoder import _VarintBytes + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + config_dir='configs', + main_configs=['configs/rabbitmq.xml','configs/log_conf.xml'], + with_rabbitmq=True) +rabbitmq_id = '' + + +# Helpers + +def check_rabbitmq_is_available(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + 'rabbitmqctl', + 'await_startup'), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def enable_consistent_hash_plugin(): + p = subprocess.Popen(('docker', + 'exec', + '-i', + rabbitmq_id, + "rabbitmq-plugins", "enable", "rabbitmq_consistent_hash_exchange"), + stdout=subprocess.PIPE) + p.communicate() + return p.returncode == 0 + + +def wait_rabbitmq_is_available(max_retries=50): + retries = 0 + while True: + if check_rabbitmq_is_available(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ is not available" + print("Waiting for RabbitMQ to start up") + time.sleep(1) + + +def wait_rabbitmq_plugin_enabled(max_retries=50): + retries = 0 + while True: + if enable_consistent_hash_plugin(): + break + else: + retries += 1 + if retries > max_retries: + raise "RabbitMQ plugin is not available" + print("Waiting for plugin") + time.sleep(1) + + +def rabbitmq_check_result(result, check=False, ref_file='test_rabbitmq_json.reference'): + fpath = p.join(p.dirname(__file__), ref_file) + with open(fpath) as reference: + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) + + +# Fixtures + +@pytest.fixture(scope="module") +def rabbitmq_cluster(): + try: + global rabbitmq_id + cluster.start() + rabbitmq_id = instance.cluster.rabbitmq_docker_id + print("rabbitmq_id is {}".format(rabbitmq_id)) + instance.query('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def rabbitmq_setup_teardown(): + wait_rabbitmq_is_available() + wait_rabbitmq_plugin_enabled() + print("RabbitMQ is available - running test") + yield # run test + instance.query('DROP TABLE IF EXISTS test.rabbitmq') + + +# Tests + + + +if __name__ == '__main__': + cluster.start() + raw_input("Cluster created, press any key to destroy...") + cluster.shutdown() + diff --git a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference new file mode 100644 index 00000000000..959bb2aad74 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference @@ -0,0 +1,50 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 +32 32 +33 33 +34 34 +35 35 +36 36 +37 37 +38 38 +39 39 +40 40 +41 41 +42 42 +43 43 +44 44 +45 45 +46 46 +47 47 +48 48 +49 49 From aeffab3fdb07f7d7ca60f3e9791181a8657c15d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:40:49 +0000 Subject: [PATCH 0144/2229] Enable SELECT and CREATE MV queries with engine RabbitMQ --- src/Storages/RabbitMQ/Buffer_fwd.h | 11 + .../RabbitMQ/RabbitMQBlockInputStream.cpp | 156 ++++++++++ .../RabbitMQ/RabbitMQBlockInputStream.h | 41 +++ src/Storages/RabbitMQ/RabbitMQHandler.cpp | 32 +++ src/Storages/RabbitMQ/RabbitMQHandler.h | 28 ++ src/Storages/RabbitMQ/RabbitMQSettings.cpp | 2 - src/Storages/RabbitMQ/RabbitMQSettings.h | 3 +- .../ReadBufferFromRabbitMQConsumer.cpp | 268 ++++++++++++++++++ .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 81 ++++++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 232 ++++++++++++++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 38 ++- 11 files changed, 883 insertions(+), 9 deletions(-) create mode 100644 src/Storages/RabbitMQ/Buffer_fwd.h create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockInputStream.h create mode 100644 src/Storages/RabbitMQ/RabbitMQHandler.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQHandler.h create mode 100644 src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp create mode 100644 src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h new file mode 100644 index 00000000000..f0ef010c518 --- /dev/null +++ b/src/Storages/RabbitMQ/Buffer_fwd.h @@ -0,0 +1,11 @@ +#pragma once + +#include + +namespace DB +{ + +class ReadBufferFromRabbitMQConsumer; +using ConsumerBufferPtr = std::shared_ptr; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp new file mode 100644 index 00000000000..89ea490e842 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -0,0 +1,156 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +RabbitMQBlockInputStream::RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, const Context & context_, const Names & columns, Poco::Logger * log_) + : storage(storage_) + , context(context_) + , column_names(columns) + , log(log_) + , non_virtual_header(storage.getSampleBlockNonMaterialized()) + , virtual_header(storage.getSampleBlockForColumns({"_exchange", "_routingKey"})) +{ +} + + +RabbitMQBlockInputStream::~RabbitMQBlockInputStream() +{ + if (!claimed) + return; + + storage.pushReadBuffer(buffer); +} + + +Block RabbitMQBlockInputStream::getHeader() const +{ + return storage.getSampleBlockForColumns(column_names); +} + + +void RabbitMQBlockInputStream::readPrefixImpl() +{ + auto timeout = std::chrono::milliseconds(context.getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); + + buffer = storage.popReadBuffer(timeout); + claimed = !!buffer; + + if (!buffer || finished) + return; + + buffer->subscribeConsumer(); +} + + +Block RabbitMQBlockInputStream::readImpl() +{ + if (!buffer || finished) + return Block(); + + finished = true; + + MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); + MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); + + auto input_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, 1); + + InputPort port(input_format->getPort().getHeader(), input_format.get()); + connect(input_format->getPort(), port); + port.setNeeded(); + + auto read_rabbitmq_message = [&] + { + size_t new_rows = 0; + + while (true) + { + auto status = input_format->prepare(); + + switch (status) + { + case IProcessor::Status::Ready: + input_format->work(); + break; + + case IProcessor::Status::Finished: + input_format->resetParser(); + return new_rows; + + case IProcessor::Status::PortFull: + { + auto chunk = port.pull(); + + auto chunk_rows = chunk.getNumRows(); + new_rows += chunk_rows; + + auto columns = chunk.detachColumns(); + + for (size_t i = 0, s = columns.size(); i < s; ++i) + { + result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size()); + } + break; + } + case IProcessor::Status::NeedData: + case IProcessor::Status::Async: + case IProcessor::Status::Wait: + case IProcessor::Status::ExpandPipeline: + throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); + } + } + }; + + size_t total_rows = 0; + + while (true) + { + if (buffer->eof()) + break; + + auto new_rows = read_rabbitmq_message(); + + auto _exchange = storage.getExchangeName(); + auto _routingKey = storage.getRoutingKey(); + + for (size_t i = 0; i < new_rows; ++i) + { + virtual_columns[0]->insert(_exchange); + virtual_columns[1]->insert(_routingKey); + } + + total_rows = total_rows + new_rows; + buffer->allowNext(); + + if (!new_rows || !checkTimeLimit()) + break; + } + + if (total_rows == 0) + return Block(); + + auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); + auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); + + LOG_DEBUG(log, "Total amount of rows is " + std::to_string(result_block.rows())); + + for (const auto & column : virtual_block.getColumnsWithTypeAndName()) + { + result_block.insert(column); + } + + return ConvertingBlockInputStream( + std::make_shared(result_block), + getHeader(), + ConvertingBlockInputStream::MatchColumnsMode::Name) + .read(); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h new file mode 100644 index 00000000000..c82fd68a680 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +class RabbitMQBlockInputStream : public IBlockInputStream +{ + +public: + RabbitMQBlockInputStream( + StorageRabbitMQ & storage_, + const Context & context_, + const Names & columns, + Poco::Logger * log_); + + ~RabbitMQBlockInputStream() override; + + String getName() const override { return storage.getName(); } + Block getHeader() const override; + + void readPrefixImpl() override; + Block readImpl() override; + //void readSuffixImpl() override; + +private: + StorageRabbitMQ & storage; + Context context; + Names column_names; + Poco::Logger * log; + bool finished = false, claimed = false; + const Block non_virtual_header, virtual_header; + + ConsumerBufferPtr buffer; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp new file mode 100644 index 00000000000..b18d6bf2cfb --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -0,0 +1,32 @@ +#include +#include + +namespace DB +{ + +RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : + LibEventHandler(evbase_), + evbase(evbase_), + log(log_) +{ +} + + +void RabbitMQHandler::onError(AMQP::TcpConnection * /*connection*/, const char * message) +{ + LOG_ERROR(log, "Library error report: " << message); + stop(); +} + + +void RabbitMQHandler::startNonBlock() +{ + event_base_loop(evbase, EVLOOP_NONBLOCK); +} + +void RabbitMQHandler::stop() +{ + event_base_loopbreak(evbase); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h new file mode 100644 index 00000000000..94a559cad38 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class RabbitMQHandler : public AMQP::LibEventHandler +{ + +public: + RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); + + void onError(AMQP::TcpConnection * connection, const char * message) override; + void startNonBlock(); + void stop(); + +private: + event_base * evbase; + Poco::Logger * log; +}; + +} diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.cpp b/src/Storages/RabbitMQ/RabbitMQSettings.cpp index ed8d4ad801a..efb73396515 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSettings.cpp @@ -5,7 +5,6 @@ #include #include - namespace DB { @@ -41,4 +40,3 @@ void RabbitMQSettings::loadFromQuery(ASTStorage & storage_def) } } } - diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 0b0f58169fa..f4c62756703 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -1,4 +1,5 @@ #pragma once + #include namespace DB @@ -14,7 +15,7 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ - M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers).", 0) \ + M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers * num_queues).", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp new file mode 100644 index 00000000000..a9f804aaa02 --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -0,0 +1,268 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( + std::pair & parsed_address, + const String & exchange_name_, + const String & routing_key_, + const size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + const bool bind_by_id_, + const bool hash_exchange_, + const size_t num_queues_, + const std::atomic & stopped_) + : ReadBuffer(nullptr, 0) + , evbase(event_base_new()) + , eventHandler(evbase, log) + , connection(&eventHandler, + AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , exchange_name(exchange_name_) + , routing_key(routing_key_) + , channel_id(channel_id_) + , log(log_) + , row_delimiter(row_delimiter_) + , bind_by_id(bind_by_id_) + , hash_exchange(hash_exchange_) + , num_queues(num_queues_) + , stopped(stopped_) +{ + /* It turned out to be very important to make a different connection each time the object of this class is created, + * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, + * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, + * then it will run all callbacks on the connection - including other stream's consumer's callbacks - + * it result in asynchronous run of the same code and lead to occasional seg faults. + */ + while (!connection.ready()) + { + event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + } + + consumer_channel = std::make_shared(&connection); + + messages.clear(); + current = messages.begin(); + + /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. + * By default there is one queue per consumer. + */ + for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) + { + initQueueBindings(queue_id); + } +} + + +ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() +{ + connection.close(); + + messages.clear(); + current = messages.begin(); + BufferBase::set(nullptr, 0, 0); +} + + +void ReadBufferFromRabbitMQConsumer::initExchange() +{ + /* As there are 5 different types of exchanges and the type should be set as a parameter while publishing the message, + * then for uniformity this parameter should always be set as fanout-exchange type. In current implementation, the exchange, + * to which messages a published, will be bound to the exchange of the needed type, which will distribute messages according to its type. + */ + consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + { + exchange_declared = false; + LOG_ERROR(log, "Failed to declare fanout exchange: " << message); + }); + + if (hash_exchange) + { + current_exchange_name = exchange_name + "_hash"; + consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * message) + { + exchange_declared = false; + }); + + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + { + exchange_declared = false; + }); + } + else + { + current_exchange_name = exchange_name + "_direct"; + consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * message) + { + exchange_declared = false; + }); + + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + { + exchange_declared = false; + }); + } +} + + +void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) +{ + if (!exchange_declared) + { + initExchange(); + exchange_declared = true; + } + + bool bindings_ok = false, bindings_error = false; + + consumer_channel->declareQueue(AMQP::exclusive) + .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) + { + queues.emplace_back(queue_name_); + + String binding_key = routing_key; + + if (bind_by_id && !hash_exchange) + { + if (queues.size() == 1) + { + binding_key = routing_key + "_" + std::to_string(channel_id); + } + else + { + binding_key = routing_key + "_" + std::to_string(channel_id + queue_id); + } + } + + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + + consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + bindings_ok = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: " << message); + }); + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to declare queue on the channel: " << message); + }); + + while (!bindings_ok && !bindings_error) + { + startNonBlockEventLoop(); + } +} + + +void ReadBufferFromRabbitMQConsumer::subscribeConsumer() +{ + if (subscribed) + return; + + LOG_TRACE(log, "Subscribing to " + std::to_string(queues.size()) + " queues"); + + for (auto & queue : queues) + { + subscribe(queue); + } + + subscribed = true; +} + + +void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) +{ + bool consumer_ok = false, consumer_error = false; + + consumer_channel->consume(queue_name, AMQP::noack) + .onSuccess([&](const std::string & consumer) + { + if (consumerTag == "") + consumerTag = consumer; + + consumer_ok = true; + + LOG_TRACE(log, "Consumer " + consumerTag + " is subscribed to queue " + queue_name); + }) + .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) + { + size_t message_size = message.bodySize(); + + if (message_size && message.body() != nullptr) + { + String message_received = std::string(message.body(), message.body() + message_size); + + if (row_delimiter != '\0') + message_received += row_delimiter; + + //LOG_TRACE(log, "Consumer " + consumerTag + " received the message " + message_received); + + received.push_back(message_received); + } + }) + .onError([&](const char * message) + { + consumer_error = true; + LOG_ERROR(log, "Consumer failed: " << message); + }); + + while (!consumer_ok && !consumer_error) + { + startNonBlockEventLoop(); + } +} + + +void ReadBufferFromRabbitMQConsumer::startNonBlockEventLoop() +{ + eventHandler.startNonBlock(); +} + + +bool ReadBufferFromRabbitMQConsumer::nextImpl() +{ + if (stopped || !allowed) + return false; + + if (current == messages.end()) + { + if (received.empty()) + { + /* Run the onReceived callbacks to save the messages that have been received by now + */ + startNonBlockEventLoop(); + } + + if (received.empty()) + { + LOG_TRACE(log, "Stalled"); + return false; + } + + messages.clear(); + messages.swap(received); + current = messages.begin(); + } + + auto new_position = const_cast(current->data()); + BufferBase::set(new_position, current->size(), 0); + + ++current; + allowed = false; + + return true; +} + +} diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h new file mode 100644 index 00000000000..7592fb53bfc --- /dev/null +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Poco +{ + class Logger; +} + +namespace DB +{ + +using ChannelPtr = std::shared_ptr; + +class ReadBufferFromRabbitMQConsumer : public ReadBuffer +{ + +public: + ReadBufferFromRabbitMQConsumer( + std::pair & parsed_address, + const String & exchange_name_, + const String & routing_key_, + const size_t channel_id_, + Poco::Logger * log_, + char row_delimiter_, + const bool bind_by_id_, + const bool hash_exchange_, + const size_t num_queues_, + const std::atomic & stopped_); + + ~ReadBufferFromRabbitMQConsumer() override; + + void allowNext() { allowed = true; } // Allow to read next message. + void subscribeConsumer(); + +private: + using Messages = std::vector; + using Queues = std::vector; + + event_base * evbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + ChannelPtr consumer_channel; + + const String & exchange_name; + const String & routing_key; + const size_t channel_id; + const bool bind_by_id; + const bool hash_exchange; + + Poco::Logger * log; + char row_delimiter; + bool stalled = false; + bool allowed = true; + const std::atomic & stopped; + + std::atomic exchange_declared = false; + const size_t num_queues; + String consumerTag; // ID for the consumer + Queues queues; + bool subscribed = false; + String current_exchange_name; + + Messages received; + Messages messages; + Messages::iterator current; + + bool nextImpl() override; + + void initExchange(); + void initQueueBindings(const size_t queue_id); + void subscribe(const String & queue_name); + void startNonBlockEventLoop(); + +}; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 98e7e97e2e1..7e7da953d80 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -12,6 +13,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -26,11 +30,9 @@ #include #include #include - -#include -#include #include + namespace DB { @@ -42,6 +44,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } + StorageRabbitMQ::StorageRabbitMQ( const StorageID & table_id_, Context & context_, @@ -70,6 +73,228 @@ StorageRabbitMQ::StorageRabbitMQ( , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) { + rabbitmq_context.makeQueryContext(); + + setColumns(columns_); + task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); + task->deactivate(); + + /// Enable a different routing algorithm. + bind_by_id = num_consumers > 1 || num_queues > 1 || bind_by_id; +} + + +Pipes StorageRabbitMQ::read( + const Names & column_names, + const SelectQueryInfo & /* query_info */, + const Context & context, + QueryProcessingStage::Enum /* processed_stage */, + size_t /* max_block_size */, + unsigned /* num_streams */) +{ + if (num_created_consumers == 0) + return {}; + + Pipes pipes; + pipes.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + pipes.emplace_back(std::make_shared(std::make_shared( + *this, context, column_names, log))); + } + + LOG_DEBUG(log, "Starting reading " << pipes.size() << " streams"); + return pipes; +} + + +void StorageRabbitMQ::startup() +{ + for (size_t i = 0; i < num_consumers; ++i) + { + try + { + pushReadBuffer(createReadBuffer()); + ++num_created_consumers; + } + catch (const AMQP::Exception &) + { + tryLogCurrentException(log); + } + } + + task->activateAndSchedule(); +} + + +void StorageRabbitMQ::shutdown() +{ + stream_cancelled = true; + + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto buffer = popReadBuffer(); + } + + task->deactivate(); +} + + +void StorageRabbitMQ::pushReadBuffer(ConsumerBufferPtr buffer) +{ + std::lock_guard lock(mutex); + buffers.push_back(buffer); + semaphore.set(); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer() +{ + return popReadBuffer(std::chrono::milliseconds::zero()); +} + + +ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeout) +{ + // Wait for the first free buffer + if (timeout == std::chrono::milliseconds::zero()) + semaphore.wait(); + else + { + if (!semaphore.tryWait(timeout.count())) + return nullptr; + } + + // Take the first available buffer from the list + std::lock_guard lock(mutex); + auto buffer = buffers.back(); + buffers.pop_back(); + + return buffer; +} + + +ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() +{ + if (update_channel_id) + next_channel_id += num_queues; + update_channel_id = true; + + return std::make_shared(parsed_address, exchange_name, routing_key, next_channel_id, + log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); +} + + +bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) +{ + // Check if all dependencies are attached + auto dependencies = DatabaseCatalog::instance().getDependencies(table_id); + if (dependencies.empty()) + return true; + + // Check the dependencies are ready? + for (const auto & db_tab : dependencies) + { + auto table = DatabaseCatalog::instance().tryGetTable(db_tab); + if (!table) + return false; + + // If it materialized view, check it's target table + auto * materialized_view = dynamic_cast(table.get()); + if (materialized_view && !materialized_view->tryGetTargetTable()) + return false; + + // Check all its dependencies + if (!checkDependencies(db_tab)) + return false; + } + + return true; +} + + +void StorageRabbitMQ::threadFunc() +{ + try + { + auto table_id = getStorageID(); + // Check if at least one direct dependency is attached + size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); + + if (dependencies_count) + { + // Keep streaming as long as there are attached views and streaming is not cancelled + while (!stream_cancelled && num_created_consumers > 0) + { + if (!checkDependencies(table_id)) + break; + + LOG_DEBUG(log, "Started streaming to " << dependencies_count << " attached views"); + + if (!streamToViews()) + break; + } + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + /// Wait for attached views + if (!stream_cancelled) + task->scheduleAfter(500); +} + + +bool StorageRabbitMQ::streamToViews() +{ + auto table_id = getStorageID(); + auto table = DatabaseCatalog::instance().getTable(table_id); + if (!table) + throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + + InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); + auto block_io = interpreter.execute(); + + // Create a stream for each consumer and join them in a union stream + BlockInputStreams streams; + streams.reserve(num_created_consumers); + + for (size_t i = 0; i < num_created_consumers; ++i) + { + auto stream = std::make_shared(*this, rabbitmq_context, block_io.out->getHeader().getNames(), log); + streams.emplace_back(stream); + + // Limit read batch to maximum block size to allow DDL + IBlockInputStream::LocalLimits limits; + const Settings & settings = global_context.getSettingsRef(); + limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms; + limits.timeout_overflow_mode = OverflowMode::BREAK; + stream->setLimits(limits); + } + + // Join multiple streams if necessary + BlockInputStreamPtr in; + if (streams.size() > 1) + in = std::make_shared(streams, nullptr, streams.size()); + else + in = streams[0]; + + std::atomic stub = {false}; + copyData(*in, *block_io.out, &stub); + + // Check whether the limits were applied during query execution + bool limits_applied = false; + const BlockStreamProfileInfo & info = in->getProfileInfo(); + limits_applied = info.hasAppliedLimit(); + + return limits_applied; } @@ -246,4 +471,3 @@ NamesAndTypesList StorageRabbitMQ::getVirtuals() const } } - diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 37b8c2b1078..8a3a48135b8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include @@ -6,8 +7,11 @@ #include #include #include +#include +#include #include + namespace DB { @@ -19,8 +23,30 @@ class StorageRabbitMQ final: public ext::shared_ptr_helper, pub public: std::string getName() const override { return "RabbitMQ"; } + bool supportsSettings() const override { return true; } + void startup() override; + void shutdown() override; + + Pipes read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + void pushReadBuffer(ConsumerBufferPtr buf); + ConsumerBufferPtr popReadBuffer(); + ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + + const String & getExchangeName() const { return exchange_name; } + const String & getRoutingKey() const { return routing_key; } + + const String & getFormatName() const { return format_name; } + NamesAndTypesList getVirtuals() const override; + protected: StorageRabbitMQ( const StorageID & table_id_, @@ -31,6 +57,7 @@ protected: const String & format_name_, char row_delimiter_, size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); + private: Context global_context; Context rabbitmq_context; @@ -42,7 +69,6 @@ private: char row_delimiter; size_t num_consumers; size_t num_created_consumers = 0; - bool bind_by_id; size_t num_queues; const bool hash_exchange; @@ -52,11 +78,19 @@ private: Poco::Semaphore semaphore; std::mutex mutex; + std::vector buffers; /// available buffers for RabbitMQ consumers - size_t consumer_id = 0; + size_t next_channel_id = 0; + bool update_channel_id = false; BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; + + ConsumerBufferPtr createReadBuffer(); + + void threadFunc(); + bool streamToViews(); + bool checkDependencies(const StorageID & table_id); }; } From 1760f01f74ee8d7fdc7ac39526ce1d041ff2fa2c Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 09:42:56 +0000 Subject: [PATCH 0145/2229] Add tests for RabbitMQ read-only part --- .../integration/test_storage_rabbitmq/test.py | 726 ++++++++++++++++++ 1 file changed, 726 insertions(+) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 475b89f6c60..815a84c1999 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -114,6 +114,732 @@ def rabbitmq_setup_teardown(): # Tests +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'new', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(25): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + messages = [] + for i in range(25, 50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=False) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_from_old_syntax_table(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ('rabbitmq1:5672', 'old', 'clickhouse-exchange', 'JSONEachRow', '\\n'); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='old', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_select_empty(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'empty', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + assert int(instance.query('SELECT count() FROM test.rabbitmq')) == 0 + + +@pytest.mark.timeout(180) +def test_rabbitmq_json_without_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'json', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'JSONEachRow' + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = '' + for i in range(25): + messages += json.dumps({'key': i, 'value': i}) + '\n' + + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + messages = '' + for i in range(25, 50): + messages += json.dumps({'key': i, 'value': i}) + '\n' + all_messages = [messages] + for message in all_messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='json', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'csv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'CSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}, {i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='csv', body=message) + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'tsv', + rabbitmq_exchange_name = 'clickhouse-exchange', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for i in range(50): + messages.append('{i}\t{i}'.format(i=i)) + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='tsv', body=message) + + result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mv', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if (rabbitmq_check_result(result)): + break; + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close() + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mvsq', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM (SELECT * FROM test.rabbitmq); + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mvsq', body=message) + + while True: + result = instance.query('SELECT * FROM test.view') + if rabbitmq_check_result(result): + break + + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + connection.close(); + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(180) +def test_rabbitmq_many_materialized_views(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view1; + DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.consumer1; + DROP TABLE IF EXISTS test.consumer2; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'mmv', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view1 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view2 (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS + SELECT * FROM test.rabbitmq; + CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS + SELECT * FROM test.rabbitmq; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + messages = [] + for i in range(50): + messages.append(json.dumps({'key': i, 'value': i})) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='mmv', body=message) + + while True: + result1 = instance.query('SELECT * FROM test.view1') + result2 = instance.query('SELECT * FROM test.view2') + if rabbitmq_check_result(result1) and rabbitmq_check_result(result2): + break + + instance.query(''' + DROP TABLE test.consumer1; + DROP TABLE test.consumer2; + DROP TABLE test.view1; + DROP TABLE test.view2; + ''') + + rabbitmq_check_result(result1, True) + rabbitmq_check_result(result2, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_big_message(rabbitmq_cluster): + # Create batchs of messages of size ~100Kb + rabbitmq_messages = 1000 + batch_messages = 1000 + messages = [json.dumps({'key': i, 'value': 'x' * 100}) * batch_messages for i in range(rabbitmq_messages)] + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + instance.query(''' + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.rabbitmq (key UInt64, value String) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'big', + rabbitmq_format = 'JSONEachRow'; + CREATE TABLE test.view (key UInt64, value String) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key='big', body=message) + + while True: + result = instance.query('SELECT count() FROM test.view') + if int(result) == batch_messages * rabbitmq_messages: + break + + connection.close() + instance.query(''' + DROP TABLE test.consumer; + DROP TABLE test.view; + ''') + + assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): + + NUMBER_OF_CONCURRENT_CONSUMERS = 10 + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + print("Setting up {}".format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_hash_exchange = 1, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.{0}; + '''.format(table_name)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client fails sometimes + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'topic_' + str(randrange(0, NUMBER_OF_CONCURRENT_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + time.sleep(1) + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): + print("dropping rabbitmq_consumer{}".format(consumer_id)) + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + '''.format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): + + NUM_CHANNELS = 5 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'clickhouse', + rabbitmq_num_consumers = 5, + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_CHANNELS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): + + NUM_QUEUES = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 4, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_QUEUES)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster): + + NUM_CONSUMERS = 10 + NUM_QUEUES = 2 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_queues = 2, + rabbitmq_num_consumers = 10, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + DROP TABLE IF EXISTS test.view; + DROP TABLE IF EXISTS test.consumer; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.rabbitmq; + ''') + + time.sleep(1) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_QUEUES * NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_read_only_combo(rabbitmq_cluster): + + NUM_MV = 5; + NUM_CONSUMERS = 4 + + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_routing_key = 'clickhouse', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + ''') + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + print("Setting up {}".format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.{0} AS + SELECT * FROM test.rabbitmq; + '''.format(table_name)) + + time.sleep(2) + + i = [0] + messages_num = 10000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + def produce(): + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = 'clickhouse_' + str(randrange(0, NUM_CONSUMERS)) + for message in messages: + channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 20 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = 0 + for view in range(NUM_MV): + result += int(instance.query('SELECT count() FROM test.view{0}'.format(view))) + if int(result) == messages_num * threads_num * NUM_MV: + break + time.sleep(1) + + for thread in threads: + thread.join() + + for mv_id in range(NUM_MV): + table_name = 'view{}'.format(mv_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + '''.format(table_name)) + + + assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) if __name__ == '__main__': From 4fb6492b08538d42089eca6b2b3322c843869b92 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 20 May 2020 15:02:02 +0300 Subject: [PATCH 0146/2229] in-memory parts: delay merges --- .../MergeTree/MergeTreeBlockOutputStream.cpp | 22 +++++++++++++++---- src/Storages/MergeTree/MergeTreeData.cpp | 21 ++++++++++++++++++ src/Storages/MergeTree/MergeTreeData.h | 19 ++++++++++++++++ .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- .../MergeTreeDataPartWriterInMemory.cpp | 7 +++--- .../MergeTree/MergeTreeWriteAheadLog.cpp | 5 ++++- src/Storages/StorageMergeTree.cpp | 7 ++++-- 7 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 4f9500f973e..67ba2ba8d3e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -31,11 +32,24 @@ void MergeTreeBlockOutputStream::write(const Block & block) PartLog::addNewPart(storage.global_context, part, watch.elapsed()); - if (isInMemoryPart(part) && storage.getSettings()->in_memory_parts_insert_sync) + if (auto * part_in_memory = dynamic_cast(part.get())) { - if (!part->waitUntilMerged(in_memory_parts_timeout)) - throw Exception("Timeout exceeded while waiting to write part " - + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); + storage.in_memory_merges_throttler.add(part_in_memory->block.bytes(), part_in_memory->rows_count); + + auto settings = storage.getSettings(); + if (settings->in_memory_parts_insert_sync) + { + if (!part->waitUntilMerged(in_memory_parts_timeout)) + throw Exception("Timeout exceeded while waiting to write part " + + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); + } + else if (storage.merging_mutating_task_handle && !storage.in_memory_merges_throttler.needDelayMerge()) + { + storage.in_memory_merges_throttler.reset(); + storage.merging_mutating_task_handle->wake(); + } + + return; } /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index bbe051a2476..2de2bb8656b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -142,6 +142,7 @@ MergeTreeData::MergeTreeData( , data_parts_by_info(data_parts_indexes.get()) , data_parts_by_state_and_info(data_parts_indexes.get()) , parts_mover(this) + , in_memory_merges_throttler(storage_settings.get()->min_bytes_for_compact_part, storage_settings.get()->min_rows_for_compact_part) { if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); @@ -3673,4 +3674,24 @@ NamesAndTypesList MergeTreeData::getVirtuals() const NameAndTypePair("_sample_factor", std::make_shared()), }; } + +bool MergeTreeData::MergesThrottler::needDelayMerge() const +{ + std::lock_guard lock(mutex); + return (!max_bytes || have_bytes < max_bytes) && (!max_rows || have_rows < max_rows); +} + +void MergeTreeData::MergesThrottler::add(size_t bytes, size_t rows) +{ + std::lock_guard lock(mutex); + have_bytes += bytes; + have_rows += rows; +} + +void MergeTreeData::MergesThrottler::reset() +{ + have_bytes = 0; + have_rows = 0; +} + } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index ad83c2bf0d7..56e9ab1da7a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -921,6 +921,25 @@ protected: bool areBackgroundMovesNeeded() const; + struct MergesThrottler + { + mutable std::mutex mutex; + size_t have_bytes = 0; + size_t have_rows = 0; + + size_t max_bytes; + size_t max_rows; + + MergesThrottler(size_t max_bytes_, size_t max_rows_) + : max_bytes(max_bytes_), max_rows(max_rows_) {} + + bool needDelayMerge() const; + void add(size_t bytes, size_t rows); + void reset(); + }; + + MergesThrottler in_memory_merges_throttler; + private: /// RAII Wrapper for atomic work with currently moving parts /// Acuire them in constructor and remove them in destructor diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index ff56b407998..1420e7d1bff 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -998,7 +998,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor commands_for_part.emplace_back(command); } - if (!isStorageTouchedByMutations(storage_from_source_part, commands_for_part, context_for_reading)) + if (source_part->isStoredOnDisk() && !isStorageTouchedByMutations(storage_from_source_part, commands_for_part, context_for_reading)) { LOG_TRACE(log, "Part " << source_part->name << " doesn't change up to mutation version " << future_part.part_info.mutation); return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 39e9757d81c..88c53107acf 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -45,6 +45,9 @@ void MergeTreeDataPartWriterInMemory::write( result_block.insert(block.getByName(col.name)); } + index_granularity.appendMark(result_block.rows()); + if (with_final_mark) + index_granularity.appendMark(0); part->block = std::move(result_block); block_written = true; } @@ -55,10 +58,6 @@ void MergeTreeDataPartWriterInMemory::calculateAndSerializePrimaryIndex(const Bl if (!rows) return; - index_granularity.appendMark(rows); - if (with_final_mark) - index_granularity.appendMark(0); - size_t primary_columns_num = primary_index_block.columns(); index_columns.resize(primary_columns_num); for (size_t i = 0; i < primary_columns_num; ++i) diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 02c45dcfb64..5ceedbe1f55 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -11,6 +11,7 @@ namespace ErrorCodes { extern const int UNKNOWN_FORMAT_VERSION; extern const int CANNOT_READ_ALL_DATA; + extern const int BAD_DATA_PART_NAME; } @@ -95,7 +96,9 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() } catch (const Exception & e) { - if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION) + if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA + || e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION + || e.code() == ErrorCodes::BAD_DATA_PART_NAME) { LOG_WARNING(&Logger::get(storage.getLogName() + " (WriteAheadLog)"), "WAL file '" << path << "' is broken. " << e.displayText()); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 37044a9780b..ad2e0317882 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -822,8 +822,11 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::mergeMutateTask() ///TODO: read deduplicate option from table config if (merge(false /*aggressive*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/)) - return BackgroundProcessingPoolTaskResult::SUCCESS; - + { + return in_memory_merges_throttler.needDelayMerge() + ? BackgroundProcessingPoolTaskResult::NOTHING_TO_DO + : BackgroundProcessingPoolTaskResult::SUCCESS; + } if (tryMutatePart()) return BackgroundProcessingPoolTaskResult::SUCCESS; From fe8d285e11061efa82e7f92aabd2b5d976fc36fe Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 12:52:16 +0000 Subject: [PATCH 0147/2229] Fix libevent build --- contrib/libevent-cmake/{ => linux}/evconfig-private.h | 0 contrib/libevent-cmake/{ => linux/event2}/event-config.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename contrib/libevent-cmake/{ => linux}/evconfig-private.h (100%) rename contrib/libevent-cmake/{ => linux/event2}/event-config.h (100%) diff --git a/contrib/libevent-cmake/evconfig-private.h b/contrib/libevent-cmake/linux/evconfig-private.h similarity index 100% rename from contrib/libevent-cmake/evconfig-private.h rename to contrib/libevent-cmake/linux/evconfig-private.h diff --git a/contrib/libevent-cmake/event-config.h b/contrib/libevent-cmake/linux/event2/event-config.h similarity index 100% rename from contrib/libevent-cmake/event-config.h rename to contrib/libevent-cmake/linux/event2/event-config.h From 8a9064cef2b82fdb214359bb784f0c697919b810 Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 20 May 2020 21:23:35 +0300 Subject: [PATCH 0148/2229] at start --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +--- src/Parsers/ASTColumnDeclaration.cpp | 14 ++++++++++++ src/Parsers/ASTColumnDeclaration.h | 4 ++-- src/Parsers/ParserCreateQuery.h | 24 +++++++++++++-------- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d4609477c1e..ec83c3f9c7f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -274,8 +274,6 @@ ASTPtr InterpreterCreateQuery::formatConstraints(const ConstraintsDescription & ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns_ast, const Context & context) { - Settings set; - /// First, deduce implicit types. /** all default_expressions as a single expression list, @@ -307,7 +305,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; } - if (set.data_type_default_nullable && !column_type->isNullable()) + if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNotNULL) column_type = makeNullable(column_type); column_names_and_types.emplace_back(col_decl.name, column_type); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c43a4b554a4..a6c4b819fdf 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -19,11 +19,13 @@ ASTPtr ASTColumnDeclaration::clone() const if (isNULL) { res->isNULL = isNULL; + res->children.push_back(res->isNULL); } if (isNULL) { res->isNotNULL = isNotNULL; + res->children.push_back(res->isNotNULL); } if (default_expression) @@ -69,6 +71,18 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta type->formatImpl(settings, state, frame); } + if (isNULL) + { + settings.ostr << ' '; + isNULL->formatImpl(settings, state, frame); + } + + if (isNotNULL) + { + settings.ostr << ' '; + isNotNULL->formatImpl(settings, state, frame); + } + if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 4816a433991..d3c50d453d5 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -13,8 +13,8 @@ class ASTColumnDeclaration : public IAST public: String name; ASTPtr type; - bool isNULL; - bool isNotNULL; + ASTPtr isNULL; + ASTPtr isNotNULL; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 7cf2497b3fb..66a2005334d 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -128,6 +128,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserStringLiteral string_literal_parser; ParserCodec codec_parser; ParserExpression expression_parser; + ParserIdentifier null_parser; + ParserIdentifier not_null_parser; /// mandatory column name ASTPtr name; @@ -139,8 +141,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E */ ASTPtr type; String default_specifier; - bool isNull = false; - bool isNotNull = false; + ASTPtr isNull; + ASTPtr isNotNull; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -169,12 +171,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - if (s_null.checkWithoutMoving(pos, expected)) { - if (s_null.check(pos, expected)) - isNull = true; - } else if (s_not_null.checkWithoutMoving(pos, expected)) { - if (s_not_null.check(pos, expected)) - isNotNull = true; + Pos pos_before_null = pos; + + if (s_null.check(pos, expected)) { + if (!null_parser.parse(pos_before_null, isNull, expected)) + return false; + } else if (s_not_null.check(pos, expected)) { + if (!not_null_parser.parse(pos_before_null, isNotNull, expected)) + return false; } if (s_comment.ignore(pos, expected)) @@ -208,10 +212,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (isNull) { column_declaration->isNULL = isNull; + column_declaration->children.push_back(std::move(isNull)); } - if (isNull) { + if (isNotNull) { column_declaration->isNotNULL = isNotNull; + column_declaration->children.push_back(std::move(isNotNull)); } if (default_expression) From c3569882bbcc33c047d9d9b9424bf06b9a50a3bf Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 20 May 2020 18:24:40 +0000 Subject: [PATCH 0149/2229] Update version of docker_compose_rabbitmq.yml --- docker/test/integration/compose/docker_compose_rabbitmq.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/compose/docker_compose_rabbitmq.yml b/docker/test/integration/compose/docker_compose_rabbitmq.yml index 7ebee3c0ea5..1e9c3777505 100644 --- a/docker/test/integration/compose/docker_compose_rabbitmq.yml +++ b/docker/test/integration/compose/docker_compose_rabbitmq.yml @@ -1,4 +1,4 @@ -version: '2.2' +version: '2.3' services: rabbitmq1: From 4b30b3168cd8675f50c2741bb89c90bac6b08428 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 19 May 2020 20:48:28 +0300 Subject: [PATCH 0150/2229] try fix build --- .gitmodules | 11 +++- cmake/find_cassandra.cmake | 8 ++- contrib/CMakeLists.txt | 1 + contrib/cassandra | 2 +- contrib/libuv | 1 + src/CMakeLists.txt | 5 ++ .../CassandraBlockInputStream.cpp | 60 +++++++++---------- .../CassandraDictionarySource.cpp | 51 ++++++++-------- src/Dictionaries/ya.make | 4 +- src/Functions/ya.make | 2 +- .../external_sources.py | 2 +- 11 files changed, 83 insertions(+), 64 deletions(-) create mode 160000 contrib/libuv diff --git a/.gitmodules b/.gitmodules index f5d4b8340fa..b3a624aaae9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -95,9 +95,6 @@ [submodule "contrib/rapidjson"] path = contrib/rapidjson url = https://github.com/Tencent/rapidjson -[submodule "contrib/cassandra"] - path = contrib/cassandra - url = https://github.com/datastax/cpp-driver.git [submodule "contrib/fastops"] path = contrib/fastops url = https://github.com/ClickHouse-Extras/fastops @@ -160,3 +157,11 @@ [submodule "contrib/openldap"] path = contrib/openldap url = https://github.com/openldap/openldap.git +[submodule "contrib/cassandra"] + path = contrib/cassandra + url = https://github.com/tavplubix/cpp-driver.git + branch = ch-tmp +[submodule "contrib/libuv"] + path = contrib/libuv + url = https://github.com/libuv/libuv.git + branch = v1.x diff --git a/cmake/find_cassandra.cmake b/cmake/find_cassandra.cmake index 7f7346ce545..951cfc88b11 100644 --- a/cmake/find_cassandra.cmake +++ b/cmake/find_cassandra.cmake @@ -1,10 +1,14 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) - if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") + message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() + set(LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") - set (CASSANDRA_LIBRARY cassandra) + set (LIBUV_LIBRARY uv_a) + set (CASSANDRA_LIBRARY cassandra_static) set (USE_CASSANDRA 1) set(CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 76da288991b..ce187038e2a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -319,5 +319,6 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) + add_subdirectory(libuv) add_subdirectory(cassandra) endif() diff --git a/contrib/cassandra b/contrib/cassandra index fd9b73d4acf..5c0f2a62bdc 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit fd9b73d4acfd85293ab304be64e2e1e2109e521d +Subproject commit 5c0f2a62bdc63dcc390d771c9afaa9dc34eb8e5b diff --git a/contrib/libuv b/contrib/libuv new file mode 160000 index 00000000000..cc51217a317 --- /dev/null +++ b/contrib/libuv @@ -0,0 +1 @@ +Subproject commit cc51217a317e96510fbb284721d5e6bc2af31e33 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 222a3e486f9..d713cec8b8a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -356,6 +356,11 @@ if (USE_OPENCL) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS}) endif () +if (USE_CASSANDRA) + dbms_target_link_libraries(PRIVATE ${CASSANDRA_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${CASS_INCLUDE_DIR}) +endif() + dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index f76c9dd93f6..73028e8d2b2 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -22,14 +22,14 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession *session, - const std::string &query_str, + CassSession *session_, + const std::string &query_str_, const DB::Block &sample_block, - const size_t max_block_size) - : session{session} - , statement{cass_statement_new(query_str.c_str(), 0)} - , query_str{query_str} - , max_block_size{max_block_size} + const size_t max_block_size_) + : session(session_) + , statement(cass_statement_new(query_str_.c_str(), 0)) + , query_str(query_str_) + , max_block_size(max_block_size_) { cass_statement_set_paging_size(statement, max_block_size); this->has_more_pages = cass_true; @@ -51,77 +51,77 @@ namespace { switch (type) { - case ValueType::UInt8: + case ValueType::vtUInt8: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt16: + case ValueType::vtUInt16: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt32: + case ValueType::vtUInt32: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt64: + case ValueType::vtUInt64: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int8: + case ValueType::vtInt8: { cass_int8_t _value; cass_value_get_int8(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int16: + case ValueType::vtInt16: { cass_int16_t _value; cass_value_get_int16(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int32: + case ValueType::vtInt32: { cass_int32_t _value; cass_value_get_int32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int64: + case ValueType::vtInt64: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Float32: + case ValueType::vtFloat32: { cass_float_t _value; cass_value_get_float(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Float64: + case ValueType::vtFloat64: { cass_double_t _value; cass_value_get_double(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::String: + case ValueType::vtString: { const char* _value; size_t _value_length; @@ -129,21 +129,21 @@ namespace static_cast(column).insertData(_value, _value_length); break; } - case ValueType::Date: + case ValueType::vtDate: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME break; } - case ValueType::DateTime: + case ValueType::vtDateTime: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UUID: + case ValueType::vtUUID: { CassUuid _value; cass_value_get_uuid(value, &_value); @@ -166,7 +166,7 @@ namespace MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); - const CassResult* result = cass_future_get_result(query_future); + const CassResult* result_tmp = cass_future_get_result(query_future); if (result == nullptr) { const char* error_message; @@ -176,12 +176,12 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result); + const CassRow* row = cass_result_first_row(result_tmp); const CassValue* map = cass_row_get_column(row, 0); - CassIterator* iterator = cass_iterator_from_map(map); - while (cass_iterator_next(iterator)) { - const CassValue* _key = cass_iterator_get_map_key(iterator); - const CassValue* _value = cass_iterator_get_map_value(iterator); + CassIterator* iterator_tmp = cass_iterator_from_map(map); + while (cass_iterator_next(iterator_tmp)) { + const CassValue* _key = cass_iterator_get_map_key(iterator_tmp); + const CassValue* _value = cass_iterator_get_map_value(iterator_tmp); auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { @@ -194,13 +194,13 @@ namespace } } - has_more_pages = cass_result_has_more_pages(result); + has_more_pages = cass_result_has_more_pages(result_tmp); if (has_more_pages) { - cass_statement_set_paging_state(statement, result); + cass_statement_set_paging_state(statement, result_tmp); } - cass_result_free(result); + cass_result_free(result_tmp); return description.sample_block.cloneWithColumns(std::move(columns)); } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index cfd21510e69..c51ae2877d8 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -15,7 +15,8 @@ namespace DB const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & /* context */) -> DictionarySourcePtr { + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr { #if USE_CASSANDRA return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else @@ -49,42 +50,42 @@ namespace ErrorCodes static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure &dict_struct, - const std::string &host, - UInt16 port, - const std::string &user, - const std::string &password, - const std::string &method, - const std::string &db, - const DB::Block &sample_block) - : dict_struct{dict_struct} - , host{host} - , port{port} - , user{user} - , password{password} - , method{method} - , db{db} - , sample_block{sample_block} - , cluster{cass_cluster_new()} - , session{cass_session_new()} + const DB::DictionaryStructure & dict_struct_, + const std::string & host_, + UInt16 port_, + const std::string & user_, + const std::string & password_, + const std::string & method_, + const std::string & db_, + const DB::Block & sample_block_) + : dict_struct(dict_struct_) + , host(host_) + , port(port_) + , user(user_) + , password(password_) + , method(method_) + , db(db_) + , sample_block(sample_block_) + , cluster(cass_cluster_new()) + , session(cass_session_new()) { cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); } CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure &dict_struct, - const Poco::Util::AbstractConfiguration &config, - const std::string &config_prefix, - DB::Block &sample_block) + const DB::DictionaryStructure & dict_struct_, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DB::Block & sample_block_) : CassandraDictionarySource( - dict_struct, + dict_struct_, config.getString(config_prefix + ".host"), config.getUInt(config_prefix + ".port"), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), config.getString(config_prefix + ".method", ""), config.getString(config_prefix + ".db", ""), - sample_block) + sample_block_) { } diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index e47b55d5254..22703a3924d 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -16,6 +16,8 @@ SRCS( CacheDictionary_generate1.cpp CacheDictionary_generate2.cpp CacheDictionary_generate3.cpp + CassandraBlockInputStream.cpp + CassandraDictionarySource.cpp ClickHouseDictionarySource.cpp ComplexKeyCacheDictionary.cpp ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -24,8 +26,8 @@ SRCS( ComplexKeyCacheDictionary_generate3.cpp ComplexKeyCacheDictionary_setAttributeValue.cpp ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp - ComplexKeyHashedDictionary.cpp ComplexKeyDirectDictionary.cpp + ComplexKeyHashedDictionary.cpp DictionaryBlockInputStreamBase.cpp DictionaryFactory.cpp DictionarySourceFactory.cpp diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 8e53ffe493d..da9435148b6 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -281,10 +281,10 @@ SRCS( rand64.cpp randConstant.cpp rand.cpp + randomFixedString.cpp randomPrintableASCII.cpp randomString.cpp randomStringUTF8.cpp - randomFixedString.cpp regexpQuoteMeta.cpp registerFunctionsArithmetic.cpp registerFunctionsComparison.cpp diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 117eb7b7e6f..c90725f2f6a 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -2,7 +2,7 @@ import warnings import pymysql.cursors import pymongo -import cassandra +import cassandra.cluster import redis import aerospike from tzlocal import get_localzone From 6d8749b100eaf9144d24291ece74c8c644a9e991 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 May 2020 01:16:08 +0300 Subject: [PATCH 0151/2229] in-memory parts: faster computation of checksums --- src/Columns/ColumnAggregateFunction.cpp | 7 +++++++ src/Columns/ColumnAggregateFunction.h | 2 ++ src/Columns/ColumnArray.cpp | 6 ++++++ src/Columns/ColumnArray.h | 1 + src/Columns/ColumnConst.h | 5 +++++ src/Columns/ColumnDecimal.cpp | 6 ++++++ src/Columns/ColumnDecimal.h | 1 + src/Columns/ColumnFixedString.cpp | 6 ++++++ src/Columns/ColumnFixedString.h | 2 ++ src/Columns/ColumnFunction.h | 5 +++++ src/Columns/ColumnLowCardinality.cpp | 6 ++++++ src/Columns/ColumnLowCardinality.h | 2 ++ src/Columns/ColumnNullable.cpp | 6 ++++++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnString.h | 6 ++++++ src/Columns/ColumnTuple.cpp | 6 ++++++ src/Columns/ColumnTuple.h | 1 + src/Columns/ColumnVector.cpp | 6 ++++++ src/Columns/ColumnVector.h | 2 ++ src/Columns/IColumn.h | 2 ++ src/Columns/IColumnDummy.h | 4 ++++ src/Columns/IColumnUnique.h | 5 +++++ src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp | 3 ++- src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h | 2 +- 25 files changed, 92 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 2f3a766b8f5..86cc3047230 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -309,6 +309,13 @@ void ColumnAggregateFunction::updateWeakHash32(WeakHash32 & hash) const } } +void ColumnAggregateFunction::updateHashFast(SipHash & hash) const +{ + /// Fallback to per-element hashing, as there is no faster way + for (size_t i = 0; i < size(); ++i) + updateHashWithValue(i, hash); +} + /// The returned size is less than real size. The reason is that some parts of /// aggregate function data may be allocated on shared arenas. These arenas are /// used for several blocks, and also may be updated concurrently from other diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index f257351a4d0..dfae7f6f774 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -162,6 +162,8 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; + size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 7dba8e857cc..a67e37c6258 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -250,6 +250,12 @@ void ColumnArray::updateWeakHash32(WeakHash32 & hash) const } } +void ColumnArray::updateHashFast(SipHash & hash) const +{ + offsets->updateHashFast(hash); + data->updateHashFast(hash); +} + void ColumnArray::insert(const Field & x) { const Array & array = DB::get(x); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 15a1d1bd91a..fcf9ea3fc12 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -62,6 +62,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 560d4d63a10..799bdc8b657 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -165,6 +165,11 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override + { + data->updateHashFast(hash); + } + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 5396389294a..1b32f1457c8 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -89,6 +89,12 @@ void ColumnDecimal::updateWeakHash32(WeakHash32 & hash) const } } +template +void ColumnDecimal::updateHashFast(SipHash & hash) const +{ + hash.update(reinterpret_cast(data.data()), size() * sizeof(data[0])); +} + template void ColumnDecimal::getPermutation(bool reverse, size_t limit, int , IColumn::Permutation & res) const { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 62e414a676b..d5aa9c9a2a6 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -106,6 +106,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 57ae4cbdedf..d8bb31d8d11 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -124,6 +124,12 @@ void ColumnFixedString::updateWeakHash32(WeakHash32 & hash) const } } +void ColumnFixedString::updateHashFast(SipHash & hash) const +{ + hash.update(n); + hash.update(reinterpret_cast(chars.data()), size() * n); +} + template struct ColumnFixedString::less { diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 74c4f3c74f2..b04660ee0dd 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -110,6 +110,8 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; + int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override { const ColumnFixedString & rhs = assert_cast(rhs_); diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 1bde48559fe..bb4bccadae3 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -106,6 +106,11 @@ public: throw Exception("updateWeakHash32 is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + void updateHashFast(SipHash &) const override + { + throw Exception("updateHashFast is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void popBack(size_t) override { throw Exception("popBack is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index e87b3b4cbf6..c804ae9e1bf 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -257,6 +257,12 @@ void ColumnLowCardinality::updateWeakHash32(WeakHash32 & hash) const idx.updateWeakHash(hash, dict_hash); } +void ColumnLowCardinality::updateHashFast(SipHash & hash) const +{ + idx.getPositions()->updateHashFast(hash); + getDictionary().getNestedColumn()->updateHashFast(hash); +} + void ColumnLowCardinality::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e641cc177f3..996940aaa2d 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -92,6 +92,8 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash &) const override; + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint)); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 55ce1401073..fe9e81605dc 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -64,6 +64,12 @@ void ColumnNullable::updateWeakHash32(WeakHash32 & hash) const hash_data[row] = old_hash_data[row]; } +void ColumnNullable::updateHashFast(SipHash & hash) const +{ + null_map->updateHashFast(hash); + nested_column->updateHashFast(hash); +} + MutableColumnPtr ColumnNullable::cloneResized(size_t new_size) const { MutableColumnPtr new_nested_col = getNestedColumn().cloneResized(new_size); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 5443d8b0187..4dfc0007e94 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -85,6 +85,7 @@ public: ColumnPtr replicate(const Offsets & replicate_offsets) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; void getExtremes(Field & min, Field & max) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 32116880014..53686b7f744 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -190,6 +190,12 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override + { + hash.update(reinterpret_cast(offsets.data()), size() * sizeof(offsets[0])); + hash.update(reinterpret_cast(chars.data()), size() * sizeof(chars[0])); + } + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 59552c67f14..b20e01caf76 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -194,6 +194,12 @@ void ColumnTuple::updateWeakHash32(WeakHash32 & hash) const column->updateWeakHash32(hash); } +void ColumnTuple::updateHashFast(SipHash & hash) const +{ + for (const auto & column : columns) + column->updateHashFast(hash); +} + void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length) { const size_t tuple_size = columns.size(); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 3533b602a1b..fde539e60da 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -62,6 +62,7 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override; void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 9b128fcffec..6a0693d646d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -84,6 +84,12 @@ void ColumnVector::updateWeakHash32(WeakHash32 & hash) const } } +template +void ColumnVector::updateHashFast(SipHash & hash) const +{ + hash.update(reinterpret_cast(data.data()), size() * sizeof(data[0])); +} + template struct ColumnVector::less { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 3551efe890c..b86ee5d8093 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -162,6 +162,8 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; + void updateHashFast(SipHash & hash) const override; + size_t byteSize() const override { return data.size() * sizeof(data[0]); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 4af593bb658..7cba4b9918d 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -206,6 +206,8 @@ public: /// WeakHash32 must have the same size as column. virtual void updateWeakHash32(WeakHash32 & hash) const = 0; + virtual void updateHashFast(SipHash & hash) const = 0; + /** Removes elements that don't match the filter. * Is used in WHERE and HAVING operations. * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 00604fb87d0..0b9682a0fba 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -69,6 +69,10 @@ public: { } + void updateHashFast(SipHash & /*hash*/) const override + { + } + void insertFrom(const IColumn &, size_t) override { ++s; diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index af5d9878a3b..ee9342c1fdf 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -141,6 +141,11 @@ public: { throw Exception("Method updateWeakHash32 is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); } + + void updateHashFast(SipHash &) const override + { + throw Exception("Method updateHashFast is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); + } }; using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 88c53107acf..b31a0cbe51a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -81,7 +81,8 @@ static MergeTreeDataPartChecksum createUncompressedChecksum(size_t size, SipHash void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) { SipHash hash; - part->block.updateHash(hash); + for (const auto & column : part->block) + column.column->updateHashFast(hash); checksums.files["data.bin"] = createUncompressedChecksum(part->block.bytes(), hash); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 0543e6420ee..db3d2cecf37 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -21,7 +21,7 @@ void MergeTreeDataPartWriterOnDisk::Stream::finalize() marks.next(); } -void MergeTreeDataPartWriterOnDisk::Stream::sync() +void MergeTreeDataPartWriterOnDisk::Stream::sync() const { plain_file->sync(); marks_file->sync(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index bb54b964793..145db10c62c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -54,7 +54,7 @@ public: void finalize(); - void sync(); + void sync() const; void addToChecksums(IMergeTreeDataPart::Checksums & checksums); }; From f40fadc3d64381d5b85cefc047b996315ffe262c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 21 May 2020 18:29:18 +0300 Subject: [PATCH 0152/2229] fix segfault --- .../CassandraBlockInputStream.cpp | 21 ++++++++++--------- src/Dictionaries/CassandraBlockInputStream.h | 2 +- .../helpers/docker_compose_cassandra.yml | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 73028e8d2b2..471f6df3e28 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -40,7 +40,8 @@ CassandraBlockInputStream::CassandraBlockInputStream( CassandraBlockInputStream::~CassandraBlockInputStream() { if (iterator != nullptr) cass_iterator_free(iterator); - cass_result_free(result); + if (result) + cass_result_free(result); } namespace @@ -166,7 +167,7 @@ namespace MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); - const CassResult* result_tmp = cass_future_get_result(query_future); + result = cass_future_get_result(query_future); if (result == nullptr) { const char* error_message; @@ -176,12 +177,12 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result_tmp); + const CassRow* row = cass_result_first_row(result); const CassValue* map = cass_row_get_column(row, 0); - CassIterator* iterator_tmp = cass_iterator_from_map(map); - while (cass_iterator_next(iterator_tmp)) { - const CassValue* _key = cass_iterator_get_map_key(iterator_tmp); - const CassValue* _value = cass_iterator_get_map_value(iterator_tmp); + iterator = cass_iterator_from_map(map); + while (cass_iterator_next(iterator)) { + const CassValue* _key = cass_iterator_get_map_key(iterator); + const CassValue* _value = cass_iterator_get_map_value(iterator); auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { @@ -194,13 +195,13 @@ namespace } } - has_more_pages = cass_result_has_more_pages(result_tmp); + has_more_pages = cass_result_has_more_pages(result); if (has_more_pages) { - cass_statement_set_paging_state(statement, result_tmp); + cass_statement_set_paging_state(statement, result); } - cass_result_free(result_tmp); + cass_result_free(result); return description.sample_block.cloneWithColumns(std::move(columns)); } diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 2b7c3b68744..8af63745f17 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -31,7 +31,7 @@ namespace DB String query_str; const size_t max_block_size; ExternalResultDescription description; - const CassResult * result; + const CassResult * result = nullptr; cass_bool_t has_more_pages; CassIterator * iterator = nullptr; }; diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/tests/integration/helpers/docker_compose_cassandra.yml index bb6a0221c54..6bbedcc1130 100644 --- a/tests/integration/helpers/docker_compose_cassandra.yml +++ b/tests/integration/helpers/docker_compose_cassandra.yml @@ -1,4 +1,4 @@ -version: '2.2' +version: '2.3' services: cassandra1: image: cassandra From e6ca09e1343ad154abfd70c3e30ceb46a868f257 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 22 May 2020 17:19:33 +0300 Subject: [PATCH 0153/2229] fix build --- CMakeLists.txt | 2 +- .../cassandra.cmake} | 13 +- contrib/CMakeLists.txt | 2 +- contrib/cassandra | 2 +- contrib/libuv-cmake/CMakeLists.txt | 591 ++++++++++++++++++ src/CMakeLists.txt | 4 +- .../CassandraBlockInputStream.cpp | 96 +-- .../CassandraDictionarySource.cpp | 4 +- 8 files changed, 655 insertions(+), 59 deletions(-) rename cmake/{find_cassandra.cmake => find/cassandra.cmake} (63%) create mode 100644 contrib/libuv-cmake/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d774f9c9f5..54a88404579 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,7 +364,7 @@ include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) include (cmake/find/avro.cmake) include (cmake/find/msgpack.cmake) -include (cmake/find_cassandra.cmake) +include (cmake/find/cassandra.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/cmake/find_cassandra.cmake b/cmake/find/cassandra.cmake similarity index 63% rename from cmake/find_cassandra.cmake rename to cmake/find/cassandra.cmake index 951cfc88b11..b1d76702cfa 100644 --- a/cmake/find_cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -4,13 +4,18 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() - set(LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") + set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") - set (LIBUV_LIBRARY uv_a) - set (CASSANDRA_LIBRARY cassandra_static) + if (USE_STATIC_LIBRARIES) + set (LIBUV_LIBRARY uv_a) + set (CASSANDRA_LIBRARY cassandra_static) + else() + set (LIBUV_LIBRARY uv) + set (CASSANDRA_LIBRARY cassandra) + endif() set (USE_CASSANDRA 1) - set(CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ce187038e2a..9c42993dda7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -319,6 +319,6 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) - add_subdirectory(libuv) + add_subdirectory(libuv-cmake) add_subdirectory(cassandra) endif() diff --git a/contrib/cassandra b/contrib/cassandra index 5c0f2a62bdc..bc593f2644a 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 5c0f2a62bdc63dcc390d771c9afaa9dc34eb8e5b +Subproject commit bc593f2644a6c50c4057459e242e214a6af70969 diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt new file mode 100644 index 00000000000..b84ce217f3b --- /dev/null +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -0,0 +1,591 @@ +cmake_minimum_required(VERSION 3.4) +project(libuv LANGUAGES C) + +cmake_policy(SET CMP0057 NEW) # Enable IN_LIST operator +cmake_policy(SET CMP0064 NEW) # Support if (TEST) operator + +#list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") + +include(CMakePackageConfigHelpers) +include(CMakeDependentOption) +include(CheckCCompilerFlag) +include(GNUInstallDirs) +include(CTest) + +set(CMAKE_C_VISIBILITY_PRESET hidden) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS ON) +set(CMAKE_C_STANDARD 90) + +#cmake_dependent_option(LIBUV_BUILD_TESTS +# "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON +# "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +#cmake_dependent_option(LIBUV_BUILD_BENCH +# "Build the benchmarks when building unit tests and we are the root project" ON +# "LIBUV_BUILD_TESTS" OFF) + +# Qemu Build +option(QEMU "build for qemu" OFF) +if(QEMU) + add_definitions(-D__QEMU__=1) +endif() + +# Compiler check +string(CONCAT is-msvc $, + $ +>) + +check_c_compiler_flag(/W4 UV_LINT_W4) +check_c_compiler_flag(/wd4100 UV_LINT_NO_UNUSED_PARAMETER_MSVC) +check_c_compiler_flag(/wd4127 UV_LINT_NO_CONDITIONAL_CONSTANT_MSVC) +check_c_compiler_flag(/wd4201 UV_LINT_NO_NONSTANDARD_MSVC) +check_c_compiler_flag(/wd4206 UV_LINT_NO_NONSTANDARD_EMPTY_TU_MSVC) +check_c_compiler_flag(/wd4210 UV_LINT_NO_NONSTANDARD_FILE_SCOPE_MSVC) +check_c_compiler_flag(/wd4232 UV_LINT_NO_NONSTANDARD_NONSTATIC_DLIMPORT_MSVC) +check_c_compiler_flag(/wd4456 UV_LINT_NO_HIDES_LOCAL) +check_c_compiler_flag(/wd4457 UV_LINT_NO_HIDES_PARAM) +check_c_compiler_flag(/wd4459 UV_LINT_NO_HIDES_GLOBAL) +check_c_compiler_flag(/wd4706 UV_LINT_NO_CONDITIONAL_ASSIGNMENT_MSVC) +check_c_compiler_flag(/wd4996 UV_LINT_NO_UNSAFE_MSVC) + +check_c_compiler_flag(-Wall UV_LINT_WALL) # DO NOT use this under MSVC + +# TODO: Place these into its own function +check_c_compiler_flag(-Wno-unused-parameter UV_LINT_NO_UNUSED_PARAMETER) +check_c_compiler_flag(-Wstrict-prototypes UV_LINT_STRICT_PROTOTYPES) +check_c_compiler_flag(-Wextra UV_LINT_EXTRA) + +set(lint-no-unused-parameter $<$:-Wno-unused-parameter>) +set(lint-strict-prototypes $<$:-Wstrict-prototypes>) +set(lint-extra $<$:-Wextra>) +set(lint-w4 $<$:/W4>) +set(lint-no-unused-parameter-msvc $<$:/wd4100>) +set(lint-no-conditional-constant-msvc $<$:/wd4127>) +set(lint-no-nonstandard-msvc $<$:/wd4201>) +set(lint-no-nonstandard-empty-tu-msvc $<$:/wd4206>) +set(lint-no-nonstandard-file-scope-msvc $<$:/wd4210>) +set(lint-no-nonstandard-nonstatic-dlimport-msvc $<$:/wd4232>) +set(lint-no-hides-local-msvc $<$:/wd4456>) +set(lint-no-hides-param-msvc $<$:/wd4457>) +set(lint-no-hides-global-msvc $<$:/wd4459>) +set(lint-no-conditional-assignment-msvc $<$:/wd4706>) +set(lint-no-unsafe-msvc $<$:/wd4996>) +# Unfortunately, this one is complicated because MSVC and clang-cl support -Wall +# but using it is like calling -Weverything +string(CONCAT lint-default $< + $,$>:-Wall +>) + +list(APPEND uv_cflags ${lint-strict-prototypes} ${lint-extra} ${lint-default} ${lint-w4}) +list(APPEND uv_cflags ${lint-no-unused-parameter}) +list(APPEND uv_cflags ${lint-no-unused-parameter-msvc}) +list(APPEND uv_cflags ${lint-no-conditional-constant-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-empty-tu-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-file-scope-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-nonstatic-dlimport-msvc}) +list(APPEND uv_cflags ${lint-no-hides-local-msvc}) +list(APPEND uv_cflags ${lint-no-hides-param-msvc}) +list(APPEND uv_cflags ${lint-no-hides-global-msvc}) +list(APPEND uv_cflags ${lint-no-conditional-assignment-msvc}) +list(APPEND uv_cflags ${lint-no-unsafe-msvc}) + +set(uv_sources + src/fs-poll.c + src/idna.c + src/inet.c + src/random.c + src/strscpy.c + src/threadpool.c + src/timer.c + src/uv-common.c + src/uv-data-getter-setters.c + src/version.c) + +#if(WIN32) +# list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) +# list(APPEND uv_libraries +# psapi +# iphlpapi +# userenv +# ws2_32) +# list(APPEND uv_sources +# src/win/async.c +# src/win/core.c +# src/win/detect-wakeup.c +# src/win/dl.c +# src/win/error.c +# src/win/fs.c +# src/win/fs-event.c +# src/win/getaddrinfo.c +# src/win/getnameinfo.c +# src/win/handle.c +# src/win/loop-watcher.c +# src/win/pipe.c +# src/win/thread.c +# src/win/poll.c +# src/win/process.c +# src/win/process-stdio.c +# src/win/signal.c +# src/win/snprintf.c +# src/win/stream.c +# src/win/tcp.c +# src/win/tty.c +# src/win/udp.c +# src/win/util.c +# src/win/winapi.c +# src/win/winsock.c) +# list(APPEND uv_test_libraries ws2_32) +# list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) +#else() +list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) +if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|OS390") + # TODO: This should be replaced with find_package(Threads) if possible + # Android has pthread as part of its c library, not as a separate + # libpthread.so. + list(APPEND uv_libraries pthread) +endif() +list(APPEND uv_sources + src/unix/async.c + src/unix/core.c + src/unix/dl.c + src/unix/fs.c + src/unix/getaddrinfo.c + src/unix/getnameinfo.c + src/unix/loop-watcher.c + src/unix/loop.c + src/unix/pipe.c + src/unix/poll.c + src/unix/process.c + src/unix/random-devurandom.c + src/unix/signal.c + src/unix/stream.c + src/unix/tcp.c + src/unix/thread.c + src/unix/tty.c + src/unix/udp.c) +list(APPEND uv_test_sources test/runner-unix.c) +#endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "AIX") + list(APPEND uv_defines + _ALL_SOURCE + _LINUX_SOURCE_COMPAT + _THREAD_SAFE + _XOPEN_SOURCE=500 + HAVE_SYS_AHAFS_EVPRODS_H) + list(APPEND uv_libraries perfstat) + list(APPEND uv_sources + src/unix/aix.c + src/unix/aix-common.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Android") + list(APPEND uv_libraries dl) + list(APPEND uv_sources + src/unix/android-ifaddrs.c + src/unix/linux-core.c + src/unix/linux-inotify.c + src/unix/linux-syscalls.c + src/unix/procfs-exepath.c + src/unix/pthread-fixes.c + src/unix/random-getentropy.c + src/unix/random-getrandom.c + src/unix/random-sysctl-linux.c + src/unix/sysinfo-loadavg.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS390") + list(APPEND uv_sources src/unix/proctitle.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD") + list(APPEND uv_sources src/unix/freebsd.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") + list(APPEND uv_sources src/unix/posix-hrtime.c src/unix/bsd-proctitle.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") + list(APPEND uv_sources src/unix/bsd-ifaddrs.c src/unix/kqueue.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + list(APPEND uv_sources src/unix/random-getrandom.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") + list(APPEND uv_sources src/unix/random-getentropy.c) +endif() + +if(APPLE) + list(APPEND uv_defines _DARWIN_UNLIMITED_SELECT=1 _DARWIN_USE_64_BIT_INODE=1) + list(APPEND uv_sources + src/unix/darwin-proctitle.c + src/unix/darwin.c + src/unix/fsevents.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) + list(APPEND uv_libraries dl rt) + list(APPEND uv_sources + src/unix/linux-core.c + src/unix/linux-inotify.c + src/unix/linux-syscalls.c + src/unix/procfs-exepath.c + src/unix/random-getrandom.c + src/unix/random-sysctl-linux.c + src/unix/sysinfo-loadavg.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD") + list(APPEND uv_sources src/unix/netbsd.c) + list(APPEND uv_libraries kvm) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") + list(APPEND uv_sources src/unix/openbsd.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OS390") + list(APPEND uv_defines PATH_MAX=255) + list(APPEND uv_defines _AE_BIMODAL) + list(APPEND uv_defines _ALL_SOURCE) + list(APPEND uv_defines _ISOC99_SOURCE) + list(APPEND uv_defines _LARGE_TIME_API) + list(APPEND uv_defines _OPEN_MSGQ_EXT) + list(APPEND uv_defines _OPEN_SYS_FILE_EXT) + list(APPEND uv_defines _OPEN_SYS_IF_EXT) + list(APPEND uv_defines _OPEN_SYS_SOCK_EXT3) + list(APPEND uv_defines _OPEN_SYS_SOCK_IPV6) + list(APPEND uv_defines _UNIX03_SOURCE) + list(APPEND uv_defines _UNIX03_THREADS) + list(APPEND uv_defines _UNIX03_WITHDRAWN) + list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) + list(APPEND uv_sources + src/unix/pthread-fixes.c + src/unix/os390.c + src/unix/os390-syscalls.c) + list(APPEND uv_cflags -Wc,DLL -Wc,exportall -Wc,xplink) + list(APPEND uv_libraries -Wl,xplink) + list(APPEND uv_test_libraries -Wl,xplink) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OS400") + list(APPEND uv_defines + _ALL_SOURCE + _LINUX_SOURCE_COMPAT + _THREAD_SAFE + _XOPEN_SOURCE=500) + list(APPEND uv_sources + src/unix/aix-common.c + src/unix/ibmi.c + src/unix/no-fsevents.c + src/unix/no-proctitle.c + src/unix/posix-poll.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + list(APPEND uv_defines __EXTENSIONS__ _XOPEN_SOURCE=500) + list(APPEND uv_libraries kstat nsl sendfile socket) + list(APPEND uv_sources src/unix/no-proctitle.c src/unix/sunos.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|Linux|NetBSD|OpenBSD") + list(APPEND uv_test_libraries util) +endif() + +set(uv_sources_tmp "") +foreach(file ${uv_sources}) + list(APPEND uv_sources_tmp "${LIBUV_ROOT_DIR}/${file}") +endforeach(file) +set(uv_sources "${uv_sources_tmp}") + +add_library(uv SHARED ${uv_sources}) +target_compile_definitions(uv + INTERFACE + USING_UV_SHARED=1 + PRIVATE + BUILDING_UV_SHARED=1 + ${uv_defines}) +target_compile_options(uv PRIVATE ${uv_cflags}) +target_include_directories(uv + PUBLIC + $ + $ + PRIVATE + $) +target_link_libraries(uv ${uv_libraries}) + +add_library(uv_a STATIC ${uv_sources}) +target_compile_definitions(uv_a PRIVATE ${uv_defines}) +target_compile_options(uv_a PRIVATE ${uv_cflags}) +target_include_directories(uv_a + PUBLIC + $ + $ + PRIVATE + $) +target_link_libraries(uv_a ${uv_libraries}) + +#if(LIBUV_BUILD_TESTS) +# # Small hack: use ${uv_test_sources} now to get the runner skeleton, +# # before the actual tests are added. +# add_executable( +# uv_run_benchmarks_a +# ${uv_test_sources} +# test/benchmark-async-pummel.c +# test/benchmark-async.c +# test/benchmark-fs-stat.c +# test/benchmark-getaddrinfo.c +# test/benchmark-loop-count.c +# test/benchmark-million-async.c +# test/benchmark-million-timers.c +# test/benchmark-multi-accept.c +# test/benchmark-ping-pongs.c +# test/benchmark-ping-udp.c +# test/benchmark-pound.c +# test/benchmark-pump.c +# test/benchmark-sizes.c +# test/benchmark-spawn.c +# test/benchmark-tcp-write-batch.c +# test/benchmark-thread.c +# test/benchmark-udp-pummel.c +# test/blackhole-server.c +# test/dns-server.c +# test/echo-server.c +# test/run-benchmarks.c +# test/runner.c) +# target_compile_definitions(uv_run_benchmarks_a PRIVATE ${uv_defines}) +# target_compile_options(uv_run_benchmarks_a PRIVATE ${uv_cflags}) +# target_link_libraries(uv_run_benchmarks_a uv_a ${uv_test_libraries}) +# +# list(APPEND uv_test_sources +# test/blackhole-server.c +# test/echo-server.c +# test/run-tests.c +# test/runner.c +# test/test-active.c +# test/test-async-null-cb.c +# test/test-async.c +# test/test-barrier.c +# test/test-callback-order.c +# test/test-callback-stack.c +# test/test-close-fd.c +# test/test-close-order.c +# test/test-condvar.c +# test/test-connect-unspecified.c +# test/test-connection-fail.c +# test/test-cwd-and-chdir.c +# test/test-default-loop-close.c +# test/test-delayed-accept.c +# test/test-dlerror.c +# test/test-eintr-handling.c +# test/test-embed.c +# test/test-emfile.c +# test/test-env-vars.c +# test/test-error.c +# test/test-fail-always.c +# test/test-fork.c +# test/test-fs-copyfile.c +# test/test-fs-event.c +# test/test-fs-poll.c +# test/test-fs.c +# test/test-fs-readdir.c +# test/test-fs-fd-hash.c +# test/test-fs-open-flags.c +# test/test-get-currentexe.c +# test/test-get-loadavg.c +# test/test-get-memory.c +# test/test-get-passwd.c +# test/test-getaddrinfo.c +# test/test-gethostname.c +# test/test-getnameinfo.c +# test/test-getsockname.c +# test/test-getters-setters.c +# test/test-gettimeofday.c +# test/test-handle-fileno.c +# test/test-homedir.c +# test/test-hrtime.c +# test/test-idle.c +# test/test-idna.c +# test/test-ip4-addr.c +# test/test-ip6-addr.c +# test/test-ipc-heavy-traffic-deadlock-bug.c +# test/test-ipc-send-recv.c +# test/test-ipc.c +# test/test-loop-alive.c +# test/test-loop-close.c +# test/test-loop-configure.c +# test/test-loop-handles.c +# test/test-loop-stop.c +# test/test-loop-time.c +# test/test-multiple-listen.c +# test/test-mutexes.c +# test/test-osx-select.c +# test/test-pass-always.c +# test/test-ping-pong.c +# test/test-pipe-bind-error.c +# test/test-pipe-close-stdout-read-stdin.c +# test/test-pipe-connect-error.c +# test/test-pipe-connect-multiple.c +# test/test-pipe-connect-prepare.c +# test/test-pipe-getsockname.c +# test/test-pipe-pending-instances.c +# test/test-pipe-sendmsg.c +# test/test-pipe-server-close.c +# test/test-pipe-set-fchmod.c +# test/test-pipe-set-non-blocking.c +# test/test-platform-output.c +# test/test-poll-close-doesnt-corrupt-stack.c +# test/test-poll-close.c +# test/test-poll-closesocket.c +# test/test-poll-oob.c +# test/test-poll.c +# test/test-process-priority.c +# test/test-process-title-threadsafe.c +# test/test-process-title.c +# test/test-queue-foreach-delete.c +# test/test-random.c +# test/test-ref.c +# test/test-run-nowait.c +# test/test-run-once.c +# test/test-semaphore.c +# test/test-shutdown-close.c +# test/test-shutdown-eof.c +# test/test-shutdown-twice.c +# test/test-signal-multiple-loops.c +# test/test-signal-pending-on-close.c +# test/test-signal.c +# test/test-socket-buffer-size.c +# test/test-spawn.c +# test/test-stdio-over-pipes.c +# test/test-strscpy.c +# test/test-tcp-alloc-cb-fail.c +# test/test-tcp-bind-error.c +# test/test-tcp-bind6-error.c +# test/test-tcp-close-accept.c +# test/test-tcp-close-while-connecting.c +# test/test-tcp-close.c +# test/test-tcp-close-reset.c +# test/test-tcp-connect-error-after-write.c +# test/test-tcp-connect-error.c +# test/test-tcp-connect-timeout.c +# test/test-tcp-connect6-error.c +# test/test-tcp-create-socket-early.c +# test/test-tcp-flags.c +# test/test-tcp-oob.c +# test/test-tcp-open.c +# test/test-tcp-read-stop.c +# test/test-tcp-shutdown-after-write.c +# test/test-tcp-try-write.c +# test/test-tcp-try-write-error.c +# test/test-tcp-unexpected-read.c +# test/test-tcp-write-after-connect.c +# test/test-tcp-write-fail.c +# test/test-tcp-write-queue-order.c +# test/test-tcp-write-to-half-open-connection.c +# test/test-tcp-writealot.c +# test/test-thread-equal.c +# test/test-thread.c +# test/test-threadpool-cancel.c +# test/test-threadpool.c +# test/test-timer-again.c +# test/test-timer-from-check.c +# test/test-timer.c +# test/test-tmpdir.c +# test/test-tty-duplicate-key.c +# test/test-tty-escape-sequence-processing.c +# test/test-tty.c +# test/test-udp-alloc-cb-fail.c +# test/test-udp-bind.c +# test/test-udp-connect.c +# test/test-udp-create-socket-early.c +# test/test-udp-dgram-too-big.c +# test/test-udp-ipv6.c +# test/test-udp-multicast-interface.c +# test/test-udp-multicast-interface6.c +# test/test-udp-multicast-join.c +# test/test-udp-multicast-join6.c +# test/test-udp-multicast-ttl.c +# test/test-udp-open.c +# test/test-udp-options.c +# test/test-udp-send-and-recv.c +# test/test-udp-send-hang-loop.c +# test/test-udp-send-immediate.c +# test/test-udp-send-unreachable.c +# test/test-udp-try-send.c +# test/test-uname.c +# test/test-walk-handles.c +# test/test-watcher-cross-stop.c) +# +# add_executable(uv_run_tests ${uv_test_sources} uv_win_longpath.manifest) +# target_compile_definitions(uv_run_tests +# PRIVATE ${uv_defines} USING_UV_SHARED=1) +# target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) +# target_link_libraries(uv_run_tests uv ${uv_test_libraries}) +# add_test(NAME uv_test +# COMMAND uv_run_tests +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +# if(CMAKE_SYSTEM_NAME STREQUAL "OS390") +# set_tests_properties(uv_test PROPERTIES ENVIRONMENT +# "LIBPATH=${CMAKE_BINARY_DIR}:$ENV{LIBPATH}") +# endif() +# add_executable(uv_run_tests_a ${uv_test_sources} uv_win_longpath.manifest) +# target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) +# target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) +# if(QEMU) +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries} -static) +# else() +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) +# endif() +# add_test(NAME uv_test_a +# COMMAND uv_run_tests_a +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +#endif() + +if(UNIX) + # Now for some gibbering horrors from beyond the stars... + foreach(lib IN LISTS uv_libraries) + list(APPEND LIBS "-l${lib}") + endforeach() + string(REPLACE ";" " " LIBS "${LIBS}") + # Consider setting project version via project() call? + file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) + string(REGEX MATCH "([0-9]+)[.][0-9]+[.][0-9]+" PACKAGE_VERSION "${configure_ac}") + set(UV_VERSION_MAJOR "${CMAKE_MATCH_1}") + # The version in the filename is mirroring the behaviour of autotools. + set_target_properties(uv PROPERTIES + VERSION ${UV_VERSION_MAJOR}.0.0 + SOVERSION ${UV_VERSION_MAJOR}) + set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) + set(prefix ${CMAKE_INSTALL_PREFIX}) + configure_file(${LIBUV_ROOT_DIR}/libuv.pc.in ${LIBUV_ROOT_DIR}/libuv.pc @ONLY) + + install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) + install(FILES ${PROJECT_BINARY_DIR}/libuv.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + +#if(WIN32) +# install(DIRECTORY include/ DESTINATION include) +# install(FILES LICENSE DESTINATION .) +# install(TARGETS uv uv_a +# RUNTIME DESTINATION lib/$ +# ARCHIVE DESTINATION lib/$) +#endif() +# +#message(STATUS "summary of build options: +# Install prefix: ${CMAKE_INSTALL_PREFIX} +# Target system: ${CMAKE_SYSTEM_NAME} +# Compiler: +# C compiler: ${CMAKE_C_COMPILER} +# CFLAGS: ${CMAKE_C_FLAGS_${_build_type}} ${CMAKE_C_FLAGS} +#") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d713cec8b8a..7c6b4006021 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -357,8 +357,8 @@ if (USE_OPENCL) endif () if (USE_CASSANDRA) - dbms_target_link_libraries(PRIVATE ${CASSANDRA_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${CASS_INCLUDE_DIR}) + dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) endif() dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 471f6df3e28..bf43adcdc59 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession *session_, + CassSession * session_, const std::string &query_str_, const DB::Block &sample_block, const size_t max_block_size_) @@ -48,108 +48,108 @@ namespace { using ValueType = ExternalResultDescription::ValueType; - void insertValue(IColumn & column, const ValueType type, const CassValue * value) + void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { switch (type) { case ValueType::vtUInt8: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt16: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt32: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt64: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt8: { - cass_int8_t _value; - cass_value_get_int8(value, &_value); - static_cast(column).insertValue(_value); + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt16: { - cass_int16_t _value; - cass_value_get_int16(value, &_value); - static_cast(column).insertValue(_value); + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt32: { - cass_int32_t _value; - cass_value_get_int32(value, &_value); - static_cast(column).insertValue(_value); + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt64: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtFloat32: { - cass_float_t _value; - cass_value_get_float(value, &_value); - static_cast(column).insertValue(_value); + cass_float_t value; + cass_value_get_float(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtFloat64: { - cass_double_t _value; - cass_value_get_double(value, &_value); - static_cast(column).insertValue(_value); + cass_double_t value; + cass_value_get_double(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtString: { - const char* _value; - size_t _value_length; - cass_value_get_string(value, &_value, &_value_length); - static_cast(column).insertData(_value, _value_length); + const char * value; + size_t value_length; + cass_value_get_string(cass_value, &value, &value_length); + static_cast(column).insertData(value, value_length); break; } case ValueType::vtDate: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(value)}); // FIXME break; } case ValueType::vtDateTime: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUUID: { - CassUuid _value; - cass_value_get_uuid(value, &_value); + CassUuid value; + cass_value_get_uuid(cass_value, &value); std::array uuid_str; - cass_uuid_string(_value, uuid_str.data()); + cass_uuid_string(value, uuid_str.data()); static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } @@ -181,9 +181,9 @@ namespace const CassValue* map = cass_row_get_column(row, 0); iterator = cass_iterator_from_map(map); while (cass_iterator_next(iterator)) { - const CassValue* _key = cass_iterator_get_map_key(iterator); - const CassValue* _value = cass_iterator_get_map_value(iterator); - auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; + const CassValue* cass_key = cass_iterator_get_map_key(iterator); + const CassValue* cass_value = cass_iterator_get_map_value(iterator); + auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { ColumnNullable & column_nullable = static_cast(*columns[idx]); diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index c51ae2877d8..3a4c7e2f2b7 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -11,7 +11,7 @@ namespace DB void registerDictionarySourceCassandra(DictionarySourceFactory & factory) { - auto createTableSource = [=](const DictionaryStructure & dict_struct, + auto create_table_source = [=](const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, @@ -28,7 +28,7 @@ namespace DB ErrorCodes::SUPPORT_IS_DISABLED}; #endif }; - factory.registerSource("cassandra", createTableSource); + factory.registerSource("cassandra", create_table_source); } } From 7e5de33e93b350a5551900e633c2f6e983180a1e Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 22 May 2020 17:35:56 +0300 Subject: [PATCH 0154/2229] trigger CI --- src/DataTypes/DataTypeString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index d0db66b202b..c1afa8b90ea 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -388,4 +388,4 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); } -} \ No newline at end of file +} From 05938e562cb9215c8e33b8a6687183449683808d Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 11:07:24 +0300 Subject: [PATCH 0155/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 35 +--- .../SSDComplexKeyCacheDictionary.cpp | 182 +++++++----------- .../SSDComplexKeyCacheDictionary.h | 12 -- 3 files changed, 79 insertions(+), 150 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index a065b367101..ad6b5cb4ea8 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -263,7 +263,6 @@ size_t SSDCachePartition::appendBlock( Index cache_index; cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); - // Poco::Logger::get("wr").information(" block mem: " + std::to_string(current_memory_block_id) + " wb: " + std::to_string(write_buffer_size)); if (current_memory_block_id >= write_buffer_size) throw DB::Exception("lel " + std::to_string(current_memory_block_id) + " " + std::to_string(write_buffer_size) + " " + std::to_string(index), ErrorCodes::LOGICAL_ERROR); @@ -338,7 +337,6 @@ size_t SSDCachePartition::appendBlock( if (!flushed) { - // Poco::Logger::get("wr").information(" set: " + std::to_string(cache_index.getBlockId()) + " " + std::to_string(cache_index.getAddressInBlock())); key_to_index.set(ids[index], cache_index); ids_buffer.push_back(ids[index]); ++index; @@ -349,7 +347,6 @@ size_t SSDCachePartition::appendBlock( init_write_buffer(); } } - // Poco::Logger::get("wr").information("exit"); return ids.size() - begin; } @@ -362,7 +359,6 @@ void SSDCachePartition::flush() if (ids.empty()) return; Poco::Logger::get("paritiiton").information("flushing to SSD."); - // Poco::Logger::get("paritiiton").information("@@@@@@@@@@@@@@@@@@@@ FLUSH!!! " + std::to_string(file_id) + " block: " + std::to_string(current_file_block_id)); AIOContext aio_context{1}; @@ -426,7 +422,6 @@ void SSDCachePartition::flush() if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); - // Poco::Logger::get("pt").information("block: " + std::to_string(index.getBlockId()) + " " + std::to_string(current_file_block_id) + " "); index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); } key_to_index.set(id, index); @@ -571,7 +566,6 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice Memory read_buffer(block_size * read_buffer_size, BUFFER_ALIGNMENT); - // TODO: merge requests std::vector requests; std::vector pointers; std::vector> blocks_to_indices; @@ -601,10 +595,6 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice request.aio_fildes = fd; request.aio_buf = reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size); request.aio_nbytes = block_size; - // Poco::Logger::get("RR").information("block found" + std::to_string(index_to_out[i].first.getBlockId()) + " max_size" + std::to_string(max_size)); - // if (index_to_out[i].first.getBlockId() > max_size) { - // throw DB::Exception("kek", ErrorCodes::LOGICAL_ERROR); - // } request.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #endif @@ -619,16 +609,12 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice std::vector processed(requests.size(), false); std::vector events(requests.size()); for (auto & event : events) - event.res = -1; // TODO: remove + event.res = -1; size_t to_push = 0; size_t to_pop = 0; while (to_pop < requests.size()) { - // Poco::Logger::get("RR").information( - // "push = " + std::to_string(to_push) + " pop=" + std::to_string(to_pop) + - // "bi = " + std::to_string(blocks_to_indices.size()) + " req = " + std::to_string(requests.size())); - /// get io tasks from previous iteration int popped = 0; while (to_pop < to_push && (popped = io_getevents(aio_context.ctx, to_push - to_pop, to_push - to_pop, &events[to_pop], nullptr)) <= 0) { @@ -681,13 +667,11 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice throwFromErrno("io_submit: Failed to submit a request for asynchronous IO", ErrorCodes::CANNOT_IO_SUBMIT); } to_push += pushed; - // Poco::Logger::get("RR").information("fin iter"); } } void SSDCachePartition::clearOldestBlocks() { - // Poco::Logger::get("GC").information("GC clear -----------------"); // write_buffer_size, because we need to erase the whole buffer. Memory read_buffer_memory(block_size * write_buffer_size, BUFFER_ALIGNMENT); @@ -708,8 +692,6 @@ void SSDCachePartition::clearOldestBlocks() request.aio_data = 0; #endif - // Poco::Logger::get("GC").information("GC offset=" + std::to_string(request.aio_offset)); - { iocb* request_ptr = &request; io_event event{}; @@ -738,7 +720,6 @@ void SSDCachePartition::clearOldestBlocks() std::vector keys; keys.reserve(write_buffer_size); - // TODO: писать кол-во значений for (size_t i = 0; i < write_buffer_size; ++i) { ReadBufferFromMemory read_buffer(read_buffer_memory.data() + i * block_size, block_size); @@ -753,7 +734,6 @@ void SSDCachePartition::clearOldestBlocks() uint32_t keys_in_current_block = 0; readBinary(keys_in_current_block, read_buffer); - // Poco::Logger::get("GC").information("keys in block: " + std::to_string(keys_in_current_block) + " offset=" + std::to_string(read_buffer.offset())); for (uint32_t j = 0; j < keys_in_current_block; ++j) { @@ -804,7 +784,6 @@ void SSDCachePartition::clearOldestBlocks() const size_t start_block = current_file_block_id % max_size; const size_t finish_block = start_block + write_buffer_size; - Poco::Logger::get("partition gc").information("erasing keys start = " + std::to_string(start_block) + " end = " + std::to_string(finish_block)); for (const auto& key : keys) { Index index; @@ -883,7 +862,7 @@ PaddedPODArray SSDCachePartition::getCachedIds(const std std::unique_lock lock(rw_lock); // Begin and end iterators can be changed. PaddedPODArray array; for (const auto & key : key_to_index.keys()) - array.push_back(key); // TODO: exclude default + array.push_back(key); return array; } @@ -1185,7 +1164,7 @@ void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector std::rethrow_exception(last_update_exception); } - // Set key + /// Set key std::get>(new_keys.values).push_back(id); std::uniform_int_distribution distribution{lifetime.min_sec, lifetime.max_sec}; @@ -1193,7 +1172,7 @@ void SSDCacheStorage::update(DictionarySourcePtr & source_ptr, const std::vector metadata.back().setExpiresAt(now + std::chrono::seconds(distribution(rnd_engine))); metadata.back().setDefault(); - /// inform caller that the cell has not been found + /// Inform caller that the cell has not been found on_id_not_found(id); } @@ -1306,11 +1285,7 @@ SSDCacheDictionary::SSDCacheDictionary( const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ const auto null_value = std::get(null_values[index]); /* NOLINT */ \ - getItemsNumberImpl( /* NOLINT */ \ - index, /* NOLINT */ \ - ids, /* NOLINT */ \ - out, /* NOLINT */ \ - [&](const size_t) { return null_value; }); /* NOLINT */ \ + getItemsNumberImpl(index, ids, out, [&](const size_t) { return null_value; }); /* NOLINT */ \ } DECLARE(UInt8) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 0a97c59f524..df636baa19e 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -92,8 +92,6 @@ namespace constexpr UInt8 HAS_NOT_FOUND = 2; - //constexpr UInt16 MAX_KEY_SIZE = std::numeric_limits::max(); - const std::string BIN_FILE_EXT = ".bin"; const std::string IND_FILE_EXT = ".idx"; @@ -287,7 +285,6 @@ size_t SSDComplexKeyCachePartition::append( for (size_t index = begin; index < keys.size();) { - //Poco::Logger::get("test").information("wb off: " + std::to_string(write_buffer->offset())); Index cache_index; cache_index.setInMemory(true); cache_index.setBlockId(current_memory_block_id); @@ -304,8 +301,6 @@ size_t SSDComplexKeyCachePartition::append( writeBinary(metadata[index].data, *write_buffer); } - //Poco::Logger::get("test key").information("wb off: " + std::to_string(write_buffer->offset())); - for (const auto & attribute : new_attributes) { if (flushed) @@ -322,7 +317,7 @@ size_t SSDComplexKeyCachePartition::append( } \ else \ { \ - const auto & values = std::get>(attribute.values); \ + const auto & values = std::get>(attribute.values); /* NOLINT */ \ writeBinary(values[index], *write_buffer); \ } \ } \ @@ -372,7 +367,6 @@ size_t SSDComplexKeyCachePartition::append( { init_write_buffer(); } - //Poco::Logger::get("test final").information("wb off: " + std::to_string(write_buffer->offset())); } return keys.size() - begin; } @@ -406,8 +400,6 @@ void SSDComplexKeyCachePartition::flush() write_request.aio_offset = (current_file_block_id % max_size) * block_size; #endif - //Poco::Logger::get("try:").information("offset: " + std::to_string(write_request.aio_offset) + " nbytes: " + std::to_string(write_request.aio_nbytes)); - while (io_submit(aio_context.ctx, 1, &write_request_ptr) < 0) { if (errno != EINTR) @@ -443,20 +435,18 @@ void SSDComplexKeyCachePartition::flush() throwFromErrnoWithPath("Cannot fsync " + path + BIN_FILE_EXT, path + BIN_FILE_EXT, ErrorCodes::CANNOT_FSYNC); /// commit changes in index - for (size_t row = 0; row < keys_buffer.size(); ++row) + for (auto & key : keys_buffer) { Index index; - //Poco::Logger::get("get:").information("sz = " + std::to_string(keys_buffer[row].size())); - if (key_to_index.getKeyAndValue(keys_buffer[row], index)) + if (key_to_index.getKeyAndValue(key, index)) { if (index.inMemory()) // Row can be inserted in the buffer twice, so we need to move to ssd only the last index. { index.setInMemory(false); index.setBlockId((current_file_block_id % max_size) + index.getBlockId()); } - key_to_index.set(keys_buffer[row], index); + key_to_index.set(key, index); } - //Poco::Logger::get("get:").information("finish"); } current_file_block_id += write_buffer_size; @@ -652,7 +642,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray processed(requests.size(), false); std::vector events(requests.size()); for (auto & event : events) - event.res = -1; // TODO: remove + event.res = -1; size_t to_push = 0; size_t to_pop = 0; @@ -714,7 +704,6 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray erasing keys <"); for (const auto& key : keys) { - //Poco::Logger::get("ClearOldestBlocks").information("ktest: null=" + std::to_string(key.isNull())); - //Poco::Logger::get("ClearOldestBlocks").information("ktest: data=" + std::to_string(reinterpret_cast(key.fullData()))); - //Poco::Logger::get("ClearOldestBlocks").information("ktest: sz=" + std::to_string(key.size()) + " fz=" + std::to_string(key.fullSize())); Index index; if (key_to_index.get(key, index)) { - //Poco::Logger::get("ClearOldestBlocks").information("erase"); size_t block_id = index.getBlockId(); if (start_block <= block_id && block_id < finish_block) key_to_index.erase(key); } - //Poco::Logger::get("ClearOldestBlocks").information("finish"); } } @@ -1048,6 +1026,67 @@ void SSDComplexKeyCacheStorage::has( hit_count.fetch_add(n - count_not_found, std::memory_order_release); } +namespace +{ +SSDComplexKeyCachePartition::Attributes createAttributesFromBlock( + const Block & block, const size_t begin_column, const std::vector & structure) +{ + SSDComplexKeyCachePartition::Attributes attributes; + + const auto columns = block.getColumns(); + for (size_t i = 0; i < structure.size(); ++i) + { + const auto & column = columns[i + begin_column]; + switch (structure[i]) + { +#define DISPATCH(TYPE) \ + case AttributeUnderlyingType::ut##TYPE: \ + { \ + SSDComplexKeyCachePartition::Attribute::Container values(column->size()); \ + memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ + attributes.emplace_back(); \ + attributes.back().type = structure[i]; \ + attributes.back().values = std::move(values); \ + } \ + break; + + DISPATCH(UInt8) + DISPATCH(UInt16) + DISPATCH(UInt32) + DISPATCH(UInt64) + DISPATCH(UInt128) + DISPATCH(Int8) + DISPATCH(Int16) + DISPATCH(Int32) + DISPATCH(Int64) + DISPATCH(Decimal32) + DISPATCH(Decimal64) + DISPATCH(Decimal128) + DISPATCH(Float32) + DISPATCH(Float64) +#undef DISPATCH + + case AttributeUnderlyingType::utString: + { + attributes.emplace_back(); + SSDComplexKeyCachePartition::Attribute::Container values(column->size()); + for (size_t j = 0; j < column->size(); ++j) + { + const auto ref = column->getDataAt(j); + values[j].resize(ref.size); + memcpy(values[j].data(), ref.data, ref.size); + } + attributes.back().type = structure[i]; + attributes.back().values = std::move(values); + } + break; + } + } + + return attributes; +} +} // namespace + template void SSDComplexKeyCacheStorage::update( DictionarySourcePtr & source_ptr, @@ -1202,7 +1241,7 @@ void SSDComplexKeyCacheStorage::update( if (update_error_count) { - /// TODO: юзать старые значения. + /// TODO: use old values. /// We don't have expired data for that `id` so all we can do is to rethrow `last_exception`. std::rethrow_exception(last_update_exception); @@ -1267,64 +1306,6 @@ void SSDComplexKeyCacheStorage::collectGarbage() } } -SSDComplexKeyCachePartition::Attributes SSDComplexKeyCacheStorage::createAttributesFromBlock( - const Block & block, const size_t begin_column, const std::vector & structure) -{ - SSDComplexKeyCachePartition::Attributes attributes; - - const auto columns = block.getColumns(); - for (size_t i = 0; i < structure.size(); ++i) - { - const auto & column = columns[i + begin_column]; - switch (structure[i]) - { -#define DISPATCH(TYPE) \ - case AttributeUnderlyingType::ut##TYPE: \ - { \ - SSDComplexKeyCachePartition::Attribute::Container values(column->size()); \ - memcpy(&values[0], column->getRawData().data, sizeof(TYPE) * values.size()); \ - attributes.emplace_back(); \ - attributes.back().type = structure[i]; \ - attributes.back().values = std::move(values); \ - } \ - break; - - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) - DISPATCH(Float32) - DISPATCH(Float64) -#undef DISPATCH - - case AttributeUnderlyingType::utString: - { - attributes.emplace_back(); - SSDComplexKeyCachePartition::Attribute::Container values(column->size()); - for (size_t j = 0; j < column->size(); ++j) - { - const auto ref = column->getDataAt(j); - values[j].resize(ref.size); - memcpy(values[j].data(), ref.data, ref.size); - } - attributes.back().type = structure[i]; - attributes.back().values = std::move(values); - } - break; - } - } - - return attributes; -} - SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( const std::string & name_, const DictionaryStructure & dict_struct_, @@ -1368,13 +1349,8 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ - const auto null_value = std::get(null_values[index]); \ - getItemsNumberImpl( \ - index, \ - key_columns, \ - key_types, \ - out, \ - [&](const size_t) { return null_value; }); \ + const auto null_value = std::get(null_values[index]); /* NOLINT */ \ + getItemsNumberImpl( index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \ } DECLARE(UInt8) @@ -1403,12 +1379,7 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ - getItemsNumberImpl( \ - index, \ - key_columns, \ - key_types, \ - out, \ - [&](const size_t row) { return def[row]; }); \ + getItemsNumberImpl(index, key_columns, key_types, out, [&](const size_t row) { return def[row]; }); /* NOLINT */ \ } DECLARE(UInt8) DECLARE(UInt16) @@ -1436,12 +1407,7 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( { \ const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ - getItemsNumberImpl( \ - index, \ - key_columns, \ - key_types, \ - out, \ - [&](const size_t) { return def; }); \ + getItemsNumberImpl(index, key_columns, key_types, out, [&](const size_t) { return def; }); /* NOLINT */ \ } DECLARE(UInt8) DECLARE(UInt16) @@ -1708,7 +1674,7 @@ AttributeValueVariant SSDComplexKeyCacheDictionary::createAttributeNullValueWith { #define DISPATCH(TYPE) \ case AttributeUnderlyingType::ut##TYPE: \ - return createAttributeNullValueWithTypeImpl(null_value); + return createAttributeNullValueWithTypeImpl(null_value); /* NOLINT */ DISPATCH(UInt8) DISPATCH(UInt16) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index b6717d16f65..7809bd1909d 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -200,7 +200,6 @@ public: { UInt16 sz; readBinary(sz, buf); - //Poco::Logger::get("test read key").information("sz " + std::to_string(sz)); char * data = nullptr; if constexpr (std::is_same_v) data = arena.alloc(); @@ -209,7 +208,6 @@ public: memcpy(data, &sz, sizeof(sz)); buf.read(data + sizeof(sz), sz); key = KeyRef(data); - //Poco::Logger::get("test read key").information("ksz = " + std::to_string(key.size())); } void ignoreKey(ReadBuffer & buf) const @@ -478,9 +476,6 @@ public: double getLoadFactor() const; private: - SSDComplexKeyCachePartition::Attributes createAttributesFromBlock( - const Block & block, const size_t begin_column, const std::vector & structure); - void collectGarbage(); const AttributeTypes attributes_structure; @@ -505,9 +500,6 @@ private: mutable size_t update_error_count = 0; mutable std::chrono::system_clock::time_point backoff_end_time; - // stats - //mutable size_t bytes_allocated = 0; - mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; }; @@ -569,10 +561,6 @@ public: return dict_struct.attributes[getAttributeIndex(attribute_name)].injective; } - /*bool hasHierarchy() const { return false; } - - void toParent(const PaddedPODArray &, PaddedPODArray &) const { }*/ - std::exception_ptr getLastException() const override { return storage.getLastException(); } template From e7324ec4d740cc8f579b9ef756837d0e6c7fff35 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 13:41:27 +0300 Subject: [PATCH 0156/2229] docs --- .../external-dicts-dict-layout.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index bdadf97cd11..3bc49c25480 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -54,10 +54,12 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings - [hashed](#dicts-external_dicts_dict_layout-hashed) - [sparse\_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) - [cache](#cache) +- [ssd\_cache](#ssd-cache) - [direct](#direct) - [range\_hashed](#range-hashed) - [complex\_key\_hashed](#complex-key-hashed) - [complex\_key\_cache](#complex-key-cache) +- [ssd\_complex\_key\_cache](#ssd-cache) - [complex\_key\_direct](#complex-key-direct) - [ip\_trie](#ip-trie) @@ -296,6 +298,40 @@ Set a large enough cache size. You need to experiment to select the number of ce This type of storage is for use with composite [keys](external-dicts-dict-structure.md). Similar to `cache`. +### ssd\_cache {#ssd-cache} + +Similar to `cache`, but stores data on SSD and index in RAM. + +``` xml + + + + 4096 + + 16777216 + + 131072 + + 1048576 + + /var/lib/clickhouse/clickhouse_dictionaries/test_dict + + 1048576 + + +``` + +or + +``` sql +LAYOUT(CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 + PATH /var/lib/clickhouse/clickhouse_dictionaries/test_dict MAX_STORED_KEYS 1048576)) +``` + +### complex\_key\_ssd\_cache {#complex-key-ssd-cache} + +This type of storage is for use with composite [keys](external-dicts-dict-structure.md). Similar to `ssd\_cache`. + ### direct {#direct} The dictionary is not stored in memory and directly goes to the source during the processing of a request. From deb0e2f199513689583ba97a5a407524fb8c8806 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 13:42:02 +0300 Subject: [PATCH 0157/2229] parts count --- src/Dictionaries/SSDCacheDictionary.cpp | 1 + src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index ad6b5cb4ea8..8c9d61223df 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1619,6 +1619,7 @@ void registerDictionarySSDCache(DictionaryFactory & factory) const auto max_partitions_count = config.getInt(layout_prefix + ".ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + max_partitions_count = 1; const auto block_size = config.getInt(layout_prefix + ".ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index df636baa19e..64b40833eb2 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1733,6 +1733,7 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) const auto max_partitions_count = config.getInt(layout_prefix + ".complex_key_ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; + max_partitions_count = 1; const auto block_size = config.getInt(layout_prefix + ".complex_key_ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) From c25850861086a9188c0c34e5357fa9ced1ac34b4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 17:16:52 +0300 Subject: [PATCH 0158/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 1 - src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 8c9d61223df..ad6b5cb4ea8 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1619,7 +1619,6 @@ void registerDictionarySSDCache(DictionaryFactory & factory) const auto max_partitions_count = config.getInt(layout_prefix + ".ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) throw Exception{name + ": dictionary of layout 'ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; - max_partitions_count = 1; const auto block_size = config.getInt(layout_prefix + ".ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 64b40833eb2..df636baa19e 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1733,7 +1733,6 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) const auto max_partitions_count = config.getInt(layout_prefix + ".complex_key_ssd_cache.max_partitions_count", DEFAULT_PARTITIONS_COUNT); if (max_partitions_count <= 0) throw Exception{name + ": dictionary of layout 'complex_key_ssd_cache' cannot have 0 (or less) max_partitions_count", ErrorCodes::BAD_ARGUMENTS}; - max_partitions_count = 1; const auto block_size = config.getInt(layout_prefix + ".complex_key_ssd_cache.block_size", DEFAULT_SSD_BLOCK_SIZE); if (block_size <= 0) From 016964caedd534b7cc961a110c25198a6ccdd39c Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 17:19:14 +0300 Subject: [PATCH 0159/2229] style fix --- src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index df636baa19e..0247a896f62 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1085,7 +1085,7 @@ SSDComplexKeyCachePartition::Attributes createAttributesFromBlock( return attributes; } -} // namespace +} template void SSDComplexKeyCacheStorage::update( @@ -1350,8 +1350,8 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ const auto null_value = std::get(null_values[index]); /* NOLINT */ \ - getItemsNumberImpl( index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \ - } + getItemsNumberImpl( index, key_columns, key_types, out, [&](const size_t) { return null_value; }); \ + } /* NOLINT */ DECLARE(UInt8) DECLARE(UInt16) From bee14177cd29dfda02e5c8d9543e85bae3b2b685 Mon Sep 17 00:00:00 2001 From: potya Date: Sat, 23 May 2020 17:32:47 +0300 Subject: [PATCH 0160/2229] Fix NOT nULL modifier --- src/Interpreters/InterpreterCreateQuery.cpp | 14 +++++----- src/Parsers/ASTColumnDeclaration.cpp | 18 +++++++------ src/Parsers/ASTColumnDeclaration.h | 2 +- src/Parsers/ParserCreateQuery.h | 29 ++++++++++++--------- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ec83c3f9c7f..586f2d0f056 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -287,25 +287,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; - if (col_decl.isNULL && col_decl.isNotNULL) - throw Exception{"Cant use NOT NULL and NULL together", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + if (!col_decl.isNULL && col_decl.isNot) + throw Exception{"Cant use NOT without NULL", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.isNULL) { + if (col_decl.isNot && col_decl.isNULL) { + if (column_type->isNullable()) + throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + } else if (col_decl.isNULL && !col_decl.isNot) { if (column_type->isNullable()) throw Exception{"Cant use NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; else { column_type = makeNullable(column_type); } - } else if (col_decl.isNotNULL) { - if (column_type->isNullable()) - throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; } - if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNotNULL) + if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNot && !col_decl.isNULL) column_type = makeNullable(column_type); column_names_and_types.emplace_back(col_decl.name, column_type); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index a6c4b819fdf..40513b45586 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -22,10 +22,10 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->isNULL); } - if (isNULL) + if (isNot) { - res->isNotNULL = isNotNULL; - res->children.push_back(res->isNotNULL); + res->isNot = isNot; + res->children.push_back(res->isNot); } if (default_expression) @@ -71,17 +71,19 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta type->formatImpl(settings, state, frame); } + if (isNot) + { + settings.ostr << ' '; + isNot->formatImpl(settings, state, frame); + } + if (isNULL) { settings.ostr << ' '; isNULL->formatImpl(settings, state, frame); } - if (isNotNULL) - { - settings.ostr << ' '; - isNotNULL->formatImpl(settings, state, frame); - } + if (default_expression) { diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index d3c50d453d5..406a8cebded 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -14,7 +14,7 @@ public: String name; ASTPtr type; ASTPtr isNULL; - ASTPtr isNotNULL; + ASTPtr isNot; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 66a2005334d..8976de8f1bc 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -118,7 +118,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserIdentifierWithOptionalParameters type_parser; ParserKeyword s_default{"DEFAULT"}; ParserKeyword s_null{"NULL"}; - ParserKeyword s_not_null{"NOT NULL"}; + ParserKeyword s_not{"NOT"}; ParserKeyword s_materialized{"MATERIALIZED"}; ParserKeyword s_alias{"ALIAS"}; ParserKeyword s_comment{"COMMENT"}; @@ -129,7 +129,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserCodec codec_parser; ParserExpression expression_parser; ParserIdentifier null_parser; - ParserIdentifier not_null_parser; + ParserCompoundIdentifier not_null_parser; /// mandatory column name ASTPtr name; @@ -142,7 +142,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr type; String default_specifier; ASTPtr isNull; - ASTPtr isNotNull; + ASTPtr isNot; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -171,14 +171,19 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - Pos pos_before_null = pos; + // Pos pos_before_null = pos; - if (s_null.check(pos, expected)) { - if (!null_parser.parse(pos_before_null, isNull, expected)) - return false; - } else if (s_not_null.check(pos, expected)) { - if (!not_null_parser.parse(pos_before_null, isNotNull, expected)) + if (s_not.check(pos, expected)) { + if (s_null.check(pos, expected)) { + isNot = std::make_shared("NOT"); + isNull = std::make_shared("NULL"); + } else { return false; + } + } else { + if (s_null.check(pos, expected)) { + isNull = std::make_shared("NULL"); + } } if (s_comment.ignore(pos, expected)) @@ -215,9 +220,9 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(isNull)); } - if (isNotNull) { - column_declaration->isNotNULL = isNotNull; - column_declaration->children.push_back(std::move(isNotNull)); + if (isNot) { + column_declaration->isNot = isNot; + column_declaration->children.push_back(std::move(isNot)); } if (default_expression) From 1b9e2df78e022c1c7ce73b640c660d151fda9bb4 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 19:10:19 +0300 Subject: [PATCH 0161/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 1 + src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index ad6b5cb4ea8..e5023fe32d6 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -633,6 +633,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + ", returned=" + std::to_string(events[i].res) + ", errno=" + std::to_string(errno), ErrorCodes::AIO_READ_ERROR); } + __msan_unpoison(reinterpret_cast(request.aio_buf), request.aio_nbytes); uint64_t checksum = 0; ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); readBinary(checksum, buf_special); diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 0247a896f62..57c9b55143e 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -664,7 +664,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray(request.aio_buf), request.aio_nbytes); uint64_t checksum = 0; ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); readBinary(checksum, buf_special); From 3fb0eab11664b8985aaeef69d738595a659ab13b Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 19:11:20 +0300 Subject: [PATCH 0162/2229] fix --- src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 57c9b55143e..73fe382c853 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1350,8 +1350,8 @@ SSDComplexKeyCacheDictionary::SSDComplexKeyCacheDictionary( const auto index = getAttributeIndex(attribute_name); \ checkAttributeType(name, attribute_name, dict_struct.attributes[index].underlying_type, AttributeUnderlyingType::ut##TYPE); \ const auto null_value = std::get(null_values[index]); /* NOLINT */ \ - getItemsNumberImpl( index, key_columns, key_types, out, [&](const size_t) { return null_value; }); \ - } /* NOLINT */ + getItemsNumberImpl(index, key_columns, key_types, out, [&](const size_t) { return null_value; }); /* NOLINT */ \ + } DECLARE(UInt8) DECLARE(UInt16) From 7358410a847d879ec3ce6e0d8c15bf622089b2f2 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 23 May 2020 23:50:49 +0300 Subject: [PATCH 0163/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 2 ++ src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 12 ++---------- src/Dictionaries/SSDComplexKeyCacheDictionary.h | 4 ---- 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index e5023fe32d6..d0830248b3d 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -716,6 +716,8 @@ void SSDCachePartition::clearOldestBlocks() "aio_nbytes=" + std::to_string(request.aio_nbytes) + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); } + + __msan_unpoison(read_buffer_memory.data(), read_buffer_memory.size()); } std::vector keys; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 73fe382c853..ae3a3f4187f 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -747,6 +747,8 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() "aio_nbytes=" + std::to_string(request.aio_nbytes) + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); } + + __msan_unpoison(read_buffer_memory.data(), read_buffer_memory.size()); } TemporalComplexKeysPool tmp_keys_pool; @@ -891,11 +893,6 @@ size_t SSDComplexKeyCachePartition::getBytesAllocated() const (keys_buffer_pool ? keys_buffer_pool->size() : 0) + (memory ? memory->size() : 0); } -PaddedPODArray SSDComplexKeyCachePartition::getCachedIds(const std::chrono::system_clock::time_point /* now */) const -{ - throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); -} - void SSDComplexKeyCachePartition::remove() { std::unique_lock lock(rw_lock); @@ -1267,11 +1264,6 @@ void SSDComplexKeyCacheStorage::update( ProfileEvents::increment(ProfileEvents::DictCacheRequests); } -PaddedPODArray SSDComplexKeyCacheStorage::getCachedIds() const -{ - throw DB::Exception("Method not supported.", ErrorCodes::NOT_IMPLEMENTED); -} - double SSDComplexKeyCacheStorage::getLoadFactor() const { double result = 0; diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 7809bd1909d..79f5ddead2f 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -360,8 +360,6 @@ public: size_t getId() const; - PaddedPODArray getCachedIds(const std::chrono::system_clock::time_point now) const; - double getLoadFactor() const; size_t getElementCount() const; @@ -461,8 +459,6 @@ public: PresentIdHandler && on_updated, AbsentIdHandler && on_key_not_found, const DictionaryLifetime lifetime); - PaddedPODArray getCachedIds() const; - std::exception_ptr getLastException() const { return last_update_exception; } const std::string & getPath() const { return path; } From c70401b1e44ce48327f35aa6fcdf77de2861e4dc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 24 May 2020 11:56:34 +0300 Subject: [PATCH 0164/2229] fix other os --- src/Dictionaries/SSDCacheDictionary.cpp | 59 ++++++++++++++----- src/Dictionaries/SSDCacheDictionary.h | 4 ++ .../SSDComplexKeyCacheDictionary.cpp | 4 ++ .../SSDComplexKeyCacheDictionary.h | 4 ++ src/Dictionaries/registerDictionaries.h | 2 + src/Functions/FunctionsExternalDictionaries.h | 22 +++++++ 6 files changed, 81 insertions(+), 14 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index d0830248b3d..b172f8bf9bf 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -1,3 +1,5 @@ +#if defined(__linux__) || defined(__FreeBSD__) + #include "SSDCacheDictionary.h" #include @@ -407,7 +409,7 @@ void SSDCachePartition::flush() ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); - if (bytes_written != static_cast(write_request.aio_nbytes)) + if (bytes_written != static_cast(block_size * write_buffer_size)) throw Exception("Not all data was written for asynchronous IO on file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(bytes_written), ErrorCodes::AIO_WRITE_ERROR); if (::fsync(fd) < 0) @@ -574,8 +576,14 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice blocks_to_indices.reserve(index_to_out.size()); for (size_t i = 0; i < index_to_out.size(); ++i) { + #if defined(__FreeBSD__) + const auto back_offset = requests.back().aio.aio_offset; + #else + const auto back_offset = requests.back().aio_offset; + #endif + if (!requests.empty() && - static_cast(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * block_size) + static_cast(back_offset) == index_to_out[i].first.getBlockId() * block_size) { blocks_to_indices.back().push_back(i); continue; @@ -586,9 +594,9 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice request.aio.aio_lio_opcode = LIO_READ; request.aio.aio_fildes = fd; request.aio.aio_buf = reinterpret_cast( - reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS)); - request.aio.aio_nbytes = SSD_BLOCK_SIZE; - request.aio.aio_offset = index_to_out[i].first; + reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size)); + request.aio.aio_nbytes = block_size; + request.aio.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #else request.aio_lio_opcode = IOCB_CMD_PREAD; @@ -608,8 +616,13 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice std::vector processed(requests.size(), false); std::vector events(requests.size()); + #if defined(__FreeBSD__) + for (auto & event : events) + event.udata = -1; + #else for (auto & event : events) event.res = -1; + #endif size_t to_push = 0; size_t to_pop = 0; @@ -626,18 +639,34 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice { const auto request_id = events[i].data; const auto & request = requests[request_id]; - if (events[i].res != static_cast(request.aio_nbytes)) + + #if defined(__FreeBSD__) + const auto bytes_written = aio_return(reinterpret_cast(events[i].udata)); + #else + const auto bytes_written = events[i].res; + #endif + + if (bytes_written != static_cast(block_size)) { - throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + - "request_id= " + std::to_string(request.aio_data) + "/ " + std::to_string(requests.size()) + - ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + - ", returned=" + std::to_string(events[i].res) + ", errno=" + std::to_string(errno), ErrorCodes::AIO_READ_ERROR); + #if defined(__FreeBSD__) + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + "."); + #else + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "request_id= " + std::to_string(request.aio_data) + "/ " + std::to_string(requests.size()) + + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + + ", returned=" + std::to_string(events[i].res) + ", errno=" + std::to_string(errno), ErrorCodes::AIO_READ_ERROR); + #endif } - __msan_unpoison(reinterpret_cast(request.aio_buf), request.aio_nbytes); + #if defined(__FreeBSD__) + const auto* buf_ptr = reinterpret_cast(request.aio.aio_buf); + #else + const auto* buf_ptr = reinterpret_cast(request.aio_buf); + #endif + __msan_unpoison(buf_ptr, block_size); uint64_t checksum = 0; - ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); + ReadBufferFromMemory buf_special(buf_ptr, block_size); readBinary(checksum, buf_special); - uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(reinterpret_cast(request.aio_buf) + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(buf_ptr + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); if (checksum != calculated_checksum) { throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); @@ -647,7 +676,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice { const auto & [file_index, out_index] = index_to_out[idx]; ReadBufferFromMemory buf( - reinterpret_cast(request.aio_buf) + file_index.getAddressInBlock(), + buf_ptr + file_index.getAddressInBlock(), block_size - file_index.getAddressInBlock()); set(out_index, buf); } @@ -1667,3 +1696,5 @@ void registerDictionarySSDCache(DictionaryFactory & factory) } } + +#endif diff --git a/src/Dictionaries/SSDCacheDictionary.h b/src/Dictionaries/SSDCacheDictionary.h index 6352d3a2522..3525d5d4483 100644 --- a/src/Dictionaries/SSDCacheDictionary.h +++ b/src/Dictionaries/SSDCacheDictionary.h @@ -1,5 +1,7 @@ #pragma once +#if defined(__linux__) || defined(__FreeBSD__) + #include "DictionaryStructure.h" #include "IDictionary.h" #include "IDictionarySource.h" @@ -454,3 +456,5 @@ private: }; } + +#endif diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index ae3a3f4187f..d13b9469132 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -1,3 +1,5 @@ +#if defined(__linux__) || defined(__FreeBSD__) + #include "SSDComplexKeyCacheDictionary.h" #include @@ -1770,3 +1772,5 @@ void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory) } } + +#endif diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.h b/src/Dictionaries/SSDComplexKeyCacheDictionary.h index 79f5ddead2f..dcd9deb29ac 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h @@ -1,5 +1,7 @@ #pragma once +#if defined(__linux__) || defined(__FreeBSD__) + #include "DictionaryStructure.h" #include "IDictionary.h" #include "IDictionarySource.h" @@ -685,3 +687,5 @@ private: }; } + +#endif diff --git a/src/Dictionaries/registerDictionaries.h b/src/Dictionaries/registerDictionaries.h index 05eeccefb8b..bca96159c9d 100644 --- a/src/Dictionaries/registerDictionaries.h +++ b/src/Dictionaries/registerDictionaries.h @@ -25,8 +25,10 @@ void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); +#if defined(__linux__) || defined(__FreeBSD__) void registerDictionarySSDCache(DictionaryFactory & factory); void registerDictionarySSDComplexKeyCache(DictionaryFactory & factory); +#endif void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaryDirect(DictionaryFactory & factory); diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index cb5be77a332..1e8c41cc724 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -29,8 +29,10 @@ #include #include #include +#if defined(__linux__) || defined(__FreeBSD__) #include #include +#endif #include #include #include @@ -175,10 +177,14 @@ private: if (!executeDispatchSimple(block, arguments, result, dict) && !executeDispatchSimple(block, arguments, result, dict) && !executeDispatchSimple(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchSimple(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && @@ -327,10 +333,14 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && @@ -506,10 +516,14 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && @@ -841,10 +855,14 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && @@ -1097,10 +1115,14 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && +#if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && +#endif !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && From e76cdbdc43b98be5e70f8c2d7c7f451d31da5637 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 24 May 2020 14:24:45 +0300 Subject: [PATCH 0165/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 24 +++++++++++------------ src/Dictionaries/registerDictionaries.cpp | 2 ++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index b172f8bf9bf..022508b07b5 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -577,13 +577,12 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice for (size_t i = 0; i < index_to_out.size(); ++i) { #if defined(__FreeBSD__) - const auto back_offset = requests.back().aio.aio_offset; + const size_t back_offset = requests.empty() ? -1 : static_cast(requests.back().aio.aio_offset); #else - const auto back_offset = requests.back().aio_offset; + const size_t back_offset = requests.empty() ? -1 : static_cast(requests.back().aio_offset); #endif - if (!requests.empty() && - static_cast(back_offset) == index_to_out[i].first.getBlockId() * block_size) + if (!requests.empty() && back_offset == index_to_out[i].first.getBlockId() * block_size) { blocks_to_indices.back().push_back(i); continue; @@ -616,10 +615,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice std::vector processed(requests.size(), false); std::vector events(requests.size()); - #if defined(__FreeBSD__) - for (auto & event : events) - event.udata = -1; - #else + #if defined(__linux__) for (auto & event : events) event.res = -1; #endif @@ -649,7 +645,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice if (bytes_written != static_cast(block_size)) { #if defined(__FreeBSD__) - throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + "."); + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); #else throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + "request_id= " + std::to_string(request.aio_data) + "/ " + std::to_string(requests.size()) + @@ -658,7 +654,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice #endif } #if defined(__FreeBSD__) - const auto* buf_ptr = reinterpret_cast(request.aio.aio_buf); + const volatile auto* buf_ptr = reinterpret_cast(request.aio.aio_buf); #else const auto* buf_ptr = reinterpret_cast(request.aio_buf); #endif @@ -739,13 +735,15 @@ void SSDCachePartition::clearOldestBlocks() throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); } +#if defined(__FreeBSD__) + if (event.aio.res != static_cast(request.aio.aio_nbytes)) + throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); +#else if (event.res != static_cast(request.aio_nbytes)) - { throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ". " + "aio_nbytes=" + std::to_string(request.aio_nbytes) + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); - } - +#endif __msan_unpoison(read_buffer_memory.data(), read_buffer_memory.size()); } diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index e9c47ec034b..58848dafdad 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -32,8 +32,10 @@ void registerDictionaries() registerDictionaryFlat(factory); registerDictionaryHashed(factory); registerDictionaryCache(factory); +#if defined(__linux__) || defined(__FreeBSD__) registerDictionarySSDCache(factory); registerDictionarySSDComplexKeyCache(factory); +#endif registerDictionaryPolygon(factory); registerDictionaryDirect(factory); } From 63ef97309485039d3def06ed84629df0b87829f6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 24 May 2020 19:06:59 +0300 Subject: [PATCH 0166/2229] fix other os --- src/Dictionaries/SSDCacheDictionary.cpp | 2 +- .../SSDComplexKeyCacheDictionary.cpp | 66 ++++++++++++++----- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 022508b07b5..9e246677f05 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -654,7 +654,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice #endif } #if defined(__FreeBSD__) - const volatile auto* buf_ptr = reinterpret_cast(request.aio.aio_buf); + const char* buf_ptr = reinterpret_cast(request.aio.aio_buf); #else const auto* buf_ptr = reinterpret_cast(request.aio_buf); #endif diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index d13b9469132..6a9b9088cab 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -392,8 +392,8 @@ void SSDComplexKeyCachePartition::flush() write_request.aio.aio_lio_opcode = LIO_WRITE; write_request.aio.aio_fildes = fd; write_request.aio.aio_buf = reinterpret_cast(memory->data()); - write_request.aio.aio_nbytes = block_size; - write_request.aio.aio_offset = block_size * current_file_block_id; + write_request.aio.aio_nbytes = block_size * write_buffer_size; + write_request.aio.aio_offset = (current_file_block_id % max_size) * block_size; #else write_request.aio_lio_opcode = IOCB_CMD_PWRITE; write_request.aio_fildes = fd; @@ -609,8 +609,13 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray(requests.back().aio_offset) == index_to_out[i].first.getBlockId() * block_size) + #if defined(__FreeBSD__) + const size_t back_offset = requests.empty() ? -1 : static_cast(requests.back().aio.aio_offset); + #else + const size_t back_offset = requests.empty() ? -1 : static_cast(requests.back().aio_offset); + #endif + + if (!requests.empty() && back_offset == index_to_out[i].first.getBlockId() * block_size) { blocks_to_indices.back().push_back(i); continue; @@ -621,9 +626,9 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray( - reinterpret_cast(read_buffer.data()) + SSD_BLOCK_SIZE * (requests.size() % READ_BUFFER_SIZE_BLOCKS)); - request.aio.aio_nbytes = SSD_BLOCK_SIZE; - request.aio.aio_offset = index_to_out[i].first; + reinterpret_cast(read_buffer.data()) + block_size * (requests.size() % read_buffer_size)); + request.aio.aio_nbytes = block_size; + request.aio.aio_offset = index_to_out[i].first.getBlockId() * block_size; request.aio_data = requests.size(); #else request.aio_lio_opcode = IOCB_CMD_PREAD; @@ -643,8 +648,11 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray processed(requests.size(), false); std::vector events(requests.size()); + #if defined(__linux__) for (auto & event : events) event.res = -1; + #endif + size_t to_push = 0; size_t to_pop = 0; @@ -662,15 +670,35 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray(request.aio_nbytes)) - throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + - "request_id= " + std::to_string(request.aio_data) + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + - "returned: " + std::to_string(events[i].res), ErrorCodes::AIO_READ_ERROR); - __msan_unpoison(reinterpret_cast(request.aio_buf), request.aio_nbytes); + + #if defined(__FreeBSD__) + const auto bytes_written = aio_return(reinterpret_cast(events[i].udata)); + #else + const auto bytes_written = events[i].res; + #endif + + if (bytes_written != static_cast(block_size)) + { + #if defined(__FreeBSD__) + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); + #else + throw Exception("AIO failed to read file " + path + BIN_FILE_EXT + ". " + + "request_id= " + std::to_string(request.aio_data) + "/ " + std::to_string(requests.size()) + + ", aio_nbytes=" + std::to_string(request.aio_nbytes) + ", aio_offset=" + std::to_string(request.aio_offset) + + ", returned=" + std::to_string(events[i].res) + ", errno=" + std::to_string(errno), ErrorCodes::AIO_READ_ERROR); + #endif + } + #if defined(__FreeBSD__) + const char* buf_ptr = reinterpret_cast(request.aio.aio_buf); + #else + const auto* buf_ptr = reinterpret_cast(request.aio_buf); + #endif + + __msan_unpoison(buf_ptr, block_size); uint64_t checksum = 0; - ReadBufferFromMemory buf_special(reinterpret_cast(request.aio_buf), block_size); + ReadBufferFromMemory buf_special(buf_ptr, block_size); readBinary(checksum, buf_special); - uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(reinterpret_cast(request.aio_buf) + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); + uint64_t calculated_checksum = CityHash_v1_0_2::CityHash64(buf_ptr + BLOCK_CHECKSUM_SIZE, block_size - BLOCK_CHECKSUM_SIZE); if (checksum != calculated_checksum) { throw Exception("Cache data corrupted. From block = " + std::to_string(checksum) + " calculated = " + std::to_string(calculated_checksum) + ".", ErrorCodes::CORRUPTED_DATA); @@ -680,7 +708,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray(request.aio_buf) + file_index.getAddressInBlock(), + buf_ptr + file_index.getAddressInBlock(), block_size - file_index.getAddressInBlock()); set(out_index, buf); } @@ -743,13 +771,15 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() throwFromErrno("io_getevents: Failed to get an event for asynchronous IO", ErrorCodes::CANNOT_IO_GETEVENTS); } +#if defined(__FreeBSD__) + if (event.aio.res != static_cast(request.aio.aio_nbytes)) + throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); +#else if (event.res != static_cast(request.aio_nbytes)) - { throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ". " + "aio_nbytes=" + std::to_string(request.aio_nbytes) + ", returned=" + std::to_string(event.res) + ".", ErrorCodes::AIO_READ_ERROR); - } - +#endif __msan_unpoison(read_buffer_memory.data(), read_buffer_memory.size()); } From 3150667aa34553e30ef077575006cd54bf1e33cc Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 24 May 2020 19:57:25 +0300 Subject: [PATCH 0167/2229] ya.make --- src/Dictionaries/ya.make | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index e47b55d5254..c462f3f665c 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -58,6 +58,8 @@ SRCS( RedisBlockInputStream.cpp RedisDictionarySource.cpp registerDictionaries.cpp + SSDCacheDictionary.cpp + SSDComplexKeyCacheDictionary.cpp writeParenthesisedString.cpp XDBCDictionarySource.cpp From 797fa400b4a4f7957a690ec9bcf83e4e2a7ead13 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 24 May 2020 21:59:06 +0300 Subject: [PATCH 0168/2229] fix --- src/Dictionaries/SSDCacheDictionary.cpp | 4 ++-- src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index 9e246677f05..f8e6ca4176f 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -654,7 +654,7 @@ void SSDCachePartition::getValueFromStorage(const PaddedPODArray & indice #endif } #if defined(__FreeBSD__) - const char* buf_ptr = reinterpret_cast(request.aio.aio_buf); + const char* buf_ptr = reinterpret_cast(reinterpret_cast(request.aio.aio_buf)); #else const auto* buf_ptr = reinterpret_cast(request.aio_buf); #endif @@ -736,7 +736,7 @@ void SSDCachePartition::clearOldestBlocks() } #if defined(__FreeBSD__) - if (event.aio.res != static_cast(request.aio.aio_nbytes)) + if (event.aio.udata != static_cast(request.aio.aio_nbytes)) throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); #else if (event.res != static_cast(request.aio_nbytes)) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 6a9b9088cab..69c248a5804 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -689,7 +689,7 @@ void SSDComplexKeyCachePartition::getValueFromStorage(const PaddedPODArray(request.aio.aio_buf); + const char* buf_ptr = reinterpret_cast(reinterpret_cast(request.aio.aio_buf)); #else const auto* buf_ptr = reinterpret_cast(request.aio_buf); #endif @@ -772,7 +772,7 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() } #if defined(__FreeBSD__) - if (event.aio.res != static_cast(request.aio.aio_nbytes)) + if (event.aio.udata != static_cast(request.aio.aio_nbytes)) throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); #else if (event.res != static_cast(request.aio_nbytes)) From d6f4c66fbc492c1bbbf10b34519b444b2b4396ea Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 25 May 2020 00:00:44 +0300 Subject: [PATCH 0169/2229] fix aio for other os --- src/Dictionaries/SSDCacheDictionary.cpp | 2 +- src/Dictionaries/SSDComplexKeyCacheDictionary.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/SSDCacheDictionary.cpp b/src/Dictionaries/SSDCacheDictionary.cpp index f8e6ca4176f..93ec2a1709a 100644 --- a/src/Dictionaries/SSDCacheDictionary.cpp +++ b/src/Dictionaries/SSDCacheDictionary.cpp @@ -736,7 +736,7 @@ void SSDCachePartition::clearOldestBlocks() } #if defined(__FreeBSD__) - if (event.aio.udata != static_cast(request.aio.aio_nbytes)) + if (aio_return(reinterpret_cast(event.udata)) != static_cast(request.aio.aio_nbytes)) throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); #else if (event.res != static_cast(request.aio_nbytes)) diff --git a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp index 69c248a5804..642882a1975 100644 --- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp +++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp @@ -430,7 +430,7 @@ void SSDComplexKeyCachePartition::flush() ProfileEvents::increment(ProfileEvents::WriteBufferAIOWrite); ProfileEvents::increment(ProfileEvents::WriteBufferAIOWriteBytes, bytes_written); - if (bytes_written != static_cast(write_request.aio_nbytes)) + if (bytes_written != static_cast(block_size * write_buffer_size)) throw Exception("Not all data was written for asynchronous IO on file " + path + BIN_FILE_EXT + ". returned: " + std::to_string(bytes_written), ErrorCodes::AIO_WRITE_ERROR); if (::fsync(fd) < 0) @@ -772,7 +772,7 @@ void SSDComplexKeyCachePartition::clearOldestBlocks() } #if defined(__FreeBSD__) - if (event.aio.udata != static_cast(request.aio.aio_nbytes)) + if (aio_return(reinterpret_cast(event.udata)) != static_cast(request.aio.aio_nbytes)) throw Exception("GC: AIO failed to read file " + path + BIN_FILE_EXT + ".", ErrorCodes::AIO_READ_ERROR); #else if (event.res != static_cast(request.aio_nbytes)) From 207de9ca9c4ad923a86afc70b59ef096be9c6264 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 25 May 2020 00:21:08 +0300 Subject: [PATCH 0170/2229] fixed direct tests --- src/Functions/FunctionsExternalDictionaries.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 1e8c41cc724..425dcf8eec0 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -175,22 +175,22 @@ private: auto dict = helper.getDictionary(block.getByPosition(arguments[0])); if (!executeDispatchSimple(block, arguments, result, dict) && + !executeDispatchSimple(block, arguments, result, dict) && !executeDispatchSimple(block, arguments, result, dict) && !executeDispatchSimple(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchSimple(block, arguments, result, dict) && #endif !executeDispatchComplex(block, arguments, result, dict) && + !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && - !executeDispatchSimple(block, arguments, result, dict)) + !executeDispatchComplex(block, arguments, result, dict)) throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; } @@ -332,16 +332,17 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && + !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && #endif !executeDispatchComplex(block, arguments, result, dict) && + !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && #endif @@ -515,16 +516,17 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && + !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && #endif !executeDispatchComplex(block, arguments, result, dict) && + !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && #endif @@ -854,16 +856,17 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && + !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && #endif !executeDispatchComplex(block, arguments, result, dict) && + !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && #endif @@ -1114,16 +1117,17 @@ private: if (!executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && + !executeDispatch(block, arguments, result, dict) && !executeDispatch(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatch(block, arguments, result, dict) && #endif !executeDispatchComplex(block, arguments, result, dict) && + !executeDispatchComplex(block, arguments, result, dict) && !executeDispatchComplex(block, arguments, result, dict) && #if defined(__linux__) || defined(__FreeBSD__) !executeDispatchComplex(block, arguments, result, dict) && #endif - !executeDispatchComplex(block, arguments, result, dict) && #if !defined(ARCADIA_BUILD) !executeDispatchComplex(block, arguments, result, dict) && #endif From a4d74601a69f589bdbcc5a70d5da5b0c63075d44 Mon Sep 17 00:00:00 2001 From: bobrovskij artemij Date: Mon, 25 May 2020 00:42:05 +0300 Subject: [PATCH 0171/2229] build warnings fix --- src/Dictionaries/MongoDBDictionarySource.h | 2 -- src/Storages/StorageMongoDB.cpp | 5 ----- 2 files changed, 7 deletions(-) diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index 190ffb1af91..e73b40be858 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -27,9 +27,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -# if POCO_VERSION < 0x01070800 void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password); -# endif std::unique_ptr createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 2f27042b162..adfdd10db6f 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -1,21 +1,16 @@ #include "StorageMongoDB.h" -#include #include #include #include -#include -#include #include #include #include #include #include #include -#include #include #include -#include #include #include #include From fb08a96582222ac6881efd2a9dd2d7edc7ec8c16 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 14:11:51 +0300 Subject: [PATCH 0172/2229] Add simple test --- .../0_stateless/01269_creare_with_null.reference | 1 + tests/queries/0_stateless/01269_create_with_null.sql | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/01269_creare_with_null.reference create mode 100644 tests/queries/0_stateless/01269_create_with_null.sql diff --git a/tests/queries/0_stateless/01269_creare_with_null.reference b/tests/queries/0_stateless/01269_creare_with_null.reference new file mode 100644 index 00000000000..fa7b52d9ebf --- /dev/null +++ b/tests/queries/0_stateless/01269_creare_with_null.reference @@ -0,0 +1 @@ +Nullable(Int32) Int32 Nullable(Int32) \ No newline at end of file diff --git a/tests/queries/0_stateless/01269_create_with_null.sql b/tests/queries/0_stateless/01269_create_with_null.sql new file mode 100644 index 00000000000..745fc9767a7 --- /dev/null +++ b/tests/queries/0_stateless/01269_create_with_null.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS data_null; + +CREATE TABLE data_null ( + a INT NULL, + b INT NOT NULL, + C Nullable(INT) +); + +INSERT INTO data_null VALUES (1, 2, 3); + +SELECT toTypeName(*) FROM data_null; \ No newline at end of file From 1ae82df3c0561b0f5968b919e124926212b02310 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 14:20:33 +0300 Subject: [PATCH 0173/2229] at start --- src/Interpreters/InterpreterCreateQuery.cpp | 15 +++++++------ src/Parsers/ASTColumnDeclaration.cpp | 20 ++++++++--------- src/Parsers/ASTColumnDeclaration.h | 4 ++-- src/Parsers/ParserCreateQuery.h | 24 ++++++++++----------- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 586f2d0f056..4c220d43b17 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -71,6 +71,7 @@ namespace ErrorCodes extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int DICTIONARY_ALREADY_EXISTS; + extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE } @@ -287,25 +288,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; - if (!col_decl.isNULL && col_decl.isNot) - throw Exception{"Cant use NOT without NULL", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + if (!col_decl.is_null && col_decl.is_not) + throw Exception{"Cant use NOT without NULL", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.isNot && col_decl.isNULL) { + if (col_decl.is_not && col_decl.is_null) { if (column_type->isNullable()) - throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; - } else if (col_decl.isNULL && !col_decl.isNot) { + throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; + } else if (col_decl.is_null && !col_decl.is_not) { if (column_type->isNullable()) - throw Exception{"Cant use NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + throw Exception{"Cant use NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; else { column_type = makeNullable(column_type); } } - if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNot && !col_decl.isNULL) + if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.is_not && !col_decl.is_null) column_type = makeNullable(column_type); column_names_and_types.emplace_back(col_decl.name, column_type); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 40513b45586..de5abe28ffb 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -16,16 +16,16 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->type); } - if (isNULL) + if (is_null) { - res->isNULL = isNULL; - res->children.push_back(res->isNULL); + res->is_null = is_null; + res->children.push_back(res->is_null); } - if (isNot) + if (is_not) { - res->isNot = isNot; - res->children.push_back(res->isNot); + res->is_not = is_not; + res->children.push_back(res->is_not); } if (default_expression) @@ -71,16 +71,16 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta type->formatImpl(settings, state, frame); } - if (isNot) + if (is_not) { settings.ostr << ' '; - isNot->formatImpl(settings, state, frame); + is_not->formatImpl(settings, state, frame); } - if (isNULL) + if (is_null) { settings.ostr << ' '; - isNULL->formatImpl(settings, state, frame); + is_null->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 406a8cebded..34afd771de2 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -13,8 +13,8 @@ class ASTColumnDeclaration : public IAST public: String name; ASTPtr type; - ASTPtr isNULL; - ASTPtr isNot; + ASTPtr is_null; + ASTPtr is_not; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 8976de8f1bc..c2b36460397 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,8 +10,6 @@ #include #include -#include - namespace DB { @@ -141,8 +139,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E */ ASTPtr type; String default_specifier; - ASTPtr isNull; - ASTPtr isNot; + ASTPtr is_null; + ASTPtr is_not; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -175,14 +173,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (s_not.check(pos, expected)) { if (s_null.check(pos, expected)) { - isNot = std::make_shared("NOT"); - isNull = std::make_shared("NULL"); + is_not = std::make_shared("NOT"); + is_null = std::make_shared("NULL"); } else { return false; } } else { if (s_null.check(pos, expected)) { - isNull = std::make_shared("NULL"); + is_null = std::make_shared("NULL"); } } @@ -215,14 +213,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(type)); } - if (isNull) { - column_declaration->isNULL = isNull; - column_declaration->children.push_back(std::move(isNull)); + if (is_null) { + column_declaration->is_null = is_null; + column_declaration->children.push_back(std::move(is_null)); } - if (isNot) { - column_declaration->isNot = isNot; - column_declaration->children.push_back(std::move(isNot)); + if (is_not) { + column_declaration->is_not = is_not; + column_declaration->children.push_back(std::move(is_not)); } if (default_expression) From 70fac9c068f352dbe3a7ce96c81adc718770b409 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 14:20:33 +0300 Subject: [PATCH 0174/2229] Fix problems --- src/Interpreters/InterpreterCreateQuery.cpp | 15 +++++++------ src/Parsers/ASTColumnDeclaration.cpp | 20 ++++++++--------- src/Parsers/ASTColumnDeclaration.h | 4 ++-- src/Parsers/ParserCreateQuery.h | 24 ++++++++++----------- 4 files changed, 31 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 586f2d0f056..4c220d43b17 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -71,6 +71,7 @@ namespace ErrorCodes extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int DICTIONARY_ALREADY_EXISTS; + extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE } @@ -287,25 +288,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; - if (!col_decl.isNULL && col_decl.isNot) - throw Exception{"Cant use NOT without NULL", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + if (!col_decl.is_null && col_decl.is_not) + throw Exception{"Cant use NOT without NULL", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.isNot && col_decl.isNULL) { + if (col_decl.is_not && col_decl.is_null) { if (column_type->isNullable()) - throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; - } else if (col_decl.isNULL && !col_decl.isNot) { + throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; + } else if (col_decl.is_null && !col_decl.is_not) { if (column_type->isNullable()) - throw Exception{"Cant use NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + throw Exception{"Cant use NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; else { column_type = makeNullable(column_type); } } - if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNot && !col_decl.isNULL) + if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.is_not && !col_decl.is_null) column_type = makeNullable(column_type); column_names_and_types.emplace_back(col_decl.name, column_type); diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 40513b45586..de5abe28ffb 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -16,16 +16,16 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->type); } - if (isNULL) + if (is_null) { - res->isNULL = isNULL; - res->children.push_back(res->isNULL); + res->is_null = is_null; + res->children.push_back(res->is_null); } - if (isNot) + if (is_not) { - res->isNot = isNot; - res->children.push_back(res->isNot); + res->is_not = is_not; + res->children.push_back(res->is_not); } if (default_expression) @@ -71,16 +71,16 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta type->formatImpl(settings, state, frame); } - if (isNot) + if (is_not) { settings.ostr << ' '; - isNot->formatImpl(settings, state, frame); + is_not->formatImpl(settings, state, frame); } - if (isNULL) + if (is_null) { settings.ostr << ' '; - isNULL->formatImpl(settings, state, frame); + is_null->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 406a8cebded..34afd771de2 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -13,8 +13,8 @@ class ASTColumnDeclaration : public IAST public: String name; ASTPtr type; - ASTPtr isNULL; - ASTPtr isNot; + ASTPtr is_null; + ASTPtr is_not; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 8976de8f1bc..c2b36460397 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,8 +10,6 @@ #include #include -#include - namespace DB { @@ -141,8 +139,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E */ ASTPtr type; String default_specifier; - ASTPtr isNull; - ASTPtr isNot; + ASTPtr is_null; + ASTPtr is_not; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -175,14 +173,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (s_not.check(pos, expected)) { if (s_null.check(pos, expected)) { - isNot = std::make_shared("NOT"); - isNull = std::make_shared("NULL"); + is_not = std::make_shared("NOT"); + is_null = std::make_shared("NULL"); } else { return false; } } else { if (s_null.check(pos, expected)) { - isNull = std::make_shared("NULL"); + is_null = std::make_shared("NULL"); } } @@ -215,14 +213,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(type)); } - if (isNull) { - column_declaration->isNULL = isNull; - column_declaration->children.push_back(std::move(isNull)); + if (is_null) { + column_declaration->is_null = is_null; + column_declaration->children.push_back(std::move(is_null)); } - if (isNot) { - column_declaration->isNot = isNot; - column_declaration->children.push_back(std::move(isNot)); + if (is_not) { + column_declaration->is_not = is_not; + column_declaration->children.push_back(std::move(is_not)); } if (default_expression) From 927de75a477415ed87c5e6b2780ea63d3311bd52 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 14:53:12 +0300 Subject: [PATCH 0175/2229] Fix problems --- src/DataTypes/DataTypesNumber.cpp | 116 ++++++------------------------ 1 file changed, 20 insertions(+), 96 deletions(-) diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index f260daa15b4..3fbf81791dd 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -15,101 +15,25 @@ namespace ErrorCodes extern const int UNEXPECTED_AST_STRUCTURE; } -static DataTypePtr createForInt8(const ASTPtr & arguments) +template +static DataTypePtr createNumericDataType(const ASTPtr & arguments) { - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT8 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT8 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt16(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT16 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT16 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt32(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT32 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT32 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForInt64(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 1) - throw Exception("INT64 data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64 || argument->value.get() == 0) - throw Exception("INT64 data type family may have only a number (positive integer) as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - - return std::make_shared(); -} - -static DataTypePtr createForFloat32(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() > 2) - throw Exception("FLOAT32 data type family must not have more than two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - else if (arguments->children.size() == 1) { - const auto * argument = arguments->children[0]->as(); - if (!argument || argument->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT32 data type family may have a non negative number as its argument", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } else if (arguments->children.size() == 2) { - const auto * beforePoint = arguments->children[0]->as(); - const auto * afterPoint = arguments->children[1]->as(); - if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || - !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT32 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + if (arguments) + { + if (std::is_integral_v) + { + if (arguments->children.size() > 1) + throw Exception(String(TypeName::get()) + " data type family must not have more than one argument - display width", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + else + { + if (arguments->children.size() > 2) + throw Exception(String(TypeName::get()) + " data type family must not have more than two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } } - - return std::make_shared(); + return std::make_shared>(); } -static DataTypePtr createForFloat64(const ASTPtr & arguments) -{ - if (arguments) { - if (arguments->children.size() != 2) - throw Exception("FLOAT64 data type family must have only two arguments - total number of digits and number of digits following the decimal point", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - else { - const auto * beforePoint = arguments->children[0]->as(); - const auto * afterPoint = arguments->children[1]->as(); - if (!beforePoint || beforePoint->value.getType() != Field::Types::UInt64 || - !afterPoint|| afterPoint->value.getType() != Field::Types::UInt64) - throw Exception("FLOAT64 data type family may have a non negative number as its arguments", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - } - } - - return std::make_shared(); -} - - void registerDataTypeNumbers(DataTypeFactory & factory) { @@ -118,12 +42,12 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerSimpleDataType("UInt32", [] { return DataTypePtr(std::make_shared()); }); factory.registerSimpleDataType("UInt64", [] { return DataTypePtr(std::make_shared()); }); - factory.registerDataType("Int8", createForInt8); - factory.registerDataType("Int16", createForInt16); - factory.registerDataType("Int32", createForInt32); - factory.registerDataType("Int64", createForInt64); - factory.registerDataType("Float32", createForFloat32); - factory.registerDataType("Float64", createForFloat64); + factory.registerDataType("Int8", createNumericDataType); + factory.registerDataType("Int16", createNumericDataType); + factory.registerDataType("Int32", createNumericDataType); + factory.registerDataType("Int64", createNumericDataType); + factory.registerDataType("Float32", createNumericDataType); + factory.registerDataType("Float64", createNumericDataType); /// These synonyms are added for compatibility. From 7135b8491c5b66788a6dd8c3e0536e175a78a616 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Mon, 25 May 2020 15:12:50 +0300 Subject: [PATCH 0176/2229] Base memory data storage --- .../AggregateFunctionDistinct.h | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index e7ccbc62c57..7e86364ab0d 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -25,13 +25,15 @@ struct AggregateFunctionDistinctData UInt128TrivialHash, HashTableGrower<3>, HashTableAllocatorWithStackMemory - > data; + > set; std::mutex mutex; - bool ALWAYS_INLINE TryToInsert(const Key& key) + bool ALWAYS_INLINE tryToInsert(const Key& key) { std::lock_guard lock(mutex); - return data.insert(key).second; + bool a = set.insert(key).second; + if (a) std::cerr << key.high << ' ' << key.low << ' ' << a << std::endl; + return a; } }; @@ -39,18 +41,30 @@ struct AggregateFunctionDistinctData * Adding -Distinct suffix to aggregate function **/ -class AggregateFunctionDistinct final : public IAggregateFunctionHelper +class AggregateFunctionDistinct final : public IAggregateFunctionDataHelper { private: AggregateFunctionPtr nested_func; size_t num_arguments; - mutable AggregateFunctionDistinctData storage; + size_t prefix_size; + + AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept + { + return place + prefix_size; + } + + ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr place) const noexcept + { + return place + prefix_size; + } public: AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) - : IAggregateFunctionHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}) , nested_func(nested), num_arguments(arguments.size()) { + prefix_size = 640'000'000; + if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } @@ -67,16 +81,19 @@ public: void create(AggregateDataPtr place) const override { - nested_func->create(place); + new (place) AggregateFunctionDistinctData; + nested_func->create(getNestedPlace(place)); } - void destroy(AggregateDataPtr place) const noexcept override { - nested_func->destroy(place); + void destroy(AggregateDataPtr place) const noexcept override + { + data(place).~AggregateFunctionDistinctData(); + nested_func->destroy(getNestedPlace(place)); } size_t sizeOfData() const override { - return nested_func->sizeOfData(); + return prefix_size + nested_func->sizeOfData(); } size_t alignOfData() const override @@ -92,34 +109,35 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - for (size_t i = 0; i < num_arguments; ++i) + for (size_t i = 0; i < num_arguments; ++i) { columns[i]->updateHashWithValue(row_num, hash); + } UInt128 key; hash.get128(key.low, key.high); - if (storage.TryToInsert(key)) - nested_func->add(place, columns, row_num, arena); + if (this->data(place).tryToInsert(key)) + nested_func->add(getNestedPlace(place), columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs, arena); + nested_func->merge(getNestedPlace(place), rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - nested_func->serialize(place, buf); + nested_func->serialize(getNestedPlace(place), buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf, arena); + nested_func->deserialize(getNestedPlace(place), buf, arena); } void insertResultInto(AggregateDataPtr place, IColumn & to) const override { - nested_func->insertResultInto(place, to); + nested_func->insertResultInto(getNestedPlace(place), to); } bool allocatesMemoryInArena() const override From f206d74b63f00b2037a82257291bd721decce8ff Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Mon, 25 May 2020 17:02:55 +0300 Subject: [PATCH 0177/2229] fix align of data && add test --- src/AggregateFunctions/AggregateFunctionDistinct.h | 7 ++----- .../0_stateless/01259_combinator_distinct.reference | 1 + tests/queries/0_stateless/01259_combinator_distinct.sql | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 7e86364ab0d..57e17ffb13c 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -30,10 +30,7 @@ struct AggregateFunctionDistinctData bool ALWAYS_INLINE tryToInsert(const Key& key) { - std::lock_guard lock(mutex); - bool a = set.insert(key).second; - if (a) std::cerr << key.high << ' ' << key.low << ' ' << a << std::endl; - return a; + return set.insert(key).second; } }; @@ -63,7 +60,7 @@ public: : IAggregateFunctionDataHelper(arguments, {}) , nested_func(nested), num_arguments(arguments.size()) { - prefix_size = 640'000'000; + prefix_size = sizeof(AggregateFunctionDistinctData); if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference index 34d13676466..739d225ad67 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -1,4 +1,5 @@ 499500 78 [0,1,2,3,4,5,6,7,8,9,10,11,12] +[0,1,2,3,4,5,6,7,8,9,10,11,12] 5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql index e3c4bb114a3..3f07dc443dd 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -1,4 +1,5 @@ SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers LIMIT 1000); SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 1000); SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); \ No newline at end of file From aea97ba9e47a732b10f9b9fec62e7d5f320a8b58 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 20:10:44 +0300 Subject: [PATCH 0178/2229] Fix types and add complex test --- src/DataTypes/DataTypesNumber.cpp | 4 +- .../01268_data_numeric_parameters.reference | 3 ++ .../01268_data_numeric_parameters.sql | 42 +++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01268_data_numeric_parameters.reference create mode 100644 tests/queries/0_stateless/01268_data_numeric_parameters.sql diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 3fbf81791dd..7fe6f64f6c6 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -46,8 +46,8 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerDataType("Int16", createNumericDataType); factory.registerDataType("Int32", createNumericDataType); factory.registerDataType("Int64", createNumericDataType); - factory.registerDataType("Float32", createNumericDataType); - factory.registerDataType("Float64", createNumericDataType); + factory.registerDataType("Float32", createNumericDataType); + factory.registerDataType("Float64", createNumericDataType); /// These synonyms are added for compatibility. diff --git a/tests/queries/0_stateless/01268_data_numeric_parameters.reference b/tests/queries/0_stateless/01268_data_numeric_parameters.reference new file mode 100644 index 00000000000..fd76fd4ef54 --- /dev/null +++ b/tests/queries/0_stateless/01268_data_numeric_parameters.reference @@ -0,0 +1,3 @@ +Int8 Int8 Int16 Int16 Int32 Int32 Int64 Int64 +Float32 Float32 Float32 Float64 Float64 Float64 +String String diff --git a/tests/queries/0_stateless/01268_data_numeric_parameters.sql b/tests/queries/0_stateless/01268_data_numeric_parameters.sql new file mode 100644 index 00000000000..eceba51e7f5 --- /dev/null +++ b/tests/queries/0_stateless/01268_data_numeric_parameters.sql @@ -0,0 +1,42 @@ +DROP TABLE IF EXISTS ints; +DROP TABLE IF EXISTS floats; +DROP TABLE IF EXISTS strings; + +CREATE TABLE ints ( + a TINYINT, + b TINYINT(8), + c SMALLINT, + d SMALLINT(16), + e INT, + f INT(32), + g BIGINT, + h BIGINT(64) +) engine=Memory; + +INSERT INTO ints VALUES (1, 8, 11, 16, 21, 32, 41, 64); + +SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d), toTypeName(e), toTypeName(f), toTypeName(g), toTypeName(h) FROM ints; + +CREATE TABLE floats ( + a FLOAT, + b FLOAT(12), + c FLOAT(15, 22), + d DOUBLE, + e DOUBLE(12), + f DOUBLE(4, 18) + +) engine=Memory; + +INSERT INTO floats VALUES (1.1, 1.2, 1.3, 41.1, 41.1, 42.1); + +SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d), toTypeName(e), toTypeName(f) FROM floats; + + +CREATE TABLE strings ( + a VARCHAR, + b VARCHAR(11) +) engine=Memory; + +INSERT INTO strings VALUES ('test', 'string'); + +SELECT toTypeName(a), toTypeName(b) FROM strings; From 2974d81b2ecb947d81973c65da9f3c9b46216087 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 21:58:30 +0300 Subject: [PATCH 0179/2229] Fix erros and add test --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- .../01269_create_with_null.reference | 2 + .../0_stateless/01269_create_with_null.sql | 39 +++++++++++++++++-- 3 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/01269_create_with_null.reference diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 4c220d43b17..81936f3706f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -71,7 +71,7 @@ namespace ErrorCodes extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int DICTIONARY_ALREADY_EXISTS; - extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE + extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; } diff --git a/tests/queries/0_stateless/01269_create_with_null.reference b/tests/queries/0_stateless/01269_create_with_null.reference new file mode 100644 index 00000000000..7ef113393d5 --- /dev/null +++ b/tests/queries/0_stateless/01269_create_with_null.reference @@ -0,0 +1,2 @@ +Nullable(Int32) Int32 Nullable(Int32) Int32 +Nullable(Int32) Int32 Nullable(Int32) Nullable(Int32) diff --git a/tests/queries/0_stateless/01269_create_with_null.sql b/tests/queries/0_stateless/01269_create_with_null.sql index 745fc9767a7..68fa130e0da 100644 --- a/tests/queries/0_stateless/01269_create_with_null.sql +++ b/tests/queries/0_stateless/01269_create_with_null.sql @@ -1,11 +1,42 @@ DROP TABLE IF EXISTS data_null; +DROP TABLE IF EXISTS set_null; CREATE TABLE data_null ( a INT NULL, b INT NOT NULL, - C Nullable(INT) -); + c Nullable(INT), + d INT +) engine=Memory(); -INSERT INTO data_null VALUES (1, 2, 3); -SELECT toTypeName(*) FROM data_null; \ No newline at end of file +INSERT INTO data_null VALUES (1, 2, 3, 4); + +SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d) FROM data_null; + + +CREATE TABLE data_null ( + a Nullable(INT) NULL, + b INT NOT NULL, + c Nullable(INT) +) engine=Memory(); --{serverError 377} + + +CREATE TABLE data_null ( + a INT NULL, + b Nullable(INT) NOT NULL, + c Nullable(INT) +) engine=Memory(); --{serverError 377} + +SET data_type_default_nullable='true'; + +CREATE TABLE set_null ( + a INT NULL, + b INT NOT NULL, + c Nullable(INT), + d INT +) engine=Memory(); + + +INSERT INTO set_null VALUES (1, 2, 3, 4); + +SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d) FROM set_null; From 1fbcdbb58a315c56931ce4daf3499538a89b8622 Mon Sep 17 00:00:00 2001 From: potya Date: Mon, 25 May 2020 22:09:14 +0300 Subject: [PATCH 0180/2229] Fix error --- src/Interpreters/InterpreterCreateQuery.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f92a751d64b..d4773d007e5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -71,7 +71,7 @@ namespace ErrorCodes extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int DICTIONARY_ALREADY_EXISTS; - extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; + extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; } @@ -288,25 +288,25 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; - if (!col_decl.isNULL && col_decl.isNot) - throw Exception{"Cant use NOT without NULL", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + if (!col_decl.is_null && col_decl.is_not) + throw Exception{"Cant use NOT without NULL", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.isNot && col_decl.isNULL) { + if (col_decl.is_not && col_decl.is_null) { if (column_type->isNullable()) - throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; - } else if (col_decl.isNULL && !col_decl.isNot) { + throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; + } else if (col_decl.is_null && !col_decl.is_not) { if (column_type->isNullable()) - throw Exception{"Cant use NULL with Nullable", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + throw Exception{"Cant use NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; else { column_type = makeNullable(column_type); } } - if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.isNot && !col_decl.isNULL) + if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.is_not && !col_decl.is_null) column_type = makeNullable(column_type); column_names_and_types.emplace_back(col_decl.name, column_type); From 58711c924bfd462014b0297a29124051092acc59 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 26 May 2020 13:19:59 +0300 Subject: [PATCH 0181/2229] Fix cmake --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1a0e74fdf25..de479fc669f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -269,6 +269,7 @@ dbms_target_link_libraries ( clickhouse_common_zookeeper clickhouse_dictionaries_embedded Poco::JSON + Poco::MongoDB string_utils PUBLIC ${Boost_SYSTEM_LIBRARY} From 14c67c6ae63500eb54a9644844e9c42d087324bc Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 26 May 2020 17:34:57 +0000 Subject: [PATCH 0182/2229] Fixes --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 13 ++- .../RabbitMQ/RabbitMQBlockInputStream.h | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 4 +- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 1 - .../ReadBufferFromRabbitMQConsumer.cpp | 82 ++++++++++++------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 5 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 35 +++----- src/Storages/RabbitMQ/StorageRabbitMQ.h | 13 +-- .../integration/test_storage_rabbitmq/test.py | 14 ++-- 10 files changed, 94 insertions(+), 77 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 89ea490e842..d498a36f95b 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -5,6 +5,11 @@ #include #include +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -117,13 +122,13 @@ Block RabbitMQBlockInputStream::readImpl() auto new_rows = read_rabbitmq_message(); - auto _exchange = storage.getExchangeName(); - auto _routingKey = storage.getRoutingKey(); + auto exchange_name = storage.getExchangeName(); + auto routing_key = storage.getRoutingKey(); for (size_t i = 0; i < new_rows; ++i) { - virtual_columns[0]->insert(_exchange); - virtual_columns[1]->insert(_routingKey); + virtual_columns[0]->insert(exchange_name); + virtual_columns[1]->insert(routing_key); } total_rows = total_rows + new_rows; diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h index c82fd68a680..fbdb40bded8 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.h @@ -25,7 +25,7 @@ public: void readPrefixImpl() override; Block readImpl() override; - //void readSuffixImpl() override; + ///void readSuffixImpl() override; private: StorageRabbitMQ & storage; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index b18d6bf2cfb..aa72ab51878 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,14 +12,14 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * /*connection*/, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) { LOG_ERROR(log, "Library error report: " << message); stop(); } -void RabbitMQHandler::startNonBlock() +void RabbitMQHandler::start() { event_base_loop(evbase, EVLOOP_NONBLOCK); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 94a559cad38..5b8a08be548 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -17,7 +17,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void startNonBlock(); + void start(); void stop(); private: diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index f4c62756703..509ed68b8d3 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -15,7 +15,6 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ - M(SettingUInt64, rabbitmq_bind_by_id, 0, "A flag which indicates that binding should be done in range [0, num_consumers * num_queues).", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index a9f804aaa02..5cdcbccadce 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -1,10 +1,18 @@ #include +#include +#include #include #include #include #include +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 +}; + namespace DB { @@ -38,11 +46,21 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * it result in asynchronous run of the same code and lead to occasional seg faults. + * it result in asynchronous run of the same code (because local variables can be updated both by the current thread + * and in callbacks by another thread during event loop, which is blocking only to the thread that has started the loop). + * So sharing the connection (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. */ - while (!connection.ready()) + + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) { event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for consumer"); } consumer_channel = std::make_shared(&connection); @@ -85,12 +103,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() if (hash_exchange) { current_exchange_name = exchange_name + "_hash"; - consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * message) + consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * /* message */) { exchange_declared = false; }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) { exchange_declared = false; }); @@ -98,12 +116,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else { current_exchange_name = exchange_name + "_direct"; - consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * message) + consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * /* message */) { exchange_declared = false; }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) { exchange_declared = false; }); @@ -113,30 +131,36 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { + /* This varibale can be updated from a different thread in case of some error so its better to always check + * whether exchange is in a working state and if not - declare it once again. + */ if (!exchange_declared) { initExchange(); exchange_declared = true; } - bool bindings_ok = false, bindings_error = false; + bool bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); - String binding_key = routing_key; - if (bind_by_id && !hash_exchange) + /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id + * in case there is one queue per consumer and bind by queue_id in case there is more than 1 queue per consumer. + * (queue_id is based on channel_id) + */ + if (bind_by_id || hash_exchange) { if (queues.size() == 1) { - binding_key = routing_key + "_" + std::to_string(channel_id); + binding_key = std::to_string(channel_id); } else { - binding_key = routing_key + "_" + std::to_string(channel_id + queue_id); + binding_key = std::to_string(channel_id + queue_id); } } @@ -145,7 +169,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) .onSuccess([&] { - bindings_ok = true; + bindings_created = true; }) .onError([&](const char * message) { @@ -159,9 +183,14 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to declare queue on the channel: " << message); }); - while (!bindings_ok && !bindings_error) + /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. + * It is important at this moment to make sure that queue bindings are created before any publishing can happen because + * otherwise messages will be routed nowhere. + */ + while (!bindings_created && !bindings_error) { - startNonBlockEventLoop(); + /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events + startEventLoop(); } } @@ -184,17 +213,14 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - bool consumer_ok = false, consumer_error = false; + bool consumer_created = false, consumer_error = false; consumer_channel->consume(queue_name, AMQP::noack) - .onSuccess([&](const std::string & consumer) + .onSuccess([&](const std::string & /* consumer */) { - if (consumerTag == "") - consumerTag = consumer; + consumer_created = true; - consumer_ok = true; - - LOG_TRACE(log, "Consumer " + consumerTag + " is subscribed to queue " + queue_name); + LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -218,16 +244,16 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) LOG_ERROR(log, "Consumer failed: " << message); }); - while (!consumer_ok && !consumer_error) + while (!consumer_created && !consumer_error) { - startNonBlockEventLoop(); + startEventLoop(); } } -void ReadBufferFromRabbitMQConsumer::startNonBlockEventLoop() +void ReadBufferFromRabbitMQConsumer::startEventLoop() { - eventHandler.startNonBlock(); + eventHandler.start(); } @@ -242,12 +268,12 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { /* Run the onReceived callbacks to save the messages that have been received by now */ - startNonBlockEventLoop(); + startEventLoop(); } if (received.empty()) { - LOG_TRACE(log, "Stalled"); + LOG_TRACE(log, "No more messages to be fetched"); return false; } @@ -256,7 +282,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() current = messages.begin(); } - auto new_position = const_cast(current->data()); + auto * new_position = const_cast(current->data()); BufferBase::set(new_position, current->size(), 0); ++current; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 7592fb53bfc..5e4318246a6 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -59,9 +59,8 @@ private: bool allowed = true; const std::atomic & stopped; - std::atomic exchange_declared = false; + bool exchange_declared = false; const size_t num_queues; - String consumerTag; // ID for the consumer Queues queues; bool subscribed = false; String current_exchange_name; @@ -75,7 +74,7 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startNonBlockEventLoop(); + void startEventLoop(); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7e7da953d80..cfabb5412ba 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -33,6 +33,11 @@ #include +enum + { + RESCHEDULE_WAIT = 500 + }; + namespace DB { @@ -55,7 +60,6 @@ StorageRabbitMQ::StorageRabbitMQ( const String & format_name_, char row_delimiter_, size_t num_consumers_, - bool bind_by_id_, size_t num_queues_, bool hash_exchange_) : IStorage(table_id_) @@ -66,7 +70,6 @@ StorageRabbitMQ::StorageRabbitMQ( , format_name(global_context.getMacros()->expand(format_name_)) , row_delimiter(row_delimiter_) , num_consumers(num_consumers_) - , bind_by_id(bind_by_id_) , num_queues(num_queues_) , hash_exchange(hash_exchange_) , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) @@ -79,8 +82,7 @@ StorageRabbitMQ::StorageRabbitMQ( task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); - /// Enable a different routing algorithm. - bind_by_id = num_consumers > 1 || num_queues > 1 || bind_by_id; + bind_by_id = num_consumers > 1 || num_queues > 1; } @@ -181,7 +183,8 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() next_channel_id += num_queues; update_channel_id = true; - return std::make_shared(parsed_address, exchange_name, routing_key, next_channel_id, + return std::make_shared( + parsed_address, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -244,7 +247,7 @@ void StorageRabbitMQ::threadFunc() /// Wait for attached views if (!stream_cancelled) - task->scheduleAfter(500); + task->scheduleAfter(RESCHEDULE_WAIT); } @@ -397,13 +400,13 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - size_t bind_by_id = static_cast(rabbitmq_settings.rabbitmq_bind_by_id); + bool hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); if (args_count >= 6) { const auto * ast = engine_args[5]->as(); if (ast && ast->value.getType() == Field::Types::UInt64) { - bind_by_id = static_cast(safeGet(ast->value)); + hash_exchange = static_cast(safeGet(ast->value)); } else { @@ -439,22 +442,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - size_t hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); - if (args_count >= 9) - { - const auto * ast = engine_args[8]->as(); - if (ast && ast->value.getType() == Field::Types::UInt64) - { - hash_exchange = static_cast(safeGet(ast->value)); - } - else - { - throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); - } - } - return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, - format, row_delimiter, num_consumers, bind_by_id, num_queues, hash_exchange); + format, row_delimiter, num_consumers, num_queues, hash_exchange); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 8a3a48135b8..b334b48a301 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -53,10 +53,13 @@ protected: Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, const String & exchange_name_, - const String & format_name_, char row_delimiter_, - size_t num_consumers_, bool bind_by_id_, size_t num_queues_, bool hash_exchange); - + const String & routing_key_, + const String & exchange_name_, + const String & format_name_, + char row_delimiter_, + size_t num_consumers_, + size_t num_queues_, + bool hash_exchange); private: Context global_context; @@ -80,7 +83,7 @@ private: std::mutex mutex; std::vector buffers; /// available buffers for RabbitMQ consumers - size_t next_channel_id = 0; + size_t next_channel_id = 1; /// Must >= 1 because it is used as a binding key, which has to be > 0 bool update_channel_id = false; BackgroundSchedulePool::TaskHolder task; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 815a84c1999..821c5a19e68 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -528,7 +528,7 @@ def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'topic_' + str(randrange(0, NUMBER_OF_CONCURRENT_CONSUMERS)) + key = str(randrange(1, NUMBER_OF_CONCURRENT_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -576,7 +576,6 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'clickhouse', rabbitmq_num_consumers = 5, rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; @@ -605,7 +604,7 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_CHANNELS)) + key = str(randrange(1, NUM_CHANNELS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -641,7 +640,6 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_queues = 4, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; DROP TABLE IF EXISTS test.view; @@ -669,7 +667,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_QUEUES)) + key = str(randrange(1, NUM_QUEUES)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -707,7 +705,6 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_queues = 2, rabbitmq_num_consumers = 10, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; DROP TABLE IF EXISTS test.view; @@ -735,7 +732,7 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_QUEUES * NUM_CONSUMERS)) + key = str(randrange(1, NUM_QUEUES * NUM_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() @@ -772,7 +769,6 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 4, - rabbitmq_routing_key = 'clickhouse', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; ''') @@ -807,7 +803,7 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = 'clickhouse_' + str(randrange(0, NUM_CONSUMERS)) + key = str(randrange(1, NUM_CONSUMERS)) for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) connection.close() From 310d5225280029b9018c6caf0a2570bb70999e99 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 26 May 2020 22:21:18 +0300 Subject: [PATCH 0183/2229] minimal implementation --- contrib/cassandra | 2 +- .../CassandraBlockInputStream.cpp | 124 ++++++++++++------ src/Dictionaries/CassandraBlockInputStream.h | 5 + .../CassandraDictionarySource.cpp | 74 ++++++++--- src/Dictionaries/CassandraDictionarySource.h | 52 ++++---- src/Dictionaries/ExternalQueryBuilder.cpp | 43 +++--- src/Dictionaries/ExternalQueryBuilder.h | 4 +- tests/integration/helpers/cluster.py | 15 ++- .../helpers/docker_compose_cassandra.yml | 2 +- .../external_sources.py | 50 ++++++- .../test.py | 9 +- 11 files changed, 276 insertions(+), 104 deletions(-) diff --git a/contrib/cassandra b/contrib/cassandra index bc593f2644a..9606ff1f70b 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit bc593f2644a6c50c4057459e242e214a6af70969 +Subproject commit 9606ff1f70bd3fc5d395df32e626923c012ffb5f diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index bf43adcdc59..473a42549a8 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -50,76 +50,77 @@ namespace void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { + /// Cassandra does not support unsigned integers switch (type) { case ValueType::vtUInt8: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt16: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt32: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt64: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt8: { cass_int8_t value; cass_value_get_int8(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt16: { cass_int16_t value; cass_value_get_int16(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt32: { cass_int32_t value; cass_value_get_int32(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt64: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtFloat32: { cass_float_t value; cass_value_get_float(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtFloat64: { cass_double_t value; cass_value_get_double(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtString: @@ -127,21 +128,21 @@ namespace const char * value; size_t value_length; cass_value_get_string(cass_value, &value, &value_length); - static_cast(column).insertData(value, value_length); + assert_cast(column).insertData(value, value_length); break; } case ValueType::vtDate: { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(value)}); // FIXME + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); break; } case ValueType::vtDateTime: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value / 1000)); break; } case ValueType::vtUUID: @@ -150,7 +151,7 @@ namespace cass_value_get_uuid(cass_value, &value); std::array uuid_str; cass_uuid_string(value, uuid_str.data()); - static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); + assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } } @@ -161,10 +162,10 @@ namespace Block CassandraBlockInputStream::readImpl() { - if (has_more_pages) + if (!has_more_pages) return {}; - MutableColumns columns(description.sample_block.columns()); + MutableColumns columns = description.sample_block.cloneEmptyColumns(); CassFuture* query_future = cass_session_execute(session, statement); result = cass_future_get_result(query_future); @@ -177,23 +178,49 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result); - const CassValue* map = cass_row_get_column(row, 0); - iterator = cass_iterator_from_map(map); - while (cass_iterator_next(iterator)) { - const CassValue* cass_key = cass_iterator_get_map_key(iterator); - const CassValue* cass_value = cass_iterator_get_map_value(iterator); - auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; - for (const auto &[value, idx]: pair_values) { - if (description.types[idx].second) { - ColumnNullable & column_nullable = static_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + [[maybe_unused]] size_t row_count = 0; + assert(cass_result_column_count(result) == columns.size()); + CassIterator * rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + while (cass_iterator_next(rows_iter)) + { + const CassRow * row = cass_iterator_get_row(rows_iter); + for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) + { + const CassValue * val = cass_row_get_column(row, col_idx); + if (cass_value_is_null(val)) + columns[col_idx]->insertDefault(); + else if (description.types[col_idx].second) + { + ColumnNullable & column_nullable = static_cast(*columns[col_idx]); + insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); column_nullable.getNullMapData().emplace_back(0); - } else { - insertValue(*columns[idx], description.types[idx].first, value); } + else + insertValue(*columns[col_idx], description.types[col_idx].first, val); } + ++row_count; } + assert(cass_result_row_count(result) == row_count); + cass_iterator_free(rows_iter); + + //const CassRow* row = cass_result_first_row(result); + //const CassValue* map = cass_row_get_column(row, 0); + //const CassValue* map = cass_row_get_column(row, 0); + //iterator = cass_iterator_from_map(map); + //while (cass_iterator_next(iterator)) { + // const CassValue* cass_key = cass_iterator_get_map_key(iterator); + // const CassValue* cass_value = cass_iterator_get_map_value(iterator); + // auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; + // for (const auto &[value, idx]: pair_values) { + // if (description.types[idx].second) { + // ColumnNullable & column_nullable = static_cast(*columns[idx]); + // insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + // column_nullable.getNullMapData().emplace_back(0); + // } else { + // insertValue(*columns[idx], description.types[idx].first, value); + // } + // } + //} has_more_pages = cass_result_has_more_pages(result); @@ -207,5 +234,28 @@ namespace } +void cassandraCheck(CassError code) +{ + if (code != CASS_OK) + throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), + ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + +void cassandraWaitAndCheck(CassFuture * future) +{ + auto code = cass_future_error_code(future); /// Waits if not ready + if (code == CASS_OK) + { + cass_future_free(future); + return; + } + const char * message; + size_t message_len; + cass_future_error_message(future, &message, & message_len); + String full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; + cass_future_free(future); /// Frees message + throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 8af63745f17..c8476bd2c15 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -8,6 +8,11 @@ namespace DB { + +void cassandraCheck(CassError error); +void cassandraWaitAndCheck(CassFuture * future); + + /// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining class CassandraBlockInputStream final : public IBlockInputStream { diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 3a4c7e2f2b7..c165ecea212 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,6 +1,8 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" +#include namespace DB { @@ -51,25 +53,32 @@ static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( const DB::DictionaryStructure & dict_struct_, - const std::string & host_, + const String & host_, UInt16 port_, - const std::string & user_, - const std::string & password_, - const std::string & method_, - const std::string & db_, + const String & user_, + const String & password_, + //const std::string & method_, + const String & db_, + const String & table_, const DB::Block & sample_block_) - : dict_struct(dict_struct_) + : log(&Logger::get("CassandraDictionarySource")) + , dict_struct(dict_struct_) , host(host_) , port(port_) , user(user_) , password(password_) - , method(method_) + //, method(method_) , db(db_) + , table(table_) , sample_block(sample_block_) - , cluster(cass_cluster_new()) + , cluster(cass_cluster_new()) //FIXME will not be freed in case of exception , session(cass_session_new()) { - cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); + cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); + if (port) + cassandraCheck(cass_cluster_set_port(cluster, port)); + cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); + cassandraWaitAndCheck(cass_session_connect_keyspace(session, cluster, db.c_str())); } CassandraDictionarySource::CassandraDictionarySource( @@ -80,11 +89,12 @@ CassandraDictionarySource::CassandraDictionarySource( : CassandraDictionarySource( dict_struct_, config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port"), + config.getUInt(config_prefix + ".port", 0), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), - config.getString(config_prefix + ".method", ""), - config.getString(config_prefix + ".db", ""), + //config.getString(config_prefix + ".method", ""), + config.getString(config_prefix + ".keyspace", ""), + config.getString(config_prefix + ".column_family"), sample_block_) { } @@ -95,8 +105,9 @@ CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySo other.port, other.user, other.password, - other.method, + //other.method, other.db, + other.table, other.sample_block} { } @@ -106,18 +117,45 @@ CassandraDictionarySource::~CassandraDictionarySource() { cass_cluster_free(cluster); } -std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { - return host + (port != 0 ? ":" + std::to_string(port) : ""); -} +//std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { +// return host + (port != 0 ? ":" + std::to_string(port) : ""); +//} -BlockInputStreamPtr CassandraDictionarySource::loadAll() { - return std::make_shared(nullptr, "", sample_block, max_block_size); +BlockInputStreamPtr CassandraDictionarySource::loadAll() +{ + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadAllQuery(); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading all using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); } +BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) +{ + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadIdsQuery(ids); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading ids using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); +} + +BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) +{ + //FIXME split conditions on partition key and clustering key + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading keys using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); +} + } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 2bdd476951a..400481d0a95 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -1,25 +1,29 @@ #pragma once +#if !defined(ARCADIA_BUILD) #include -#include +#endif #if USE_CASSANDRA -# include "DictionaryStructure.h" -# include "IDictionarySource.h" -# include +#include "DictionaryStructure.h" +#include "IDictionarySource.h" +#include +#include +#include namespace DB { class CassandraDictionarySource final : public IDictionarySource { CassandraDictionarySource( const DictionaryStructure & dict_struct, - const std::string & host, + const String & host, UInt16 port, - const std::string & user, - const std::string & password, - const std::string & method, - const std::string & db, + const String & user, + const String & password, + //const std::string & method, + const String & db, + const String & table, const Block & sample_block); public: @@ -44,15 +48,15 @@ public: DictionarySourcePtr clone() const override { return std::make_unique(*this); } - BlockInputStreamPtr loadIds(const std::vector & /* ids */) override - { - throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadIds(const std::vector & ids) override; + //{ + // throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + //} - BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override - { - throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + //{ + // throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + //} BlockInputStreamPtr loadUpdatedAll() override { @@ -62,15 +66,17 @@ public: std::string toString() const override; private: - static std::string toConnectionString(const std::string & host, const UInt16 port); + //static std::string toConnectionString(const std::string & host, const UInt16 port); + Poco::Logger * log; const DictionaryStructure dict_struct; - const std::string host; + const String host; const UInt16 port; - const std::string user; - const std::string password; - const std::string method; - const std::string db; + const String user; + const String password; + //const std::string method; + const String db; + const String table; Block sample_block; CassCluster * cluster; diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 529fb3d60fa..d55d77ea9a5 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -63,6 +63,13 @@ void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out) std::string ExternalQueryBuilder::composeLoadAllQuery() const { WriteBufferFromOwnString out; + composeLoadAllQuery(out); + writeChar(';', out); + return out.str(); +} + +void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const +{ writeString("SELECT ", out); if (dict_struct.id) @@ -149,24 +156,26 @@ std::string ExternalQueryBuilder::composeLoadAllQuery() const writeString(" WHERE ", out); writeString(where, out); } - - writeChar(';', out); - - return out.str(); } std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_field, const std::string & time_point) const { - std::string out = composeLoadAllQuery(); - std::string update_query; + WriteBufferFromOwnString out; + composeLoadAllQuery(out); if (!where.empty()) - update_query = " AND " + update_field + " >= '" + time_point + "'"; + writeString(" AND ", out); else - update_query = " WHERE " + update_field + " >= '" + time_point + "'"; + writeString(" WHERE ", out); - return out.insert(out.size() - 1, update_query); /// This is done to insert "update_query" before "out"'s semicolon + writeQuoted(update_field, out); + writeString(" >= '", out); + writeString(time_point, out); + writeChar('\'', out); + + writeChar(';', out); + return out.str(); } @@ -303,7 +312,7 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st } else /* if (method == IN_WITH_TUPLES) */ { - writeString(composeKeyTupleDefinition(), out); + composeKeyTupleDefinition(out); writeString(" IN (", out); first = true; @@ -346,7 +355,7 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons const auto & key_description = (*dict_struct.key)[i]; /// key_i=value_i - writeString(key_description.name, out); + writeQuoted(key_description.name, out); writeString("=", out); key_description.type->serializeAsTextQuoted(*key_columns[i], row, out, format_settings); } @@ -355,26 +364,24 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons } -std::string ExternalQueryBuilder::composeKeyTupleDefinition() const +void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; - std::string result{"("}; + writeChar('(', out); auto first = true; for (const auto & key : *dict_struct.key) { if (!first) - result += ", "; + writeString(", ", out); first = false; - result += key.name; + writeQuoted(key.name, out); } - result += ")"; - - return result; + writeChar(')', out); } diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 93e10f2d6b0..2ffc6a475ee 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -58,11 +58,13 @@ struct ExternalQueryBuilder private: const FormatSettings format_settings; + void composeLoadAllQuery(WriteBuffer & out) const; + /// Expression in form (x = c1 AND y = c2 ...) void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const; /// Expression in form (x, y, ...) - std::string composeKeyTupleDefinition() const; + void composeKeyTupleDefinition(WriteBuffer & out) const; /// Expression in form (c1, c2, ...) void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const; diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8b97d77f4c9..9e66712fd7e 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -19,6 +19,7 @@ import pprint import psycopg2 import pymongo import pymysql +import cassandra.cluster from dicttoxml import dicttoxml from kazoo.client import KazooClient from kazoo.exceptions import KazooException @@ -448,6 +449,18 @@ class ClickHouseCluster: logging.warning("Can't connect to SchemaRegistry: %s", str(ex)) time.sleep(1) + def wait_cassandra_to_start(self, timeout=15): + cass_client = cassandra.cluster.Cluster(["localhost"], port="9043") + start = time.time() + while time.time() - start < timeout: + try: + cass_client.connect().execute("drop keyspace if exists test;") + logging.info("Connected to Cassandra %s") + return + except Exception as ex: + logging.warning("Can't connect to Minio: %s", str(ex)) + time.sleep(1) + def start(self, destroy_dirs=True): if self.is_up: return @@ -526,7 +539,7 @@ class ClickHouseCluster: if self.with_cassandra and self.base_cassandra_cmd: subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate']) - time.sleep(10) + self.wait_cassandra_to_start() clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate'] logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd))) diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/tests/integration/helpers/docker_compose_cassandra.yml index 6bbedcc1130..6567a352027 100644 --- a/tests/integration/helpers/docker_compose_cassandra.yml +++ b/tests/integration/helpers/docker_compose_cassandra.yml @@ -4,4 +4,4 @@ services: image: cassandra restart: always ports: - - 6340:6349 + - 9043:9042 diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 493cccc8482..2dad70bc913 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -8,6 +8,7 @@ import aerospike from tzlocal import get_localzone import datetime import os +import uuid class ExternalSource(object): @@ -407,23 +408,70 @@ class SourceHTTPS(SourceHTTPBase): return "https" class SourceCassandra(ExternalSource): + TYPE_MAPPING = { + 'UInt8': 'tinyint', + 'UInt16': 'smallint', + 'UInt32': 'int', + 'UInt64': 'bigint', + 'Int8': 'tinyint', + 'Int16': 'smallint', + 'Int32': 'int', + 'Int64': 'bigint', + 'UUID': 'uuid', + 'Date': 'date', + 'DateTime': 'timestamp', + 'String': 'text', + 'Float32': 'float', + 'Float64': 'double' + } + + def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password): + ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password) + self.structure = dict() + def get_source_str(self, table_name): return ''' {host} {port} + test + {table} '''.format( host=self.docker_hostname, port=self.docker_port, + table=table_name, ) def prepare(self, structure, table_name, cluster): self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) + self.session = self.client.connect() + self.session.execute("create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};") + self.structure[table_name] = structure + columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] + keys = ['"' + col.name + '"' for col in structure.keys] + # FIXME use partition key + query = 'create table test."{name}" ({columns}, primary key ("{some_col}", {pk}));'.format( + name=table_name, columns=', '.join(columns), some_col=structure.ordinary_fields[0].name, pk=', '.join(keys)) + self.session.execute(query) self.prepared = True + def get_value_to_insert(self, value, type): + if type == 'UUID': + return uuid.UUID(value) + elif type == 'DateTime': + local_datetime = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + return get_localzone().localize(local_datetime) + return value + def load_data(self, data, table_name): - pass + names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()] + columns = ['"' + col[0] + '"' for col in names_and_types] + insert = 'insert into test."{table}" ({columns}) values ({args})'.format( + table=table_name, columns=','.join(columns), args=','.join(['%s']*len(columns))) + for row in data: + values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types] + self.session.execute(insert, values) class SourceRedis(ExternalSource): def __init__( diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 2e1d3ed4c70..98ba191c948 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -8,7 +8,7 @@ from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceEx from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra import math - +import time SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') @@ -119,7 +119,7 @@ LAYOUTS = [ ] SOURCES = [ - SourceCassandra("Cassandra", "localhost", "6340", "cassandra1", "6349", "", ""), + SourceCassandra("Cassandra", "localhost", "9043", "cassandra1", "9042", "", ""), SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMongoURI("MongoDB_URI", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), @@ -134,7 +134,7 @@ SOURCES = [ DICTIONARIES = [] -# Key-value dictionaries with onle one possible field for key +# Key-value dictionaries with only one possible field for key SOURCES_KV = [ SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), @@ -212,6 +212,7 @@ def get_dictionaries(fold, total_folds, all_dicts): return all_dicts[fold * chunk_len : (fold + 1) * chunk_len] +#@pytest.mark.timeout(3000) @pytest.mark.parametrize("fold", list(range(10))) def test_simple_dictionaries(started_cluster, fold): fields = FIELDS["simple"] @@ -227,6 +228,8 @@ def test_simple_dictionaries(started_cluster, fold): node.query("system reload dictionaries") + #time.sleep(3000) + queries_with_answers = [] for dct in simple_dicts: for row in data: From 5e472af425476c7e145d952d9d853b985d7e6e24 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 26 May 2020 20:43:20 +0000 Subject: [PATCH 0184/2229] Fix merge & small fix --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 15 +++++++-------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 6 +++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index aa72ab51878..cde43862ede 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -14,7 +14,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) { - LOG_ERROR(log, "Library error report: " << message); + LOG_ERROR(log, "Library error report: {}", message); stop(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5cdcbccadce..945de989b57 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -46,11 +46,10 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * it result in asynchronous run of the same code (because local variables can be updated both by the current thread - * and in callbacks by another thread during event loop, which is blocking only to the thread that has started the loop). - * So sharing the connection (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. + * as a result local variables can be updated both by the current thread and in callbacks by another thread during + * event loop, which is blocking only to the thread that has started the loop. Therefore sharing the connection + * (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. */ - size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) { @@ -97,7 +96,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) { exchange_declared = false; - LOG_ERROR(log, "Failed to declare fanout exchange: " << message); + LOG_ERROR(log, "Failed to declare fanout exchange: {}", message); }); if (hash_exchange) @@ -174,13 +173,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: " << message); + LOG_ERROR(log, "Failed to create queue binding: {}", message); }); }) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to declare queue on the channel: " << message); + LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. @@ -241,7 +240,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer failed: " << message); + LOG_ERROR(log, "Consumer failed: {}", message); }); while (!consumer_created && !consumer_error) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index cfabb5412ba..fb20569200d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -106,7 +106,7 @@ Pipes StorageRabbitMQ::read( *this, context, column_names, log))); } - LOG_DEBUG(log, "Starting reading " << pipes.size() << " streams"); + LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); return pipes; } @@ -136,7 +136,7 @@ void StorageRabbitMQ::shutdown() for (size_t i = 0; i < num_created_consumers; ++i) { - auto buffer = popReadBuffer(); + popReadBuffer(); } task->deactivate(); @@ -233,7 +233,7 @@ void StorageRabbitMQ::threadFunc() if (!checkDependencies(table_id)) break; - LOG_DEBUG(log, "Started streaming to " << dependencies_count << " attached views"); + LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); if (!streamToViews()) break; From 2919f6710cc3b020f8ede883a9d309c0d918b3ee Mon Sep 17 00:00:00 2001 From: potya Date: Tue, 26 May 2020 23:58:51 +0300 Subject: [PATCH 0185/2229] Add some multiword data types --- src/DataTypes/DataTypeString.cpp | 1 + src/Parsers/ParserCreateQuery.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index efaf844a845..5762d5d7055 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -376,6 +376,7 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 19410a78dd2..930dc0fec35 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -163,6 +165,36 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type + auto first_word = type->getID(); + + if (boost::algorithm::to_lower_copy(first_word) == "function_double") { + ParserKeyword s_presicion{"PRESICION"}; + s_presicion.ignore(pos); + } else if (boost::algorithm::to_lower_copy(first_word) == "function_char") { + ParserKeyword s_varying{"VARYING"}; + s_varying.ignore(pos); + } else if (boost::algorithm::to_lower_copy(first_word) == "function_native") { + ParserIdentifierWithOptionalParameters tmp; + ASTPtr second_word; + if (!tmp.parse(pos, second_word, expected)) { + return false; + } + if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_character") { + return false; + } + + type = second_word; + } else if (boost::algorithm::to_lower_copy(first_word) == "function_varying") + { + ParserIdentifierWithOptionalParameters tmp; + ASTPtr second_word; + if (!tmp.parse(pos, second_word, expected)) + return false; + if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_char") + return false; + type = second_word; + } + if (s_comment.ignore(pos, expected)) { From 8d69223aafe29989c7166a1638f460ced450516d Mon Sep 17 00:00:00 2001 From: bobrovskij artemij Date: Wed, 27 May 2020 01:20:25 +0300 Subject: [PATCH 0186/2229] show_privileges test fix, one more build fix --- src/Storages/StorageMongoDB.cpp | 2 +- tests/queries/0_stateless/01271_show_privileges.reference | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index adfdd10db6f..db19e08b990 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes StorageMongoDB::StorageMongoDB( const StorageID & table_id_, const std::string & host_, - short unsigned int port_, + uint16_t port_, const std::string & database_name_, const std::string & collection_name_, const std::string & username_, diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e85dbd89801..702f6d8cb58 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -103,6 +103,7 @@ INTROSPECTION ['INTROSPECTION FUNCTIONS'] \N ALL FILE [] GLOBAL SOURCES URL [] GLOBAL SOURCES REMOTE [] GLOBAL SOURCES +MONGO [] GLOBAL SOURCES MYSQL [] GLOBAL SOURCES ODBC [] GLOBAL SOURCES JDBC [] GLOBAL SOURCES From 3ee757df8607dce8ecd90dd3df8e82db6fa6391a Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 27 May 2020 15:13:35 +0300 Subject: [PATCH 0187/2229] Fix style errors --- src/DataTypes/DataTypeString.cpp | 4 ++-- src/DataTypes/DataTypesNumber.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 358afc6c8f2..749493cee99 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -379,7 +379,8 @@ bool DataTypeString::equals(const IDataType & rhs) const static DataTypePtr create(const ASTPtr & arguments) { - if (arguments) { + if (arguments) + { if (arguments->children.size() > 1) throw Exception("String data type family mustnt have more than one argument - size in characters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -410,5 +411,4 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("MEDIUMBLOB", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("LONGBLOB", "String", DataTypeFactory::CaseInsensitive); } - } diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 7fe6f64f6c6..fd9809cde9d 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -12,7 +12,6 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNEXPECTED_AST_STRUCTURE; } template From d5840688a1b08a2de562f7cddad964b571a7105c Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 27 May 2020 15:22:12 +0300 Subject: [PATCH 0188/2229] Fix style errors --- src/DataTypes/DataTypesNumber.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 7 ++++--- src/Parsers/ASTColumnDeclaration.cpp | 2 -- src/Parsers/ParserCreateQuery.h | 21 +++++++++++---------- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 82a1f35c297..4da767ae359 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -36,4 +36,4 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); } -} \ No newline at end of file +} diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d4773d007e5..06a011e4633 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -298,12 +298,13 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres if (col_decl.is_not && col_decl.is_null) { if (column_type->isNullable()) throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; - } else if (col_decl.is_null && !col_decl.is_not) { + } + else if (col_decl.is_null && !col_decl.is_not) + { if (column_type->isNullable()) throw Exception{"Cant use NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; - else { + else column_type = makeNullable(column_type); - } } if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.is_not && !col_decl.is_null) diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index de5abe28ffb..2d5bcba611c 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -83,8 +83,6 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta is_null->formatImpl(settings, state, frame); } - - if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index c2b36460397..9fae3d60836 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -171,18 +171,17 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E // Pos pos_before_null = pos; - if (s_not.check(pos, expected)) { - if (s_null.check(pos, expected)) { + if (s_not.check(pos, expected)) + if (s_null.check(pos, expected)) + { is_not = std::make_shared("NOT"); is_null = std::make_shared("NULL"); - } else { + } + else return false; - } - } else { - if (s_null.check(pos, expected)) { + else + if (s_null.check(pos, expected)) is_null = std::make_shared("NULL"); - } - } if (s_comment.ignore(pos, expected)) { @@ -213,12 +212,14 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(type)); } - if (is_null) { + if (is_null) + { column_declaration->is_null = is_null; column_declaration->children.push_back(std::move(is_null)); } - if (is_not) { + if (is_not) + { column_declaration->is_not = is_not; column_declaration->children.push_back(std::move(is_not)); } From df13694a033cb07e44ca4bb94533a9d21b724f3e Mon Sep 17 00:00:00 2001 From: potya Date: Wed, 27 May 2020 15:32:39 +0300 Subject: [PATCH 0189/2229] Fix style errors --- src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 06a011e4633..1b6f7d53a54 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -295,7 +295,8 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.is_not && col_decl.is_null) { + if (col_decl.is_not && col_decl.is_null) + { if (column_type->isNullable()) throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; } From 415c8469a355f076e327d88d48e2d820996f2962 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 16:51:23 +0300 Subject: [PATCH 0190/2229] comaptibility with glibc 2.4 --- .gitmodules | 4 +- contrib/cassandra | 2 +- contrib/libuv | 2 +- contrib/libuv-cmake/CMakeLists.txt | 538 ++++++++++------------------- 4 files changed, 196 insertions(+), 350 deletions(-) diff --git a/.gitmodules b/.gitmodules index bbc9f15ffd8..a4c84301fc9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -163,8 +163,8 @@ branch = ch-tmp [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/libuv/libuv.git - branch = v1.x + url = https://github.com/tavplubix/libuv.git + branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git diff --git a/contrib/cassandra b/contrib/cassandra index 9606ff1f70b..58a71947d9d 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 9606ff1f70bd3fc5d395df32e626923c012ffb5f +Subproject commit 58a71947d9dd8412f5aeb38275fa81417ea27ee0 diff --git a/contrib/libuv b/contrib/libuv index cc51217a317..379988fef9b 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit cc51217a317e96510fbb284721d5e6bc2af31e33 +Subproject commit 379988fef9b0c6ac706a624dbac6be8924a3a0da diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index b84ce217f3b..a2869e037ff 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,96 +1,23 @@ cmake_minimum_required(VERSION 3.4) project(libuv LANGUAGES C) -cmake_policy(SET CMP0057 NEW) # Enable IN_LIST operator -cmake_policy(SET CMP0064 NEW) # Support if (TEST) operator - -#list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") - include(CMakePackageConfigHelpers) include(CMakeDependentOption) -include(CheckCCompilerFlag) include(GNUInstallDirs) include(CTest) -set(CMAKE_C_VISIBILITY_PRESET hidden) -set(CMAKE_C_STANDARD_REQUIRED ON) -set(CMAKE_C_EXTENSIONS ON) -set(CMAKE_C_STANDARD 90) - #cmake_dependent_option(LIBUV_BUILD_TESTS # "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON # "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) -#cmake_dependent_option(LIBUV_BUILD_BENCH -# "Build the benchmarks when building unit tests and we are the root project" ON -# "LIBUV_BUILD_TESTS" OFF) -# Qemu Build -option(QEMU "build for qemu" OFF) -if(QEMU) - add_definitions(-D__QEMU__=1) +if(MSVC) + list(APPEND uv_cflags /W4) +elseif(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") + list(APPEND uv_cflags -fvisibility=hidden --std=gnu89) + list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes) + list(APPEND uv_cflags -Wno-unused-parameter) endif() -# Compiler check -string(CONCAT is-msvc $, - $ ->) - -check_c_compiler_flag(/W4 UV_LINT_W4) -check_c_compiler_flag(/wd4100 UV_LINT_NO_UNUSED_PARAMETER_MSVC) -check_c_compiler_flag(/wd4127 UV_LINT_NO_CONDITIONAL_CONSTANT_MSVC) -check_c_compiler_flag(/wd4201 UV_LINT_NO_NONSTANDARD_MSVC) -check_c_compiler_flag(/wd4206 UV_LINT_NO_NONSTANDARD_EMPTY_TU_MSVC) -check_c_compiler_flag(/wd4210 UV_LINT_NO_NONSTANDARD_FILE_SCOPE_MSVC) -check_c_compiler_flag(/wd4232 UV_LINT_NO_NONSTANDARD_NONSTATIC_DLIMPORT_MSVC) -check_c_compiler_flag(/wd4456 UV_LINT_NO_HIDES_LOCAL) -check_c_compiler_flag(/wd4457 UV_LINT_NO_HIDES_PARAM) -check_c_compiler_flag(/wd4459 UV_LINT_NO_HIDES_GLOBAL) -check_c_compiler_flag(/wd4706 UV_LINT_NO_CONDITIONAL_ASSIGNMENT_MSVC) -check_c_compiler_flag(/wd4996 UV_LINT_NO_UNSAFE_MSVC) - -check_c_compiler_flag(-Wall UV_LINT_WALL) # DO NOT use this under MSVC - -# TODO: Place these into its own function -check_c_compiler_flag(-Wno-unused-parameter UV_LINT_NO_UNUSED_PARAMETER) -check_c_compiler_flag(-Wstrict-prototypes UV_LINT_STRICT_PROTOTYPES) -check_c_compiler_flag(-Wextra UV_LINT_EXTRA) - -set(lint-no-unused-parameter $<$:-Wno-unused-parameter>) -set(lint-strict-prototypes $<$:-Wstrict-prototypes>) -set(lint-extra $<$:-Wextra>) -set(lint-w4 $<$:/W4>) -set(lint-no-unused-parameter-msvc $<$:/wd4100>) -set(lint-no-conditional-constant-msvc $<$:/wd4127>) -set(lint-no-nonstandard-msvc $<$:/wd4201>) -set(lint-no-nonstandard-empty-tu-msvc $<$:/wd4206>) -set(lint-no-nonstandard-file-scope-msvc $<$:/wd4210>) -set(lint-no-nonstandard-nonstatic-dlimport-msvc $<$:/wd4232>) -set(lint-no-hides-local-msvc $<$:/wd4456>) -set(lint-no-hides-param-msvc $<$:/wd4457>) -set(lint-no-hides-global-msvc $<$:/wd4459>) -set(lint-no-conditional-assignment-msvc $<$:/wd4706>) -set(lint-no-unsafe-msvc $<$:/wd4996>) -# Unfortunately, this one is complicated because MSVC and clang-cl support -Wall -# but using it is like calling -Weverything -string(CONCAT lint-default $< - $,$>:-Wall ->) - -list(APPEND uv_cflags ${lint-strict-prototypes} ${lint-extra} ${lint-default} ${lint-w4}) -list(APPEND uv_cflags ${lint-no-unused-parameter}) -list(APPEND uv_cflags ${lint-no-unused-parameter-msvc}) -list(APPEND uv_cflags ${lint-no-conditional-constant-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-empty-tu-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-file-scope-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-nonstatic-dlimport-msvc}) -list(APPEND uv_cflags ${lint-no-hides-local-msvc}) -list(APPEND uv_cflags ${lint-no-hides-param-msvc}) -list(APPEND uv_cflags ${lint-no-hides-global-msvc}) -list(APPEND uv_cflags ${lint-no-conditional-assignment-msvc}) -list(APPEND uv_cflags ${lint-no-unsafe-msvc}) - set(uv_sources src/fs-poll.c src/idna.c @@ -103,11 +30,172 @@ set(uv_sources src/uv-data-getter-setters.c src/version.c) +set(uv_test_sources + test/blackhole-server.c + test/echo-server.c + test/run-tests.c + test/runner.c + test/test-active.c + test/test-async-null-cb.c + test/test-async.c + test/test-barrier.c + test/test-callback-order.c + test/test-callback-stack.c + test/test-close-fd.c + test/test-close-order.c + test/test-condvar.c + test/test-connect-unspecified.c + test/test-connection-fail.c + test/test-cwd-and-chdir.c + test/test-default-loop-close.c + test/test-delayed-accept.c + test/test-dlerror.c + test/test-eintr-handling.c + test/test-embed.c + test/test-emfile.c + test/test-env-vars.c + test/test-error.c + test/test-fail-always.c + test/test-fork.c + test/test-fs-copyfile.c + test/test-fs-event.c + test/test-fs-poll.c + test/test-fs.c + test/test-fs-readdir.c + test/test-fs-fd-hash.c + test/test-fs-open-flags.c + test/test-get-currentexe.c + test/test-get-loadavg.c + test/test-get-memory.c + test/test-get-passwd.c + test/test-getaddrinfo.c + test/test-gethostname.c + test/test-getnameinfo.c + test/test-getsockname.c + test/test-getters-setters.c + test/test-gettimeofday.c + test/test-handle-fileno.c + test/test-homedir.c + test/test-hrtime.c + test/test-idle.c + test/test-idna.c + test/test-ip4-addr.c + test/test-ip6-addr.c + test/test-ipc-heavy-traffic-deadlock-bug.c + test/test-ipc-send-recv.c + test/test-ipc.c + test/test-loop-alive.c + test/test-loop-close.c + test/test-loop-configure.c + test/test-loop-handles.c + test/test-loop-stop.c + test/test-loop-time.c + test/test-multiple-listen.c + test/test-mutexes.c + test/test-osx-select.c + test/test-pass-always.c + test/test-ping-pong.c + test/test-pipe-bind-error.c + test/test-pipe-close-stdout-read-stdin.c + test/test-pipe-connect-error.c + test/test-pipe-connect-multiple.c + test/test-pipe-connect-prepare.c + test/test-pipe-getsockname.c + test/test-pipe-pending-instances.c + test/test-pipe-sendmsg.c + test/test-pipe-server-close.c + test/test-pipe-set-fchmod.c + test/test-pipe-set-non-blocking.c + test/test-platform-output.c + test/test-poll-close-doesnt-corrupt-stack.c + test/test-poll-close.c + test/test-poll-closesocket.c + test/test-poll-oob.c + test/test-poll.c + test/test-process-priority.c + test/test-process-title-threadsafe.c + test/test-process-title.c + test/test-queue-foreach-delete.c + test/test-random.c + test/test-ref.c + test/test-run-nowait.c + test/test-run-once.c + test/test-semaphore.c + test/test-shutdown-close.c + test/test-shutdown-eof.c + test/test-shutdown-twice.c + test/test-signal-multiple-loops.c + test/test-signal-pending-on-close.c + test/test-signal.c + test/test-socket-buffer-size.c + test/test-spawn.c + test/test-stdio-over-pipes.c + test/test-strscpy.c + test/test-tcp-alloc-cb-fail.c + test/test-tcp-bind-error.c + test/test-tcp-bind6-error.c + test/test-tcp-close-accept.c + test/test-tcp-close-while-connecting.c + test/test-tcp-close.c + test/test-tcp-close-reset.c + test/test-tcp-connect-error-after-write.c + test/test-tcp-connect-error.c + test/test-tcp-connect-timeout.c + test/test-tcp-connect6-error.c + test/test-tcp-create-socket-early.c + test/test-tcp-flags.c + test/test-tcp-oob.c + test/test-tcp-open.c + test/test-tcp-read-stop.c + test/test-tcp-shutdown-after-write.c + test/test-tcp-try-write.c + test/test-tcp-try-write-error.c + test/test-tcp-unexpected-read.c + test/test-tcp-write-after-connect.c + test/test-tcp-write-fail.c + test/test-tcp-write-queue-order.c + test/test-tcp-write-to-half-open-connection.c + test/test-tcp-writealot.c + test/test-thread-equal.c + test/test-thread.c + test/test-threadpool-cancel.c + test/test-threadpool.c + test/test-timer-again.c + test/test-timer-from-check.c + test/test-timer.c + test/test-tmpdir.c + test/test-tty-duplicate-key.c + test/test-tty.c + test/test-udp-alloc-cb-fail.c + test/test-udp-bind.c + test/test-udp-connect.c + test/test-udp-create-socket-early.c + test/test-udp-dgram-too-big.c + test/test-udp-ipv6.c + test/test-udp-multicast-interface.c + test/test-udp-multicast-interface6.c + test/test-udp-multicast-join.c + test/test-udp-multicast-join6.c + test/test-udp-multicast-ttl.c + test/test-udp-open.c + test/test-udp-options.c + test/test-udp-send-and-recv.c + test/test-udp-send-hang-loop.c + test/test-udp-send-immediate.c + test/test-udp-send-unreachable.c + test/test-udp-try-send.c + test/test-uname.c + test/test-walk-handles.c + test/test-watcher-cross-stop.c) + #if(WIN32) # list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) # list(APPEND uv_libraries -# psapi +# advapi32 # iphlpapi +# psapi +# shell32 +# user32 # userenv # ws2_32) # list(APPEND uv_sources @@ -140,8 +228,7 @@ set(uv_sources # list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) #else() list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) -if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|OS390") - # TODO: This should be replaced with find_package(Threads) if possible +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") # Android has pthread as part of its c library, not as a separate # libpthread.so. list(APPEND uv_libraries pthread) @@ -173,16 +260,13 @@ if(CMAKE_SYSTEM_NAME STREQUAL "AIX") _ALL_SOURCE _LINUX_SOURCE_COMPAT _THREAD_SAFE - _XOPEN_SOURCE=500 - HAVE_SYS_AHAFS_EVPRODS_H) + _XOPEN_SOURCE=500) list(APPEND uv_libraries perfstat) - list(APPEND uv_sources - src/unix/aix.c - src/unix/aix-common.c) + list(APPEND uv_sources src/unix/aix.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Android") - list(APPEND uv_libraries dl) + list(APPEND uv_libs dl) list(APPEND uv_sources src/unix/android-ifaddrs.c src/unix/linux-core.c @@ -190,13 +274,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android") src/unix/linux-syscalls.c src/unix/procfs-exepath.c src/unix/pthread-fixes.c - src/unix/random-getentropy.c src/unix/random-getrandom.c src/unix/random-sysctl-linux.c src/unix/sysinfo-loadavg.c) endif() -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS390") +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS/390") list(APPEND uv_sources src/unix/proctitle.c) endif() @@ -250,11 +333,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") list(APPEND uv_sources src/unix/openbsd.c) endif() -if(CMAKE_SYSTEM_NAME STREQUAL "OS390") +if(CMAKE_SYSTEM_NAME STREQUAL "OS/390") list(APPEND uv_defines PATH_MAX=255) list(APPEND uv_defines _AE_BIMODAL) list(APPEND uv_defines _ALL_SOURCE) - list(APPEND uv_defines _ISOC99_SOURCE) list(APPEND uv_defines _LARGE_TIME_API) list(APPEND uv_defines _OPEN_MSGQ_EXT) list(APPEND uv_defines _OPEN_SYS_FILE_EXT) @@ -267,25 +349,9 @@ if(CMAKE_SYSTEM_NAME STREQUAL "OS390") list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) list(APPEND uv_sources src/unix/pthread-fixes.c + src/unix/pthread-barrier.c src/unix/os390.c src/unix/os390-syscalls.c) - list(APPEND uv_cflags -Wc,DLL -Wc,exportall -Wc,xplink) - list(APPEND uv_libraries -Wl,xplink) - list(APPEND uv_test_libraries -Wl,xplink) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OS400") - list(APPEND uv_defines - _ALL_SOURCE - _LINUX_SOURCE_COMPAT - _THREAD_SAFE - _XOPEN_SOURCE=500) - list(APPEND uv_sources - src/unix/aix-common.c - src/unix/ibmi.c - src/unix/no-fsevents.c - src/unix/no-proctitle.c - src/unix/posix-poll.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") @@ -304,225 +370,24 @@ foreach(file ${uv_sources}) endforeach(file) set(uv_sources "${uv_sources_tmp}") +list(APPEND uv_defines CLICKHOUSE_GLIBC_COMPATIBILITY) + add_library(uv SHARED ${uv_sources}) target_compile_definitions(uv - INTERFACE - USING_UV_SHARED=1 - PRIVATE - BUILDING_UV_SHARED=1 - ${uv_defines}) + INTERFACE USING_UV_SHARED=1 + PRIVATE ${uv_defines} BUILDING_UV_SHARED=1) target_compile_options(uv PRIVATE ${uv_cflags}) -target_include_directories(uv - PUBLIC - $ - $ - PRIVATE - $) +target_include_directories(uv PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) target_link_libraries(uv ${uv_libraries}) add_library(uv_a STATIC ${uv_sources}) target_compile_definitions(uv_a PRIVATE ${uv_defines}) target_compile_options(uv_a PRIVATE ${uv_cflags}) -target_include_directories(uv_a - PUBLIC - $ - $ - PRIVATE - $) +target_include_directories(uv_a PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) target_link_libraries(uv_a ${uv_libraries}) #if(LIBUV_BUILD_TESTS) -# # Small hack: use ${uv_test_sources} now to get the runner skeleton, -# # before the actual tests are added. -# add_executable( -# uv_run_benchmarks_a -# ${uv_test_sources} -# test/benchmark-async-pummel.c -# test/benchmark-async.c -# test/benchmark-fs-stat.c -# test/benchmark-getaddrinfo.c -# test/benchmark-loop-count.c -# test/benchmark-million-async.c -# test/benchmark-million-timers.c -# test/benchmark-multi-accept.c -# test/benchmark-ping-pongs.c -# test/benchmark-ping-udp.c -# test/benchmark-pound.c -# test/benchmark-pump.c -# test/benchmark-sizes.c -# test/benchmark-spawn.c -# test/benchmark-tcp-write-batch.c -# test/benchmark-thread.c -# test/benchmark-udp-pummel.c -# test/blackhole-server.c -# test/dns-server.c -# test/echo-server.c -# test/run-benchmarks.c -# test/runner.c) -# target_compile_definitions(uv_run_benchmarks_a PRIVATE ${uv_defines}) -# target_compile_options(uv_run_benchmarks_a PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_benchmarks_a uv_a ${uv_test_libraries}) -# -# list(APPEND uv_test_sources -# test/blackhole-server.c -# test/echo-server.c -# test/run-tests.c -# test/runner.c -# test/test-active.c -# test/test-async-null-cb.c -# test/test-async.c -# test/test-barrier.c -# test/test-callback-order.c -# test/test-callback-stack.c -# test/test-close-fd.c -# test/test-close-order.c -# test/test-condvar.c -# test/test-connect-unspecified.c -# test/test-connection-fail.c -# test/test-cwd-and-chdir.c -# test/test-default-loop-close.c -# test/test-delayed-accept.c -# test/test-dlerror.c -# test/test-eintr-handling.c -# test/test-embed.c -# test/test-emfile.c -# test/test-env-vars.c -# test/test-error.c -# test/test-fail-always.c -# test/test-fork.c -# test/test-fs-copyfile.c -# test/test-fs-event.c -# test/test-fs-poll.c -# test/test-fs.c -# test/test-fs-readdir.c -# test/test-fs-fd-hash.c -# test/test-fs-open-flags.c -# test/test-get-currentexe.c -# test/test-get-loadavg.c -# test/test-get-memory.c -# test/test-get-passwd.c -# test/test-getaddrinfo.c -# test/test-gethostname.c -# test/test-getnameinfo.c -# test/test-getsockname.c -# test/test-getters-setters.c -# test/test-gettimeofday.c -# test/test-handle-fileno.c -# test/test-homedir.c -# test/test-hrtime.c -# test/test-idle.c -# test/test-idna.c -# test/test-ip4-addr.c -# test/test-ip6-addr.c -# test/test-ipc-heavy-traffic-deadlock-bug.c -# test/test-ipc-send-recv.c -# test/test-ipc.c -# test/test-loop-alive.c -# test/test-loop-close.c -# test/test-loop-configure.c -# test/test-loop-handles.c -# test/test-loop-stop.c -# test/test-loop-time.c -# test/test-multiple-listen.c -# test/test-mutexes.c -# test/test-osx-select.c -# test/test-pass-always.c -# test/test-ping-pong.c -# test/test-pipe-bind-error.c -# test/test-pipe-close-stdout-read-stdin.c -# test/test-pipe-connect-error.c -# test/test-pipe-connect-multiple.c -# test/test-pipe-connect-prepare.c -# test/test-pipe-getsockname.c -# test/test-pipe-pending-instances.c -# test/test-pipe-sendmsg.c -# test/test-pipe-server-close.c -# test/test-pipe-set-fchmod.c -# test/test-pipe-set-non-blocking.c -# test/test-platform-output.c -# test/test-poll-close-doesnt-corrupt-stack.c -# test/test-poll-close.c -# test/test-poll-closesocket.c -# test/test-poll-oob.c -# test/test-poll.c -# test/test-process-priority.c -# test/test-process-title-threadsafe.c -# test/test-process-title.c -# test/test-queue-foreach-delete.c -# test/test-random.c -# test/test-ref.c -# test/test-run-nowait.c -# test/test-run-once.c -# test/test-semaphore.c -# test/test-shutdown-close.c -# test/test-shutdown-eof.c -# test/test-shutdown-twice.c -# test/test-signal-multiple-loops.c -# test/test-signal-pending-on-close.c -# test/test-signal.c -# test/test-socket-buffer-size.c -# test/test-spawn.c -# test/test-stdio-over-pipes.c -# test/test-strscpy.c -# test/test-tcp-alloc-cb-fail.c -# test/test-tcp-bind-error.c -# test/test-tcp-bind6-error.c -# test/test-tcp-close-accept.c -# test/test-tcp-close-while-connecting.c -# test/test-tcp-close.c -# test/test-tcp-close-reset.c -# test/test-tcp-connect-error-after-write.c -# test/test-tcp-connect-error.c -# test/test-tcp-connect-timeout.c -# test/test-tcp-connect6-error.c -# test/test-tcp-create-socket-early.c -# test/test-tcp-flags.c -# test/test-tcp-oob.c -# test/test-tcp-open.c -# test/test-tcp-read-stop.c -# test/test-tcp-shutdown-after-write.c -# test/test-tcp-try-write.c -# test/test-tcp-try-write-error.c -# test/test-tcp-unexpected-read.c -# test/test-tcp-write-after-connect.c -# test/test-tcp-write-fail.c -# test/test-tcp-write-queue-order.c -# test/test-tcp-write-to-half-open-connection.c -# test/test-tcp-writealot.c -# test/test-thread-equal.c -# test/test-thread.c -# test/test-threadpool-cancel.c -# test/test-threadpool.c -# test/test-timer-again.c -# test/test-timer-from-check.c -# test/test-timer.c -# test/test-tmpdir.c -# test/test-tty-duplicate-key.c -# test/test-tty-escape-sequence-processing.c -# test/test-tty.c -# test/test-udp-alloc-cb-fail.c -# test/test-udp-bind.c -# test/test-udp-connect.c -# test/test-udp-create-socket-early.c -# test/test-udp-dgram-too-big.c -# test/test-udp-ipv6.c -# test/test-udp-multicast-interface.c -# test/test-udp-multicast-interface6.c -# test/test-udp-multicast-join.c -# test/test-udp-multicast-join6.c -# test/test-udp-multicast-ttl.c -# test/test-udp-open.c -# test/test-udp-options.c -# test/test-udp-send-and-recv.c -# test/test-udp-send-hang-loop.c -# test/test-udp-send-immediate.c -# test/test-udp-send-unreachable.c -# test/test-udp-try-send.c -# test/test-uname.c -# test/test-walk-handles.c -# test/test-watcher-cross-stop.c) -# -# add_executable(uv_run_tests ${uv_test_sources} uv_win_longpath.manifest) +# add_executable(uv_run_tests ${uv_test_sources}) # target_compile_definitions(uv_run_tests # PRIVATE ${uv_defines} USING_UV_SHARED=1) # target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) @@ -530,18 +395,10 @@ target_link_libraries(uv_a ${uv_libraries}) # add_test(NAME uv_test # COMMAND uv_run_tests # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -# if(CMAKE_SYSTEM_NAME STREQUAL "OS390") -# set_tests_properties(uv_test PROPERTIES ENVIRONMENT -# "LIBPATH=${CMAKE_BINARY_DIR}:$ENV{LIBPATH}") -# endif() -# add_executable(uv_run_tests_a ${uv_test_sources} uv_win_longpath.manifest) +# add_executable(uv_run_tests_a ${uv_test_sources}) # target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) # target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) -# if(QEMU) -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries} -static) -# else() -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) -# endif() +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) # add_test(NAME uv_test_a # COMMAND uv_run_tests_a # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) @@ -549,18 +406,15 @@ target_link_libraries(uv_a ${uv_libraries}) if(UNIX) # Now for some gibbering horrors from beyond the stars... - foreach(lib IN LISTS uv_libraries) - list(APPEND LIBS "-l${lib}") - endforeach() - string(REPLACE ";" " " LIBS "${LIBS}") - # Consider setting project version via project() call? + foreach(x ${uv_libraries}) + set(LIBS "${LIBS} -l${x}") + endforeach(x) file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) - string(REGEX MATCH "([0-9]+)[.][0-9]+[.][0-9]+" PACKAGE_VERSION "${configure_ac}") - set(UV_VERSION_MAJOR "${CMAKE_MATCH_1}") + string(REGEX MATCH [0-9]+[.][0-9]+[.][0-9]+ PACKAGE_VERSION "${configure_ac}") + string(REGEX MATCH ^[0-9]+ UV_VERSION_MAJOR "${PACKAGE_VERSION}") # The version in the filename is mirroring the behaviour of autotools. - set_target_properties(uv PROPERTIES - VERSION ${UV_VERSION_MAJOR}.0.0 - SOVERSION ${UV_VERSION_MAJOR}) + set_target_properties(uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 + SOVERSION ${UV_VERSION_MAJOR}) set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) set(prefix ${CMAKE_INSTALL_PREFIX}) @@ -568,7 +422,7 @@ if(UNIX) install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) - install(FILES ${PROJECT_BINARY_DIR}/libuv.pc + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libuv.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) @@ -581,11 +435,3 @@ endif() # RUNTIME DESTINATION lib/$ # ARCHIVE DESTINATION lib/$) #endif() -# -#message(STATUS "summary of build options: -# Install prefix: ${CMAKE_INSTALL_PREFIX} -# Target system: ${CMAKE_SYSTEM_NAME} -# Compiler: -# C compiler: ${CMAKE_C_COMPILER} -# CFLAGS: ${CMAKE_C_FLAGS_${_build_type}} ${CMAKE_C_FLAGS} -#") From f099ce19a7dde2ab9c1d9e37b1f932cb32cf4480 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 16:54:39 +0300 Subject: [PATCH 0191/2229] fix --- src/Dictionaries/CassandraBlockInputStream.h | 2 +- tests/integration/helpers/cluster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index c8476bd2c15..22e4429343d 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -9,7 +9,7 @@ namespace DB { -void cassandraCheck(CassError error); +void cassandraCheck(CassError code); void cassandraWaitAndCheck(CassFuture * future); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 44e08d5bf6a..49a18d14796 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -460,7 +460,7 @@ class ClickHouseCluster: logging.warning("Can't connect to SchemaRegistry: %s", str(ex)) time.sleep(1) - def wait_cassandra_to_start(self, timeout=15): + def wait_cassandra_to_start(self, timeout=30): cass_client = cassandra.cluster.Cluster(["localhost"], port="9043") start = time.time() while time.time() - start < timeout: From 709b4f42c82da439b0b3b2216fd6f56959411dd3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 22:11:04 +0300 Subject: [PATCH 0192/2229] Prototype sending crash reports on segfaults --- .gitmodules | 3 + CMakeLists.txt | 1 + base/daemon/BaseDaemon.cpp | 8 +- base/daemon/CMakeLists.txt | 8 +- base/daemon/SentryWriter.cpp | 107 +++++++++++++ base/daemon/SentryWriter.h | 21 +++ cmake/find/sentry.cmake | 19 +++ contrib/CMakeLists.txt | 14 +- contrib/curl-cmake/CMakeLists.txt | 2 + contrib/sentry-native | 1 + programs/server/Server.cpp | 2 + src/Common/StackTrace.cpp | 146 ++++++++++++------ src/Common/StackTrace.h | 22 ++- src/Common/TraceCollector.cpp | 2 +- .../System/StorageSystemStackTrace.cpp | 2 +- utils/check-style/check-include | 1 + 16 files changed, 298 insertions(+), 61 deletions(-) create mode 100644 base/daemon/SentryWriter.cpp create mode 100644 base/daemon/SentryWriter.h create mode 100644 cmake/find/sentry.cmake create mode 160000 contrib/sentry-native diff --git a/.gitmodules b/.gitmodules index 7f5d1307a6e..daa5d12a62c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -160,3 +160,6 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git +[submodule "contrib/sentry-native"] + path = contrib/sentry-native + url = git@github.com:getsentry/sentry-native.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dfd1df1cb..79db4c624ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -365,6 +365,7 @@ include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) include (cmake/find/avro.cmake) include (cmake/find/msgpack.cmake) +include (cmake/find/sentry.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 10c7173d5b1..f269c3923e0 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -222,6 +223,7 @@ public: DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); DB::readBinary(query_id, in); + stack_trace.resetFrames(); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. @@ -247,6 +249,7 @@ private: UInt32 thread_num, const std::string & query_id) const { + SentryWriter::onFault(sig, info, context, stack_trace); LOG_FATAL(log, "########################################"); { @@ -272,7 +275,7 @@ private: std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - bare_stacktrace << ' ' << stack_trace.getFrames()[i]; + bare_stacktrace << ' ' << stack_trace.getFramePointers()[i]; LOG_FATAL(log, bare_stacktrace.str()); } @@ -511,6 +514,8 @@ void debugIncreaseOOMScore() {} void BaseDaemon::initialize(Application & self) { closeFDs(); + SentryWriter::initialize(); + task_manager = std::make_unique(); ServerApplication::initialize(self); @@ -518,7 +523,6 @@ void BaseDaemon::initialize(Application & self) argsToConfig(argv(), config(), PRIO_APPLICATION - 100); bool is_daemon = config().getBool("application.runAsDaemon", false); - if (is_daemon) { /** When creating pid file and looking for config, will search for paths relative to the working path of the program when started. diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 5d9a37dc75e..46fa4a0fe34 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -1,7 +1,13 @@ add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp -) + SentryWriter.cpp) target_include_directories (daemon PUBLIC ..) target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +# target_include_directories (daemon SYSTEM BEFORE PRIVATE ${SENTRY_INCLUDE_DIR}) +endif () \ No newline at end of file diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp new file mode 100644 index 00000000000..8859adc1c2e --- /dev/null +++ b/base/daemon/SentryWriter.cpp @@ -0,0 +1,107 @@ +#include + +#include +#if !defined(ARCADIA_BUILD) +# include "Common/config_version.h" +#endif + +#include + +namespace { + void setExtras() { + sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); + sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); + sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); + sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); + sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); + sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); + sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); + } +} + +void SentryWriter::initialize() { + sentry_options_t * options = sentry_options_new(); + sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_debug(options, 1); + sentry_init(options); + sentry_options_set_dsn(options, "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + sentry_options_set_environment(options, "prod"); + } else { + sentry_options_set_environment(options, "test"); + } +} + +void SentryWriter::shutdown() { + sentry_shutdown(); +} + +void SentryWriter::onFault( + int sig, + const siginfo_t & info, + const ucontext_t & context, + const StackTrace & stack_trace + ) +{ + const std::string & error_message = signalToErrorMessage(sig, info, context); + sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); + sentry_set_tag("signal", strsignal(sig)); + sentry_set_tag("server_name", getFQDNOrHostName().c_str()); + sentry_set_extra("signal_number", sentry_value_new_int32(sig)); + setExtras(); + + sentry_value_t frames = sentry_value_new_list(); + + size_t stack_size = stack_trace.getSize(); + if (stack_size > 0) + { + size_t offset = stack_trace.getOffset(); + if (stack_size == 1) + { + offset = 1; + } + char instruction_addr[100]; + for (size_t i = stack_size - 1; i >= offset; --i) + { + const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + sentry_value_t frame = sentry_value_new_object(); + unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); + snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + + if (current_frame.symbol.has_value()) + { + sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + } + + if (current_frame.file.has_value()) + { + sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + } + + if (current_frame.line.has_value()) + { + sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + } + + sentry_value_append(frames, frame); + } + } + + sentry_value_t stacktrace = sentry_value_new_object(); + sentry_value_set_by_key(stacktrace, "frames", frames); + + sentry_value_t thread = sentry_value_new_object(); + sentry_value_set_by_key(thread, "stacktrace", stacktrace); + + sentry_value_t values = sentry_value_new_list(); + sentry_value_append(values, thread); + + sentry_value_t threads = sentry_value_new_object(); + sentry_value_set_by_key(threads, "values", values); + + sentry_value_set_by_key(event, "threads", threads); + + sentry_capture_event(event); + shutdown(); +} diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h new file mode 100644 index 00000000000..6c85ef04dd3 --- /dev/null +++ b/base/daemon/SentryWriter.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +#include + +class SentryWriter +{ +public: + SentryWriter() = delete; + + static void initialize(); + static void shutdown(); + static void onFault( + int sig, + const siginfo_t & info, + const ucontext_t & context, + const StackTrace & stack_trace + ); +}; diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake new file mode 100644 index 00000000000..f94b53ffb00 --- /dev/null +++ b/cmake/find/sentry.cmake @@ -0,0 +1,19 @@ +set (SENTRY_LIBRARY "sentry") +set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include") +if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") + message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive") + return() +endif () + +option (USE_SENTRY "Use Sentry" ON) + +set (BUILD_SHARED_LIBS OFF) +set (SENTRY_PIC OFF) +set (SENTRY_BACKEND "none") +set (SENTRY_TRANSPORT "curl") +set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) +set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + +message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") + +include_directories("${SENTRY_INCLUDE_DIR}") \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 99f7be2cbb7..1d1d7756de2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -274,7 +274,7 @@ if (USE_INTERNAL_HDFS3_LIBRARY) add_subdirectory(libhdfs3-cmake) endif () -if (USE_INTERNAL_AWS_S3_LIBRARY) +if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) @@ -286,12 +286,18 @@ if (USE_INTERNAL_AWS_S3_LIBRARY) set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES}) set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS}) set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH}) + + # The library is large - avoid bloat. + target_compile_options (curl PRIVATE -g0) +endif () + +if (USE_INTERNAL_AWS_S3_LIBRARY) add_subdirectory(aws-s3-cmake) # The library is large - avoid bloat. target_compile_options (aws_s3 PRIVATE -g0) target_compile_options (aws_s3_checksums PRIVATE -g0) - target_compile_options (curl PRIVATE -g0) + endif () if (USE_BASE64) @@ -318,4 +324,8 @@ if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() +if (USE_SENTRY) + add_subdirectory (sentry-native) +endif() + add_subdirectory (fmtlib-cmake) diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index d9805612ffe..d0f6a7773b0 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -1,4 +1,6 @@ set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl) +set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) +set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) set (SRCS ${CURL_DIR}/lib/file.c diff --git a/contrib/sentry-native b/contrib/sentry-native new file mode 160000 index 00000000000..3bfce2d17c1 --- /dev/null +++ b/contrib/sentry-native @@ -0,0 +1 @@ +Subproject commit 3bfce2d17c1b80fbbaae83bb5ef41c1b290d34fb diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c1a520030f4..8383fa2d9bf 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,6 +60,8 @@ #include #include #include "MySQLHandlerFactory.h" +#include + #if !defined(ARCADIA_BUILD) # include "config_core.h" diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 6d0b6a0f7d2..5cc8c43a27a 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -190,6 +190,63 @@ static void * getCallerAddress(const ucontext_t & context) #endif } +static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) +{ +#if defined(__ELF__) && !defined(__FreeBSD__) + + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); + std::unordered_map dwarfs; + + for (size_t i = 0; i < offset; ++i) { + frames.value()[i].virtual_addr = frame_pointers[i]; + } + + for (size_t i = offset; i < size; ++i) + { + StackTrace::Frame & current_frame = frames.value()[i]; + current_frame.virtual_addr = frame_pointers[i]; + const auto * object = symbol_index.findObject(current_frame.virtual_addr); + uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; + current_frame.physical_addr = reinterpret_cast(uintptr_t(current_frame.virtual_addr) - virtual_offset); + + if (object) + { + current_frame.object = object->name; + if (std::filesystem::exists(current_frame.object.value())) + { + auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; + + DB::Dwarf::LocationInfo location; + if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) { + current_frame.file = location.file.toString(); + current_frame.line = location.line; + } + } + } + else + { + current_frame.object = "?"; + } + + const auto * symbol = symbol_index.findSymbol(current_frame.virtual_addr); + if (symbol) + { + int status = 0; + current_frame.symbol = demangle(symbol->name, status); + } + else + { + current_frame.symbol = "?"; + } + } +# else + for (size_t i = 0; i < size; ++i) { + frames.value()[i].virtual_addr = frame_pointers[i]; + } + UNUSED(offset); +#endif +} + StackTrace::StackTrace() { tryCapture(); @@ -203,7 +260,7 @@ StackTrace::StackTrace(const ucontext_t & signal_context) if (size == 0 && caller_address) { - frames[0] = caller_address; + frame_pointers[0] = caller_address; size = 1; } else @@ -212,7 +269,7 @@ StackTrace::StackTrace(const ucontext_t & signal_context) for (size_t i = 0; i < size; ++i) { - if (frames[i] == caller_address) + if (frame_pointers[i] == caller_address) { offset = i; break; @@ -229,8 +286,8 @@ void StackTrace::tryCapture() { size = 0; #if USE_UNWIND - size = unw_backtrace(frames.data(), capacity); - __msan_unpoison(frames.data(), size * sizeof(frames[0])); + size = unw_backtrace(frame_pointers.data(), capacity); + __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0])); #endif } @@ -244,102 +301,89 @@ size_t StackTrace::getOffset() const return offset; } -const StackTrace::Frames & StackTrace::getFrames() const +const StackTrace::FramePointers & StackTrace::getFramePointers() const { - return frames; + return frame_pointers; } +const StackTrace::Frames & StackTrace::getFrames() const +{ + if (!frames.has_value()) { + frames = {{}}; + symbolize(frame_pointers.data(), offset, size, frames); + } + return frames; +} static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { if (size == 0) return callback(""); -#if defined(__ELF__) && !defined(__FreeBSD__) - const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); - std::unordered_map dwarfs; - std::stringstream out; for (size_t i = offset; i < size; ++i) { - const void * virtual_addr = frames[i]; - const auto * object = symbol_index.findObject(virtual_addr); - uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; - const void * physical_addr = reinterpret_cast(uintptr_t(virtual_addr) - virtual_offset); - + const StackTrace::Frame& current_frame = frames.value()[i]; out << i << ". "; - if (object) + if (current_frame.file.has_value() && current_frame.line.has_value()) { - if (std::filesystem::exists(object->name)) - { - auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; - - DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) - out << location.file.toString() << ":" << location.line << ": "; - } + out << current_frame.file.value() << ":" << current_frame.line.value() << ": "; } - const auto * symbol = symbol_index.findSymbol(virtual_addr); - if (symbol) + if (current_frame.symbol.has_value()) { - int status = 0; - out << demangle(symbol->name, status); + out << current_frame.symbol.value(); } - else - out << "?"; - out << " @ " << physical_addr; - out << " in " << (object ? object->name : "?"); + out << " @ " << current_frame.physical_addr; + if (current_frame.object.has_value()) { + out << " in " << current_frame.object.value(); + } callback(out.str()); out.str({}); } -#else - std::stringstream out; - - for (size_t i = offset; i < size; ++i) - { - const void * addr = frames[i]; - out << i << ". " << addr; - - callback(out.str()); - out.str({}); - } -#endif } -static std::string toStringImpl(const StackTrace::Frames & frames, size_t offset, size_t size) +static std::string toStringImpl(const void * const * frame_pointers, size_t offset, size_t size) { std::stringstream out; + StackTrace::Frames frames{}; + frames = {{}}; + symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } void StackTrace::toStringEveryLine(std::function callback) const { - toStringEveryLineImpl(frames, offset, size, std::move(callback)); + toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); } +void StackTrace::resetFrames() { + frames.reset(); +} + + std::string StackTrace::toString() const { /// Calculation of stack trace text is extremely slow. /// We use simple cache because otherwise the server could be overloaded by trash queries. static SimpleCache func_cached; - return func_cached(frames, offset, size); + return func_cached(frame_pointers.data(), offset, size); } -std::string StackTrace::toString(void ** frames_, size_t offset, size_t size) +std::string StackTrace::toString(void ** frame_pointers, size_t offset, size_t size) { __msan_unpoison(frames_, size * sizeof(*frames_)); - StackTrace::Frames frames_copy{}; + StackTrace::FramePointers frame_pointers_copy{}; for (size_t i = 0; i < size; ++i) - frames_copy[i] = frames_[i]; + frame_pointers_copy[i] = frame_pointers[i]; static SimpleCache func_cached; - return func_cached(frames_copy, offset, size); + return func_cached(frame_pointers_copy.data(), offset, size); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 401c8344f2d..27b2c44dd94 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -23,8 +25,18 @@ struct NoCapture class StackTrace { public: + struct Frame + { + const void * virtual_addr = nullptr; + void * physical_addr = nullptr; + std::optional symbol; + std::optional object; + std::optional file; + std::optional line; + }; static constexpr size_t capacity = 32; - using Frames = std::array; + using FramePointers = std::array; + using Frames = std::optional>; /// Tries to capture stack trace StackTrace(); @@ -38,19 +50,23 @@ public: size_t getSize() const; size_t getOffset() const; + const FramePointers & getFramePointers() const; const Frames & getFrames() const; std::string toString() const; - static std::string toString(void ** frames, size_t offset, size_t size); + static std::string toString(void ** frame_pointers, size_t offset, size_t size); void toStringEveryLine(std::function callback) const; + void resetFrames(); + protected: void tryCapture(); size_t size = 0; size_t offset = 0; /// How many frames to skip while displaying. - Frames frames{}; + FramePointers frame_pointers{}; + mutable Frames frames{}; }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index 7df06dc7892..f5bdfd2b826 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -81,7 +81,7 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac size_t stack_trace_offset = stack_trace.getOffset(); writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out); for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) - writePODBinary(stack_trace.getFrames()[i], out); + writePODBinary(stack_trace.getFramePointers()[i], out); writePODBinary(trace_type, out); writePODBinary(thread_id, out); diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index a8966ad0307..bdce70894d5 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -198,7 +198,7 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte Array arr; arr.reserve(stack_trace_size - stack_trace_offset); for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) - arr.emplace_back(reinterpret_cast(stack_trace->getFrames()[i])); + arr.emplace_back(reinterpret_cast(stack_trace->getFramePointers()[i])); res_columns[0]->insert(tid); res_columns[1]->insertData(query_id_data, query_id_size); diff --git a/utils/check-style/check-include b/utils/check-style/check-include index 211172979bd..35f94d6e706 100755 --- a/utils/check-style/check-include +++ b/utils/check-style/check-include @@ -59,6 +59,7 @@ inc="-I. \ -I./contrib/lz4/lib \ -I./contrib/hyperscan/src \ -I./contrib/simdjson/include \ +-I./contrib/sentry-native/include \ -I./src \ -I${BUILD_DIR}/src" From d8342e5b120a716f861fe9fd9f262a7bc34e5efc Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 27 May 2020 23:05:55 +0300 Subject: [PATCH 0193/2229] in-memory parts: better restore and clear stale wal files --- src/Storages/MergeTree/DataPartsExchange.cpp | 6 +- src/Storages/MergeTree/DataPartsExchange.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 - .../MergeTree/MergeTreeBlockOutputStream.cpp | 9 +- src/Storages/MergeTree/MergeTreeData.cpp | 61 +++++++++++ src/Storages/MergeTree/MergeTreeData.h | 3 + src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../MergeTree/MergeTreeWriteAheadLog.cpp | 24 ++++- .../MergeTree/MergeTreeWriteAheadLog.h | 4 +- .../ReplicatedMergeTreeCleanupThread.cpp | 1 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 7 +- src/Storages/StorageMergeTree.cpp | 19 +++- src/Storages/StorageReplicatedMergeTree.cpp | 18 +++- .../configs/do_not_merge.xml | 2 + .../test_polymorphic_parts/test.py | 100 +++++++++++++----- 15 files changed, 213 insertions(+), 48 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f61d80e63ac..5e0ee324e3a 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -276,13 +276,13 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_SIZE) readStringBinary(part_type, in); - return part_type == "InMemory" ? downloadPartToMemory(part_name, replica_path, in) + return part_type == "InMemory" ? downloadPartToMemory(part_name, std::move(reservation), in) : downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix_, std::move(reservation), in); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( const String & part_name, - const String & /* replica_path */, + ReservationPtr reservation, PooledReadWriteBufferFromHTTP & in) { MergeTreeData::DataPart::Checksums checksums; @@ -292,7 +292,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( NativeBlockInputStream block_in(in, 0); auto block = block_in.read(); MergeTreeData::MutableDataPartPtr new_data_part = - std::make_shared(data, part_name, nullptr); + std::make_shared(data, part_name, reservation->getDisk()); new_data_part->is_temp = true; new_data_part->setColumns(block.getNamesAndTypesList()); diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 217434d7e41..1aa87d9f307 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -78,7 +78,7 @@ private: MergeTreeData::MutableDataPartPtr downloadPartToMemory( const String & part_name, - const String & replica_path, + ReservationPtr reservation, PooledReadWriteBufferFromHTTP & in); MergeTreeData & data; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a76251c2512..bb51a4e8e67 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -380,8 +380,6 @@ String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const String IMergeTreeDataPart::getFullPath() const { - // assertOnDisk(); //TODO - if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); @@ -390,8 +388,6 @@ String IMergeTreeDataPart::getFullPath() const String IMergeTreeDataPart::getFullRelativePath() const { - // assertOnDisk(); //TODO - if (relative_path.empty()) throw Exception("Part relative_path cannot be empty. It's bug.", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 67ba2ba8d3e..1b3f80b4e09 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -49,12 +49,13 @@ void MergeTreeBlockOutputStream::write(const Block & block) storage.merging_mutating_task_handle->wake(); } - return; + continue; } - - /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. - if (storage.merging_mutating_task_handle) + else if (storage.merging_mutating_task_handle) + { + /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.merging_mutating_task_handle->wake(); + } } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2de2bb8656b..c6bc54eabe8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -58,6 +58,7 @@ #include #include #include +#include namespace ProfileEvents @@ -1046,6 +1047,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) for (auto & part : parts_from_wal) { + if (getActiveContainingPart(part->info, DataPartState::Committed, part_lock)) + continue; + part->modification_time = time(nullptr); /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later part->state = DataPartState::Committed; @@ -1322,6 +1326,61 @@ void MergeTreeData::clearPartsFromFilesystem(const DataPartsVector & parts_to_re } } +void MergeTreeData::clearOldWriteAheadLogs() +{ + DataPartsVector parts = getDataPartsVector(); + std::vector> all_block_numbers_on_disk; + std::vector> block_numbers_on_disk; + + for (const auto & part : parts) + if (part->isStoredOnDisk()) + all_block_numbers_on_disk.emplace_back(part->info.min_block, part->info.max_block); + + if (all_block_numbers_on_disk.empty()) + return; + + std::sort(all_block_numbers_on_disk.begin(), all_block_numbers_on_disk.end()); + block_numbers_on_disk.push_back(all_block_numbers_on_disk[0]); + for (size_t i = 1; i < all_block_numbers_on_disk.size(); ++i) + { + if (all_block_numbers_on_disk[i].first == all_block_numbers_on_disk[i - 1].second + 1) + block_numbers_on_disk.back().second = all_block_numbers_on_disk[i].second; + else + block_numbers_on_disk.push_back(all_block_numbers_on_disk[i]); + } + + auto is_range_on_disk = [&block_numbers_on_disk](Int64 min_block, Int64 max_block) + { + auto lower = std::upper_bound(block_numbers_on_disk.begin(), block_numbers_on_disk.end(), std::make_pair(min_block, -1L)); + if (lower != block_numbers_on_disk.end() && min_block >= lower->first && max_block <= lower->second) + return true; + + if (lower != block_numbers_on_disk.begin()) + { + --lower; + if (min_block >= lower->first && max_block <= lower->second) + return true; + } + + return false; + }; + + auto disks = getStoragePolicy()->getDisks(); + for (auto disk_it = disks.rbegin(); disk_it != disks.rend(); ++disk_it) + { + auto disk_ptr = *disk_it; + for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) + { + auto min_max_block_number = MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(it->name()); + if (min_max_block_number && is_range_on_disk(min_max_block_number->first, min_max_block_number->second)) + { + LOG_DEBUG(log, "Removing from filesystem outdated WAL file " + it->name()); + disk_ptr->remove(relative_data_path + it->name()); + } + } + } +} + void MergeTreeData::rename(const String & new_table_path, const StorageID & new_table_id) { auto disks = getStoragePolicy()->getDisks(); @@ -1875,6 +1934,7 @@ void MergeTreeData::renameTempPartAndReplace( DataPartPtr covering_part; DataPartsVector covered_parts = getActivePartsToReplace(part_info, part_name, covering_part, lock); + DataPartsVector covered_parts_in_memory; if (covering_part) { @@ -3690,6 +3750,7 @@ void MergeTreeData::MergesThrottler::add(size_t bytes, size_t rows) void MergeTreeData::MergesThrottler::reset() { + std::lock_guard lock(mutex); have_bytes = 0; have_rows = 0; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 56e9ab1da7a..d56f6d57283 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -492,6 +492,9 @@ public: void clearOldPartsFromFilesystem(bool force = false); void clearPartsFromFilesystem(const DataPartsVector & parts); + /// Delete WAL files containing parts, that all already stored on disk. + void clearOldWriteAheadLogs(); + /// Delete all directories which names begin with "tmp" /// Set non-negative parameter value to override MergeTreeSettings temporary_directories_lifetime /// Must be called with locked lockStructureForShare(). diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 68e240f9d7e..0012ed89b77 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -35,6 +35,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, min_rows_for_compact_part, 0, "Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(SettingBool, in_memory_parts_enable_wal, 1, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(SettingBool, in_memory_parts_insert_sync, 0, "", 0) \ + M(SettingUInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ \ /** Merge settings. */ \ M(SettingUInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 5ceedbe1f55..e2dcea290de 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -48,7 +49,8 @@ void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name block_out->write(block); block_out->flush(); - if (out->count() > MAX_WAL_BYTES) + auto max_wal_bytes = storage.getSettings()->write_ahead_log_max_bytes; + if (out->count() > max_wal_bytes) rotate(); } @@ -106,7 +108,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() /// If file is broken, do not write new parts to it. /// But if it contains any part rotate and save them. if (max_block_number == -1) - Poco::File(path).remove(); + disk->remove(path); else if (name == DEFAULT_WAL_FILE) rotate(); @@ -135,4 +137,22 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() return result; } +std::optional +MergeTreeWriteAheadLog::tryParseMinMaxBlockNumber(const String & filename) +{ + Int64 min_block; + Int64 max_block; + ReadBufferFromString in(filename); + if (!checkString(WAL_FILE_NAME, in) + || !checkChar('_', in) + || !tryReadIntText(min_block, in) + || !checkChar('_', in) + || !tryReadIntText(max_block, in)) + { + return {}; + } + + return std::make_pair(min_block, max_block); +} + } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 22665048f56..3081d51ecac 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -16,7 +16,6 @@ public: constexpr static auto WAL_FILE_NAME = "wal"; constexpr static auto WAL_FILE_EXTENSION = ".bin"; constexpr static auto DEFAULT_WAL_FILE = "wal.bin"; - constexpr static size_t MAX_WAL_BYTES = 1024 * 1024 * 1024; MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_, const String & name = DEFAULT_WAL_FILE); @@ -24,6 +23,9 @@ public: void write(const Block & block, const String & part_name); std::vector restore(); + using MinMaxBlockNumber = std::pair; + static std::optional tryParseMinMaxBlockNumber(const String & filename); + private: void init(); void rotate(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index b1164f6621c..cbb0b3f5c0e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -54,6 +54,7 @@ void ReplicatedMergeTreeCleanupThread::run() void ReplicatedMergeTreeCleanupThread::iterate() { storage.clearOldPartsAndRemoveFromZK(); + storage.clearOldWriteAheadLogs(); { /// TODO: Implement tryLockStructureForShare. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 80e7e033525..39b7a6968a9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1014,7 +1014,12 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); if (part) - sum_parts_size_in_bytes += part->getBytesOnDisk(); + { + if (auto * part_in_memory = dynamic_cast(part.get())) + sum_parts_size_in_bytes += part_in_memory->block.bytes(); + else + sum_parts_size_in_bytes += part->getBytesOnDisk(); + } } if (merger_mutator.merges_blocker.isCancelled()) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ad2e0317882..73ae4cdd33c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -87,6 +87,7 @@ StorageMergeTree::StorageMergeTree( void StorageMergeTree::startup() { clearOldPartsFromFilesystem(); + clearOldWriteAheadLogs(); /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately @@ -632,8 +633,22 @@ bool StorageMergeTree::merge( new_part = merger_mutator.mergePartsToTemporaryPart( future_part, *merge_entry, table_lock_holder, time(nullptr), merging_tagger->reserved_space, deduplicate, force_ttl); + merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr); + DataPartsVector parts_to_remove_immediately; + for (const auto & part : future_part.parts) + { + part->notifyMerged(); + if (isInMemoryPart(part)) + { + modifyPartState(part, DataPartState::Deleting); + parts_to_remove_immediately.push_back(part); + } + } + + removePartsFinally(parts_to_remove_immediately); + merging_tagger->is_successful = true; write_part_log({}); } @@ -644,9 +659,6 @@ bool StorageMergeTree::merge( throw; } - for (const auto & part : future_part.parts) - part->notifyMerged(); - return true; } @@ -818,6 +830,7 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::mergeMutateTask() clearOldTemporaryDirectories(); } clearOldMutations(); + clearOldWriteAheadLogs(); } ///TODO: read deduplicate option from table config diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ab88b93fe01..2c6131caf63 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -298,6 +298,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// Temporary directories contain untinalized results of Merges or Fetches (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately clearOldTemporaryDirectories(0); + clearOldWriteAheadLogs(); } createNewZooKeeperNodes(); @@ -1050,7 +1051,8 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) for (auto & part_ptr : parts) { ttl_infos.update(part_ptr->ttl_infos); - max_volume_index = std::max(max_volume_index, getStoragePolicy()->getVolumeIndexByDisk(part_ptr->disk)); + if (part_ptr->isStoredOnDisk()) + max_volume_index = std::max(max_volume_index, getStoragePolicy()->getVolumeIndexByDisk(part_ptr->disk)); } ReservationPtr reserved_space = reserveSpacePreferringTTLRules(estimated_space_for_merge, ttl_infos, time(nullptr), max_volume_index); @@ -1091,6 +1093,20 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) try { checkPartChecksumsAndCommit(transaction, part); + + DataPartsVector parts_to_remove_immediatly; + for (const auto & part_ptr : parts) + { + part_ptr->notifyMerged(); + if (isInMemoryPart(part_ptr)) + { + modifyPartState(part_ptr, DataPartState::Deleting); + parts_to_remove_immediatly.push_back(part_ptr); + } + } + + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove_immediatly); + removePartsFinally(parts_to_remove_immediatly); } catch (const Exception & e) { diff --git a/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml b/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml index bc2dae31ad6..8b57af4f48e 100644 --- a/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml +++ b/tests/integration/test_polymorphic_parts/configs/do_not_merge.xml @@ -2,5 +2,7 @@ 1 2 + 100 + 0 diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index ba8b4b6b725..362204c307d 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -70,8 +70,11 @@ node6 = cluster.add_instance('node6', config_dir='configs', main_configs=['confi settings_in_memory = {'index_granularity_bytes' : 10485760, 'min_rows_for_wide_part' : 512, 'min_rows_for_compact_part' : 256} -node9 = cluster.add_instance('node9', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) -node10 = cluster.add_instance('node10', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) +node9 = cluster.add_instance('node9', config_dir="configs", with_zookeeper=True) +node10 = cluster.add_instance('node10', config_dir="configs", with_zookeeper=True) + +node11 = cluster.add_instance('node11', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) +node12 = cluster.add_instance('node12', config_dir="configs", main_configs=['configs/do_not_merge.xml'], with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") def start_cluster(): @@ -85,7 +88,8 @@ def start_cluster(): create_tables('polymorphic_table_wide', [node3, node4], [settings_wide, settings_compact], "shard2") create_tables_old_format('polymorphic_table', [node5, node6], "shard3") create_tables('in_memory_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") - create_tables('wal_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") + create_tables('wal_table', [node11, node12], [settings_in_memory, settings_in_memory], "shard4") + create_tables('restore_table', [node11, node12], [settings_in_memory, settings_in_memory], "shard5") yield cluster @@ -317,66 +321,106 @@ def test_in_memory(start_cluster): assert TSV(node10.query("SELECT part_type, count() FROM system.parts " \ "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV(expected) + node9.query("SYSTEM START MERGES") + node10.query("SYSTEM START MERGES") + + assert_eq_with_retry(node9, "OPTIMIZE TABLE in_memory_table FINAL SETTINGS optimize_throw_if_noop = 1", "") + node10.query("SYSTEM SYNC REPLICA in_memory_table", timeout=20) + + assert node9.query("SELECT count() FROM in_memory_table") == "1300\n" + assert node10.query("SELECT count() FROM in_memory_table") == "1300\n" + + assert TSV(node9.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV("Wide\t1\n") + assert TSV(node10.query("SELECT part_type, count() FROM system.parts " \ + "WHERE table = 'in_memory_table' AND active GROUP BY part_type ORDER BY part_type")) == TSV("Wide\t1\n") + def test_in_memory_wal(start_cluster): # Merges are disabled in config for i in range(5): - insert_random_data('wal_table', node9, 50) - node10.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + insert_random_data('wal_table', node11, 50) + node12.query("SYSTEM SYNC REPLICA wal_table", timeout=20) def check(node, rows, parts): node.query("SELECT count() FROM wal_table") == "{}\n".format(rows) node.query("SELECT count() FROM system.parts WHERE table = 'wal_table' AND part_type = 'InMemory'") == "{}\n".format(parts) - check(node9, 250, 5) - check(node10, 250, 5) + check(node11, 250, 5) + check(node12, 250, 5) # WAL works at inserts - node9.restart_clickhouse(kill=True) - check(node9, 250, 5) + node11.restart_clickhouse(kill=True) + check(node11, 250, 5) # WAL works at fetches - node10.restart_clickhouse(kill=True) - check(node10, 250, 5) + node12.restart_clickhouse(kill=True) + check(node12, 250, 5) - insert_random_data('wal_table', node9, 50) - node10.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + insert_random_data('wal_table', node11, 50) + node12.query("SYSTEM SYNC REPLICA wal_table", timeout=20) # Disable replication with PartitionManager() as pm: - pm.partition_instances(node9, node10) - check(node9, 300, 6) + pm.partition_instances(node11, node12) + check(node11, 300, 6) - wal_file = os.path.join(node9.path, "database/data/default/wal_table/wal.bin") + wal_file = os.path.join(node11.path, "database/data/default/wal_table/wal.bin") # Corrupt wal file open(wal_file, 'rw+').truncate(os.path.getsize(wal_file) - 10) - node9.restart_clickhouse(kill=True) + node11.restart_clickhouse(kill=True) # Broken part is lost, but other restored successfully - check(node9, 250, 5) + check(node11, 250, 5) # WAL with blocks from 0 to 4 - broken_wal_file = os.path.join(node9.path, "database/data/default/wal_table/wal_0_4.bin") + broken_wal_file = os.path.join(node11.path, "database/data/default/wal_table/wal_0_4.bin") assert os.path.exists(broken_wal_file) # Fetch lost part from replica - node9.query("SYSTEM SYNC REPLICA wal_table", timeout=20) - check(node9, 300, 6) + node11.query("SYSTEM SYNC REPLICA wal_table", timeout=20) + check(node11, 300, 6) #Check that new data is written to new wal, but old is still exists for restoring assert os.path.getsize(wal_file) > 0 - assert os.path.getsize(broken_wal_file) + assert os.path.exists(broken_wal_file) # Data is lost without WAL - node9.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") + node11.query("ALTER TABLE wal_table MODIFY SETTING in_memory_parts_enable_wal = 0") with PartitionManager() as pm: - pm.partition_instances(node9, node10) + pm.partition_instances(node11, node12) - insert_random_data('wal_table', node9, 50) - check(node9, 350, 7) + insert_random_data('wal_table', node11, 50) + check(node11, 350, 7) - node9.restart_clickhouse(kill=True) - check(node9, 300, 6) + node11.restart_clickhouse(kill=True) + check(node11, 300, 6) +def test_in_memory_wal_rotate(start_cluster): + # Write every part to single wal + node11.query("ALTER TABLE restore_table MODIFY SETTING write_ahead_log_max_bytes = 10") + for i in range(5): + insert_random_data('restore_table', node11, 50) + + for i in range(5): + wal_file = os.path.join(node11.path, "database/data/default/restore_table/wal_{0}_{0}.bin".format(i)) + assert os.path.exists(wal_file) + + for node in [node11, node12]: + node.query("ALTER TABLE restore_table MODIFY SETTING number_of_free_entries_in_pool_to_lower_max_size_of_merge = 0") + node.query("ALTER TABLE restore_table MODIFY SETTING max_bytes_to_merge_at_max_space_in_pool = 10000000") + + assert_eq_with_retry(node11, "OPTIMIZE TABLE restore_table FINAL SETTINGS optimize_throw_if_noop = 1", "") + # Restart to be sure, that clearing stale logs task was ran + node11.restart_clickhouse(kill=True) + + for i in range(5): + wal_file = os.path.join(node11.path, "database/data/default/restore_table/wal_{0}_{0}.bin".format(i)) + assert not os.path.exists(wal_file) + + # New wal file was created and ready to write part to it + wal_file = os.path.join(node11.path, "database/data/default/restore_table/wal.bin") + assert os.path.exists(wal_file) + assert os.path.getsize(wal_file) == 0 def test_polymorphic_parts_index(start_cluster): node1.query(''' From 31123236cb359f1783dcadf8c3062ddb1ca6b8cf Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 23:15:33 +0300 Subject: [PATCH 0194/2229] Settings for crash report opt-in --- base/daemon/BaseDaemon.cpp | 2 +- base/daemon/SentryWriter.cpp | 164 +++++++++++++++++++++-------------- base/daemon/SentryWriter.h | 4 +- src/Common/config.h.in | 1 + 4 files changed, 105 insertions(+), 66 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index f269c3923e0..a8a79827552 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -514,7 +514,6 @@ void debugIncreaseOOMScore() {} void BaseDaemon::initialize(Application & self) { closeFDs(); - SentryWriter::initialize(); task_manager = std::make_unique(); ServerApplication::initialize(self); @@ -533,6 +532,7 @@ void BaseDaemon::initialize(Application & self) } reloadConfiguration(); + SentryWriter::initialize(config()); /// This must be done before creation of any files (including logs). mode_t umask_num = 0027; diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 8859adc1c2e..5c7d6eadd98 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -1,14 +1,21 @@ #include +#include #include #if !defined(ARCADIA_BUILD) # include "Common/config_version.h" #endif +#if USE_SENTRY #include +#endif + namespace { + static bool initialized = false; + void setExtras() { +#if USE_SENTRY sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); @@ -16,24 +23,47 @@ namespace { sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); +#endif } } -void SentryWriter::initialize() { - sentry_options_t * options = sentry_options_new(); - sentry_options_set_release(options, VERSION_STRING); - sentry_options_set_debug(options, 1); - sentry_init(options); - sentry_options_set_dsn(options, "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); - if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { - sentry_options_set_environment(options, "prod"); - } else { - sentry_options_set_environment(options, "test"); +void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { +#if USE_SENTRY + bool enabled = false; + if (config.getBool("send_crash_reports.enabled", false)) + { + if ((strlen(VERSION_OFFICIAL) > 0) || config.getBool("send_crash_reports.debug", false)) + { + enabled = true; + } } + if (enabled) + { + const std::string & endpoint = config.getString( + "send_crash_reports.endpoint", + "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" + ); + sentry_options_t * options = sentry_options_new(); + sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_debug(options, 1); + sentry_init(options); + sentry_options_set_dsn(options, endpoint.c_str()); + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + sentry_options_set_environment(options, "prod"); + } else { + sentry_options_set_environment(options, "test"); + } + initialized = true; + } +#endif } void SentryWriter::shutdown() { - sentry_shutdown(); +#if USE_SENTRY + if (initialized) { + sentry_shutdown(); + } +#endif } void SentryWriter::onFault( @@ -43,65 +73,71 @@ void SentryWriter::onFault( const StackTrace & stack_trace ) { - const std::string & error_message = signalToErrorMessage(sig, info, context); - sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); - sentry_set_tag("signal", strsignal(sig)); - sentry_set_tag("server_name", getFQDNOrHostName().c_str()); - sentry_set_extra("signal_number", sentry_value_new_int32(sig)); - setExtras(); - - sentry_value_t frames = sentry_value_new_list(); - - size_t stack_size = stack_trace.getSize(); - if (stack_size > 0) +#if USE_SENTRY + if (initialized) { - size_t offset = stack_trace.getOffset(); - if (stack_size == 1) + const std::string & error_message = signalToErrorMessage(sig, info, context); + sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); + sentry_set_tag("signal", strsignal(sig)); + sentry_set_tag("server_name", getFQDNOrHostName().c_str()); + sentry_set_extra("signal_number", sentry_value_new_int32(sig)); + setExtras(); + + /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/stacktrace/ + sentry_value_t frames = sentry_value_new_list(); + size_t stack_size = stack_trace.getSize(); + if (stack_size > 0) { - offset = 1; + size_t offset = stack_trace.getOffset(); + if (stack_size == 1) + { + offset = 1; + } + char instruction_addr[100]; + for (size_t i = stack_size - 1; i >= offset; --i) + { + const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + sentry_value_t frame = sentry_value_new_object(); + unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); + snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + + if (current_frame.symbol.has_value()) + { + sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + } + + if (current_frame.file.has_value()) + { + sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + } + + if (current_frame.line.has_value()) + { + sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + } + + sentry_value_append(frames, frame); + } } - char instruction_addr[100]; - for (size_t i = stack_size - 1; i >= offset; --i) - { - const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; - sentry_value_t frame = sentry_value_new_object(); - unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); - snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); - sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); - if (current_frame.symbol.has_value()) - { - sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); - } + /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/ + sentry_value_t stacktrace = sentry_value_new_object(); + sentry_value_set_by_key(stacktrace, "frames", frames); - if (current_frame.file.has_value()) - { - sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); - } + sentry_value_t thread = sentry_value_new_object(); + sentry_value_set_by_key(thread, "stacktrace", stacktrace); - if (current_frame.line.has_value()) - { - sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); - } + sentry_value_t values = sentry_value_new_list(); + sentry_value_append(values, thread); - sentry_value_append(frames, frame); - } + sentry_value_t threads = sentry_value_new_object(); + sentry_value_set_by_key(threads, "values", values); + + sentry_value_set_by_key(event, "threads", threads); + + sentry_capture_event(event); + shutdown(); } - - sentry_value_t stacktrace = sentry_value_new_object(); - sentry_value_set_by_key(stacktrace, "frames", frames); - - sentry_value_t thread = sentry_value_new_object(); - sentry_value_set_by_key(thread, "stacktrace", stacktrace); - - sentry_value_t values = sentry_value_new_list(); - sentry_value_append(values, thread); - - sentry_value_t threads = sentry_value_new_object(); - sentry_value_set_by_key(threads, "values", values); - - sentry_value_set_by_key(event, "threads", threads); - - sentry_capture_event(event); - shutdown(); +#endif } diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h index 6c85ef04dd3..ee45ae4f203 100644 --- a/base/daemon/SentryWriter.h +++ b/base/daemon/SentryWriter.h @@ -3,6 +3,8 @@ #include #include +#include + #include class SentryWriter @@ -10,7 +12,7 @@ class SentryWriter public: SentryWriter() = delete; - static void initialize(); + static void initialize(Poco::Util::LayeredConfiguration & config); static void shutdown(); static void onFault( int sig, diff --git a/src/Common/config.h.in b/src/Common/config.h.in index df2359c1c29..dd6263c3948 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,4 +9,5 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 USE_OPENCL +#cmakedefine01 USE_SENTRY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY From 52e4a0293d622072bbd8d9f09d37bc7257b83174 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 23:21:53 +0300 Subject: [PATCH 0195/2229] Keep sentry-native in debug mode only under setting --- base/daemon/SentryWriter.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 5c7d6eadd98..7e2a95c8369 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -30,9 +30,10 @@ namespace { void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { #if USE_SENTRY bool enabled = false; + bool debug = config.getBool("send_crash_reports.debug", false); if (config.getBool("send_crash_reports.enabled", false)) { - if ((strlen(VERSION_OFFICIAL) > 0) || config.getBool("send_crash_reports.debug", false)) + if (debug || (strlen(VERSION_OFFICIAL) > 0)) { enabled = true; } @@ -45,7 +46,10 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { ); sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); - sentry_options_set_debug(options, 1); + if (debug) + { + sentry_options_set_debug(options, 1); + } sentry_init(options); sentry_options_set_dsn(options, endpoint.c_str()); if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { From 905bce4aebb6925f37f33806cfd9642a51cec69c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 23:13:25 +0300 Subject: [PATCH 0196/2229] fix --- contrib/libuv-cmake/CMakeLists.txt | 6 ++++- .../CassandraBlockInputStream.cpp | 21 +++++++++-------- .../CassandraDictionarySource.cpp | 23 ++++++++----------- tests/integration/helpers/cluster.py | 6 ++--- .../external_sources.py | 1 + 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index a2869e037ff..65aeabdd9cb 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -227,7 +227,11 @@ set(uv_test_sources # list(APPEND uv_test_libraries ws2_32) # list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) #else() -list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) + +if(CMAKE_SIZEOF_VOID_P EQUAL 4) + list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) +endif() + if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") # Android has pthread as part of its c library, not as a separate # libpthread.so. diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 473a42549a8..6d8a45508ce 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,16 +1,17 @@ -# include -# include -# include -# include -# include -# include -# include +#if !defined(ARCADIA_BUILD) +#include +#endif #if USE_CASSANDRA -# include -# include "CassandraBlockInputStream.h" -# include "CassandraBlockInputStream.h" +#include +#include +#include +#include +#include +#include +#include +#include "CassandraBlockInputStream.h" namespace DB diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 4150fe56f14..2aee5d44b6b 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -13,19 +13,16 @@ namespace DB void registerDictionarySourceCassandra(DictionarySourceFactory & factory) { - auto create_table_source = [=](const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block, - const Context & /* context */, - bool /*check_config*/) -> DictionarySourcePtr { + auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, + [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, + [[maybe_unused]] const std::string & config_prefix, + [[maybe_unused]] Block & sample_block, + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr + { #if USE_CASSANDRA return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else - (void)dict_struct; - (void)config; - (void)config_prefix; - (void)sample_block; throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif @@ -37,9 +34,9 @@ namespace DB #if USE_CASSANDRA -# include -# include -# include "CassandraBlockInputStream.h" +#include +#include +#include "CassandraBlockInputStream.h" namespace DB { diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 49a18d14796..a539db1d47f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -465,11 +465,11 @@ class ClickHouseCluster: start = time.time() while time.time() - start < timeout: try: - cass_client.connect().execute("drop keyspace if exists test;") - logging.info("Connected to Cassandra %s") + cass_client.connect() + logging.info("Connected to Cassandra") return except Exception as ex: - logging.warning("Can't connect to Minio: %s", str(ex)) + logging.warning("Can't connect to Cassandra: %s", str(ex)) time.sleep(1) def start(self, destroy_dirs=True): diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 2dad70bc913..336f3ddc28b 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -447,6 +447,7 @@ class SourceCassandra(ExternalSource): self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) self.session = self.client.connect() self.session.execute("create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};") + self.session.execute('drop table if exists test."{}"'.format(table_name)) self.structure[table_name] = structure columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] keys = ['"' + col.name + '"' for col in structure.keys] From d74f1357d48c82333dfa063bf9db4c75855ae4fb Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 28 May 2020 01:06:33 +0400 Subject: [PATCH 0197/2229] Add LDAP authentication support --- programs/server/MySQLHandler.cpp | 2 +- programs/server/users.xml | 35 +++ src/Access/AccessControlManager.cpp | 119 ++++++++++ src/Access/AccessControlManager.h | 4 + src/Access/Authentication.cpp | 25 ++- src/Access/Authentication.h | 20 +- src/Access/ContextAccess.cpp | 4 +- src/Access/ContextAccess.h | 4 +- src/Access/ExternalAuthenticators.cpp | 28 +++ src/Access/ExternalAuthenticators.h | 24 ++ src/Access/LDAPClient.cpp | 279 ++++++++++++++++++++++++ src/Access/LDAPClient.h | 55 +++++ src/Access/LDAPParams.h | 63 ++++++ src/Access/UsersConfigAccessStorage.cpp | 20 +- src/Access/ya.make | 2 + src/Common/ErrorCodes.cpp | 1 + src/Core/config_core.h.in | 1 + src/Interpreters/Context.cpp | 2 +- src/Parsers/ASTCreateUserQuery.cpp | 4 + src/Parsers/ASTCreateUserQuery.h | 4 +- src/Parsers/ParserCreateUserQuery.cpp | 11 + src/Parsers/ParserCreateUserQuery.h | 4 +- 22 files changed, 696 insertions(+), 15 deletions(-) create mode 100644 src/Access/ExternalAuthenticators.cpp create mode 100644 src/Access/ExternalAuthenticators.h create mode 100644 src/Access/LDAPClient.cpp create mode 100644 src/Access/LDAPClient.h create mode 100644 src/Access/LDAPParams.h diff --git a/programs/server/MySQLHandler.cpp b/programs/server/MySQLHandler.cpp index a3ac3601e01..afad1a99817 100644 --- a/programs/server/MySQLHandler.cpp +++ b/programs/server/MySQLHandler.cpp @@ -224,7 +224,7 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. auto user = connection_context.getAccessControlManager().read(user_name); const DB::Authentication::Type user_auth_type = user->authentication.getType(); - if (user_auth_type != DB::Authentication::DOUBLE_SHA1_PASSWORD && user_auth_type != DB::Authentication::PLAINTEXT_PASSWORD && user_auth_type != DB::Authentication::NO_PASSWORD) + if (user_auth_type != DB::Authentication::LDAP_PASSWORD && user_auth_type != DB::Authentication::DOUBLE_SHA1_PASSWORD && user_auth_type != DB::Authentication::PLAINTEXT_PASSWORD && user_auth_type != DB::Authentication::NO_PASSWORD) { authPluginSSL(); } diff --git a/programs/server/users.xml b/programs/server/users.xml index 3d95269190b..286c065722e 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -27,6 +27,38 @@ + + + + + @@ -44,6 +76,9 @@ If you want to specify double SHA1, place it in 'password_double_sha1_hex' element. Example: e395796d6546b1b65db9d665cd43f0e858dd4303 + If you want to specify a previously defined LDAP server (see 'ldap_servers' above) for authentication, place its name in 'server' element inside 'ldap' element. + Example: my_ldap_server + How to generate decent password: Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-' In first line will be password and in second - corresponding SHA256. diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 1c1215a0e28..fc659d8bacb 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -9,13 +9,22 @@ #include #include #include +#include #include #include +#include +#include #include namespace DB { + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + namespace { std::vector> createStorages() @@ -32,6 +41,110 @@ namespace constexpr size_t DISK_ACCESS_STORAGE_INDEX = 0; constexpr size_t USERS_CONFIG_ACCESS_STORAGE_INDEX = 1; + + auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const String & ldap_server_name) + { + LDAPServerParams params; + + const String ldap_server_config = "ldap_servers." + ldap_server_name; + + const bool has_host = config.has(ldap_server_config + ".host"); + const bool has_port = config.has(ldap_server_config + ".port"); + const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); + const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); + const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); + const bool has_tls_cert_verify = config.has(ldap_server_config + ".tls_cert_verify"); + const bool has_ca_cert_dir = config.has(ldap_server_config + ".ca_cert_dir"); + const bool has_ca_cert_file = config.has(ldap_server_config + ".ca_cert_file"); + + if (!has_host) + throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); + + params.host = config.getString(ldap_server_config + ".host"); + + if (params.host.empty()) + throw Exception("Empty 'host' entry", ErrorCodes::BAD_ARGUMENTS); + + if (has_auth_dn_prefix) + params.auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + + if (has_auth_dn_suffix) + params.auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + + if (has_enable_tls) + { + String enable_tls_lc_str = config.getString(ldap_server_config + ".enable_tls"); + boost::to_lower(enable_tls_lc_str); + + if (enable_tls_lc_str == "starttls") + params.enable_tls = LDAPServerParams::TLSEnable::YES_STARTTLS; + else if (config.getBool(ldap_server_config + ".enable_tls")) + params.enable_tls = LDAPServerParams::TLSEnable::YES; + else + params.enable_tls = LDAPServerParams::TLSEnable::NO; + } + + if (has_tls_cert_verify) + { + String tls_cert_verify_lc_str = config.getString(ldap_server_config + ".tls_cert_verify"); + boost::to_lower(tls_cert_verify_lc_str); + + if (tls_cert_verify_lc_str == "never") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::NEVER; + else if (tls_cert_verify_lc_str == "allow") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::ALLOW; + else if (tls_cert_verify_lc_str == "try") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::TRY; + else if (tls_cert_verify_lc_str == "demand") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::DEMAND; + else + throw Exception("Bad value for 'tls_cert_verify' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); + } + + if (has_ca_cert_dir) + params.ca_cert_dir = config.getString(ldap_server_config + ".ca_cert_dir"); + + if (has_ca_cert_file) + params.ca_cert_file = config.getString(ldap_server_config + ".ca_cert_file"); + + if (has_port) + { + const auto port = config.getInt64(ldap_server_config + ".port"); + if (port < 0 || port > 65535) + throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS); + + params.port = port; + } + else + params.port = (params.enable_tls == LDAPServerParams::TLSEnable::YES ? 636 : 389); + + return params; + } + + void parseAndAddLDAPServers(ExternalAuthenticators & external_authenticators, const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) + { + Poco::Util::AbstractConfiguration::Keys ldap_server_names; + config.keys("ldap_servers", ldap_server_names); + + for (const auto & ldap_server_name : ldap_server_names) + { + try + { + external_authenticators.setLDAPServerParams(ldap_server_name, parseLDAPServer(config, ldap_server_name)); + } + catch (...) + { + tryLogCurrentException(log, "Could not parse LDAP server " + backQuote(ldap_server_name)); + } + } + } + + auto parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) + { + auto external_authenticators = std::make_unique(); + parseAndAddLDAPServers(*external_authenticators, config, log); + return external_authenticators; + } } @@ -100,6 +213,7 @@ void AccessControlManager::setLocalDirectory(const String & directory_path) void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config) { + external_authenticators = parseExternalAuthenticators(users_config, getLogger()); auto & users_config_access_storage = dynamic_cast(getStorageByIndex(USERS_CONFIG_ACCESS_STORAGE_INDEX)); users_config_access_storage.setConfiguration(users_config); } @@ -164,4 +278,9 @@ std::shared_ptr AccessControlManager::getProfileSettings( return settings_profiles_cache->getProfileSettings(profile_name); } +const ExternalAuthenticators & AccessControlManager::getExternalAuthenticators() const +{ + return *external_authenticators; +} + } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 6bcf8d7c504..4ef971dca65 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -36,6 +36,7 @@ class EnabledSettings; class SettingsProfilesCache; class SettingsProfileElements; class ClientInfo; +class ExternalAuthenticators; struct Settings; @@ -82,6 +83,8 @@ public: std::shared_ptr getProfileSettings(const String & profile_name) const; + const ExternalAuthenticators & getExternalAuthenticators() const; + private: class ContextAccessCache; std::unique_ptr context_access_cache; @@ -89,6 +92,7 @@ private: std::unique_ptr row_policy_cache; std::unique_ptr quota_cache; std::unique_ptr settings_profiles_cache; + std::unique_ptr external_authenticators; }; } diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index f435d6e6336..e7e8b10aabc 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include @@ -11,6 +13,15 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +const String & Authentication::getLDAPServerName() const +{ + return ldap_server_name; +} + +void Authentication::setLDAPServerName(const String & server_name) +{ + ldap_server_name = server_name; +} Authentication::Digest Authentication::getPasswordDoubleSHA1() const { @@ -36,12 +47,15 @@ Authentication::Digest Authentication::getPasswordDoubleSHA1() const case DOUBLE_SHA1_PASSWORD: return password_hash; + + case LDAP_PASSWORD: + throw Exception("Cannot get password double SHA1 for user with 'LDAP_PASSWORD' authentication.", ErrorCodes::BAD_ARGUMENTS); } throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); } -bool Authentication::isCorrectPassword(const String & password_) const +bool Authentication::isCorrectPassword(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators) const { switch (type) { @@ -71,6 +85,15 @@ bool Authentication::isCorrectPassword(const String & password_) const return encodeSHA1(first_sha1) == password_hash; } + + case LDAP_PASSWORD: + { + auto ldap_server_params = external_authenticators.getLDAPServerParams(ldap_server_name); + ldap_server_params.user = user_; + ldap_server_params.password = password_; + LDAPSimpleAuthClient ldap_client(ldap_server_params); + return ldap_client.check(); + } } throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 3f16dc56de3..2e166a431f9 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +class ExternalAuthenticators; /// Authentication type and encrypted password for checking when an user logins. class Authentication @@ -35,6 +36,9 @@ public: /// SHA1(SHA1(password)). /// This kind of hash is used by the `mysql_native_password` authentication plugin. DOUBLE_SHA1_PASSWORD, + + /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt. + LDAP_PASSWORD, }; using Digest = std::vector; @@ -67,8 +71,13 @@ public: /// Allowed to use for Type::NO_PASSWORD, Type::PLAINTEXT_PASSWORD, Type::DOUBLE_SHA1_PASSWORD. Digest getPasswordDoubleSHA1() const; + /// Sets an external LDAP server name. LDAP server name is used when authentication type is LDAP_PASSWORD. + void setLDAPServerName(const String & server_name); + const String & getLDAPServerName() const; + /// Checks if the provided password is correct. Returns false if not. - bool isCorrectPassword(const String & password) const; + /// User name and external authenticators' info is used only by some specific authentication mechanisms (e.g., LDAP). + bool isCorrectPassword(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators) const; friend bool operator ==(const Authentication & lhs, const Authentication & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash); } friend bool operator !=(const Authentication & lhs, const Authentication & rhs) { return !(lhs == rhs); } @@ -82,6 +91,7 @@ private: Type type = Type::NO_PASSWORD; Digest password_hash; + String ldap_server_name; }; @@ -122,6 +132,9 @@ inline void Authentication::setPassword(const String & password_) case DOUBLE_SHA1_PASSWORD: return setPasswordHashBinary(encodeDoubleSHA1(password_)); + + case LDAP_PASSWORD: + throw Exception("Cannot specify password for the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); } throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); } @@ -145,6 +158,8 @@ inline void Authentication::setPasswordHashHex(const String & hash) inline String Authentication::getPasswordHashHex() const { + if (type == LDAP_PASSWORD) + throw Exception("Cannot get password of a user with the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); String hex; hex.resize(password_hash.size() * 2); boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data()); @@ -186,6 +201,9 @@ inline void Authentication::setPasswordHashBinary(const Digest & hash) password_hash = hash; return; } + + case LDAP_PASSWORD: + throw Exception("Cannot specify password for the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); } throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); } diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index f973e93c76b..eb1fa0dd894 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -176,12 +176,12 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & } -bool ContextAccess::isCorrectPassword(const String & password) const +bool ContextAccess::isCorrectPassword(const String & password, const ExternalAuthenticators & external_authenticators) const { std::lock_guard lock{mutex}; if (!user) return false; - return user->authentication.isCorrectPassword(password); + return user->authentication.isCorrectPassword(password, user_name, external_authenticators); } bool ContextAccess::isClientHostAllowed() const diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 27bb29a878c..062ab37e414 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -26,6 +26,7 @@ struct QuotaUsage; struct Settings; class SettingsConstraints; class AccessControlManager; +class ExternalAuthenticators; class IAST; using ASTPtr = std::shared_ptr; @@ -62,7 +63,8 @@ public: UserPtr getUser() const; String getUserName() const; - bool isCorrectPassword(const String & password) const; + /// External authenticators may be used by only some of the authentication mechanisms. + bool isCorrectPassword(const String & password, const ExternalAuthenticators & external_authenticators) const; bool isClientHostAllowed() const; /// Returns information about current and enabled roles. diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp new file mode 100644 index 00000000000..273048a020e --- /dev/null +++ b/src/Access/ExternalAuthenticators.cpp @@ -0,0 +1,28 @@ +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void ExternalAuthenticators::setLDAPServerParams(const String & server, const LDAPServerParams & params) +{ + std::scoped_lock lock(mutex); + ldap_server_params.erase(server); + ldap_server_params[server] = params; +} + +LDAPServerParams ExternalAuthenticators::getLDAPServerParams(const String & server) const +{ + std::scoped_lock lock(mutex); + auto it = ldap_server_params.find(server); + if (it == ldap_server_params.end()) + throw Exception("LDAP server '" + server + "' is not configured", ErrorCodes::BAD_ARGUMENTS); + return it->second; +} + +} diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h new file mode 100644 index 00000000000..f7707e0719f --- /dev/null +++ b/src/Access/ExternalAuthenticators.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +#include +#include + + +namespace DB +{ + +class ExternalAuthenticators +{ +public: + void setLDAPServerParams(const String & server, const LDAPServerParams & params); + LDAPServerParams getLDAPServerParams(const String & server) const; + +private: + mutable std::mutex mutex; + std::map ldap_server_params; +}; + +} diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp new file mode 100644 index 00000000000..136a7af70c4 --- /dev/null +++ b/src/Access/LDAPClient.cpp @@ -0,0 +1,279 @@ +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; + extern const int LDAP_ERROR; +} + +LDAPClient::LDAPClient(const LDAPServerParams & params_) + : params(params_) +{ +} + +LDAPClient::~LDAPClient() +{ + closeConnection(); +} + +#if USE_LDAP + +namespace +{ + auto escapeForLDAP(const String & src) + { + String dest; + dest.reserve(src.size() * 2); + + for (auto ch : src) + { + switch (ch) + { + case ',': + case '\\': + case '#': + case '+': + case '<': + case '>': + case ';': + case '"': + case '=': + dest += '\\'; + break; + } + dest += ch; + } + + return dest; + } +} + +void LDAPClient::diag(const int rc) +{ + if (rc != LDAP_SUCCESS) + { + String text; + const char * raw_err_str = ldap_err2string(rc); + + if (raw_err_str) + text = raw_err_str; + + if (handle) + { + String message; + char * raw_message = nullptr; + ldap_get_option(handle, LDAP_OPT_DIAGNOSTIC_MESSAGE, &raw_message); + + if (raw_message) + { + message = raw_message; + ldap_memfree(raw_message); + raw_message = nullptr; + } + + if (!message.empty()) + { + if (!text.empty()) + text += ": "; + text += message; + } + } + + throw Exception(text, ErrorCodes::LDAP_ERROR); + } +} + +int LDAPClient::openConnection(const bool graceful_bind_failure) +{ + closeConnection(); + + { + LDAPURLDesc url; + std::memset(&url, 0, sizeof(url)); + + url.lud_scheme = const_cast(params.enable_tls == LDAPServerParams::TLSEnable::YES ? "ldaps" : "ldap"); + url.lud_host = const_cast(params.host.c_str()); + url.lud_port = params.port; + url.lud_scope = LDAP_SCOPE_DEFAULT; + + auto * uri = ldap_url_desc2str(&url); + if (!uri) + throw Exception("ldap_url_desc2str() failed", ErrorCodes::LDAP_ERROR); + + SCOPE_EXIT({ ldap_memfree(uri); }); + + diag(ldap_initialize(&handle, uri)); + if (!handle) + throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR); + } + + { + int value = 0; + switch (params.protocol_version) + { + case LDAPServerParams::ProtocolVersion::V2: value = LDAP_VERSION2; break; + case LDAPServerParams::ProtocolVersion::V3: value = LDAP_VERSION3; break; + } + diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value)); + } + + diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); + diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); + + { + ::timeval operation_timeout; + operation_timeout.tv_sec = params.operation_timeout.count(); + operation_timeout.tv_usec = 0; + diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); + } + + { + ::timeval network_timeout; + network_timeout.tv_sec = params.network_timeout.count(); + network_timeout.tv_usec = 0; + diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); + } + + { + const int search_timeout = params.search_timeout.count(); + diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); + } + + { + const int size_limit = params.search_limit; + diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); + } + + { + int value = 0; + switch (params.tls_cert_verify) + { + case LDAPServerParams::TLSCertVerify::NEVER: value = LDAP_OPT_X_TLS_NEVER; break; + case LDAPServerParams::TLSCertVerify::ALLOW: value = LDAP_OPT_X_TLS_ALLOW; break; + case LDAPServerParams::TLSCertVerify::TRY: value = LDAP_OPT_X_TLS_TRY; break; + case LDAPServerParams::TLSCertVerify::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break; + } + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); + } + + if (!params.ca_cert_dir.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.ca_cert_dir.c_str())); + + if (!params.ca_cert_file.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.ca_cert_file.c_str())); + + if (params.enable_tls == LDAPServerParams::TLSEnable::YES_STARTTLS) + diag(ldap_start_tls_s(handle, nullptr, nullptr)); + + int rc = LDAP_OTHER; + + switch (params.sasl_mechanism) + { + case LDAPServerParams::SASLMechanism::SIMPLE: + { + const String dn = params.auth_dn_prefix + escapeForLDAP(params.user) + params.auth_dn_suffix; + + ::berval cred; + cred.bv_val = const_cast(params.password.c_str()); + cred.bv_len = params.password.size(); + + rc = ldap_sasl_bind_s(handle, dn.c_str(), LDAP_SASL_SIMPLE, &cred, nullptr, nullptr, nullptr); + + if (!graceful_bind_failure) + diag(rc); + + break; + } + } + + return rc; +} + +void LDAPClient::openConnection() +{ + const bool graceful_bind_failure = false; + diag(openConnection(graceful_bind_failure)); +} + +void LDAPClient::closeConnection() noexcept +{ + if (!handle) + return; + + ldap_unbind_ext_s(handle, nullptr, nullptr); + handle = nullptr; +} + +bool LDAPSimpleAuthClient::check() +{ + bool result = false; + + if (params.user.empty()) + throw Exception("LDAP authentication of a user with an empty name is not allowed", ErrorCodes::BAD_ARGUMENTS); + + const bool graceful_bind_failure = true; + const auto rc = openConnection(graceful_bind_failure); + + SCOPE_EXIT({ closeConnection(); }); + + switch (rc) + { + case LDAP_SUCCESS: + { + result = true; + break; + } + + case LDAP_INVALID_CREDENTIALS: + { + result = false; + break; + } + + default: + { + result = false; + diag(rc); + break; + } + } + + return result; +} + +#else // USE_LDAP + +void LDAPClient::diag(const int) +{ + throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +} + +int LDAPClient::openConnection(const bool) +{ + throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +} + +void LDAPClient::openConnection() +{ + throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +} + +void LDAPClient::closeConnection() noexcept +{ +} + +bool LDAPSimpleAuthClient::check() +{ + throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); +} + +#endif // USE_LDAP + +} diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h new file mode 100644 index 00000000000..1187f2c2a21 --- /dev/null +++ b/src/Access/LDAPClient.h @@ -0,0 +1,55 @@ +#pragma once + +#if __has_include("config_core.h") +#include "config_core.h" +#endif + +#include +#include + +#if USE_LDAP +#include +#define MAYBE_NORETURN +#else +#define MAYBE_NORETURN [[noreturn]] +#endif + + +namespace DB +{ + +class LDAPClient +{ +public: + explicit LDAPClient(const LDAPServerParams & params_); + ~LDAPClient(); + + LDAPClient(const LDAPClient &) = delete; + LDAPClient(LDAPClient &&) = delete; + LDAPClient & operator= (const LDAPClient &) = delete; + LDAPClient & operator= (LDAPClient &&) = delete; + +protected: + int openConnection(const bool graceful_bind_failure = false); + MAYBE_NORETURN void openConnection(); + void closeConnection() noexcept; + MAYBE_NORETURN void diag(const int rc); + +protected: + LDAPServerParams params; +#if USE_LDAP + LDAP * handle = nullptr; +#endif +}; + +class LDAPSimpleAuthClient + : private LDAPClient +{ +public: + using LDAPClient::LDAPClient; + bool check(); +}; + +} + +#undef MAYBE_NORETURN diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h new file mode 100644 index 00000000000..ed28526d29d --- /dev/null +++ b/src/Access/LDAPParams.h @@ -0,0 +1,63 @@ +#pragma once + +#include + +#include + + +namespace DB +{ + +struct LDAPServerParams +{ + enum class ProtocolVersion + { + V2, + V3 + }; + + enum class TLSEnable + { + NO, + YES_STARTTLS, + YES + }; + + enum class TLSCertVerify + { + NEVER, + ALLOW, + TRY, + DEMAND + }; + + enum class SASLMechanism + { + SIMPLE + }; + + ProtocolVersion protocol_version = ProtocolVersion::V3; + + String host; + std::uint16_t port = 636; + + TLSEnable enable_tls = TLSEnable::YES; + TLSCertVerify tls_cert_verify = TLSCertVerify::DEMAND; + String ca_cert_dir; + String ca_cert_file; + + SASLMechanism sasl_mechanism = SASLMechanism::SIMPLE; + + String auth_dn_prefix; + String auth_dn_suffix; + + String user; + String password; + + std::chrono::seconds operation_timeout{40}; + std::chrono::seconds network_timeout{30}; + std::chrono::seconds search_timeout{20}; + std::uint32_t search_limit = 100; +}; + +} diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index ce33383548f..2482bca053a 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -55,13 +55,14 @@ namespace bool has_password = config.has(user_config + ".password"); bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); + bool has_ldap = config.has(user_config + ".ldap"); - if (has_password + has_password_sha256_hex + has_password_double_sha1_hex > 1) - throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex' is used to specify password for user " + user_name + ". Must be only one of them.", + if (has_password + has_password_sha256_hex + has_password_double_sha1_hex + has_ldap > 1) + throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex', 'ldap' is specified for user " + user_name + ". Must be only one of them.", ErrorCodes::BAD_ARGUMENTS); - if (!has_password && !has_password_sha256_hex && !has_password_double_sha1_hex) - throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); + if (!has_password && !has_password_sha256_hex && !has_password_double_sha1_hex && !has_ldap) + throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' or 'ldap' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); if (has_password) { @@ -78,6 +79,17 @@ namespace user->authentication = Authentication{Authentication::DOUBLE_SHA1_PASSWORD}; user->authentication.setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex")); } + else if (has_ldap) + { + bool has_ldap_server = config.has(user_config + ".ldap.server"); + if (!has_ldap_server) + throw Exception("Missing mandatory 'server' in 'ldap', with LDAP server name, for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); + + const auto ldap_server_name = config.getString(user_config + ".ldap.server"); + + user->authentication = Authentication{Authentication::LDAP_PASSWORD}; + user->authentication.setLDAPServerName(ldap_server_name); + } const auto profile_name_config = user_config + ".profile"; if (config.has(profile_name_config)) diff --git a/src/Access/ya.make b/src/Access/ya.make index 970c0714a93..de56306c827 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -18,10 +18,12 @@ SRCS( EnabledRowPolicies.cpp EnabledSettings.cpp ExtendedRoleSet.cpp + ExternalAuthenticators.cpp GrantedAccess.cpp GrantedRoles.cpp IAccessEntity.cpp IAccessStorage.cpp + LDAPClient.cpp MemoryAccessStorage.cpp MultipleAccessStorage.cpp Quota.cpp diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index a8bd41162e3..8ab04f1b933 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -496,6 +496,7 @@ namespace ErrorCodes extern const int OPENCL_ERROR = 522; extern const int UNKNOWN_ROW_POLICY = 523; extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524; + extern const int LDAP_ERROR = 525; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/src/Core/config_core.h.in b/src/Core/config_core.h.in index 054ee9a80b7..10f6ef98c6d 100644 --- a/src/Core/config_core.h.in +++ b/src/Core/config_core.h.in @@ -8,3 +8,4 @@ #cmakedefine01 USE_EMBEDDED_COMPILER #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL +#cmakedefine01 USE_LDAP diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index dc94b32a34d..8ab5a79942f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -661,7 +661,7 @@ void Context::setUser(const String & name, const String & password, const Poco:: if (new_user_id) { new_access = getAccessControlManager().getContextAccess(*new_user_id, {}, true, settings, current_database, client_info); - if (!new_access->isClientHostAllowed() || !new_access->isCorrectPassword(password)) + if (!new_access->isClientHostAllowed() || !new_access->isCorrectPassword(password, getAccessControlManager().getExternalAuthenticators())) { new_user_id = {}; new_access = nullptr; diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index c8e2a76dfa2..fdf2dfdf81f 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -35,6 +35,10 @@ namespace settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "double_sha1_hash BY " << (settings.hilite ? IAST::hilite_none : "") << quoteString(authentication.getPasswordHashHex()); break; + case Authentication::Type::LDAP_PASSWORD: + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "ldap BY " << (settings.hilite ? IAST::hilite_none : "") + << quoteString(authentication.getLDAPServerName()); + break; } } diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 54dc51d783b..e954adc7b53 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -12,14 +12,14 @@ class ASTExtendedRoleSet; class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP}] BY {'password'|'hash'|'server_name'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] + * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD|LDAP}] BY {'password'|'hash'|'server_name'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 76a06a0282f..e07d76e6f32 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -125,6 +125,17 @@ namespace return true; } + if (ParserKeyword{"LDAP"}.ignore(pos, expected)) + { + String server_name; + if (!parseByPassword(pos, expected, server_name)) + return false; + + authentication = Authentication{Authentication::LDAP_PASSWORD}; + authentication->setLDAPServerName(server_name); + return true; + } + if (!ParserKeyword{"NO_PASSWORD"}.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index d609894a7ec..e7235279584 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -7,13 +7,13 @@ namespace DB { /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP}] BY {'password'|'hash'|'server_name'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] + * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD|LDAP}] BY {'password'|'hash'|'server_name'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ From 856245d76676b2f25f7959e1be9f5b2d5fe2cf8a Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 28 May 2020 01:40:56 +0400 Subject: [PATCH 0198/2229] Revert merge artefacts --- contrib/cppkafka | 2 +- contrib/grpc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/cppkafka b/contrib/cppkafka index 9b184d881c1..f555ee36aaa 160000 --- a/contrib/cppkafka +++ b/contrib/cppkafka @@ -1 +1 @@ -Subproject commit 9b184d881c15cc50784b28688c7c99d3d764db24 +Subproject commit f555ee36aaa74d17ca0dab3ce472070a610b2966 diff --git a/contrib/grpc b/contrib/grpc index c1d176528fd..8aea4e168e7 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit c1d176528fd8da9dd4066d16554bcd216d29033f +Subproject commit 8aea4e168e78f3eb9828080740fc8cb73d53bf79 From d4fd018715297bc97d714a0d22a6416d6a5e6081 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Thu, 28 May 2020 02:54:14 +0400 Subject: [PATCH 0199/2229] Fix linking errors in parser-related test executables --- src/Access/Authentication.cpp | 10 ---------- src/Access/Authentication.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index e7e8b10aabc..9c8956e9253 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -13,16 +13,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -const String & Authentication::getLDAPServerName() const -{ - return ldap_server_name; -} - -void Authentication::setLDAPServerName(const String & server_name) -{ - ldap_server_name = server_name; -} - Authentication::Digest Authentication::getPasswordDoubleSHA1() const { switch (type) diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 2e166a431f9..903b8046a5d 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -208,4 +208,14 @@ inline void Authentication::setPasswordHashBinary(const Digest & hash) throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); } +inline const String & Authentication::getLDAPServerName() const +{ + return ldap_server_name; +} + +inline void Authentication::setLDAPServerName(const String & server_name) +{ + ldap_server_name = server_name; +} + } From e51e828d5c8cfcdc4489fd719ffa6338a9b7caf9 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Mon, 25 May 2020 02:55:00 +0300 Subject: [PATCH 0200/2229] init --- src/Common/SharedBlockRowRef.h | 30 +++++++ src/Interpreters/InterpreterSelectQuery.cpp | 1 + .../OptimizedPartialSortingTransform.cpp | 83 +++++++++++++++++++ .../OptimizedPartialSortingTransform.h | 34 ++++++++ src/Processors/ya.make | 1 + 5 files changed, 149 insertions(+) create mode 100644 src/Processors/Transforms/OptimizedPartialSortingTransform.cpp create mode 100644 src/Processors/Transforms/OptimizedPartialSortingTransform.h diff --git a/src/Common/SharedBlockRowRef.h b/src/Common/SharedBlockRowRef.h index 193f7e4dd05..e9fd076da07 100644 --- a/src/Common/SharedBlockRowRef.h +++ b/src/Common/SharedBlockRowRef.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -86,4 +87,33 @@ struct SharedBlockRowRef } }; +struct SharedBlockRowWithSortDescriptionRef : SharedBlockRowRef +{ + SortDescription * description = nullptr; + + void set(SharedBlockPtr & shared_block_, ColumnRawPtrs * columns_, size_t row_num_) = delete; + + bool operator< (const SharedBlockRowRef & other) const + { + size_t size = columns->size(); + for (size_t i = 0; i < size; ++i) + { + int res = (*description)[i].direction * (*columns)[i]->compareAt(row_num, other.row_num, *(*other.columns)[i], 1); + if (res < 0) + return true; + else if (res > 0) + return false; + } + return false; + } + + void set(SharedBlockPtr & shared_block_, ColumnRawPtrs * columns_, size_t row_num_, SortDescription * description_) + { + shared_block = shared_block_; + columns = columns_; + row_num = row_num_; + description = description_; + } +}; + } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 4f717eda706..3bccc7ba2d5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -84,6 +84,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp b/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp new file mode 100644 index 00000000000..e753052a2b3 --- /dev/null +++ b/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp @@ -0,0 +1,83 @@ +#include +#include +#include + +namespace DB +{ + +OptimizedPartialSortingTransform::OptimizedPartialSortingTransform( + const Block & header_, SortDescription & description_, UInt64 limit_) + : ISimpleTransform(header_, header_, false) + , description(description_), limit(limit_) + , threshold_shared_block(nullptr) +{ +} + +static ColumnRawPtrs extractColumns(const Block & block, const SortDescription& description) +{ + size_t size = description.size(); + ColumnRawPtrs res; + res.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const IColumn * column = !description[i].column_name.empty() + ? block.getByName(description[i].column_name).column.get() + : block.safeGetByPosition(description[i].column_number).column.get(); + res.emplace_back(column); + } + + return res; +} + +void OptimizedPartialSortingTransform::transform(Chunk & chunk) +{ + if (read_rows) + read_rows->add(chunk.getNumRows()); + + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + chunk.clear(); + + SharedBlockPtr shared_block = new detail::SharedBlock(std::move(block)); + UInt64 rows_num = shared_block->rows(); + + + if (threshold_shared_block) { + SharedBlockRowWithSortDescriptionRef row; + IColumn::Filter filter(rows_num); + ColumnRawPtrs shared_block_columns = extractColumns(*shared_block, description); + size_t filtered_count = 0; + + for (UInt64 i = 0; i < rows_num; ++i) { + row.set(shared_block, &shared_block_columns, i, &description); + + if (threshold_row < row) + { + ++filtered_count; + filter[i] = 1; + } + } + + if (filtered_count) + { + for (auto & column : shared_block->getColumns()) + { + column = column->filter(filter, filtered_count); + } + } + } + + sortBlock(*shared_block, description, limit); + + if (!threshold_shared_block && limit && limit < rows_num) + { + Block threshold_block = shared_block->cloneWithColumns(shared_block->getColumns()); + threshold_shared_block = new detail::SharedBlock(std::move(threshold_block)); + threshold_block_columns = extractColumns(*threshold_shared_block, description); + threshold_row.set(threshold_shared_block, &threshold_block_columns, limit - 1, &description); + } + + chunk.setColumns(shared_block->getColumns(), shared_block->rows()); +} + +} diff --git a/src/Processors/Transforms/OptimizedPartialSortingTransform.h b/src/Processors/Transforms/OptimizedPartialSortingTransform.h new file mode 100644 index 00000000000..20e72bd836f --- /dev/null +++ b/src/Processors/Transforms/OptimizedPartialSortingTransform.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include +#include + + +namespace DB +{ +class OptimizedPartialSortingTransform : public ISimpleTransform +{ +public: + OptimizedPartialSortingTransform( + const Block & header_, + SortDescription & description_, + UInt64 limit_ = 0); + + String getName() const override { return "OptimizedPartialSortingTransform"; } + + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { read_rows.swap(counter); } + +protected: + void transform(Chunk & chunk) override; + +private: + SortDescription description; + UInt64 limit; + RowsBeforeLimitCounterPtr read_rows; + SharedBlockRowWithSortDescriptionRef threshold_row; + SharedBlockPtr threshold_shared_block; + ColumnRawPtrs threshold_block_columns; +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 62320f1c147..5952341527b 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -129,6 +129,7 @@ SRCS( Transforms/MergeSortingTransform.cpp Transforms/MergingAggregatedMemoryEfficientTransform.cpp Transforms/MergingAggregatedTransform.cpp + Transforms/OptimizedPartialSortingTransform.cpp Transforms/PartialSortingTransform.cpp Transforms/ReverseTransform.cpp Transforms/RollupTransform.cpp From 5f013a365063495e904d566a3d97508b7fc1aaa9 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Mon, 25 May 2020 03:32:08 +0300 Subject: [PATCH 0201/2229] fix --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 3bccc7ba2d5..db4e8eed99f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2109,7 +2109,7 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting if (stream_type != QueryPipeline::StreamType::Main) return nullptr; - return std::make_shared(header, output_order_descr, limit); + return std::make_shared(header, output_order_descr, limit); }); pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr From e24e6257c7715ad2fb1d94eb0ef38de4eb1fd989 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Wed, 27 May 2020 14:52:45 +0300 Subject: [PATCH 0202/2229] fix --- src/Common/SharedBlockRowRef.h | 29 ------- src/Interpreters/InterpreterSelectQuery.cpp | 3 +- .../OptimizedPartialSortingTransform.cpp | 83 ------------------- .../OptimizedPartialSortingTransform.h | 34 -------- .../Transforms/PartialSortingTransform.cpp | 66 +++++++++++++++ .../Transforms/PartialSortingTransform.h | 2 + 6 files changed, 69 insertions(+), 148 deletions(-) delete mode 100644 src/Processors/Transforms/OptimizedPartialSortingTransform.cpp delete mode 100644 src/Processors/Transforms/OptimizedPartialSortingTransform.h diff --git a/src/Common/SharedBlockRowRef.h b/src/Common/SharedBlockRowRef.h index e9fd076da07..957d66243dd 100644 --- a/src/Common/SharedBlockRowRef.h +++ b/src/Common/SharedBlockRowRef.h @@ -87,33 +87,4 @@ struct SharedBlockRowRef } }; -struct SharedBlockRowWithSortDescriptionRef : SharedBlockRowRef -{ - SortDescription * description = nullptr; - - void set(SharedBlockPtr & shared_block_, ColumnRawPtrs * columns_, size_t row_num_) = delete; - - bool operator< (const SharedBlockRowRef & other) const - { - size_t size = columns->size(); - for (size_t i = 0; i < size; ++i) - { - int res = (*description)[i].direction * (*columns)[i]->compareAt(row_num, other.row_num, *(*other.columns)[i], 1); - if (res < 0) - return true; - else if (res > 0) - return false; - } - return false; - } - - void set(SharedBlockPtr & shared_block_, ColumnRawPtrs * columns_, size_t row_num_, SortDescription * description_) - { - shared_block = shared_block_; - columns = columns_; - row_num = row_num_; - description = description_; - } -}; - } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index db4e8eed99f..4f717eda706 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -84,7 +84,6 @@ #include #include #include -#include #include #include #include @@ -2109,7 +2108,7 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting if (stream_type != QueryPipeline::StreamType::Main) return nullptr; - return std::make_shared(header, output_order_descr, limit); + return std::make_shared(header, output_order_descr, limit); }); pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr diff --git a/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp b/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp deleted file mode 100644 index e753052a2b3..00000000000 --- a/src/Processors/Transforms/OptimizedPartialSortingTransform.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -OptimizedPartialSortingTransform::OptimizedPartialSortingTransform( - const Block & header_, SortDescription & description_, UInt64 limit_) - : ISimpleTransform(header_, header_, false) - , description(description_), limit(limit_) - , threshold_shared_block(nullptr) -{ -} - -static ColumnRawPtrs extractColumns(const Block & block, const SortDescription& description) -{ - size_t size = description.size(); - ColumnRawPtrs res; - res.reserve(size); - - for (size_t i = 0; i < size; ++i) - { - const IColumn * column = !description[i].column_name.empty() - ? block.getByName(description[i].column_name).column.get() - : block.safeGetByPosition(description[i].column_number).column.get(); - res.emplace_back(column); - } - - return res; -} - -void OptimizedPartialSortingTransform::transform(Chunk & chunk) -{ - if (read_rows) - read_rows->add(chunk.getNumRows()); - - auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); - chunk.clear(); - - SharedBlockPtr shared_block = new detail::SharedBlock(std::move(block)); - UInt64 rows_num = shared_block->rows(); - - - if (threshold_shared_block) { - SharedBlockRowWithSortDescriptionRef row; - IColumn::Filter filter(rows_num); - ColumnRawPtrs shared_block_columns = extractColumns(*shared_block, description); - size_t filtered_count = 0; - - for (UInt64 i = 0; i < rows_num; ++i) { - row.set(shared_block, &shared_block_columns, i, &description); - - if (threshold_row < row) - { - ++filtered_count; - filter[i] = 1; - } - } - - if (filtered_count) - { - for (auto & column : shared_block->getColumns()) - { - column = column->filter(filter, filtered_count); - } - } - } - - sortBlock(*shared_block, description, limit); - - if (!threshold_shared_block && limit && limit < rows_num) - { - Block threshold_block = shared_block->cloneWithColumns(shared_block->getColumns()); - threshold_shared_block = new detail::SharedBlock(std::move(threshold_block)); - threshold_block_columns = extractColumns(*threshold_shared_block, description); - threshold_row.set(threshold_shared_block, &threshold_block_columns, limit - 1, &description); - } - - chunk.setColumns(shared_block->getColumns(), shared_block->rows()); -} - -} diff --git a/src/Processors/Transforms/OptimizedPartialSortingTransform.h b/src/Processors/Transforms/OptimizedPartialSortingTransform.h deleted file mode 100644 index 20e72bd836f..00000000000 --- a/src/Processors/Transforms/OptimizedPartialSortingTransform.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include -#include -#include -#include - - -namespace DB -{ -class OptimizedPartialSortingTransform : public ISimpleTransform -{ -public: - OptimizedPartialSortingTransform( - const Block & header_, - SortDescription & description_, - UInt64 limit_ = 0); - - String getName() const override { return "OptimizedPartialSortingTransform"; } - - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { read_rows.swap(counter); } - -protected: - void transform(Chunk & chunk) override; - -private: - SortDescription description; - UInt64 limit; - RowsBeforeLimitCounterPtr read_rows; - SharedBlockRowWithSortDescriptionRef threshold_row; - SharedBlockPtr threshold_shared_block; - ColumnRawPtrs threshold_block_columns; -}; - -} diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 018614f0165..062064c0fd6 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -11,6 +12,38 @@ PartialSortingTransform::PartialSortingTransform( { } +static ColumnRawPtrs extractColumns(const Block & block, const SortDescription & description) +{ + size_t size = description.size(); + ColumnRawPtrs res; + res.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const IColumn * column = !description[i].column_name.empty() + ? block.getByName(description[i].column_name).column.get() + : block.safeGetByPosition(description[i].column_number).column.get(); + res.emplace_back(column); + } + + return res; +} + +bool less(const ColumnRawPtrs & lhs, UInt64 lhs_row_num, + const ColumnRawPtrs & rhs, UInt64 rhs_row_num, const SortDescription & description) +{ + size_t size = description.size(); + for (size_t i = 0; i < size; ++i) + { + int res = description[i].direction * lhs[i]->compareAt(lhs_row_num, rhs_row_num, *rhs[i], 1); + if (res < 0) + return true; + else if (res > 0) + return false; + } + return false; +} + void PartialSortingTransform::transform(Chunk & chunk) { if (read_rows) @@ -19,7 +52,40 @@ void PartialSortingTransform::transform(Chunk & chunk) auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); chunk.clear(); + UInt64 rows_num = block.rows(); + + if (!threshold_block_columns.empty()) + { + IColumn::Filter filter(rows_num, 0); + ColumnRawPtrs block_columns = extractColumns(block, description); + size_t filtered_count = 0; + + for (UInt64 i = 0; i < rows_num; ++i) + { + if (less(threshold_block_columns, limit - 1, block_columns, i, description)) + { + ++filtered_count; + filter[i] = 1; + } + } + + if (filtered_count) + { + for (auto & column : block.getColumns()) + { + column = column->filter(filter, filtered_count); + } + } + } + sortBlock(block, description, limit); + + if (threshold_block_columns.empty() && limit && limit < rows_num) + { + threshold_block = block.cloneWithColumns(block.getColumns()); + threshold_block_columns = extractColumns(threshold_block, description); + } + chunk.setColumns(block.getColumns(), block.rows()); } diff --git a/src/Processors/Transforms/PartialSortingTransform.h b/src/Processors/Transforms/PartialSortingTransform.h index 47ac90c6904..d6749e4dfad 100644 --- a/src/Processors/Transforms/PartialSortingTransform.h +++ b/src/Processors/Transforms/PartialSortingTransform.h @@ -29,6 +29,8 @@ private: SortDescription description; UInt64 limit; RowsBeforeLimitCounterPtr read_rows; + Block threshold_block; + ColumnRawPtrs threshold_block_columns; }; } From c301471c53bf298230c195f27ed0c2232f1928c5 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Wed, 27 May 2020 14:56:01 +0300 Subject: [PATCH 0203/2229] fix --- src/Common/SharedBlockRowRef.h | 1 - src/Processors/ya.make | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Common/SharedBlockRowRef.h b/src/Common/SharedBlockRowRef.h index 957d66243dd..193f7e4dd05 100644 --- a/src/Common/SharedBlockRowRef.h +++ b/src/Common/SharedBlockRowRef.h @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 5952341527b..62320f1c147 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -129,7 +129,6 @@ SRCS( Transforms/MergeSortingTransform.cpp Transforms/MergingAggregatedMemoryEfficientTransform.cpp Transforms/MergingAggregatedTransform.cpp - Transforms/OptimizedPartialSortingTransform.cpp Transforms/PartialSortingTransform.cpp Transforms/ReverseTransform.cpp Transforms/RollupTransform.cpp From fedf1f5dd2707826a0276e037b15e95236b4f6ce Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Wed, 27 May 2020 15:57:14 +0300 Subject: [PATCH 0204/2229] more relaxation --- src/Processors/Transforms/PartialSortingTransform.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 062064c0fd6..ad45159f77e 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -52,12 +52,13 @@ void PartialSortingTransform::transform(Chunk & chunk) auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); chunk.clear(); + ColumnRawPtrs block_columns; UInt64 rows_num = block.rows(); if (!threshold_block_columns.empty()) { IColumn::Filter filter(rows_num, 0); - ColumnRawPtrs block_columns = extractColumns(block, description); + block_columns = extractColumns(block, description); size_t filtered_count = 0; for (UInt64 i = 0; i < rows_num; ++i) @@ -80,7 +81,8 @@ void PartialSortingTransform::transform(Chunk & chunk) sortBlock(block, description, limit); - if (threshold_block_columns.empty() && limit && limit < rows_num) + if (limit && limit < rows_num && + (threshold_block_columns.empty() || less(block_columns, limit - 1, threshold_block_columns, limit - 1, description))) { threshold_block = block.cloneWithColumns(block.getColumns()); threshold_block_columns = extractColumns(threshold_block, description); From c38caafe935fdd54349eefe2478a7b57fede591b Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Wed, 27 May 2020 18:38:29 +0300 Subject: [PATCH 0205/2229] fix tests --- src/Processors/Transforms/PartialSortingTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index ad45159f77e..f68a415117b 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -81,7 +81,7 @@ void PartialSortingTransform::transform(Chunk & chunk) sortBlock(block, description, limit); - if (limit && limit < rows_num && + if (limit && limit < block.rows() && (threshold_block_columns.empty() || less(block_columns, limit - 1, threshold_block_columns, limit - 1, description))) { threshold_block = block.cloneWithColumns(block.getColumns()); From 4769ce727176007239387909a30a1e5a7d11453a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 May 2020 02:59:13 +0300 Subject: [PATCH 0206/2229] in-memory parts: several fixes --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataPartInMemory.h | 2 +- .../MergeTree/MergeTreeReaderInMemory.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 13 +++++++----- .../01130_in_memory_parts_default.reference | 1 + .../01130_in_memory_parts_default.sql | 20 +++++++++++++++++++ 6 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_default.reference create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_default.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c6bc54eabe8..d9b6b5e8780 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1351,7 +1351,7 @@ void MergeTreeData::clearOldWriteAheadLogs() auto is_range_on_disk = [&block_numbers_on_disk](Int64 min_block, Int64 max_block) { - auto lower = std::upper_bound(block_numbers_on_disk.begin(), block_numbers_on_disk.end(), std::make_pair(min_block, -1L)); + auto lower = std::lower_bound(block_numbers_on_disk.begin(), block_numbers_on_disk.end(), std::make_pair(min_block, -1L)); if (lower != block_numbers_on_disk.end() && min_block >= lower->first && max_block <= lower->second) return true; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 29c01805529..b264ff73436 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -39,7 +39,7 @@ public: bool isStoredOnDisk() const override { return false; } - bool hasColumnFiles(const String & /* column_name */, const IDataType & /* type */) const override { return true; } + bool hasColumnFiles(const String & column_name, const IDataType & /* type */) const override { return !!getColumnPosition(column_name); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 671b36dfe86..5e4c3e49e3b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -49,7 +49,7 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool /* continue_read if (!part_in_memory->block.has(name)) continue; - const auto block_column = part_in_memory->block.getByName(name).column; + const auto & block_column = part_in_memory->block.getByName(name).column; if (total_rows_read == 0 && part_rows <= max_rows_to_read) { res_columns[i] = block_column; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 73ae4cdd33c..c97319456ce 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -637,13 +637,16 @@ bool StorageMergeTree::merge( merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr); DataPartsVector parts_to_remove_immediately; - for (const auto & part : future_part.parts) { - part->notifyMerged(); - if (isInMemoryPart(part)) + auto lock = lockParts(); + for (const auto & part : future_part.parts) { - modifyPartState(part, DataPartState::Deleting); - parts_to_remove_immediately.push_back(part); + part->notifyMerged(); + if (isInMemoryPart(part)) + { + modifyPartState(part, DataPartState::Deleting); + parts_to_remove_immediately.push_back(part); + } } } diff --git a/tests/queries/0_stateless/01130_in_memory_parts_default.reference b/tests/queries/0_stateless/01130_in_memory_parts_default.reference new file mode 100644 index 00000000000..c51afdb6658 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_default.reference @@ -0,0 +1 @@ +0 bbb_aaa diff --git a/tests/queries/0_stateless/01130_in_memory_parts_default.sql b/tests/queries/0_stateless/01130_in_memory_parts_default.sql new file mode 100644 index 00000000000..61e20c84f3d --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_default.sql @@ -0,0 +1,20 @@ +-- Test 01266_default_prewhere_reqq, but with in-memory parts +DROP TABLE IF EXISTS t1; + +CREATE TABLE t1 +( + date Date, + s1 String, + s2 String +) ENGINE = MergeTree() PARTITION BY toYYYYMMDD(date) ORDER BY (date, s1) +SETTINGS index_granularity = 8192, min_rows_for_compact_part = 1000, min_rows_for_wide_part = 1000; + + +set max_threads=1; + +insert into t1 (date, s1,s2) values(today()-1,'aaa','bbb'); +alter table t1 add column s3 String DEFAULT concat(s2,'_',s1); +-- insert into t1 (date, s1,s2) values(today(),'aaa2','bbb2'); +select ignore(date), s3 from t1 where s2='bbb'; + +DROP TABLE t1; From c91ca6fd4b16e670d82e6ca26a6c80fd2e53d111 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Fri, 29 May 2020 04:50:02 +0300 Subject: [PATCH 0207/2229] fix optimization, add comments and performance test --- .../Transforms/PartialSortingTransform.cpp | 19 +++++++++++++++---- .../Transforms/PartialSortingTransform.h | 4 ++++ tests/performance/order_by_with_limit.xml | 9 +++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 tests/performance/order_by_with_limit.xml diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index f68a415117b..23cf80439ef 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -55,9 +55,12 @@ void PartialSortingTransform::transform(Chunk & chunk) ColumnRawPtrs block_columns; UInt64 rows_num = block.rows(); + /** If we've saved columns from previously blocks we could filter all rows from current block + * which are unnecessary for sortBlock(...) because they obviously won't be in the top LIMIT rows. + */ if (!threshold_block_columns.empty()) { - IColumn::Filter filter(rows_num, 0); + IColumn::Filter filter(rows_num, 1); block_columns = extractColumns(block, description); size_t filtered_count = 0; @@ -66,7 +69,7 @@ void PartialSortingTransform::transform(Chunk & chunk) if (less(threshold_block_columns, limit - 1, block_columns, i, description)) { ++filtered_count; - filter[i] = 1; + filter[i] = 0; } } @@ -74,14 +77,22 @@ void PartialSortingTransform::transform(Chunk & chunk) { for (auto & column : block.getColumns()) { - column = column->filter(filter, filtered_count); + column = column->filter(filter, rows_num - filtered_count); } } } sortBlock(block, description, limit); - if (limit && limit < block.rows() && + if (!threshold_block_columns.empty()) + { + block_columns = extractColumns(block, description); + } + + /** If this is the first processed block or (limit - 1)'th row of the current block + * is less than current threshold row then we could update threshold. + */ + if (limit && limit <= block.rows() && (threshold_block_columns.empty() || less(block_columns, limit - 1, threshold_block_columns, limit - 1, description))) { threshold_block = block.cloneWithColumns(block.getColumns()); diff --git a/src/Processors/Transforms/PartialSortingTransform.h b/src/Processors/Transforms/PartialSortingTransform.h index d6749e4dfad..13d3775796b 100644 --- a/src/Processors/Transforms/PartialSortingTransform.h +++ b/src/Processors/Transforms/PartialSortingTransform.h @@ -29,6 +29,10 @@ private: SortDescription description; UInt64 limit; RowsBeforeLimitCounterPtr read_rows; + /** threshold_block is using for saving columns from previously processed block. + * threshold_block_columns contains pointers to columns from threshold_block which used for comparison. + * That's all for PartialSort optimization + */ Block threshold_block; ColumnRawPtrs threshold_block_columns; }; diff --git a/tests/performance/order_by_with_limit.xml b/tests/performance/order_by_with_limit.xml new file mode 100644 index 00000000000..b45f42071de --- /dev/null +++ b/tests/performance/order_by_with_limit.xml @@ -0,0 +1,9 @@ + + + sorting + comparison + + + SELECT rand64() AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 500 + + From f6a220916991d379d6bce7a320304e982184ded5 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 30 Mar 2020 18:36:02 +0200 Subject: [PATCH 0208/2229] Add target specific macros --- dbms/src/Functions/DynamicTarget/Target.h | 106 ++++++++++++++++++++++ src/Functions/IFunctionImpl.h | 2 +- 2 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Functions/DynamicTarget/Target.h diff --git a/dbms/src/Functions/DynamicTarget/Target.h b/dbms/src/Functions/DynamicTarget/Target.h new file mode 100644 index 00000000000..5e3032ded3e --- /dev/null +++ b/dbms/src/Functions/DynamicTarget/Target.h @@ -0,0 +1,106 @@ +#pragma once + +namespace DB::DynamicTarget +{ + +enum class TargetArch : int { + Scalar, + SSE4, + AVX, + AVX2, + AVX512, +}; + +#if defined(__GNUC__) +// TODO: There are lots of different AVX512 :( +# define BEGIN_AVX512_SPECIFIC_CODE \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native\")") +# define BEGIN_AVX2_SPECIFIC_CODE \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native\")") +# define BEGIN_AVX_SPECIFIC_CODE \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native\")") +# define BEGIN_SSE4_SPECIFIC_CODE \ + _Pragma("GCC push_options") \ + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") +# define END_TARGET_SPECIFIC_CODE \ + _Pragma("GCC pop_options") +#elif defined(__clang__) +// TODO: There are lots of different AVX512 :( +# define BEGIN_AVX512_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX2_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx\"))))") +# define BEGIN_SSE4_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx\"))))") +# define END_TARGET_SPECIFIC_CODE \ + _Pragma("clang attribute pop") +#else +# error "Only CLANG and GCC compilers are supported" +#endif + +#define DECLARE_DEFAULT_CODE (...) \ +namespace TargetSpecific::Default { \ + __VA_ARGS__ \ +} + +#define DECLARE_SSE4_SPECIFIC_CODE (...) \ +BEGIN_SSE4_SPECIFIC_CODE \ +namespace TargetSpecific::SSE4 { \ + __VA_ARGS__ \ +} \ +END_TARGET_SPECIFIC_CODE + +#define DECLARE_AVX_SPECIFIC_CODE (...) \ +BEGIN_AVX_SPECIFIC_CODE \ +namespace TargetSpecific::AVX { \ + __VA_ARGS__ \ +} \ +END_TARGET_SPECIFIC_CODE + +#define DECLARE_AVX2_SPECIFIC_CODE (...) \ +BEGIN_AVX2_SPECIFIC_CODE \ +namespace TargetSpecific::AVX2 { \ + __VA_ARGS__ \ +} \ +END_TARGET_SPECIFIC_CODE + +#define DECLARE_AVX512_SPECIFIC_CODE (...) \ +BEGIN_AVX512_SPECIFIC_CODE \ +namespace TargetSpecific::AVX512 { \ + __VA_ARGS__ \ +} \ +END_TARGET_SPECIFIC_CODE + +#define DYNAMIC_CODE (...) \ +DECLARE_DEFAULT_CODE (__VA_ARGS__) \ +DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX512_SPECIFIC_CODE(__VA_ARGS__) + +DECLARE_DEFAULT_CODE( + constexpr auto BuildArch = TargetArch::Scalar; +) // DECLARE_DEFAULT_CODE + +DECLARE_SSE4_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::SSE4; +) // DECLARE_SSE4_SPECIFIC_CODE + +DECLARE_AVX_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX; +) // DECLARE_AVX_SPECIFIC_CODE + +DECLARE_AVX2_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX2; +) // DECLARE_AVX2_SPECIFIC_CODE + +DECLARE_AVX512_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX512; +) // DECLARE_AVX512_SPECIFIC_CODE + +} // namespace DB::DynamicTarget \ No newline at end of file diff --git a/src/Functions/IFunctionImpl.h b/src/Functions/IFunctionImpl.h index 116363705de..4da3e9ca056 100644 --- a/src/Functions/IFunctionImpl.h +++ b/src/Functions/IFunctionImpl.h @@ -5,7 +5,7 @@ /// In order to implement a new function you can choose one of two options: /// * Implement interface for IFunction (old function interface, which is planned to be removed sometimes) /// * Implement three interfaces for IExecutableFunctionImpl, IFunctionBaseImpl and IFunctionOverloadResolverImpl -/// Generally saying, IFunction represents a union of tree new interfaces. However, it can't be used for all cases. +/// Generally saying, IFunction represents a union of three new interfaces. However, it can't be used for all cases. /// Examples: /// * Function properties may depend on arguments type (e.g. toUInt32(UInt8) is globally monotonic, toUInt32(UInt64) - only on intervals) /// * In implementation of lambda functions DataTypeFunction needs an functional object with known arguments and return type From 37d13d4bce5f4260979e38617457b9136ca1a15f Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 2 Apr 2020 15:48:14 +0200 Subject: [PATCH 0209/2229] Compilable --- .../Functions/DynamicTarget/CMakeLists.txt | 11 + .../DynamicTarget/DynamicFunctionAdaptors.h | 263 ++++++++++++++++++ dbms/src/Functions/DynamicTarget/Selector.h | 39 +++ dbms/src/Functions/DynamicTarget/Target.cpp | 12 + dbms/src/Functions/DynamicTarget/Target.h | 19 +- src/Compression/CompressionFactory.cpp | 2 +- src/Functions/CMakeLists.txt | 3 + src/Functions/FunctionStartsEndsWith.h | 103 ++++--- src/Functions/IFunctionImpl.h | 2 +- 9 files changed, 408 insertions(+), 46 deletions(-) create mode 100644 dbms/src/Functions/DynamicTarget/CMakeLists.txt create mode 100644 dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h create mode 100644 dbms/src/Functions/DynamicTarget/Selector.h create mode 100644 dbms/src/Functions/DynamicTarget/Target.cpp diff --git a/dbms/src/Functions/DynamicTarget/CMakeLists.txt b/dbms/src/Functions/DynamicTarget/CMakeLists.txt new file mode 100644 index 00000000000..154e47ab5f6 --- /dev/null +++ b/dbms/src/Functions/DynamicTarget/CMakeLists.txt @@ -0,0 +1,11 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) +add_headers_and_sources(clickhouse_functions_dynamic_target .) +add_library(clickhouse_functions_dynamic_target + ${clickhouse_functions_dynamic_target_sources} + ${clickhouse_functions_dynamic_target_headers}) +target_link_libraries(clickhouse_functions_dynamic_target PRIVATE dbms) + +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") + # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. + target_compile_options(clickhouse_functions_dynamic_target PRIVATE "-g0") +endif () diff --git a/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h b/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h new file mode 100644 index 00000000000..123faa859e9 --- /dev/null +++ b/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h @@ -0,0 +1,263 @@ +#pragma once +#include + +namespace DB +{ + +/// Adaptors are implement user interfaces from IFunction.h via developer interfaces from IFunctionImpl.h +/// Typically, you don't need to change this classes. + +class ExecutableFunctionAdaptor final : public IExecutableFunction +{ +public: + explicit ExecutableFunctionAdaptor(ExecutableFunctionImplPtr impl_) : impl(std::move(impl_)) {} + + String getName() const final { return impl->getName(); } + + void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) final; + + void createLowCardinalityResultCache(size_t cache_size) override; + +private: + ExecutableFunctionImplPtr impl; + + /// Cache is created by function createLowCardinalityResultCache() + ExecutableFunctionLowCardinalityResultCachePtr low_cardinality_result_cache; + + bool defaultImplementationForConstantArguments( + Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); + + bool defaultImplementationForNulls( + Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); + + void executeWithoutLowCardinalityColumns( + Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); +}; + +class FunctionBaseAdaptor final : public IFunctionBase +{ +public: + explicit FunctionBaseAdaptor(FunctionBaseImplPtr impl_) : impl(std::move(impl_)) {} + + String getName() const final { return impl->getName(); } + + const DataTypes & getArgumentTypes() const final { return impl->getArgumentTypes(); } + const DataTypePtr & getReturnType() const final { return impl->getReturnType(); } + + ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const final + { + return std::make_shared(impl->prepare(sample_block, arguments, result)); + } + +#if USE_EMBEDDED_COMPILER + + bool isCompilable() const final { return impl->isCompilable(); } + + llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override + { + return impl->compile(builder, std::move(values)); + } + +#endif + + bool isStateful() const final { return impl->isStateful(); } + bool isSuitableForConstantFolding() const final { return impl->isSuitableForConstantFolding(); } + + ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments) const final + { + return impl->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments); + } + + bool isInjective(const Block & sample_block) final { return impl->isInjective(sample_block); } + bool isDeterministic() const final { return impl->isDeterministic(); } + bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); } + bool hasInformationAboutMonotonicity() const final { return impl->hasInformationAboutMonotonicity(); } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const final + { + return impl->getMonotonicityForRange(type, left, right); + } + + const IFunctionBaseImpl * getImpl() const { return impl.get(); } + +private: + FunctionBaseImplPtr impl; +}; + + +class FunctionOverloadResolverAdaptor final : public IFunctionOverloadResolver +{ +public: + explicit FunctionOverloadResolverAdaptor(FunctionOverloadResolverImplPtr impl_) : impl(std::move(impl_)) {} + + String getName() const final { return impl->getName(); } + + bool isDeterministic() const final { return impl->isDeterministic(); } + + bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); } + + bool isStateful() const final { return impl->isStateful(); } + + bool isVariadic() const final { return impl->isVariadic(); } + + size_t getNumberOfArguments() const final { return impl->getNumberOfArguments(); } + + void checkNumberOfArguments(size_t number_of_arguments) const final; + + FunctionBaseImplPtr buildImpl(const ColumnsWithTypeAndName & arguments) const + { + return impl->build(arguments, getReturnType(arguments)); + } + + FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const final + { + return std::make_shared(buildImpl(arguments)); + } + + void getLambdaArgumentTypes(DataTypes & arguments) const final + { + checkNumberOfArguments(arguments.size()); + impl->getLambdaArgumentTypes(arguments); + } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return impl->getArgumentsThatAreAlwaysConstant(); } + + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const final + { + return impl->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); + } + +private: + FunctionOverloadResolverImplPtr impl; + + DataTypePtr getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const; + DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const; +}; + + +/// Following classes are implement IExecutableFunctionImpl, IFunctionBaseImpl and IFunctionOverloadResolverImpl via IFunction. + +class DefaultExecutable final : public IExecutableFunctionImpl +{ +public: + explicit DefaultExecutable(std::shared_ptr function_) : function(std::move(function_)) {} + + String getName() const override { return function->getName(); } + +protected: + void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final + { + return function->executeImpl(block, arguments, result, input_rows_count); + } + void executeDryRun(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final + { + return function->executeImplDryRun(block, arguments, result, input_rows_count); + } + bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); } + bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); } + bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return function->getArgumentsThatAreAlwaysConstant(); } + bool canBeExecutedOnDefaultArguments() const override { return function->canBeExecutedOnDefaultArguments(); } + +private: + std::shared_ptr function; +}; + +class DefaultFunction final : public IFunctionBaseImpl +{ +public: + DefaultFunction(std::shared_ptr function_, DataTypes arguments_, DataTypePtr return_type_) + : function(std::move(function_)), arguments(std::move(arguments_)), return_type(std::move(return_type_)) {} + + String getName() const override { return function->getName(); } + + const DataTypes & getArgumentTypes() const override { return arguments; } + const DataTypePtr & getReturnType() const override { return return_type; } + +#if USE_EMBEDDED_COMPILER + + bool isCompilable() const override { return function->isCompilable(arguments); } + + llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); } + +#endif + + ExecutableFunctionImplPtr prepare(const Block & /*sample_block*/, const ColumnNumbers & /*arguments*/, size_t /*result*/) const override + { + return std::make_unique(function); + } + + bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); } + ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments_) const override + { + return function->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments_); + } + + bool isStateful() const override { return function->isStateful(); } + + bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); } + + bool isDeterministic() const override { return function->isDeterministic(); } + + bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + + bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } + + using Monotonicity = IFunctionBase::Monotonicity; + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return function->getMonotonicityForRange(type, left, right); + } +private: + std::shared_ptr function; + DataTypes arguments; + DataTypePtr return_type; +}; + +class DefaultOverloadResolver : public IFunctionOverloadResolverImpl +{ +public: + explicit DefaultOverloadResolver(std::shared_ptr function_) : function(std::move(function_)) {} + + void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override + { + return function->checkNumberOfArgumentsIfVariadic(number_of_arguments); + } + + bool isDeterministic() const override { return function->isDeterministic(); } + bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } + + String getName() const override { return function->getName(); } + bool isStateful() const override { return function->isStateful(); } + bool isVariadic() const override { return function->isVariadic(); } + size_t getNumberOfArguments() const override { return function->getNumberOfArguments(); } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return function->getArgumentsThatAreAlwaysConstant(); } + ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override + { + return function->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); + } + + DataTypePtr getReturnType(const DataTypes & arguments) const override { return function->getReturnTypeImpl(arguments); } + DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override { return function->getReturnTypeImpl(arguments); } + + bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); } + bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); } + bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); } + + FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + DataTypes data_types(arguments.size()); + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + return std::make_unique(function, data_types, return_type); + } + + void getLambdaArgumentTypes(DataTypes & arguments) const override { function->getLambdaArgumentTypes(arguments); } + +private: + std::shared_ptr function; +}; + + +} diff --git a/dbms/src/Functions/DynamicTarget/Selector.h b/dbms/src/Functions/DynamicTarget/Selector.h new file mode 100644 index 00000000000..257172a7223 --- /dev/null +++ b/dbms/src/Functions/DynamicTarget/Selector.h @@ -0,0 +1,39 @@ +#pragma once + +#include "Target.h" + +namespace DB::DynamicTarget +{ + +class PerformanceStatistic +{}; + +template +class SelectorExecutor +{ +public: + using Executor = std::function; + // Should register all executors before + void registerExecutor(std::optional arch, Executor executor) + { + if (!arch || IsArchSupported(*arch)) { + executors_.emplace_back(std::move(executor)); + } + } + + void execute(Params... params) + { + if (executors_.empty()) { + throw "There are no realizations for this arch Arch"; + } + int impl = 0; + // TODO: choose implementation. + executors_[impl](params...); + } + +private: + std::vector executors_; + PerformanceStatistic statistic_; +}; + +} // namespace DB::DynamicTarget \ No newline at end of file diff --git a/dbms/src/Functions/DynamicTarget/Target.cpp b/dbms/src/Functions/DynamicTarget/Target.cpp new file mode 100644 index 00000000000..e0bdb509164 --- /dev/null +++ b/dbms/src/Functions/DynamicTarget/Target.cpp @@ -0,0 +1,12 @@ +#include "Target.h" + +namespace DB::DynamicTarget +{ + +bool IsArchSupported(TargetArch arch) +{ + // TODO(dakovalkov): use cpuid + return arch != TargetArch::AVX512; +} + +} // namespace DB::DynamicTarget \ No newline at end of file diff --git a/dbms/src/Functions/DynamicTarget/Target.h b/dbms/src/Functions/DynamicTarget/Target.h index 5e3032ded3e..2873871d16c 100644 --- a/dbms/src/Functions/DynamicTarget/Target.h +++ b/dbms/src/Functions/DynamicTarget/Target.h @@ -43,40 +43,45 @@ enum class TargetArch : int { # error "Only CLANG and GCC compilers are supported" #endif -#define DECLARE_DEFAULT_CODE (...) \ +#define DECLARE_DEFAULT_CODE(...) \ namespace TargetSpecific::Default { \ + using namespace DB::DynamicTarget::TargetSpecific::Default; \ __VA_ARGS__ \ } -#define DECLARE_SSE4_SPECIFIC_CODE (...) \ +#define DECLARE_SSE4_SPECIFIC_CODE(...) \ BEGIN_SSE4_SPECIFIC_CODE \ namespace TargetSpecific::SSE4 { \ + using namespace DB::DynamicTarget::TargetSpecific::SSE4; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX_SPECIFIC_CODE (...) \ +#define DECLARE_AVX_SPECIFIC_CODE(...) \ BEGIN_AVX_SPECIFIC_CODE \ namespace TargetSpecific::AVX { \ + using namespace DB::DynamicTarget::TargetSpecific::AVX; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX2_SPECIFIC_CODE (...) \ +#define DECLARE_AVX2_SPECIFIC_CODE(...) \ BEGIN_AVX2_SPECIFIC_CODE \ namespace TargetSpecific::AVX2 { \ + using namespace DB::DynamicTarget::TargetSpecific::AVX2; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512_SPECIFIC_CODE (...) \ +#define DECLARE_AVX512_SPECIFIC_CODE(...) \ BEGIN_AVX512_SPECIFIC_CODE \ namespace TargetSpecific::AVX512 { \ + using namespace DB::DynamicTarget::TargetSpecific::AVX512; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE -#define DYNAMIC_CODE (...) \ +#define DECLARE_MULTITARGET_CODE(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ @@ -103,4 +108,6 @@ DECLARE_AVX512_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512; ) // DECLARE_AVX512_SPECIFIC_CODE +bool IsArchSupported(TargetArch arch); + } // namespace DB::DynamicTarget \ No newline at end of file diff --git a/src/Compression/CompressionFactory.cpp b/src/Compression/CompressionFactory.cpp index 5d5c5c14de6..2598fc07b08 100644 --- a/src/Compression/CompressionFactory.cpp +++ b/src/Compression/CompressionFactory.cpp @@ -120,7 +120,7 @@ void CompressionCodecFactory::registerCompressionCodecWithType( if (byte_code) if (!family_code_with_codec.emplace(*byte_code, creator).second) - throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR); + throw Exception("CompressionCodecFactory: the codec family code '" + std::to_string(*byte_code) + "' is not unique", ErrorCodes::LOGICAL_ERROR); } void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional byte_code, Creator creator) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 069a63aa9e1..e9a33283d5b 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -91,3 +91,6 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url) add_subdirectory(array) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array) + +add_subdirectory(DynamicTarget) +target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_dynamic_target) \ No newline at end of file diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 4f56a827f4c..0e63b616558 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -6,6 +6,8 @@ #include #include +#include +#include namespace DB { @@ -27,43 +29,14 @@ struct NameEndsWith static constexpr auto name = "endsWith"; }; +using DynamicTarget::TargetArch; + +DECLARE_MULTITARGET_CODE( + template -class FunctionStartsEndsWith : public IFunction -{ +class FunctionStartsEndsWithImpl { public: - static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) - { - return std::make_shared(); - } - - String getName() const override - { - return name; - } - - size_t getNumberOfArguments() const override - { - return 2; - } - - bool useDefaultImplementationForConstants() const override - { - return true; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isStringOrFixedString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!isStringOrFixedString(arguments[1])) - throw Exception("Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + static void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { const IColumn * haystack_column = block.getByPosition(arguments[0]).column.get(); const IColumn * needle_column = block.getByPosition(arguments[1]).column.get(); @@ -82,14 +55,14 @@ public: else if (const ColumnConst * haystack_const_fixed = checkAndGetColumnConst(haystack_column)) dispatch>(ConstSource(*haystack_const_fixed), needle_column, vec_res); else - throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal combination of columns as arguments of function " "getName()", ErrorCodes::ILLEGAL_COLUMN); block.getByPosition(result).column = std::move(col_res); } private: template - void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) const + static void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) { if (const ColumnString * needle = checkAndGetColumn(needle_column)) execute(haystack_source, StringSource(*needle), res_data); @@ -100,7 +73,7 @@ private: else if (const ColumnConst * needle_const_fixed = checkAndGetColumnConst(needle_column)) execute>(haystack_source, ConstSource(*needle_const_fixed), res_data); else - throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal combination of columns as arguments of function " "getName()", ErrorCodes::ILLEGAL_COLUMN); } template @@ -136,4 +109,58 @@ private: } }; +) // DECLARE_MULTITARGET_CODE + +template +class FunctionStartsEndsWith : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + FunctionStartsEndsWith() { + executor_.registerExecutor(std::nullopt, TargetSpecific::Default::FunctionStartsEndsWithImpl::executeImpl); + executor_.registerExecutor(TargetArch::SSE4, TargetSpecific::SSE4::FunctionStartsEndsWithImpl::executeImpl); + executor_.registerExecutor(TargetArch::AVX, TargetSpecific::AVX::FunctionStartsEndsWithImpl::executeImpl); + executor_.registerExecutor(TargetArch::AVX2, TargetSpecific::AVX2::FunctionStartsEndsWithImpl::executeImpl); + executor_.registerExecutor(TargetArch::AVX512, TargetSpecific::AVX512::FunctionStartsEndsWithImpl::executeImpl); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isStringOrFixedString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isStringOrFixedString(arguments[1])) + throw Exception("Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + executor_.execute(block, arguments, result, input_rows_count); + } +private: + DynamicTarget::SelectorExecutor executor_; +}; + } diff --git a/src/Functions/IFunctionImpl.h b/src/Functions/IFunctionImpl.h index 4da3e9ca056..27e7aec6bd4 100644 --- a/src/Functions/IFunctionImpl.h +++ b/src/Functions/IFunctionImpl.h @@ -194,7 +194,7 @@ using FunctionOverloadResolverImplPtr = std::unique_ptr +class IFunction { public: virtual ~IFunction() = default; From 43657809d8f36c0e0595e0928d76b942381bad95 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sun, 5 Apr 2020 14:01:33 +0200 Subject: [PATCH 0210/2229] awful adaptor --- dbms/src/Functions/DynamicTarget/Selector.h | 324 ++++++++++++++++++-- dbms/src/Functions/DynamicTarget/Target.cpp | 2 +- dbms/src/Functions/DynamicTarget/Target.h | 6 +- src/Functions/CMakeLists.txt | 2 +- src/Functions/FunctionStartsEndsWith.h | 98 +++--- 5 files changed, 358 insertions(+), 74 deletions(-) diff --git a/dbms/src/Functions/DynamicTarget/Selector.h b/dbms/src/Functions/DynamicTarget/Selector.h index 257172a7223..70c14dd2d11 100644 --- a/dbms/src/Functions/DynamicTarget/Selector.h +++ b/dbms/src/Functions/DynamicTarget/Selector.h @@ -2,38 +2,324 @@ #include "Target.h" +#include + +#include + namespace DB::DynamicTarget { -class PerformanceStatistic -{}; +// TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h -template -class SelectorExecutor +/** When decompressing uniform sequence of blocks (for example, blocks from one file), + * you can pass single PerformanceStatistics object to subsequent invocations of 'decompress' method. + * It will accumulate statistics and use it as a feedback to choose best specialization of algorithm at runtime. + * One PerformanceStatistics object cannot be used concurrently from different threads. + */ +struct PerformanceStatistics { -public: - using Executor = std::function; - // Should register all executors before - void registerExecutor(std::optional arch, Executor executor) + struct Element { - if (!arch || IsArchSupported(*arch)) { - executors_.emplace_back(std::move(executor)); + double count = 0; + double sum = 0; + + double adjustedCount() const + { + return count - NUM_INVOCATIONS_TO_THROW_OFF; } + + double mean() const + { + return sum / adjustedCount(); + } + + /// For better convergence, we don't use proper estimate of stddev. + /// We want to eventually separate between two algorithms even in case + /// when there is no statistical significant difference between them. + double sigma() const + { + return mean() / sqrt(adjustedCount()); + } + + void update(double seconds, double bytes) + { + ++count; + + if (count > NUM_INVOCATIONS_TO_THROW_OFF) + sum += seconds / bytes; + } + + double sample(pcg64 & stat_rng) const + { + /// If there is a variant with not enough statistics, always choose it. + /// And in that case prefer variant with less number of invocations. + + if (adjustedCount() < 2) + return adjustedCount() - 1; + else + return std::normal_distribution<>(mean(), sigma())(stat_rng); + } + }; + + /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. + static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2; + + /// How to select method to run. + /// -1 - automatically, based on statistics (default); + /// -2 - choose methods in round robin fashion (for performance testing). + /// >= 0 - always choose specified method (for performance testing); + ssize_t choose_method = -1; + + std::vector data; + + /// It's Ok that generator is not seeded. + pcg64 rng; + + /// To select from different algorithms we use a kind of "bandits" algorithm. + /// Sample random values from estimated normal distributions and choose the minimal. + size_t select() + { + if (choose_method < 0) + { + std::vector samples(data.size()); + for (size_t i = 0; i < data.size(); ++i) + samples[i] = choose_method == -1 + ? data[i].sample(rng) + : data[i].adjustedCount(); + + return std::min_element(samples.begin(), samples.end()) - samples.begin(); + } + else + return choose_method; } - void execute(Params... params) + size_t size() { + return data.size(); + } + + void emplace_back() { + data.emplace_back(); + } + + PerformanceStatistics() {} + PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} +}; + +// template +// class PerformanceExecutor +// { +// public: +// using Executor = std::function; +// // Should register all executors before execute +// void registerExecutor(Executor executor) +// { +// executors.emplace_back(std::move(executor)); +// } + +// // The performance of the execution is time / weight. +// // Weight is usualy the +// void execute(int weight, Params... params) +// { +// if (executors_.empty()) { +// throw "There are no realizations for current Arch"; +// } +// int impl = 0; +// // TODO: choose implementation. +// executors_[impl](params...); +// } + +// private: +// std::vector executors; +// PerformanceStatistics statistics; +// }; + +class FunctionDynamicAdaptor : public IFunction +{ +public: + template + FunctionDynamicAdaptor(const Context & context_) : context(context_) { - if (executors_.empty()) { - throw "There are no realizations for this arch Arch"; + registerImplementation(); + } + + virtual String getName() const override { + return impls.front()->getName(); + } + + virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + int id = statistics.select(); + // TODO(dakovalkov): measure time and change statistics. + impls[id]->executeImpl(block, arguments, result, input_rows_count); + } + virtual void executeImplDryRun(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + impls.front()->executeImplDryRun(block, arguments, result, input_rows_count); + } + + virtual bool useDefaultImplementationForNulls() const override + { + return impls.front()->useDefaultImplementationForNulls(); + } + + virtual bool useDefaultImplementationForConstants() const override + { + return impls.front()->useDefaultImplementationForConstants(); + } + + virtual bool useDefaultImplementationForLowCardinalityColumns() const override + { + return impls.front()->useDefaultImplementationForLowCardinalityColumns(); + } + + virtual bool canBeExecutedOnLowCardinalityDictionary() const override + { + return impls.front()->canBeExecutedOnLowCardinalityDictionary(); + } + + virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const override + { + return impls.front()->getArgumentsThatAreAlwaysConstant(); + } + + virtual bool canBeExecutedOnDefaultArguments() const override + { + return impls.front()->canBeExecutedOnDefaultArguments(); + } + +#if USE_EMBEDDED_COMPILER + + virtual bool isCompilable() const override + { + return impls.front()->isCompilable(); + } + + virtual llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override + { + return impls.front()->compile(builder, std::move(values)); + } + +#endif + + /// Properties from IFunctionBase (see IFunction.h) + virtual bool isSuitableForConstantFolding() const override + { + return impls.front()->isSuitableForConstantFolding(); + } + virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments) const override + { + return impls.front()->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments); + } + virtual bool isInjective(const Block & sample_block) override + { + return impls.front()->isInjective(sample_block); + } + virtual bool isDeterministic() const override + { + return impls.front()->isDeterministic(); + } + virtual bool isDeterministicInScopeOfQuery() const override + { + return impls.front()->isDeterministicInScopeOfQuery(); + } + virtual bool isStateful() const override + { + return impls.front()->isStateful(); + } + virtual bool hasInformationAboutMonotonicity() const override + { + return impls.front()->hasInformationAboutMonotonicity(); + } + + using Monotonicity = IFunctionBase::Monotonicity; + virtual Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return impls.front()->getMonotonicityForRange(type, left, right); + } + + virtual size_t getNumberOfArguments() const override { + return impls.front()->getNumberOfArguments(); + } + + virtual DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + return impls.front()->getReturnTypeImpl(arguments); + } + + virtual DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + return impls.front()->getReturnTypeImpl(arguments); + } + + virtual bool isVariadic() const override + { + return impls.front()->isVariadic(); + } + + virtual void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override + { + impls.front()->checkNumberOfArgumentsIfVariadic(number_of_arguments); + } + + virtual void getLambdaArgumentTypes(DataTypes & arguments) const override + { + impls.front()->getLambdaArgumentTypes(arguments); + } + + virtual ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override + { + return impls.front()->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); + } + +protected: + +#if USE_EMBEDDED_COMPILER + + virtual bool isCompilableImpl(const DataTypes & /* types */) const override + { + return false; + // return impls.front()->isCompilableImpl(types); + } + + virtual llvm::Value * compileImpl(llvm::IRBuilderBase & /* builder */, const DataTypes & /* types */, ValuePlaceholders /* ph */) const override + { + throw "safasf Error"; + // return impls.front()->compileImpl(builder, types, ph); + } + +#endif + /* + * Register implementation of the function. + */ + template + void registerImplementation(TargetArch arch = TargetArch::Default) { + if (arch == TargetArch::Default || IsArchSupported(arch)) { + impls.emplace_back(Function::create(context)); + statistics.emplace_back(); } - int impl = 0; - // TODO: choose implementation. - executors_[impl](params...); } private: - std::vector executors_; - PerformanceStatistic statistic_; + const Context & context; + std::vector impls; + PerformanceStatistics statistics; }; -} // namespace DB::DynamicTarget \ No newline at end of file +#define DECLARE_STANDART_TARGET_ADAPTOR(Function) \ +class Function : public FunctionDynamicAdaptor \ +{ \ +public: \ + Function(const Context & context) : FunctionDynamicAdaptor(context) \ + { \ + registerImplementation(TargetArch::SSE4); \ + registerImplementation(TargetArch::AVX); \ + registerImplementation(TargetArch::AVX2); \ + registerImplementation(TargetArch::AVX512); \ + } \ + static FunctionPtr create(const Context & context) \ + { \ + return std::make_shared(context); \ + } \ +} + +} // namespace DB::DynamicTarget diff --git a/dbms/src/Functions/DynamicTarget/Target.cpp b/dbms/src/Functions/DynamicTarget/Target.cpp index e0bdb509164..54c41a1786c 100644 --- a/dbms/src/Functions/DynamicTarget/Target.cpp +++ b/dbms/src/Functions/DynamicTarget/Target.cpp @@ -9,4 +9,4 @@ bool IsArchSupported(TargetArch arch) return arch != TargetArch::AVX512; } -} // namespace DB::DynamicTarget \ No newline at end of file +} // namespace DB::DynamicTarget diff --git a/dbms/src/Functions/DynamicTarget/Target.h b/dbms/src/Functions/DynamicTarget/Target.h index 2873871d16c..e1772a11857 100644 --- a/dbms/src/Functions/DynamicTarget/Target.h +++ b/dbms/src/Functions/DynamicTarget/Target.h @@ -4,7 +4,7 @@ namespace DB::DynamicTarget { enum class TargetArch : int { - Scalar, + Default, // Without any additional compiler options. SSE4, AVX, AVX2, @@ -89,7 +89,7 @@ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( - constexpr auto BuildArch = TargetArch::Scalar; + constexpr auto BuildArch = TargetArch::Default; ) // DECLARE_DEFAULT_CODE DECLARE_SSE4_SPECIFIC_CODE( @@ -110,4 +110,4 @@ DECLARE_AVX512_SPECIFIC_CODE( bool IsArchSupported(TargetArch arch); -} // namespace DB::DynamicTarget \ No newline at end of file +} // namespace DB::DynamicTarget diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index e9a33283d5b..85b1b717d47 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -93,4 +93,4 @@ add_subdirectory(array) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array) add_subdirectory(DynamicTarget) -target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_dynamic_target) \ No newline at end of file +target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_dynamic_target) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 0e63b616558..497abc92508 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -29,14 +29,45 @@ struct NameEndsWith static constexpr auto name = "endsWith"; }; -using DynamicTarget::TargetArch; - DECLARE_MULTITARGET_CODE( template -class FunctionStartsEndsWithImpl { +class FunctionStartsEndsWith : public IFunction +{ public: - static void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) + static constexpr auto name = Name::name; + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 2; + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isStringOrFixedString(arguments[0])) + throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!isStringOrFixedString(arguments[1])) + throw Exception("Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { const IColumn * haystack_column = block.getByPosition(arguments[0]).column.get(); const IColumn * needle_column = block.getByPosition(arguments[1]).column.get(); @@ -55,14 +86,14 @@ public: else if (const ColumnConst * haystack_const_fixed = checkAndGetColumnConst(haystack_column)) dispatch>(ConstSource(*haystack_const_fixed), needle_column, vec_res); else - throw Exception("Illegal combination of columns as arguments of function " "getName()", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); block.getByPosition(result).column = std::move(col_res); } private: template - static void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) + void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray & res_data) const { if (const ColumnString * needle = checkAndGetColumn(needle_column)) execute(haystack_source, StringSource(*needle), res_data); @@ -73,7 +104,7 @@ private: else if (const ColumnConst * needle_const_fixed = checkAndGetColumnConst(needle_column)) execute>(haystack_source, ConstSource(*needle_const_fixed), res_data); else - throw Exception("Illegal combination of columns as arguments of function " "getName()", ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } template @@ -112,55 +143,22 @@ private: ) // DECLARE_MULTITARGET_CODE template -class FunctionStartsEndsWith : public IFunction +class FunctionStartsEndsWith : public DynamicTarget::FunctionDynamicAdaptor { public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) + FunctionStartsEndsWith(const Context & context_) + : FunctionDynamicAdaptor>(context_) { - return std::make_shared(); + registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512); } - - FunctionStartsEndsWith() { - executor_.registerExecutor(std::nullopt, TargetSpecific::Default::FunctionStartsEndsWithImpl::executeImpl); - executor_.registerExecutor(TargetArch::SSE4, TargetSpecific::SSE4::FunctionStartsEndsWithImpl::executeImpl); - executor_.registerExecutor(TargetArch::AVX, TargetSpecific::AVX::FunctionStartsEndsWithImpl::executeImpl); - executor_.registerExecutor(TargetArch::AVX2, TargetSpecific::AVX2::FunctionStartsEndsWithImpl::executeImpl); - executor_.registerExecutor(TargetArch::AVX512, TargetSpecific::AVX512::FunctionStartsEndsWithImpl::executeImpl); - } - - String getName() const override + static FunctionPtr create(const Context & context) { - return name; + return std::make_shared>(context); \ } - - size_t getNumberOfArguments() const override - { - return 2; - } - - bool useDefaultImplementationForConstants() const override - { - return true; - } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isStringOrFixedString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!isStringOrFixedString(arguments[1])) - throw Exception("Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(); - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - executor_.execute(block, arguments, result, input_rows_count); - } -private: - DynamicTarget::SelectorExecutor executor_; }; -} +} \ No newline at end of file From e0a497d575c8c776510cce47e066156e848dac80 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sun, 5 Apr 2020 15:14:59 +0200 Subject: [PATCH 0211/2229] Better interface --- dbms/src/Functions/DynamicTarget/Selector.h | 162 ++------------------ src/Functions/FunctionStartsEndsWith.h | 19 ++- 2 files changed, 27 insertions(+), 154 deletions(-) diff --git a/dbms/src/Functions/DynamicTarget/Selector.h b/dbms/src/Functions/DynamicTarget/Selector.h index 70c14dd2d11..a59022a6c28 100644 --- a/dbms/src/Functions/DynamicTarget/Selector.h +++ b/dbms/src/Functions/DynamicTarget/Selector.h @@ -133,161 +133,30 @@ struct PerformanceStatistics // PerformanceStatistics statistics; // }; -class FunctionDynamicAdaptor : public IFunction +template +class FunctionDynamicAdaptor : public DefaultFunction { public: - template - FunctionDynamicAdaptor(const Context & context_) : context(context_) + template + FunctionDynamicAdaptor(const Context & context_, Params ...params) + : DefaultFunction(params...) + , context(context_) { - registerImplementation(); - } - - virtual String getName() const override { - return impls.front()->getName(); + statistics.emplace_back(); } virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { int id = statistics.select(); // TODO(dakovalkov): measure time and change statistics. - impls[id]->executeImpl(block, arguments, result, input_rows_count); - } - virtual void executeImplDryRun(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - impls.front()->executeImplDryRun(block, arguments, result, input_rows_count); - } - - virtual bool useDefaultImplementationForNulls() const override - { - return impls.front()->useDefaultImplementationForNulls(); - } - - virtual bool useDefaultImplementationForConstants() const override - { - return impls.front()->useDefaultImplementationForConstants(); - } - - virtual bool useDefaultImplementationForLowCardinalityColumns() const override - { - return impls.front()->useDefaultImplementationForLowCardinalityColumns(); - } - - virtual bool canBeExecutedOnLowCardinalityDictionary() const override - { - return impls.front()->canBeExecutedOnLowCardinalityDictionary(); - } - - virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - return impls.front()->getArgumentsThatAreAlwaysConstant(); - } - - virtual bool canBeExecutedOnDefaultArguments() const override - { - return impls.front()->canBeExecutedOnDefaultArguments(); - } - -#if USE_EMBEDDED_COMPILER - - virtual bool isCompilable() const override - { - return impls.front()->isCompilable(); - } - - virtual llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override - { - return impls.front()->compile(builder, std::move(values)); - } - -#endif - - /// Properties from IFunctionBase (see IFunction.h) - virtual bool isSuitableForConstantFolding() const override - { - return impls.front()->isSuitableForConstantFolding(); - } - virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments) const override - { - return impls.front()->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments); - } - virtual bool isInjective(const Block & sample_block) override - { - return impls.front()->isInjective(sample_block); - } - virtual bool isDeterministic() const override - { - return impls.front()->isDeterministic(); - } - virtual bool isDeterministicInScopeOfQuery() const override - { - return impls.front()->isDeterministicInScopeOfQuery(); - } - virtual bool isStateful() const override - { - return impls.front()->isStateful(); - } - virtual bool hasInformationAboutMonotonicity() const override - { - return impls.front()->hasInformationAboutMonotonicity(); - } - - using Monotonicity = IFunctionBase::Monotonicity; - virtual Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return impls.front()->getMonotonicityForRange(type, left, right); - } - - virtual size_t getNumberOfArguments() const override { - return impls.front()->getNumberOfArguments(); - } - - virtual DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - return impls.front()->getReturnTypeImpl(arguments); - } - - virtual DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - return impls.front()->getReturnTypeImpl(arguments); - } - - virtual bool isVariadic() const override - { - return impls.front()->isVariadic(); - } - - virtual void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override - { - impls.front()->checkNumberOfArgumentsIfVariadic(number_of_arguments); - } - - virtual void getLambdaArgumentTypes(DataTypes & arguments) const override - { - impls.front()->getLambdaArgumentTypes(arguments); - } - - virtual ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override - { - return impls.front()->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); + if (id == 0) { + DefaultFunction::executeImpl(block, arguments, result, input_rows_count); + } else { + impls[id - 1]->executeImpl(block, arguments, result, input_rows_count); + } } protected: - -#if USE_EMBEDDED_COMPILER - - virtual bool isCompilableImpl(const DataTypes & /* types */) const override - { - return false; - // return impls.front()->isCompilableImpl(types); - } - - virtual llvm::Value * compileImpl(llvm::IRBuilderBase & /* builder */, const DataTypes & /* types */, ValuePlaceholders /* ph */) const override - { - throw "safasf Error"; - // return impls.front()->compileImpl(builder, types, ph); - } - -#endif /* * Register implementation of the function. */ @@ -301,15 +170,16 @@ protected: private: const Context & context; - std::vector impls; + std::vector impls; // Alternative implementations. PerformanceStatistics statistics; }; +// TODO(dakovalkov): May be it's better to delete this macros and write every function explicitly for better readability. #define DECLARE_STANDART_TARGET_ADAPTOR(Function) \ -class Function : public FunctionDynamicAdaptor \ +class Function : public FunctionDynamicAdaptor \ { \ public: \ - Function(const Context & context) : FunctionDynamicAdaptor(context) \ + Function(const Context & context) : FunctionDynamicAdaptor(context) \ { \ registerImplementation(TargetArch::SSE4); \ registerImplementation(TargetArch::AVX); \ diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 497abc92508..e883dc3267a 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -143,22 +143,25 @@ private: ) // DECLARE_MULTITARGET_CODE template -class FunctionStartsEndsWith : public DynamicTarget::FunctionDynamicAdaptor +class FunctionStartsEndsWith + : public DynamicTarget::FunctionDynamicAdaptor> { public: - static constexpr auto name = Name::name; FunctionStartsEndsWith(const Context & context_) - : FunctionDynamicAdaptor>(context_) + : DynamicTarget::FunctionDynamicAdaptor>(context_) { - registerImplementation>(TargetArch::SSE4); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation>(DynamicTarget::TargetArch::SSE4); + registerImplementation>(DynamicTarget::TargetArch::AVX); + registerImplementation>(DynamicTarget::TargetArch::AVX2); + registerImplementation>(DynamicTarget::TargetArch::AVX512); } static FunctionPtr create(const Context & context) { - return std::make_shared>(context); \ + return std::make_shared>(context); } }; +// template +// using FunctionStartsEndsWith = TargetSpecific::Default::FunctionStartsEndsWith; + } \ No newline at end of file From 09bb9041ec09a4ac3d4ebd12820cbe06f9b3d64e Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sun, 5 Apr 2020 21:39:12 +0200 Subject: [PATCH 0212/2229] Add descriptions, move to Functions/, rename, measure time, and so on... --- .../Functions/DynamicTarget/CMakeLists.txt | 11 - .../DynamicTarget/DynamicFunctionAdaptors.h | 263 ------------------ .../Selector.h => PerformanceAdaptors.h} | 133 +++++---- .../Target.cpp => TargetSpecific.cpp} | 6 +- .../Target.h => TargetSpecific.h} | 77 ++++- src/Functions/CMakeLists.txt | 3 - src/Functions/FunctionStartsEndsWith.h | 25 +- 7 files changed, 157 insertions(+), 361 deletions(-) delete mode 100644 dbms/src/Functions/DynamicTarget/CMakeLists.txt delete mode 100644 dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h rename dbms/src/Functions/{DynamicTarget/Selector.h => PerformanceAdaptors.h} (56%) rename dbms/src/Functions/{DynamicTarget/Target.cpp => TargetSpecific.cpp} (59%) rename dbms/src/Functions/{DynamicTarget/Target.h => TargetSpecific.h} (56%) diff --git a/dbms/src/Functions/DynamicTarget/CMakeLists.txt b/dbms/src/Functions/DynamicTarget/CMakeLists.txt deleted file mode 100644 index 154e47ab5f6..00000000000 --- a/dbms/src/Functions/DynamicTarget/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) -add_headers_and_sources(clickhouse_functions_dynamic_target .) -add_library(clickhouse_functions_dynamic_target - ${clickhouse_functions_dynamic_target_sources} - ${clickhouse_functions_dynamic_target_headers}) -target_link_libraries(clickhouse_functions_dynamic_target PRIVATE dbms) - -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") - # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. - target_compile_options(clickhouse_functions_dynamic_target PRIVATE "-g0") -endif () diff --git a/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h b/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h deleted file mode 100644 index 123faa859e9..00000000000 --- a/dbms/src/Functions/DynamicTarget/DynamicFunctionAdaptors.h +++ /dev/null @@ -1,263 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -/// Adaptors are implement user interfaces from IFunction.h via developer interfaces from IFunctionImpl.h -/// Typically, you don't need to change this classes. - -class ExecutableFunctionAdaptor final : public IExecutableFunction -{ -public: - explicit ExecutableFunctionAdaptor(ExecutableFunctionImplPtr impl_) : impl(std::move(impl_)) {} - - String getName() const final { return impl->getName(); } - - void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) final; - - void createLowCardinalityResultCache(size_t cache_size) override; - -private: - ExecutableFunctionImplPtr impl; - - /// Cache is created by function createLowCardinalityResultCache() - ExecutableFunctionLowCardinalityResultCachePtr low_cardinality_result_cache; - - bool defaultImplementationForConstantArguments( - Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); - - bool defaultImplementationForNulls( - Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); - - void executeWithoutLowCardinalityColumns( - Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run); -}; - -class FunctionBaseAdaptor final : public IFunctionBase -{ -public: - explicit FunctionBaseAdaptor(FunctionBaseImplPtr impl_) : impl(std::move(impl_)) {} - - String getName() const final { return impl->getName(); } - - const DataTypes & getArgumentTypes() const final { return impl->getArgumentTypes(); } - const DataTypePtr & getReturnType() const final { return impl->getReturnType(); } - - ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const final - { - return std::make_shared(impl->prepare(sample_block, arguments, result)); - } - -#if USE_EMBEDDED_COMPILER - - bool isCompilable() const final { return impl->isCompilable(); } - - llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override - { - return impl->compile(builder, std::move(values)); - } - -#endif - - bool isStateful() const final { return impl->isStateful(); } - bool isSuitableForConstantFolding() const final { return impl->isSuitableForConstantFolding(); } - - ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments) const final - { - return impl->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments); - } - - bool isInjective(const Block & sample_block) final { return impl->isInjective(sample_block); } - bool isDeterministic() const final { return impl->isDeterministic(); } - bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); } - bool hasInformationAboutMonotonicity() const final { return impl->hasInformationAboutMonotonicity(); } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const final - { - return impl->getMonotonicityForRange(type, left, right); - } - - const IFunctionBaseImpl * getImpl() const { return impl.get(); } - -private: - FunctionBaseImplPtr impl; -}; - - -class FunctionOverloadResolverAdaptor final : public IFunctionOverloadResolver -{ -public: - explicit FunctionOverloadResolverAdaptor(FunctionOverloadResolverImplPtr impl_) : impl(std::move(impl_)) {} - - String getName() const final { return impl->getName(); } - - bool isDeterministic() const final { return impl->isDeterministic(); } - - bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); } - - bool isStateful() const final { return impl->isStateful(); } - - bool isVariadic() const final { return impl->isVariadic(); } - - size_t getNumberOfArguments() const final { return impl->getNumberOfArguments(); } - - void checkNumberOfArguments(size_t number_of_arguments) const final; - - FunctionBaseImplPtr buildImpl(const ColumnsWithTypeAndName & arguments) const - { - return impl->build(arguments, getReturnType(arguments)); - } - - FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const final - { - return std::make_shared(buildImpl(arguments)); - } - - void getLambdaArgumentTypes(DataTypes & arguments) const final - { - checkNumberOfArguments(arguments.size()); - impl->getLambdaArgumentTypes(arguments); - } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return impl->getArgumentsThatAreAlwaysConstant(); } - - ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const final - { - return impl->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); - } - -private: - FunctionOverloadResolverImplPtr impl; - - DataTypePtr getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const; - DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const; -}; - - -/// Following classes are implement IExecutableFunctionImpl, IFunctionBaseImpl and IFunctionOverloadResolverImpl via IFunction. - -class DefaultExecutable final : public IExecutableFunctionImpl -{ -public: - explicit DefaultExecutable(std::shared_ptr function_) : function(std::move(function_)) {} - - String getName() const override { return function->getName(); } - -protected: - void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final - { - return function->executeImpl(block, arguments, result, input_rows_count); - } - void executeDryRun(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) final - { - return function->executeImplDryRun(block, arguments, result, input_rows_count); - } - bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); } - bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); } - bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return function->getArgumentsThatAreAlwaysConstant(); } - bool canBeExecutedOnDefaultArguments() const override { return function->canBeExecutedOnDefaultArguments(); } - -private: - std::shared_ptr function; -}; - -class DefaultFunction final : public IFunctionBaseImpl -{ -public: - DefaultFunction(std::shared_ptr function_, DataTypes arguments_, DataTypePtr return_type_) - : function(std::move(function_)), arguments(std::move(arguments_)), return_type(std::move(return_type_)) {} - - String getName() const override { return function->getName(); } - - const DataTypes & getArgumentTypes() const override { return arguments; } - const DataTypePtr & getReturnType() const override { return return_type; } - -#if USE_EMBEDDED_COMPILER - - bool isCompilable() const override { return function->isCompilable(arguments); } - - llvm::Value * compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const override { return function->compile(builder, arguments, std::move(values)); } - -#endif - - ExecutableFunctionImplPtr prepare(const Block & /*sample_block*/, const ColumnNumbers & /*arguments*/, size_t /*result*/) const override - { - return std::make_unique(function); - } - - bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); } - ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments_) const override - { - return function->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments_); - } - - bool isStateful() const override { return function->isStateful(); } - - bool isInjective(const Block & sample_block) override { return function->isInjective(sample_block); } - - bool isDeterministic() const override { return function->isDeterministic(); } - - bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } - - bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } - - using Monotonicity = IFunctionBase::Monotonicity; - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return function->getMonotonicityForRange(type, left, right); - } -private: - std::shared_ptr function; - DataTypes arguments; - DataTypePtr return_type; -}; - -class DefaultOverloadResolver : public IFunctionOverloadResolverImpl -{ -public: - explicit DefaultOverloadResolver(std::shared_ptr function_) : function(std::move(function_)) {} - - void checkNumberOfArgumentsIfVariadic(size_t number_of_arguments) const override - { - return function->checkNumberOfArgumentsIfVariadic(number_of_arguments); - } - - bool isDeterministic() const override { return function->isDeterministic(); } - bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); } - - String getName() const override { return function->getName(); } - bool isStateful() const override { return function->isStateful(); } - bool isVariadic() const override { return function->isVariadic(); } - size_t getNumberOfArguments() const override { return function->getNumberOfArguments(); } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return function->getArgumentsThatAreAlwaysConstant(); } - ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override - { - return function->getArgumentsThatDontImplyNullableReturnType(number_of_arguments); - } - - DataTypePtr getReturnType(const DataTypes & arguments) const override { return function->getReturnTypeImpl(arguments); } - DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments) const override { return function->getReturnTypeImpl(arguments); } - - bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); } - bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); } - bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); } - - FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override - { - DataTypes data_types(arguments.size()); - for (size_t i = 0; i < arguments.size(); ++i) - data_types[i] = arguments[i].type; - return std::make_unique(function, data_types, return_type); - } - - void getLambdaArgumentTypes(DataTypes & arguments) const override { function->getLambdaArgumentTypes(arguments); } - -private: - std::shared_ptr function; -}; - - -} diff --git a/dbms/src/Functions/DynamicTarget/Selector.h b/dbms/src/Functions/PerformanceAdaptors.h similarity index 56% rename from dbms/src/Functions/DynamicTarget/Selector.h rename to dbms/src/Functions/PerformanceAdaptors.h index a59022a6c28..492a4791170 100644 --- a/dbms/src/Functions/DynamicTarget/Selector.h +++ b/dbms/src/Functions/PerformanceAdaptors.h @@ -1,21 +1,20 @@ #pragma once -#include "Target.h" - +#include #include +#include + #include -namespace DB::DynamicTarget +/// This file contains Adaptors which help to combine several implementations of the function. +/// Adaptors check that implementation can be executed on the current platform and choose +/// that one which works faster according to previous runs. + +namespace DB { -// TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h - -/** When decompressing uniform sequence of blocks (for example, blocks from one file), - * you can pass single PerformanceStatistics object to subsequent invocations of 'decompress' method. - * It will accumulate statistics and use it as a feedback to choose best specialization of algorithm at runtime. - * One PerformanceStatistics object cannot be used concurrently from different threads. - */ +// TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h with little changes. struct PerformanceStatistics { struct Element @@ -105,71 +104,97 @@ struct PerformanceStatistics PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} }; -// template -// class PerformanceExecutor -// { -// public: -// using Executor = std::function; -// // Should register all executors before execute -// void registerExecutor(Executor executor) -// { -// executors.emplace_back(std::move(executor)); -// } - -// // The performance of the execution is time / weight. -// // Weight is usualy the -// void execute(int weight, Params... params) -// { -// if (executors_.empty()) { -// throw "There are no realizations for current Arch"; -// } -// int impl = 0; -// // TODO: choose implementation. -// executors_[impl](params...); -// } - -// private: -// std::vector executors; -// PerformanceStatistics statistics; -// }; - +/// Combine several IExecutableFunctionImpl into one. +/// All the implementations should be equivalent. +/// Implementation to execute will be selected based on performance on previous runs. +/// DefaultFunction should be executable on every supported platform, while alternative implementations +/// could use extended set of instructions (AVX, NEON, etc). +/// It's convenient to inherit your func from this and register all alternative implementations in the constructor. template -class FunctionDynamicAdaptor : public DefaultFunction +class ExecutableFunctionPerformanceAdaptor : public DefaultFunction { public: template - FunctionDynamicAdaptor(const Context & context_, Params ...params) - : DefaultFunction(params...) - , context(context_) + ExecutableFunctionPerformanceAdaptor(Params ...params) : DefaultFunction(params...) + { + statistics.emplace_back(); + } + + virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + auto id = statistics.select(); + Stopwatch watch; + if (id == 0) { + DefaultFunction::execute(block, arguments, result, input_rows_count); + } else { + impls[id - 1]->execute(block, arguments, result, input_rows_count); + } + watch.stop(); + // TODO(dakovalkov): Calculate something more informative. + size_t rows_summary = 0; + for (auto i : arguments) { + rows_summary += block.getByPosition(i).column->size(); + } + if (rows_summary >= 1000) { + statistics.data[id].update(watch.elapsedSeconds(), rows_summary); + } + } + + // Register alternative implementation. + template + void registerImplementation(TargetArch arch, Params... params) { + if (arch == TargetArch::Default || IsArchSupported(arch)) { + impls.emplace_back(std::make_shared(params...)); + statistics.emplace_back(); + } + } + +private: + std::vector impls; // Alternative implementations. + PerformanceStatistics statistics; +}; + +// The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. +template +class FunctionPerformanceAdaptor : public DefaultFunction +{ +public: + template + FunctionPerformanceAdaptor(Params ...params) : DefaultFunction(params...) { statistics.emplace_back(); } virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - int id = statistics.select(); - // TODO(dakovalkov): measure time and change statistics. + auto id = statistics.select(); + Stopwatch watch; if (id == 0) { DefaultFunction::executeImpl(block, arguments, result, input_rows_count); } else { impls[id - 1]->executeImpl(block, arguments, result, input_rows_count); } + watch.stop(); + // TODO(dakovalkov): Calculate something more informative. + size_t rows_summary = 0; + for (auto i : arguments) { + rows_summary += block.getByPosition(i).column->size(); + } + if (rows_summary >= 1000) { + statistics.data[id].update(watch.elapsedSeconds(), rows_summary); + } } -protected: - /* - * Register implementation of the function. - */ - template - void registerImplementation(TargetArch arch = TargetArch::Default) { + // Register alternative implementation. + template + void registerImplementation(TargetArch arch, Params... params) { if (arch == TargetArch::Default || IsArchSupported(arch)) { - impls.emplace_back(Function::create(context)); + impls.emplace_back(std::make_shared(params...)); statistics.emplace_back(); } } private: - const Context & context; std::vector impls; // Alternative implementations. PerformanceStatistics statistics; }; @@ -179,7 +204,7 @@ private: class Function : public FunctionDynamicAdaptor \ { \ public: \ - Function(const Context & context) : FunctionDynamicAdaptor(context) \ + Function(const Context &) : FunctionDynamicAdaptor() \ { \ registerImplementation(TargetArch::SSE4); \ registerImplementation(TargetArch::AVX); \ @@ -192,4 +217,4 @@ public: \ } \ } -} // namespace DB::DynamicTarget +} // namespace DB diff --git a/dbms/src/Functions/DynamicTarget/Target.cpp b/dbms/src/Functions/TargetSpecific.cpp similarity index 59% rename from dbms/src/Functions/DynamicTarget/Target.cpp rename to dbms/src/Functions/TargetSpecific.cpp index 54c41a1786c..837a6796bf2 100644 --- a/dbms/src/Functions/DynamicTarget/Target.cpp +++ b/dbms/src/Functions/TargetSpecific.cpp @@ -1,6 +1,6 @@ -#include "Target.h" +#include -namespace DB::DynamicTarget +namespace DB { bool IsArchSupported(TargetArch arch) @@ -9,4 +9,4 @@ bool IsArchSupported(TargetArch arch) return arch != TargetArch::AVX512; } -} // namespace DB::DynamicTarget +} // namespace DB diff --git a/dbms/src/Functions/DynamicTarget/Target.h b/dbms/src/Functions/TargetSpecific.h similarity index 56% rename from dbms/src/Functions/DynamicTarget/Target.h rename to dbms/src/Functions/TargetSpecific.h index e1772a11857..ddb9fbb74f8 100644 --- a/dbms/src/Functions/DynamicTarget/Target.h +++ b/dbms/src/Functions/TargetSpecific.h @@ -1,6 +1,60 @@ #pragma once -namespace DB::DynamicTarget +/// This file contains macros and helpers for writing platform-dependent code. +/// +/// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the namespace TargetSpecific:: and enable +/// Arch-specific compile options. +/// Thus, it's allowed to call functions inside only after checking platform in runtime (see IsArchSupported() below) +/// For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code into the namespace TargetSpecific::Default +/// but dosn't specify any additional copile options. +/// +/// Example: +/// +/// DECLARE_DEFAULT_CODE ( +/// int func() { +/// return 1; +/// } +/// ) // DECLARE_DEFAULT_CODE +/// +/// DECLARE_AVX2_SPECIFIC_CODE ( +/// int func() { +/// return 2; +/// } +/// ) // DECLARE_DEFAULT_CODE +/// +/// int func() { +/// if (IsArchSupported(TargetArch::AVX2)) +/// return TargetSpecifc::AVX2::func(); +/// return TargetSpecifc::Default::func(); +/// } +/// +/// Sometimes code may benefit from compiling with different options. +/// For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several copies of the code and +/// compile it with different options. These copies are available via TargetSpecifc namespaces described above. +/// +/// Inside every TargetSpecific namespace there is a constexpr variable BuildArch, which TODO +/// +/// Example: +/// +/// DECLARE_MULTITARGET_CODE( +/// int func(int size, ...) { +/// int iteration_size = 1; +/// if constexpr (BuildArch == TargetArch::SSE4) +/// iteration_size = 2 +/// else if constexpr (BuildArch == TargetArch::AVX || BuildArch == TargetArch::AVX2) +/// iteration_size = 4; +/// else if constexpr (BuildArch == TargetArch::AVX512) +/// iteration_size = 8; +/// for (int i = 0; i < size; i += iteration_size) +/// ... +/// } +/// ) // DECLARE_MULTITARGET_CODE +/// +/// // All 5 versions of func are available here. Use runtime detection to choose one. +/// +/// If you want to write IFunction or IExecutableFuncionImpl with runtime dispatching, see PerformanceAdaptors.h. + +namespace DB { enum class TargetArch : int { @@ -11,11 +65,14 @@ enum class TargetArch : int { AVX512, }; +// Runtime detection. +bool IsArchSupported(TargetArch arch); + #if defined(__GNUC__) // TODO: There are lots of different AVX512 :( # define BEGIN_AVX512_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native\")") @@ -40,19 +97,19 @@ enum class TargetArch : int { # define END_TARGET_SPECIFIC_CODE \ _Pragma("clang attribute pop") #else -# error "Only CLANG and GCC compilers are supported" +# error "Only CLANG and GCC compilers are supported for vectorized code generation" #endif #define DECLARE_DEFAULT_CODE(...) \ namespace TargetSpecific::Default { \ - using namespace DB::DynamicTarget::TargetSpecific::Default; \ + using namespace DB::TargetSpecific::Default; \ __VA_ARGS__ \ } #define DECLARE_SSE4_SPECIFIC_CODE(...) \ BEGIN_SSE4_SPECIFIC_CODE \ namespace TargetSpecific::SSE4 { \ - using namespace DB::DynamicTarget::TargetSpecific::SSE4; \ + using namespace DB::TargetSpecific::SSE4; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -60,7 +117,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX_SPECIFIC_CODE(...) \ BEGIN_AVX_SPECIFIC_CODE \ namespace TargetSpecific::AVX { \ - using namespace DB::DynamicTarget::TargetSpecific::AVX; \ + using namespace DB::TargetSpecific::AVX; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -68,7 +125,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX2_SPECIFIC_CODE(...) \ BEGIN_AVX2_SPECIFIC_CODE \ namespace TargetSpecific::AVX2 { \ - using namespace DB::DynamicTarget::TargetSpecific::AVX2; \ + using namespace DB::TargetSpecific::AVX2; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -76,7 +133,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512_SPECIFIC_CODE(...) \ BEGIN_AVX512_SPECIFIC_CODE \ namespace TargetSpecific::AVX512 { \ - using namespace DB::DynamicTarget::TargetSpecific::AVX512; \ + using namespace DB::TargetSpecific::AVX512; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -108,6 +165,4 @@ DECLARE_AVX512_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512; ) // DECLARE_AVX512_SPECIFIC_CODE -bool IsArchSupported(TargetArch arch); - -} // namespace DB::DynamicTarget +} // namespace DB diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 85b1b717d47..069a63aa9e1 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -91,6 +91,3 @@ target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_url) add_subdirectory(array) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_array) - -add_subdirectory(DynamicTarget) -target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_dynamic_target) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index e883dc3267a..1306f3572f7 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -6,8 +6,8 @@ #include #include -#include -#include +#include +#include namespace DB { @@ -36,10 +36,6 @@ class FunctionStartsEndsWith : public IFunction { public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) - { - return std::make_shared(); - } String getName() const override { @@ -144,16 +140,16 @@ private: template class FunctionStartsEndsWith - : public DynamicTarget::FunctionDynamicAdaptor> + : public FunctionPerformanceAdaptor> { public: - FunctionStartsEndsWith(const Context & context_) - : DynamicTarget::FunctionDynamicAdaptor>(context_) + FunctionStartsEndsWith(const Context &) + : FunctionPerformanceAdaptor>() { - registerImplementation>(DynamicTarget::TargetArch::SSE4); - registerImplementation>(DynamicTarget::TargetArch::AVX); - registerImplementation>(DynamicTarget::TargetArch::AVX2); - registerImplementation>(DynamicTarget::TargetArch::AVX512); + registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512); } static FunctionPtr create(const Context & context) { @@ -161,7 +157,4 @@ public: } }; -// template -// using FunctionStartsEndsWith = TargetSpecific::Default::FunctionStartsEndsWith; - } \ No newline at end of file From 9d875d8adb796fc6b42ae69ea05541dce7360d75 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 6 Apr 2020 09:31:26 +0200 Subject: [PATCH 0213/2229] Arch detection --- dbms/src/Functions/TargetSpecific.cpp | 29 +++++++++++++++++++++++++-- dbms/src/Functions/TargetSpecific.h | 10 ++++----- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/dbms/src/Functions/TargetSpecific.cpp b/dbms/src/Functions/TargetSpecific.cpp index 837a6796bf2..18040111cca 100644 --- a/dbms/src/Functions/TargetSpecific.cpp +++ b/dbms/src/Functions/TargetSpecific.cpp @@ -1,12 +1,37 @@ #include +#if defined(__GNUC__) || defined(__clang__) +# include +#else +# error "Only CLANG and GCC compilers are supported for dynamic dispatch" +#endif + namespace DB { +int GetSupportedArches() { + unsigned int eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { + return 0; + } + int res = 0; + if (ecx & bit_SSE4_2) + res |= static_cast(TargetArch::SSE4); + if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX)) { + // TODO(dakovalkov): check XGETBV. + res |= static_cast(TargetArch::AVX); + if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) { + res |= static_cast(TargetArch::AVX2); + } + // TODO(dakovalkov): check AVX512 support. + } + return res; +} + bool IsArchSupported(TargetArch arch) { - // TODO(dakovalkov): use cpuid - return arch != TargetArch::AVX512; + static int arches = GetSupportedArches(); + return arch == TargetArch::Default || (arches & static_cast(arch)); } } // namespace DB diff --git a/dbms/src/Functions/TargetSpecific.h b/dbms/src/Functions/TargetSpecific.h index ddb9fbb74f8..c5cd78fe03c 100644 --- a/dbms/src/Functions/TargetSpecific.h +++ b/dbms/src/Functions/TargetSpecific.h @@ -58,11 +58,11 @@ namespace DB { enum class TargetArch : int { - Default, // Without any additional compiler options. - SSE4, - AVX, - AVX2, - AVX512, + Default = 0, // Without any additional compiler options. + SSE4 = (1 << 0), + AVX = (1 << 1), + AVX2 = (1 << 2), + AVX512 = (1 << 3), }; // Runtime detection. From 9026187c2c17668c3a932271cbcd782e07769d83 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 6 Apr 2020 09:44:54 +0200 Subject: [PATCH 0214/2229] Cosmetics --- src/Functions/FunctionStartsEndsWith.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 1306f3572f7..730f0b9efbb 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -1,14 +1,13 @@ -#include #include #include #include +#include +#include +#include #include #include #include -#include -#include - namespace DB { @@ -157,4 +156,4 @@ public: } }; -} \ No newline at end of file +} From f07f9188ddd9406aa5a1d95596f95c3bb8318fe8 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 13 Apr 2020 11:25:53 +0200 Subject: [PATCH 0215/2229] Fix unsuccessful rebase --- {dbms/src => src}/Functions/PerformanceAdaptors.h | 2 +- {dbms/src => src}/Functions/TargetSpecific.cpp | 0 {dbms/src => src}/Functions/TargetSpecific.h | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename {dbms/src => src}/Functions/PerformanceAdaptors.h (99%) rename {dbms/src => src}/Functions/TargetSpecific.cpp (100%) rename {dbms/src => src}/Functions/TargetSpecific.h (100%) diff --git a/dbms/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h similarity index 99% rename from dbms/src/Functions/PerformanceAdaptors.h rename to src/Functions/PerformanceAdaptors.h index 492a4791170..12f4b84dab9 100644 --- a/dbms/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -215,6 +215,6 @@ public: \ { \ return std::make_shared(context); \ } \ -} +}; } // namespace DB diff --git a/dbms/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp similarity index 100% rename from dbms/src/Functions/TargetSpecific.cpp rename to src/Functions/TargetSpecific.cpp diff --git a/dbms/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h similarity index 100% rename from dbms/src/Functions/TargetSpecific.h rename to src/Functions/TargetSpecific.h From acbd3b3a7088d1665063024f16ab73f5c44ffa9b Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 13 Apr 2020 11:26:56 +0200 Subject: [PATCH 0216/2229] Compile rand function with AVX --- src/Functions/FunctionsRandom.cpp | 13 ++++--------- src/Functions/FunctionsRandom.h | 26 +++++++++++++++++++++++--- src/Functions/generateUUIDv4.cpp | 3 ++- src/Functions/randConstant.cpp | 3 ++- tests/performance/rand.xml | 23 +++++++++++++++++++++++ 5 files changed, 54 insertions(+), 14 deletions(-) create mode 100644 tests/performance/rand.xml diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 19b2f08cdba..7506b118d5f 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -8,21 +8,15 @@ namespace DB { +DECLARE_MULTITARGET_CODE( + namespace { - /// NOTE Probably - /// http://www.pcg-random.org/ - /// or http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/ - /// or http://docs.yeppp.info/c/group__yep_random___w_e_l_l1024a.html - /// could go better. - struct LinearCongruentialGenerator { - /// Constants from `man lrand48_r`. static constexpr UInt64 a = 0x5DEECE66D; static constexpr UInt64 c = 0xB; - /// And this is from `head -c8 /dev/urandom | xxd -p` UInt64 current = 0x09826f4a081cee35ULL; void seed(UInt64 value) @@ -63,7 +57,8 @@ void RandImpl::execute(char * output, size_t size) unalignedStore(output + 12, generator3.next()); } - /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } +) //DECLARE_MULTITARGET_CODE + } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 1ac6d24a356..f2fab585a47 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -5,7 +5,10 @@ #include #include - +#include +#include +// #include "TargetSpecific.h" +// #include "PerformanceAdaptors.h" namespace DB { @@ -34,9 +37,10 @@ namespace ErrorCodes * This means that the timer must be of sufficient resolution to give different values to each block. */ +DECLARE_MULTITARGET_CODE( + struct RandImpl { - /// Fill memory with random data. The memory region must be 15-bytes padded. static void execute(char * output, size_t size); }; @@ -46,7 +50,6 @@ class FunctionRandom : public IFunction { public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { @@ -83,4 +86,21 @@ public: } }; +) // DECLARE_MULTITARGET_CODE + +template +class FunctionRandom : public FunctionPerformanceAdaptor> +{ +public: + FunctionRandom() { + registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512); + } + static FunctionPtr create(const Context &) { + return std::make_shared>(); + } +}; + } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 39013519d2f..d543226ba5c 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -32,7 +32,8 @@ public: size_t size = input_rows_count; vec_to.resize(size); - RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); + // TODO(dakovalkov): rewrite this workaround + TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); for (UInt128 & uuid: vec_to) { diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index bad4b199ee2..3eba5abf10d 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -99,7 +99,8 @@ public: argument_types.emplace_back(arguments.back().type); typename ColumnVector::Container vec_to(1); - RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); + // TODO(dakovalkov): Rewrite this workaround + TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); ToType value = vec_to[0]; return std::make_unique>(value, argument_types, return_type); diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml new file mode 100644 index 00000000000..6f73c2b1f73 --- /dev/null +++ b/tests/performance/rand.xml @@ -0,0 +1,23 @@ + + + + 20000 + + + 40000 + + + + + + + table + + numbers(100000000) + + + + + SELECT rand() FROM {table} + SELECT rand64() FROM {table} + From 9674482a6c8f433b120a43c0ce52c5f49dfd8768 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 14 Apr 2020 17:46:53 +0200 Subject: [PATCH 0217/2229] clang fixes --- .gitmodules | 3 +++ contrib/SIMDxorshift | 1 + src/Functions/TargetSpecific.cpp | 2 +- src/Functions/TargetSpecific.h | 26 +++++++++++++------------- 4 files changed, 18 insertions(+), 14 deletions(-) create mode 160000 contrib/SIMDxorshift diff --git a/.gitmodules b/.gitmodules index 7f5d1307a6e..c14fef40457 100644 --- a/.gitmodules +++ b/.gitmodules @@ -160,3 +160,6 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git +[submodule "contrib/SIMDxorshift"] + path = contrib/SIMDxorshift + url = https://github.com/lemire/SIMDxorshift diff --git a/contrib/SIMDxorshift b/contrib/SIMDxorshift new file mode 160000 index 00000000000..270eb8936c9 --- /dev/null +++ b/contrib/SIMDxorshift @@ -0,0 +1 @@ +Subproject commit 270eb8936c9b4bd038c39f1783a8eba6b8f15b09 diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index 18040111cca..f22a586c333 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -1,6 +1,6 @@ #include -#if defined(__GNUC__) || defined(__clang__) +#if defined(__GNUC__) # include #else # error "Only CLANG and GCC compilers are supported for dynamic dispatch" diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index c5cd78fe03c..e5818632843 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -68,7 +68,19 @@ enum class TargetArch : int { // Runtime detection. bool IsArchSupported(TargetArch arch); -#if defined(__GNUC__) +#if defined(__clang__) +// TODO: There are lots of different AVX512 :( +# define BEGIN_AVX512_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX2_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx\"))))") +# define BEGIN_SSE4_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx\"))))") +# define END_TARGET_SPECIFIC_CODE \ + _Pragma("clang attribute pop") +#elif defined(__GNUC__) // TODO: There are lots of different AVX512 :( # define BEGIN_AVX512_SPECIFIC_CODE \ _Pragma("GCC push_options") \ @@ -84,18 +96,6 @@ bool IsArchSupported(TargetArch arch); _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") # define END_TARGET_SPECIFIC_CODE \ _Pragma("GCC pop_options") -#elif defined(__clang__) -// TODO: There are lots of different AVX512 :( -# define BEGIN_AVX512_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") -# define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") -# define BEGIN_AVX_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx\"))))") -# define BEGIN_SSE4_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx\"))))") -# define END_TARGET_SPECIFIC_CODE \ - _Pragma("clang attribute pop") #else # error "Only CLANG and GCC compilers are supported for vectorized code generation" #endif From 808bb14c5cf1b22234352d5dc9992df16c49d30c Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 14 Apr 2020 17:48:33 +0200 Subject: [PATCH 0218/2229] Add xorshift-rand from lemire for comparing performance --- contrib/CMakeLists.txt | 1 + contrib/SIMDxorshift-cmake/CMakeLists.txt | 12 ++++ src/Functions/CMakeLists.txt | 4 ++ src/Functions/FunctionsRandom.cpp | 1 - src/Functions/FunctionsRandom.h | 1 - src/Functions/SIMDxorshift.cpp | 40 +++++++++++ src/Functions/SIMDxorshift.h | 84 +++++++++++++++++++++++ src/Functions/registerFunctionsRandom.cpp | 2 + tests/performance/rand.xml | 1 + 9 files changed, 144 insertions(+), 2 deletions(-) create mode 100644 contrib/SIMDxorshift-cmake/CMakeLists.txt create mode 100644 src/Functions/SIMDxorshift.cpp create mode 100644 src/Functions/SIMDxorshift.h diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d122188ad0b..344a06f29b7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -27,6 +27,7 @@ add_subdirectory (murmurhash) add_subdirectory (replxx-cmake) add_subdirectory (ryu-cmake) add_subdirectory (unixodbc-cmake) +add_subdirectory (SIMDxorshift-cmake) add_subdirectory (poco-cmake) diff --git a/contrib/SIMDxorshift-cmake/CMakeLists.txt b/contrib/SIMDxorshift-cmake/CMakeLists.txt new file mode 100644 index 00000000000..573173ff1b4 --- /dev/null +++ b/contrib/SIMDxorshift-cmake/CMakeLists.txt @@ -0,0 +1,12 @@ +set(SIMDXORSHIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/SIMDxorshift/include") +set(SIMDXORSHIFT_SRC_DIR "${SIMDXORSHIFT_INCLUDE_DIR}/../src") +set(SIMDXORSHIFT_SRC + ${SIMDXORSHIFT_SRC_DIR}/xorshift128plus.c + ${SIMDXORSHIFT_SRC_DIR}/simdxorshift128plus.c +) + +set(SIMDXORSHIFT_LIBRARY "simdxorshift") + +add_library(${SIMDXORSHIFT_LIBRARY} ${SIMDXORSHIFT_SRC}) +target_include_directories(${SIMDXORSHIFT_LIBRARY} PUBLIC "${SIMDXORSHIFT_INCLUDE_DIR}") +target_compile_options(${SIMDXORSHIFT_LIBRARY} PRIVATE -mavx2) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 069a63aa9e1..451dfe97a03 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -83,6 +83,10 @@ if(USE_RAPIDJSON) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR}) endif() + +target_link_libraries(clickhouse_functions PUBLIC "simdxorshift") +message(STATUS "Using SIMDXORSHIFT ${SIMDXORSHIFT_LIBRARY}") + add_subdirectory(GatherUtils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 7506b118d5f..1f91c54c598 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -4,7 +4,6 @@ #include #include - namespace DB { diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index f2fab585a47..c6bcd9cb1ae 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -44,7 +44,6 @@ struct RandImpl static void execute(char * output, size_t size); }; - template class FunctionRandom : public IFunction { diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp new file mode 100644 index 00000000000..6cad047f6da --- /dev/null +++ b/src/Functions/SIMDxorshift.cpp @@ -0,0 +1,40 @@ +#include +#include +#include +#include +#include + +#include + +#include + +extern "C" { +#include +} + +namespace DB +{ + +BEGIN_AVX_SPECIFIC_CODE + +void RandImplXorshift::execute(char * output, size_t size) +{ + avx_xorshift128plus_key_t mykey; + avx_xorshift128plus_init(324, 4444, &mykey); + // TODO(set last 16 bytes) + for (auto * end = output + size - 16; output < end; output += 32) { + unalignedStore<__m256i>(output, avx_xorshift128plus(&mykey)); + } +} + +struct NameRandXorshift { static constexpr auto name = "randxorshift"; }; +using FunctionRandXorshift = FunctionRandomXorshift; + +void registerFunctionRandXorshift(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +END_TARGET_SPECIFIC_CODE + +} diff --git a/src/Functions/SIMDxorshift.h b/src/Functions/SIMDxorshift.h new file mode 100644 index 00000000000..e46943f695a --- /dev/null +++ b/src/Functions/SIMDxorshift.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +struct RandImplXorshift +{ + static void execute(char * output, size_t size); +}; + +template +class FunctionRandomXorshift : public IFunction +{ +public: + static constexpr auto name = Name::name; + + static FunctionPtr create(const Context &) { + return std::make_shared>(); + } + + String getName() const override + { + return name; + } + + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() > 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 0 or 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + return std::make_shared>(); + } + + void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override + { + auto col_to = ColumnVector::create(); + typename ColumnVector::Container & vec_to = col_to->getData(); + + size_t size = input_rows_count; + vec_to.resize(size); + RandImplXorshift::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(ToType)); + + block.getByPosition(result).column = std::move(col_to); + } +}; + +// template +// class FunctionRandom : public FunctionPerformanceAdaptor> +// { +// public: +// FunctionRandom() { +// registerImplementation>(TargetArch::SSE4); +// registerImplementation>(TargetArch::AVX); +// registerImplementation>(TargetArch::AVX2); +// registerImplementation>(TargetArch::AVX512); +// } +// static FunctionPtr create(const Context &) { +// return std::make_shared>(); +// } +// }; + +} diff --git a/src/Functions/registerFunctionsRandom.cpp b/src/Functions/registerFunctionsRandom.cpp index 3638474c4fe..422ec91f025 100644 --- a/src/Functions/registerFunctionsRandom.cpp +++ b/src/Functions/registerFunctionsRandom.cpp @@ -10,6 +10,7 @@ void registerFunctionRandomPrintableASCII(FunctionFactory & factory); void registerFunctionRandomString(FunctionFactory & factory); void registerFunctionRandomFixedString(FunctionFactory & factory); void registerFunctionRandomStringUTF8(FunctionFactory & factory); +void registerFunctionRandXorshift(FunctionFactory & factory); void registerFunctionsRandom(FunctionFactory & factory) { @@ -21,6 +22,7 @@ void registerFunctionsRandom(FunctionFactory & factory) registerFunctionRandomString(factory); registerFunctionRandomFixedString(factory); registerFunctionRandomStringUTF8(factory); + registerFunctionRandXorshift(factory); } } diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml index 6f73c2b1f73..32ec38ddb4e 100644 --- a/tests/performance/rand.xml +++ b/tests/performance/rand.xml @@ -20,4 +20,5 @@ SELECT rand() FROM {table} SELECT rand64() FROM {table} + SELECT randxorshift() FROM {table} From 0afa67208218eff5c4004b2d9084eb3b4266bcca Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 15 May 2020 10:40:27 +0200 Subject: [PATCH 0219/2229] make randxorshift great again --- src/Functions/FunctionsRandom.h | 21 ++++----- src/Functions/SIMDxorshift.cpp | 76 ++++++++++++++++++++++++++++----- src/Functions/SIMDxorshift.h | 64 +++++---------------------- 3 files changed, 88 insertions(+), 73 deletions(-) diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index c6bcd9cb1ae..990c3a5f466 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -44,8 +44,10 @@ struct RandImpl static void execute(char * output, size_t size); }; -template -class FunctionRandom : public IFunction +) // DECLARE_MULTITARGET_CODE + +template +class FunctionRandomImpl : public IFunction { public: static constexpr auto name = Name::name; @@ -85,21 +87,20 @@ public: } }; -) // DECLARE_MULTITARGET_CODE - template -class FunctionRandom : public FunctionPerformanceAdaptor> +class FunctionRandom : public FunctionPerformanceAdaptor> { public: FunctionRandom() { - registerImplementation>(TargetArch::SSE4); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512); } + static FunctionPtr create(const Context &) { return std::make_shared>(); } }; -} +} // namespace DB diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp index 6cad047f6da..739077b5480 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/SIMDxorshift.cpp @@ -8,33 +8,89 @@ #include -extern "C" { +extern "C" +{ +#include #include } namespace DB { -BEGIN_AVX_SPECIFIC_CODE +DECLARE_DEFAULT_CODE( -void RandImplXorshift::execute(char * output, size_t size) +void RandXorshiftImpl::execute(char * output, size_t size) { - avx_xorshift128plus_key_t mykey; - avx_xorshift128plus_init(324, 4444, &mykey); - // TODO(set last 16 bytes) - for (auto * end = output + size - 16; output < end; output += 32) { - unalignedStore<__m256i>(output, avx_xorshift128plus(&mykey)); + char * end = output + size; + + xorshift128plus_key_s mykey; + + xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), + 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), + &mykey); + + const int bytes_per_write = 8; + const intptr_t mask = bytes_per_write - 1; + + // Process head to make output aligned. + unalignedStore(output, xorshift128plus(&mykey)); + output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); + + while (end - output > 0) { + *reinterpret_cast(output) = xorshift128plus(&mykey); + output += bytes_per_write; } } +) // DECLARE_DEFAULT_CODE + +DECLARE_AVX2_SPECIFIC_CODE( + +void RandXorshiftImpl::execute(char * output, size_t size) +{ + char * end = output + size; + + avx_xorshift128plus_key_t mykey; + avx_xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), + 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), + &mykey); + + const int safe_overwrite = 16; // How many bytes we can write behind the end. + const int bytes_per_write = 32; + const intptr_t mask = bytes_per_write - 1; + + if (size + safe_overwrite <= bytes_per_write) { + _mm_storeu_si128(reinterpret_cast<__m128i*>(output), + _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); + return; + } + + // Process head to make output aligned. + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); + output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); + + while ((end - output) + safe_overwrite >= bytes_per_write) { + _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); + output += bytes_per_write; + } + + // Process tail. + if ((end - output) > 0) { + _mm_store_si128(reinterpret_cast<__m128i*>(output), + _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); + } +} + +) // DECLARE_AVX2_SPECIFIC_CODE + struct NameRandXorshift { static constexpr auto name = "randxorshift"; }; using FunctionRandXorshift = FunctionRandomXorshift; +struct NameRandXorshift64 { static constexpr auto name = "randxorshift64"; }; +using FunctionRandXorshift64 = FunctionRandomXorshift; void registerFunctionRandXorshift(FunctionFactory & factory) { factory.registerFunction(); } -END_TARGET_SPECIFIC_CODE - } diff --git a/src/Functions/SIMDxorshift.h b/src/Functions/SIMDxorshift.h index e46943f695a..46732c4d876 100644 --- a/src/Functions/SIMDxorshift.h +++ b/src/Functions/SIMDxorshift.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -16,69 +17,26 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct RandImplXorshift +DECLARE_MULTITARGET_CODE( + +struct RandXorshiftImpl { static void execute(char * output, size_t size); }; +) // DECLARE_MULTITARGET_CODE + template -class FunctionRandomXorshift : public IFunction +class FunctionRandomXorshift : public FunctionPerformanceAdaptor> { public: - static constexpr auto name = Name::name; + FunctionRandomXorshift() { + registerImplementation>(TargetArch::AVX2); + } static FunctionPtr create(const Context &) { return std::make_shared>(); } - - String getName() const override - { - return name; - } - - bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override { return false; } - bool useDefaultImplementationForNulls() const override { return false; } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (arguments.size() > 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 0 or 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - return std::make_shared>(); - } - - void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override - { - auto col_to = ColumnVector::create(); - typename ColumnVector::Container & vec_to = col_to->getData(); - - size_t size = input_rows_count; - vec_to.resize(size); - RandImplXorshift::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(ToType)); - - block.getByPosition(result).column = std::move(col_to); - } }; -// template -// class FunctionRandom : public FunctionPerformanceAdaptor> -// { -// public: -// FunctionRandom() { -// registerImplementation>(TargetArch::SSE4); -// registerImplementation>(TargetArch::AVX); -// registerImplementation>(TargetArch::AVX2); -// registerImplementation>(TargetArch::AVX512); -// } -// static FunctionPtr create(const Context &) { -// return std::make_shared>(); -// } -// }; - -} +} // namespace DB From 089b3ca0085b24716c422ff16ed8342ae340d935 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 15 May 2020 12:10:34 +0200 Subject: [PATCH 0220/2229] Check target properly --- src/Functions/FunctionStartsEndsWith.h | 9 +++++---- src/Functions/FunctionsRandom.h | 2 +- src/Functions/PerformanceAdaptors.h | 25 +++++++---------------- src/Functions/TargetSpecific.cpp | 27 ++++++++++++++++++++++--- src/Functions/TargetSpecific.h | 28 ++++++++++++++------------ 5 files changed, 52 insertions(+), 39 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 730f0b9efbb..29c81796d0e 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -145,11 +145,12 @@ public: FunctionStartsEndsWith(const Context &) : FunctionPerformanceAdaptor>() { - registerImplementation>(TargetArch::SSE4); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation> (TargetArch::SSE4); + registerImplementation> (TargetArch::AVX); + registerImplementation> (TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512f); } + static FunctionPtr create(const Context & context) { return std::make_shared>(context); diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 990c3a5f466..98d04d61ad1 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -95,7 +95,7 @@ public: registerImplementation>(TargetArch::SSE4); registerImplementation>(TargetArch::AVX); registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation>(TargetArch::AVX512f); } static FunctionPtr create(const Context &) { diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 12f4b84dab9..a97fdbce0b0 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -104,6 +104,11 @@ struct PerformanceStatistics PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} }; +struct PerformanceAdaptorOptions +{ + +}; + /// Combine several IExecutableFunctionImpl into one. /// All the implementations should be equivalent. /// Implementation to execute will be selected based on performance on previous runs. @@ -152,6 +157,7 @@ public: private: std::vector impls; // Alternative implementations. PerformanceStatistics statistics; + PerformanceAdaptorOptions options; }; // The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. @@ -197,24 +203,7 @@ public: private: std::vector impls; // Alternative implementations. PerformanceStatistics statistics; -}; - -// TODO(dakovalkov): May be it's better to delete this macros and write every function explicitly for better readability. -#define DECLARE_STANDART_TARGET_ADAPTOR(Function) \ -class Function : public FunctionDynamicAdaptor \ -{ \ -public: \ - Function(const Context &) : FunctionDynamicAdaptor() \ - { \ - registerImplementation(TargetArch::SSE4); \ - registerImplementation(TargetArch::AVX); \ - registerImplementation(TargetArch::AVX2); \ - registerImplementation(TargetArch::AVX512); \ - } \ - static FunctionPtr create(const Context & context) \ - { \ - return std::make_shared(context); \ - } \ + PerformanceAdaptorOptions options; }; } // namespace DB diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index f22a586c333..aa017823e54 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -2,6 +2,7 @@ #if defined(__GNUC__) # include +# include #else # error "Only CLANG and GCC compilers are supported for dynamic dispatch" #endif @@ -9,6 +10,11 @@ namespace DB { +__attribute__ ((target("xsave"))) +uint64_t xgetbv(uint32_t ecx) { + return _xgetbv(ecx); +} + int GetSupportedArches() { unsigned int eax, ebx, ecx, edx; if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { @@ -17,13 +23,15 @@ int GetSupportedArches() { int res = 0; if (ecx & bit_SSE4_2) res |= static_cast(TargetArch::SSE4); - if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX)) { - // TODO(dakovalkov): check XGETBV. + // (xgetbv(0) & 0x6) == 0x6 checks that XMM state and YMM state are enabled. + if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX) && (xgetbv(0) & 0x6) == 0x6) { res |= static_cast(TargetArch::AVX); if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) { res |= static_cast(TargetArch::AVX2); + if (ebx & bit_AVX512F) { + res |= static_cast(TargetArch::AVX512f); + } } - // TODO(dakovalkov): check AVX512 support. } return res; } @@ -34,4 +42,17 @@ bool IsArchSupported(TargetArch arch) return arch == TargetArch::Default || (arches & static_cast(arch)); } +String ToString(TargetArch arch) +{ + switch (arch) { + case TargetArch::Default: return "default"; + case TargetArch::SSE4: return "sse4"; + case TargetArch::AVX: return "avx"; + case TargetArch::AVX2: return "avx2"; + case TargetArch::AVX512f: return "avx512f"; + } + + __builtin_unreachable(); +} + } // namespace DB diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index e5818632843..accb1dd7fab 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -1,5 +1,7 @@ #pragma once +#include + /// This file contains macros and helpers for writing platform-dependent code. /// /// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the namespace TargetSpecific:: and enable @@ -62,16 +64,17 @@ enum class TargetArch : int { SSE4 = (1 << 0), AVX = (1 << 1), AVX2 = (1 << 2), - AVX512 = (1 << 3), + AVX512f = (1 << 3), }; // Runtime detection. bool IsArchSupported(TargetArch arch); +String ToString(TargetArch arch); + #if defined(__clang__) -// TODO: There are lots of different AVX512 :( -# define BEGIN_AVX512_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX512f_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f\"))))") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") # define BEGIN_AVX_SPECIFIC_CODE \ @@ -81,8 +84,7 @@ bool IsArchSupported(TargetArch arch); # define END_TARGET_SPECIFIC_CODE \ _Pragma("clang attribute pop") #elif defined(__GNUC__) -// TODO: There are lots of different AVX512 :( -# define BEGIN_AVX512_SPECIFIC_CODE \ +# define BEGIN_AVX512f_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ @@ -130,10 +132,10 @@ namespace TargetSpecific::AVX2 { \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512_SPECIFIC_CODE(...) \ -BEGIN_AVX512_SPECIFIC_CODE \ -namespace TargetSpecific::AVX512 { \ - using namespace DB::TargetSpecific::AVX512; \ +#define DECLARE_AVX512f_SPECIFIC_CODE(...) \ +BEGIN_AVX512f_SPECIFIC_CODE \ +namespace TargetSpecific::AVX512f { \ + using namespace DB::TargetSpecific::AVX512f; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -143,7 +145,7 @@ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512_SPECIFIC_CODE(__VA_ARGS__) +DECLARE_AVX512f_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; @@ -161,8 +163,8 @@ DECLARE_AVX2_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX2; ) // DECLARE_AVX2_SPECIFIC_CODE -DECLARE_AVX512_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512; +DECLARE_AVX512f_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX512f; ) // DECLARE_AVX512_SPECIFIC_CODE } // namespace DB From 43f5ca868a14f49d0382d4954a247f47c0f919e8 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 15 May 2020 14:00:20 +0200 Subject: [PATCH 0221/2229] Cosmetics --- src/Functions/FunctionStartsEndsWith.h | 2 +- src/Functions/FunctionsRandom.cpp | 10 +++- src/Functions/FunctionsRandom.h | 8 +-- src/Functions/PerformanceAdaptors.h | 2 +- src/Functions/SIMDxorshift.cpp | 13 ++--- src/Functions/TargetSpecific.cpp | 4 +- src/Functions/TargetSpecific.h | 69 ++++++++++++++------------ 7 files changed, 61 insertions(+), 47 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 29c81796d0e..77692b5c414 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -148,7 +148,7 @@ public: registerImplementation> (TargetArch::SSE4); registerImplementation> (TargetArch::AVX); registerImplementation> (TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512f); + registerImplementation>(TargetArch::AVX512F); } static FunctionPtr create(const Context & context) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 1f91c54c598..11861d2d12c 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -11,11 +11,19 @@ DECLARE_MULTITARGET_CODE( namespace { + /// NOTE Probably + /// http://www.pcg-random.org/ + /// or http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/ + /// or http://docs.yeppp.info/c/group__yep_random___w_e_l_l1024a.html + /// could go better. + struct LinearCongruentialGenerator { + /// Constants from `man lrand48_r`. static constexpr UInt64 a = 0x5DEECE66D; static constexpr UInt64 c = 0xB; + /// And this is from `head -c8 /dev/urandom | xxd -p` UInt64 current = 0x09826f4a081cee35ULL; void seed(UInt64 value) @@ -55,7 +63,7 @@ void RandImpl::execute(char * output, size_t size) unalignedStore(output + 8, generator2.next()); unalignedStore(output + 12, generator3.next()); } - + /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } ) //DECLARE_MULTITARGET_CODE diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 98d04d61ad1..ccc218574b0 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -92,10 +92,10 @@ class FunctionRandom : public FunctionPerformanceAdaptor>(TargetArch::SSE4); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512f); + registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512F); } static FunctionPtr create(const Context &) { diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index a97fdbce0b0..2ecd60e67fe 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -160,7 +160,7 @@ private: PerformanceAdaptorOptions options; }; -// The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. +/// The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. template class FunctionPerformanceAdaptor : public DefaultFunction { diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp index 739077b5480..9a45257d28d 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/SIMDxorshift.cpp @@ -29,8 +29,8 @@ void RandXorshiftImpl::execute(char * output, size_t size) 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), &mykey); - const int bytes_per_write = 8; - const intptr_t mask = bytes_per_write - 1; + constexpr int bytes_per_write = 8; + constexpr intptr_t mask = bytes_per_write - 1; // Process head to make output aligned. unalignedStore(output, xorshift128plus(&mykey)); @@ -55,9 +55,9 @@ void RandXorshiftImpl::execute(char * output, size_t size) 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), &mykey); - const int safe_overwrite = 16; // How many bytes we can write behind the end. - const int bytes_per_write = 32; - const intptr_t mask = bytes_per_write - 1; + constexpr int safe_overwrite = 16; // How many bytes we can write behind the end. + constexpr int bytes_per_write = 32; + constexpr intptr_t mask = bytes_per_write - 1; if (size + safe_overwrite <= bytes_per_write) { _mm_storeu_si128(reinterpret_cast<__m128i*>(output), @@ -91,6 +91,7 @@ using FunctionRandXorshift64 = FunctionRandomXorshift(); + factory.registerFunction(); } -} +} // namespace DB diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index aa017823e54..19604a83ab7 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -29,7 +29,7 @@ int GetSupportedArches() { if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) { res |= static_cast(TargetArch::AVX2); if (ebx & bit_AVX512F) { - res |= static_cast(TargetArch::AVX512f); + res |= static_cast(TargetArch::AVX512F); } } } @@ -49,7 +49,7 @@ String ToString(TargetArch arch) case TargetArch::SSE4: return "sse4"; case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; - case TargetArch::AVX512f: return "avx512f"; + case TargetArch::AVX512F: return "avx512f"; } __builtin_unreachable(); diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index accb1dd7fab..888d88d1d77 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -4,42 +4,47 @@ /// This file contains macros and helpers for writing platform-dependent code. /// -/// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the namespace TargetSpecific:: and enable -/// Arch-specific compile options. -/// Thus, it's allowed to call functions inside only after checking platform in runtime (see IsArchSupported() below) -/// For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code into the namespace TargetSpecific::Default -/// but dosn't specify any additional copile options. +/// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the +/// namespace TargetSpecific:: and enable Arch-specific compile options. +/// Thus, it's allowed to call functions inside these namespaces only after +/// checking platform in runtime (see IsArchSupported() below). +/// +/// For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code +/// into the namespace TargetSpecific::Default but dosn't specify any additional +/// copile options. /// /// Example: /// /// DECLARE_DEFAULT_CODE ( -/// int func() { +/// int funcImpl() { /// return 1; /// } /// ) // DECLARE_DEFAULT_CODE /// /// DECLARE_AVX2_SPECIFIC_CODE ( -/// int func() { +/// int funcImpl() { /// return 2; /// } /// ) // DECLARE_DEFAULT_CODE /// /// int func() { /// if (IsArchSupported(TargetArch::AVX2)) -/// return TargetSpecifc::AVX2::func(); -/// return TargetSpecifc::Default::func(); +/// return TargetSpecifc::AVX2::funcImpl(); +/// return TargetSpecifc::Default::funcImpl(); /// } /// /// Sometimes code may benefit from compiling with different options. -/// For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several copies of the code and -/// compile it with different options. These copies are available via TargetSpecifc namespaces described above. +/// For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several +/// copies of the code and compile it with different options. These copies are +/// available via TargetSpecifc namespaces described above. /// -/// Inside every TargetSpecific namespace there is a constexpr variable BuildArch, which TODO +/// Inside every TargetSpecific namespace there is a constexpr variable BuildArch, +/// which indicates the target platform for current code. /// /// Example: /// /// DECLARE_MULTITARGET_CODE( -/// int func(int size, ...) { +/// int funcImpl(int size, ...) { /// int iteration_size = 1; /// if constexpr (BuildArch == TargetArch::SSE4) /// iteration_size = 2 @@ -60,11 +65,11 @@ namespace DB { enum class TargetArch : int { - Default = 0, // Without any additional compiler options. - SSE4 = (1 << 0), - AVX = (1 << 1), - AVX2 = (1 << 2), - AVX512f = (1 << 3), + Default = 0, // Without any additional compiler options. + SSE4 = (1 << 0), + AVX = (1 << 1), + AVX2 = (1 << 2), + AVX512F = (1 << 3), }; // Runtime detection. @@ -73,7 +78,7 @@ bool IsArchSupported(TargetArch arch); String ToString(TargetArch arch); #if defined(__clang__) -# define BEGIN_AVX512f_SPECIFIC_CODE \ +# define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f\"))))") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") @@ -84,7 +89,7 @@ String ToString(TargetArch arch); # define END_TARGET_SPECIFIC_CODE \ _Pragma("clang attribute pop") #elif defined(__GNUC__) -# define BEGIN_AVX512f_SPECIFIC_CODE \ +# define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ @@ -132,20 +137,20 @@ namespace TargetSpecific::AVX2 { \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512f_SPECIFIC_CODE(...) \ -BEGIN_AVX512f_SPECIFIC_CODE \ -namespace TargetSpecific::AVX512f { \ - using namespace DB::TargetSpecific::AVX512f; \ +#define DECLARE_AVX512F_SPECIFIC_CODE(...) \ +BEGIN_AVX512F_SPECIFIC_CODE \ +namespace TargetSpecific::AVX512F { \ + using namespace DB::TargetSpecific::AVX512F; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE #define DECLARE_MULTITARGET_CODE(...) \ -DECLARE_DEFAULT_CODE (__VA_ARGS__) \ -DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512f_SPECIFIC_CODE(__VA_ARGS__) +DECLARE_DEFAULT_CODE (__VA_ARGS__) \ +DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; @@ -163,8 +168,8 @@ DECLARE_AVX2_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX2; ) // DECLARE_AVX2_SPECIFIC_CODE -DECLARE_AVX512f_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512f; -) // DECLARE_AVX512_SPECIFIC_CODE +DECLARE_AVX512F_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX512F; +) // DECLARE_AVX512F_SPECIFIC_CODE } // namespace DB From d6d67b0da42b84302bfd13b624b3dad422941cd7 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 15 May 2020 14:06:12 +0200 Subject: [PATCH 0222/2229] Fix bug --- src/Functions/SIMDxorshift.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp index 9a45257d28d..f7c5f953e09 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/SIMDxorshift.cpp @@ -55,11 +55,12 @@ void RandXorshiftImpl::execute(char * output, size_t size) 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), &mykey); - constexpr int safe_overwrite = 16; // How many bytes we can write behind the end. + constexpr int safe_overwrite = 15; // How many bytes we can write behind the end. constexpr int bytes_per_write = 32; constexpr intptr_t mask = bytes_per_write - 1; - if (size + safe_overwrite <= bytes_per_write) { + if (size + safe_overwrite < bytes_per_write) { + // size <= 16. _mm_storeu_si128(reinterpret_cast<__m128i*>(output), _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); return; @@ -74,7 +75,7 @@ void RandXorshiftImpl::execute(char * output, size_t size) output += bytes_per_write; } - // Process tail. + // Process tail. (end - output) <= 16. if ((end - output) > 0) { _mm_store_si128(reinterpret_cast<__m128i*>(output), _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); From 80ab14e3f96a4013a687d352bc2457c31ed1d099 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 08:15:39 +0200 Subject: [PATCH 0223/2229] Get rid of copy-paste in PerformanceAdaptor --- src/Functions/FunctionStartsEndsWith.h | 9 +- src/Functions/FunctionsRandom.h | 14 ++- src/Functions/PerformanceAdaptors.h | 154 +++++++++++++++---------- src/Functions/SIMDxorshift.cpp | 2 - src/Functions/SIMDxorshift.h | 12 +- 5 files changed, 119 insertions(+), 72 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 77692b5c414..44850257f99 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -41,6 +41,11 @@ public: return name; } + static String getImplementationTag() + { + return ToString(BuildArch); + } + size_t getNumberOfArguments() const override { return 2; @@ -143,7 +148,9 @@ class FunctionStartsEndsWith { public: FunctionStartsEndsWith(const Context &) - : FunctionPerformanceAdaptor>() + : FunctionPerformanceAdaptor>( + PerformanceAdaptorOptions() + ) { registerImplementation> (TargetArch::SSE4); registerImplementation> (TargetArch::AVX); diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index ccc218574b0..ae54243164f 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -42,6 +42,7 @@ DECLARE_MULTITARGET_CODE( struct RandImpl { static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch); } }; ) // DECLARE_MULTITARGET_CODE @@ -57,6 +58,11 @@ public: return name; } + static String getImplementationTag() + { + return RandImpl::getImplementationTag(); + } + bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } @@ -91,14 +97,18 @@ template class FunctionRandom : public FunctionPerformanceAdaptor> { public: - FunctionRandom() { + FunctionRandom() + : FunctionPerformanceAdaptor>( + PerformanceAdaptorOptions()) + { registerImplementation>(TargetArch::SSE4); registerImplementation>(TargetArch::AVX); registerImplementation>(TargetArch::AVX2); registerImplementation>(TargetArch::AVX512F); } - static FunctionPtr create(const Context &) { + static FunctionPtr create(const Context &) + { return std::make_shared>(); } }; diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 2ecd60e67fe..f7bb8cfd6ee 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -92,10 +92,14 @@ struct PerformanceStatistics return choose_method; } - size_t size() { + size_t size() const { return data.size(); } + bool empty() const { + return size() == 0; + } + void emplace_back() { data.emplace_back(); } @@ -106,7 +110,47 @@ struct PerformanceStatistics struct PerformanceAdaptorOptions { + std::optional> implementations; +}; +// Redirects IExecutableFunctionImpl::execute() and IFunction:executeImpl() to executeFunctionImpl(); +template +class FunctionExecutor; + +template +class FunctionExecutor>> + : public DefaultFunction +{ +public: + using BaseFunctionPtr = ExecutableFunctionImplPtr; + + template + FunctionExecutor(Args ...args) : DefaultFunction(args...) {} + + virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; + + virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + executeFunctionImpl(block, arguments, result, input_rows_count); + } +}; + +template +class FunctionExecutor>> + : public DefaultFunction +{ +public: + using BaseFunctionPtr = FunctionPtr; + + template + FunctionExecutor(Args ...args) : DefaultFunction(args...) {} + + virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; + + virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + executeFunctionImpl(block, arguments, result, input_rows_count); + } }; /// Combine several IExecutableFunctionImpl into one. @@ -116,23 +160,60 @@ struct PerformanceAdaptorOptions /// could use extended set of instructions (AVX, NEON, etc). /// It's convenient to inherit your func from this and register all alternative implementations in the constructor. template -class ExecutableFunctionPerformanceAdaptor : public DefaultFunction +class FunctionPerformanceAdaptor : public FunctionExecutor { public: + using BaseFunctionPtr = FunctionExecutor::BaseFunctionPtr; + template - ExecutableFunctionPerformanceAdaptor(Params ...params) : DefaultFunction(params...) + FunctionPerformanceAdaptor(PerformanceAdaptorOptions options_, Params ...params) + : FunctionExecutor(params...) + , options(std::move(options_)) { - statistics.emplace_back(); + if (isImplementationEnabled(DefaultFunction::getImplementationTag())) { + statistics.emplace_back(); + } } - virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + // Register alternative implementation. + template + void registerImplementation(TargetArch arch, Params... params) { + if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag())) { + impls.emplace_back(std::make_shared(params...)); + statistics.emplace_back(); + } + } + + bool isImplementationEnabled(const String & impl_tag) { + if (!options.implementations) { + return true; + } + for (const auto & tag : *options.implementations) { + if (tag == impl_tag) { + return true; + } + } + return false; + } + +protected: + virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, + size_t result, size_t input_rows_count) override { + if (statistics.empty()) + throw "No implementations"; auto id = statistics.select(); Stopwatch watch; - if (id == 0) { - DefaultFunction::execute(block, arguments, result, input_rows_count); + if (id == impls.size()) { + if constexpr (std::is_base_of_v) + DefaultFunction::executeImpl(block, arguments, result, input_rows_count); + else + DefaultFunction::execute(block, arguments, result, input_rows_count); } else { - impls[id - 1]->execute(block, arguments, result, input_rows_count); + if constexpr (std::is_base_of_v) + impls[id]->executeImpl(block, arguments, result, input_rows_count); + else + impls[id]->execute(block, arguments, result, input_rows_count); } watch.stop(); // TODO(dakovalkov): Calculate something more informative. @@ -145,63 +226,8 @@ public: } } - // Register alternative implementation. - template - void registerImplementation(TargetArch arch, Params... params) { - if (arch == TargetArch::Default || IsArchSupported(arch)) { - impls.emplace_back(std::make_shared(params...)); - statistics.emplace_back(); - } - } - private: - std::vector impls; // Alternative implementations. - PerformanceStatistics statistics; - PerformanceAdaptorOptions options; -}; - -/// The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. -template -class FunctionPerformanceAdaptor : public DefaultFunction -{ -public: - template - FunctionPerformanceAdaptor(Params ...params) : DefaultFunction(params...) - { - statistics.emplace_back(); - } - - virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - auto id = statistics.select(); - Stopwatch watch; - if (id == 0) { - DefaultFunction::executeImpl(block, arguments, result, input_rows_count); - } else { - impls[id - 1]->executeImpl(block, arguments, result, input_rows_count); - } - watch.stop(); - // TODO(dakovalkov): Calculate something more informative. - size_t rows_summary = 0; - for (auto i : arguments) { - rows_summary += block.getByPosition(i).column->size(); - } - if (rows_summary >= 1000) { - statistics.data[id].update(watch.elapsedSeconds(), rows_summary); - } - } - - // Register alternative implementation. - template - void registerImplementation(TargetArch arch, Params... params) { - if (arch == TargetArch::Default || IsArchSupported(arch)) { - impls.emplace_back(std::make_shared(params...)); - statistics.emplace_back(); - } - } - -private: - std::vector impls; // Alternative implementations. + std::vector impls; // Alternative implementations. PerformanceStatistics statistics; PerformanceAdaptorOptions options; }; diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp index f7c5f953e09..b5c8b0995ac 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/SIMDxorshift.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include extern "C" diff --git a/src/Functions/SIMDxorshift.h b/src/Functions/SIMDxorshift.h index 46732c4d876..c8b741c06b1 100644 --- a/src/Functions/SIMDxorshift.h +++ b/src/Functions/SIMDxorshift.h @@ -22,19 +22,25 @@ DECLARE_MULTITARGET_CODE( struct RandXorshiftImpl { static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch); } }; ) // DECLARE_MULTITARGET_CODE template -class FunctionRandomXorshift : public FunctionPerformanceAdaptor> +class FunctionRandomXorshift + : public FunctionPerformanceAdaptor> { public: - FunctionRandomXorshift() { + FunctionRandomXorshift() + : FunctionPerformanceAdaptor>( + PerformanceAdaptorOptions()) + { registerImplementation>(TargetArch::AVX2); } - static FunctionPtr create(const Context &) { + static FunctionPtr create(const Context &) + { return std::make_shared>(); } }; From f59b13a58d54ed5c447028dc4275cfed6ac38b88 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 08:59:08 +0200 Subject: [PATCH 0224/2229] Fix style issues --- src/Common/ErrorCodes.cpp | 1 + src/Functions/FunctionsRandom.h | 2 +- src/Functions/PerformanceAdaptors.h | 56 +++++++++++++++++++---------- src/Functions/SIMDxorshift.cpp | 35 ++++++++++-------- src/Functions/SIMDxorshift.h | 2 +- src/Functions/TargetSpecific.cpp | 42 ++++++++++++---------- src/Functions/TargetSpecific.h | 5 +-- 7 files changed, 88 insertions(+), 55 deletions(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index cb4c591041c..2681bd0773c 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -498,6 +498,7 @@ namespace ErrorCodes extern const int ALTER_OF_COLUMN_IS_FORBIDDEN = 524; extern const int INCORRECT_DISK_INDEX = 525; extern const int UNKNOWN_VOLUME_TYPE = 526; + extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION = 527; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index ae54243164f..6130ee1c2a5 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -113,4 +113,4 @@ public: } }; -} // namespace DB +} diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index f7bb8cfd6ee..f7b9c12c7cb 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -14,6 +14,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION; +} + // TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h with little changes. struct PerformanceStatistics { @@ -92,15 +97,18 @@ struct PerformanceStatistics return choose_method; } - size_t size() const { + size_t size() const + { return data.size(); } - bool empty() const { + bool empty() const + { return size() == 0; } - void emplace_back() { + void emplace_back() + { data.emplace_back(); } @@ -113,7 +121,7 @@ struct PerformanceAdaptorOptions std::optional> implementations; }; -// Redirects IExecutableFunctionImpl::execute() and IFunction:executeImpl() to executeFunctionImpl(); +/// Redirects IExecutableFunctionImpl::execute() and IFunction:executeImpl() to executeFunctionImpl(); template class FunctionExecutor; @@ -170,28 +178,28 @@ public: : FunctionExecutor(params...) , options(std::move(options_)) { - if (isImplementationEnabled(DefaultFunction::getImplementationTag())) { + if (isImplementationEnabled(DefaultFunction::getImplementationTag())) statistics.emplace_back(); - } } - // Register alternative implementation. + /// Register alternative implementation. template void registerImplementation(TargetArch arch, Params... params) { - if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag())) { + if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag())) + { impls.emplace_back(std::make_shared(params...)); statistics.emplace_back(); } } bool isImplementationEnabled(const String & impl_tag) { - if (!options.implementations) { + if (!options.implementations) return true; - } - for (const auto & tag : *options.implementations) { - if (tag == impl_tag) { + + for (const auto & tag : *options.implementations) + { + if (tag == impl_tag) return true; - } } return false; } @@ -201,27 +209,37 @@ protected: size_t result, size_t input_rows_count) override { if (statistics.empty()) - throw "No implementations"; + throw Exception("All available implementations are disabled by user config", + ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION); + auto id = statistics.select(); Stopwatch watch; - if (id == impls.size()) { + + if (id == impls.size()) + { if constexpr (std::is_base_of_v) DefaultFunction::executeImpl(block, arguments, result, input_rows_count); else DefaultFunction::execute(block, arguments, result, input_rows_count); - } else { + } + else + { if constexpr (std::is_base_of_v) impls[id]->executeImpl(block, arguments, result, input_rows_count); else impls[id]->execute(block, arguments, result, input_rows_count); } watch.stop(); + // TODO(dakovalkov): Calculate something more informative. size_t rows_summary = 0; - for (auto i : arguments) { + for (auto i : arguments) + { rows_summary += block.getByPosition(i).column->size(); } - if (rows_summary >= 1000) { + + if (rows_summary >= 1000) + { statistics.data[id].update(watch.elapsedSeconds(), rows_summary); } } @@ -232,4 +250,4 @@ private: PerformanceAdaptorOptions options; }; -} // namespace DB +} diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/SIMDxorshift.cpp index b5c8b0995ac..a8410ed957a 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/SIMDxorshift.cpp @@ -28,15 +28,16 @@ void RandXorshiftImpl::execute(char * output, size_t size) &mykey); constexpr int bytes_per_write = 8; - constexpr intptr_t mask = bytes_per_write - 1; - + constexpr intptr_t mask = bytes_per_write - 1; + // Process head to make output aligned. unalignedStore(output, xorshift128plus(&mykey)); output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); - while (end - output > 0) { + while (end - output > 0) + { *reinterpret_cast(output) = xorshift128plus(&mykey); - output += bytes_per_write; + output += bytes_per_write; } } @@ -46,6 +47,9 @@ DECLARE_AVX2_SPECIFIC_CODE( void RandXorshiftImpl::execute(char * output, size_t size) { + if (size == 0) + return; + char * end = output + size; avx_xorshift128plus_key_t mykey; @@ -53,31 +57,34 @@ void RandXorshiftImpl::execute(char * output, size_t size) 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), &mykey); - constexpr int safe_overwrite = 15; // How many bytes we can write behind the end. + constexpr int safe_overwrite = 15; /// How many bytes we can write behind the end. constexpr int bytes_per_write = 32; - constexpr intptr_t mask = bytes_per_write - 1; + constexpr intptr_t mask = bytes_per_write - 1; - if (size + safe_overwrite < bytes_per_write) { - // size <= 16. + if (size + safe_overwrite < bytes_per_write) + { + /// size <= 16. _mm_storeu_si128(reinterpret_cast<__m128i*>(output), _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); return; } - // Process head to make output aligned. + /// Process head to make output aligned. _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); - while ((end - output) + safe_overwrite >= bytes_per_write) { + while ((end - output) + safe_overwrite >= bytes_per_write) + { _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); output += bytes_per_write; } - // Process tail. (end - output) <= 16. - if ((end - output) > 0) { + /// Process tail. (end - output) <= 16. + if ((end - output) > 0) + { _mm_store_si128(reinterpret_cast<__m128i*>(output), _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); - } + } } ) // DECLARE_AVX2_SPECIFIC_CODE @@ -93,4 +100,4 @@ void registerFunctionRandXorshift(FunctionFactory & factory) factory.registerFunction(); } -} // namespace DB +} diff --git a/src/Functions/SIMDxorshift.h b/src/Functions/SIMDxorshift.h index c8b741c06b1..c9e46cf7192 100644 --- a/src/Functions/SIMDxorshift.h +++ b/src/Functions/SIMDxorshift.h @@ -45,4 +45,4 @@ public: } }; -} // namespace DB +} diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index 19604a83ab7..891d63d8258 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -11,25 +11,30 @@ namespace DB { __attribute__ ((target("xsave"))) -uint64_t xgetbv(uint32_t ecx) { +UInt64 xgetbv(UInt32 ecx) +{ return _xgetbv(ecx); } -int GetSupportedArches() { - unsigned int eax, ebx, ecx, edx; - if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { +UInt32 GetSupportedArches() +{ + UInt32 eax, ebx, ecx, edx; + if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) return 0; - } - int res = 0; + + UInt32 res = 0; if (ecx & bit_SSE4_2) - res |= static_cast(TargetArch::SSE4); - // (xgetbv(0) & 0x6) == 0x6 checks that XMM state and YMM state are enabled. - if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX) && (xgetbv(0) & 0x6) == 0x6) { - res |= static_cast(TargetArch::AVX); - if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) { - res |= static_cast(TargetArch::AVX2); - if (ebx & bit_AVX512F) { - res |= static_cast(TargetArch::AVX512F); + res |= static_cast(TargetArch::SSE4); + /// (xgetbv(0) & 0x6) == 0x6 checks that XMM state and YMM state are enabled. + if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX) && (xgetbv(0) & 0x6) == 0x6) + { + res |= static_cast(TargetArch::AVX); + if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) + { + res |= static_cast(TargetArch::AVX2); + if (ebx & bit_AVX512F) + { + res |= static_cast(TargetArch::AVX512F); } } } @@ -38,13 +43,14 @@ int GetSupportedArches() { bool IsArchSupported(TargetArch arch) { - static int arches = GetSupportedArches(); - return arch == TargetArch::Default || (arches & static_cast(arch)); + static UInt32 arches = GetSupportedArches(); + return arch == TargetArch::Default || (arches & static_cast(arch)); } String ToString(TargetArch arch) { - switch (arch) { + switch (arch) + { case TargetArch::Default: return "default"; case TargetArch::SSE4: return "sse4"; case TargetArch::AVX: return "avx"; @@ -55,4 +61,4 @@ String ToString(TargetArch arch) __builtin_unreachable(); } -} // namespace DB +} diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index 888d88d1d77..7af792ae3c7 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -64,7 +64,8 @@ namespace DB { -enum class TargetArch : int { +enum class TargetArch : UInt32 +{ Default = 0, // Without any additional compiler options. SSE4 = (1 << 0), AVX = (1 << 1), @@ -172,4 +173,4 @@ DECLARE_AVX512F_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512F; ) // DECLARE_AVX512F_SPECIFIC_CODE -} // namespace DB +} From 2609b1c370989238a0e619d7db246474e19d266f Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 09:01:46 +0200 Subject: [PATCH 0225/2229] Save test --- tests/performance/rand.xml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml index 32ec38ddb4e..a007eb50179 100644 --- a/tests/performance/rand.xml +++ b/tests/performance/rand.xml @@ -1,10 +1,10 @@ - 20000 + 10000 - 40000 + 20000 @@ -13,12 +13,13 @@ table - numbers(100000000) + numbers(10000000) - SELECT rand() FROM {table} - SELECT rand64() FROM {table} - SELECT randxorshift() FROM {table} + SELECT count() FROM (SELECT rand() FROM {table}) + SELECT count() FROM (SELECT randxorshift() FROM {table}) + SELECT count() FROM (SELECT rand64() FROM {table}) + SELECT count() FROM (SELECT randxorshift64() FROM {table}) From e1dc2330891a5fb1fa5946dae3841c7ad8714ec1 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 17:20:07 +0200 Subject: [PATCH 0226/2229] Fix clang build (probably) --- src/Functions/TargetSpecific.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index 7af792ae3c7..a9ed8bfe71e 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -79,16 +79,15 @@ bool IsArchSupported(TargetArch arch); String ToString(TargetArch arch); #if defined(__clang__) -# define BEGIN_AVX512F_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f\"))))") -# define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") -# define BEGIN_AVX_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx\"))))") -# define BEGIN_SSE4_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx\"))))") -# define END_TARGET_SPECIFIC_CODE \ - _Pragma("clang attribute pop") +# define BEGIN_AVX512F_SPECIFIC_CODE _Pragma(\ + "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2,avx512f\"))),apply_to=function)") +# define BEGIN_AVX2_SPECIFIC_CODE _Pragma(\ + "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2\"))),apply_to=function)") +# define BEGIN_AVX_SPECIFIC_CODE _Pragma(\ + "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx\"))),apply_to=function)") +# define BEGIN_SSE4_SPECIFIC_CODE _Pragma(\ + "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") +# define END_TARGET_SPECIFIC_CODE _Pragma("clang attribute pop") #elif defined(__GNUC__) # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ From a4ff8bb9331d289a98dac69a92f5f3f0fec73217 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 17:43:05 +0200 Subject: [PATCH 0227/2229] Get rid of handwritten cpu feature detection --- src/Functions/FunctionStartsEndsWith.h | 2 +- src/Functions/FunctionsRandom.h | 2 +- src/Functions/TargetSpecific.cpp | 46 +++++++------------------- src/Functions/TargetSpecific.h | 26 +++++++-------- 4 files changed, 27 insertions(+), 49 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 44850257f99..9435adbe858 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -152,7 +152,7 @@ public: PerformanceAdaptorOptions() ) { - registerImplementation> (TargetArch::SSE4); + registerImplementation> (TargetArch::SSE42); registerImplementation> (TargetArch::AVX); registerImplementation> (TargetArch::AVX2); registerImplementation>(TargetArch::AVX512F); diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 6130ee1c2a5..8fb1758a60c 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -101,7 +101,7 @@ public: : FunctionPerformanceAdaptor>( PerformanceAdaptorOptions()) { - registerImplementation>(TargetArch::SSE4); + registerImplementation>(TargetArch::SSE42); registerImplementation>(TargetArch::AVX); registerImplementation>(TargetArch::AVX2); registerImplementation>(TargetArch::AVX512F); diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index 891d63d8258..4168fb60a59 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -1,44 +1,22 @@ #include -#if defined(__GNUC__) -# include -# include -#else -# error "Only CLANG and GCC compilers are supported for dynamic dispatch" -#endif +#include namespace DB { -__attribute__ ((target("xsave"))) -UInt64 xgetbv(UInt32 ecx) -{ - return _xgetbv(ecx); -} - UInt32 GetSupportedArches() { - UInt32 eax, ebx, ecx, edx; - if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) - return 0; - - UInt32 res = 0; - if (ecx & bit_SSE4_2) - res |= static_cast(TargetArch::SSE4); - /// (xgetbv(0) & 0x6) == 0x6 checks that XMM state and YMM state are enabled. - if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX) && (xgetbv(0) & 0x6) == 0x6) - { - res |= static_cast(TargetArch::AVX); - if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) - { - res |= static_cast(TargetArch::AVX2); - if (ebx & bit_AVX512F) - { - res |= static_cast(TargetArch::AVX512F); - } - } - } - return res; + UInt32 result = 0; + if (Cpu::haveSSE42()) + result |= static_cast(TargetArch::SSE42); + if (Cpu::haveAVX()) + result |= static_cast(TargetArch::AVX); + if (Cpu::haveAVX2()) + result |= static_cast(TargetArch::AVX2); + if (Cpu::haveAVX512F()) + result |= static_cast(TargetArch::AVX512F); + return result; } bool IsArchSupported(TargetArch arch) @@ -52,7 +30,7 @@ String ToString(TargetArch arch) switch (arch) { case TargetArch::Default: return "default"; - case TargetArch::SSE4: return "sse4"; + case TargetArch::SSE42: return "sse42"; case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; case TargetArch::AVX512F: return "avx512f"; diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index a9ed8bfe71e..7a946effb53 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -46,7 +46,7 @@ /// DECLARE_MULTITARGET_CODE( /// int funcImpl(int size, ...) { /// int iteration_size = 1; -/// if constexpr (BuildArch == TargetArch::SSE4) +/// if constexpr (BuildArch == TargetArch::SSE42) /// iteration_size = 2 /// else if constexpr (BuildArch == TargetArch::AVX || BuildArch == TargetArch::AVX2) /// iteration_size = 4; @@ -66,8 +66,8 @@ namespace DB enum class TargetArch : UInt32 { - Default = 0, // Without any additional compiler options. - SSE4 = (1 << 0), + Default = 0, /// Without any additional compiler options. + SSE42 = (1 << 0), /// SSE4.2 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), @@ -85,7 +85,7 @@ String ToString(TargetArch arch); "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2\"))),apply_to=function)") # define BEGIN_AVX_SPECIFIC_CODE _Pragma(\ "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx\"))),apply_to=function)") -# define BEGIN_SSE4_SPECIFIC_CODE _Pragma(\ +# define BEGIN_SSE42_SPECIFIC_CODE _Pragma(\ "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") # define END_TARGET_SPECIFIC_CODE _Pragma("clang attribute pop") #elif defined(__GNUC__) @@ -98,7 +98,7 @@ String ToString(TargetArch arch); # define BEGIN_AVX_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native\")") -# define BEGIN_SSE4_SPECIFIC_CODE \ +# define BEGIN_SSE42_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") # define END_TARGET_SPECIFIC_CODE \ @@ -113,10 +113,10 @@ namespace TargetSpecific::Default { \ __VA_ARGS__ \ } -#define DECLARE_SSE4_SPECIFIC_CODE(...) \ -BEGIN_SSE4_SPECIFIC_CODE \ -namespace TargetSpecific::SSE4 { \ - using namespace DB::TargetSpecific::SSE4; \ +#define DECLARE_SSE42_SPECIFIC_CODE(...) \ +BEGIN_SSE42_SPECIFIC_CODE \ +namespace TargetSpecific::SSE42 { \ + using namespace DB::TargetSpecific::SSE42; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -147,7 +147,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_MULTITARGET_CODE(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ -DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ +DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) @@ -156,9 +156,9 @@ DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; ) // DECLARE_DEFAULT_CODE -DECLARE_SSE4_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::SSE4; -) // DECLARE_SSE4_SPECIFIC_CODE +DECLARE_SSE42_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::SSE42; +) // DECLARE_SSE42_SPECIFIC_CODE DECLARE_AVX_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX; From ad0ddc936a75ec1e9bd26f98fff0258827d2bedf Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 19:21:23 +0200 Subject: [PATCH 0228/2229] Cosmetics, fix style issues --- src/Functions/PerformanceAdaptors.h | 16 +++++++++------- .../{SIMDxorshift.cpp => RandXorshift.cpp} | 2 +- src/Functions/{SIMDxorshift.h => RandXorshift.h} | 0 src/Functions/TargetSpecific.h | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) rename src/Functions/{SIMDxorshift.cpp => RandXorshift.cpp} (98%) rename src/Functions/{SIMDxorshift.h => RandXorshift.h} (100%) diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index f7b9c12c7cb..ea3f2ae0b47 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -133,7 +133,7 @@ public: using BaseFunctionPtr = ExecutableFunctionImplPtr; template - FunctionExecutor(Args ...args) : DefaultFunction(args...) {} + FunctionExecutor(Args&&... args) : DefaultFunction(std::forward(args)...) {} virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; @@ -151,7 +151,7 @@ public: using BaseFunctionPtr = FunctionPtr; template - FunctionExecutor(Args ...args) : DefaultFunction(args...) {} + FunctionExecutor(Args&&... args) : DefaultFunction(std::forward(args)...) {} virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; @@ -174,8 +174,8 @@ public: using BaseFunctionPtr = FunctionExecutor::BaseFunctionPtr; template - FunctionPerformanceAdaptor(PerformanceAdaptorOptions options_, Params ...params) - : FunctionExecutor(params...) + FunctionPerformanceAdaptor(PerformanceAdaptorOptions options_, Params&&... params) + : FunctionExecutor(std::forward(params)...) , options(std::move(options_)) { if (isImplementationEnabled(DefaultFunction::getImplementationTag())) @@ -184,15 +184,17 @@ public: /// Register alternative implementation. template - void registerImplementation(TargetArch arch, Params... params) { + void registerImplementation(TargetArch arch, Params&&... params) + { if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag())) { - impls.emplace_back(std::make_shared(params...)); + impls.emplace_back(std::make_shared(std::forward(params)...)); statistics.emplace_back(); } } - bool isImplementationEnabled(const String & impl_tag) { + bool isImplementationEnabled(const String & impl_tag) + { if (!options.implementations) return true; diff --git a/src/Functions/SIMDxorshift.cpp b/src/Functions/RandXorshift.cpp similarity index 98% rename from src/Functions/SIMDxorshift.cpp rename to src/Functions/RandXorshift.cpp index a8410ed957a..652bb90f559 100644 --- a/src/Functions/SIMDxorshift.cpp +++ b/src/Functions/RandXorshift.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include extern "C" { diff --git a/src/Functions/SIMDxorshift.h b/src/Functions/RandXorshift.h similarity index 100% rename from src/Functions/SIMDxorshift.h rename to src/Functions/RandXorshift.h diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index 7a946effb53..f5bd0267c52 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -4,7 +4,7 @@ /// This file contains macros and helpers for writing platform-dependent code. /// -/// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the +/// Macros DECLARE__SPECIFIC_CODE will wrap code inside them into the /// namespace TargetSpecific:: and enable Arch-specific compile options. /// Thus, it's allowed to call functions inside these namespaces only after /// checking platform in runtime (see IsArchSupported() below). From 234a828dd0315800d96ac87568dea7353464c9ba Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sat, 16 May 2020 21:30:00 +0200 Subject: [PATCH 0229/2229] Add option to disable multitarget build --- src/Functions/CMakeLists.txt | 6 +++++ src/Functions/FunctionStartsEndsWith.h | 11 +++++---- src/Functions/FunctionsRandom.h | 11 +++++---- src/Functions/RandXorshift.h | 5 ++++- src/Functions/TargetSpecific.h | 31 ++++++++++++++++++-------- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 451dfe97a03..8c9cf159e30 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -83,6 +83,12 @@ if(USE_RAPIDJSON) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${RAPIDJSON_INCLUDE_DIR}) endif() +option(ENABLE_MULTITARGET_CODE "" ON) +if (ENABLE_MULTITARGET_CODE) + add_definitions(-DUSE_MULTITARGET_CODE=1) +else() + add_definitions(-DUSE_MULTITARGET_CODE=0) +endif() target_link_libraries(clickhouse_functions PUBLIC "simdxorshift") message(STATUS "Using SIMDXORSHIFT ${SIMDXORSHIFT_LIBRARY}") diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 9435adbe858..f433f9c46c2 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -152,10 +152,13 @@ public: PerformanceAdaptorOptions() ) { - registerImplementation> (TargetArch::SSE42); - registerImplementation> (TargetArch::AVX); - registerImplementation> (TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512F); + if constexpr (UseMultitargetCode) + { + registerImplementation> (TargetArch::SSE42); + registerImplementation> (TargetArch::AVX); + registerImplementation> (TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512F); + } } static FunctionPtr create(const Context & context) diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 8fb1758a60c..a957c0b5e97 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -101,10 +101,13 @@ public: : FunctionPerformanceAdaptor>( PerformanceAdaptorOptions()) { - registerImplementation>(TargetArch::SSE42); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512F); + if constexpr (UseMultitargetCode) + { + registerImplementation>(TargetArch::SSE42); + registerImplementation>(TargetArch::AVX); + registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512F); + } } static FunctionPtr create(const Context &) diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index c9e46cf7192..5f3f3c9c04f 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -36,7 +36,10 @@ public: : FunctionPerformanceAdaptor>( PerformanceAdaptorOptions()) { - registerImplementation>(TargetArch::AVX2); + if constexpr (UseMultitargetCode) + { + registerImplementation>(TargetArch::AVX2); + } } static FunctionPtr create(const Context &) diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index f5bd0267c52..0c9eb7357d1 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -78,6 +78,10 @@ bool IsArchSupported(TargetArch arch); String ToString(TargetArch arch); +#if USE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) + +constexpr bool UseMultitargetCode = true; + #if defined(__clang__) # define BEGIN_AVX512F_SPECIFIC_CODE _Pragma(\ "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2,avx512f\"))),apply_to=function)") @@ -88,7 +92,7 @@ String ToString(TargetArch arch); # define BEGIN_SSE42_SPECIFIC_CODE _Pragma(\ "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") # define END_TARGET_SPECIFIC_CODE _Pragma("clang attribute pop") -#elif defined(__GNUC__) +#else # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") @@ -103,16 +107,8 @@ String ToString(TargetArch arch); _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") # define END_TARGET_SPECIFIC_CODE \ _Pragma("GCC pop_options") -#else -# error "Only CLANG and GCC compilers are supported for vectorized code generation" #endif -#define DECLARE_DEFAULT_CODE(...) \ -namespace TargetSpecific::Default { \ - using namespace DB::TargetSpecific::Default; \ - __VA_ARGS__ \ -} - #define DECLARE_SSE42_SPECIFIC_CODE(...) \ BEGIN_SSE42_SPECIFIC_CODE \ namespace TargetSpecific::SSE42 { \ @@ -145,6 +141,23 @@ namespace TargetSpecific::AVX512F { \ } \ END_TARGET_SPECIFIC_CODE +#else + +constexpr bool UseMultitargetCode = false; + +#define DECLARE_SSE42_SPECIFIC_CODE(...) +#define DECLARE_AVX_SPECIFIC_CODE(...) +#define DECLARE_AVX2_SPECIFIC_CODE(...) +#define DECLARE_AVX512F_SPECIFIC_CODE(...) + +#endif + +#define DECLARE_DEFAULT_CODE(...) \ +namespace TargetSpecific::Default { \ + using namespace DB::TargetSpecific::Default; \ + __VA_ARGS__ \ +} + #define DECLARE_MULTITARGET_CODE(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ From 9387981abeb28cdb89faeb55d9a8b1007e637810 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sun, 17 May 2020 17:13:01 +0200 Subject: [PATCH 0230/2229] Add afwul option for choosing implementation --- src/Core/Settings.h | 1 + src/Core/SettingsCollection.h | 2 +- src/Functions/FunctionStartsEndsWith.h | 6 +-- src/Functions/FunctionsRandom.h | 9 ++-- src/Functions/PerformanceAdaptors.h | 25 ++++++----- src/Functions/RandXorshift.cpp | 61 ++++++++++++++++++++++++++ src/Functions/RandXorshift.h | 16 ++++--- src/Functions/TargetSpecific.cpp | 8 ++-- 8 files changed, 98 insertions(+), 30 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 142e0872d72..68bebd0b6b0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -424,6 +424,7 @@ struct Settings : public SettingsCollection M(SettingBool, allow_nondeterministic_mutations, false, "Allow non-deterministic functions in ALTER UPDATE/ALTER DELETE statements", 0) \ M(SettingSeconds, lock_acquire_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "How long locking request should wait before failing", 0) \ M(SettingBool, materialize_ttl_after_modify, true, "Apply TTL for old data, after ALTER MODIFY TTL query", 0) \ + M(SettingString, function_implementation, "", "Choose implementation. If empty enable all of them.", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ diff --git a/src/Core/SettingsCollection.h b/src/Core/SettingsCollection.h index 1fe5762de4c..c34bd1a2990 100644 --- a/src/Core/SettingsCollection.h +++ b/src/Core/SettingsCollection.h @@ -327,7 +327,7 @@ using SettingLogQueriesType = SettingEnum; enum class SettingsBinaryFormat { - OLD, /// Part of the settings are serialized as strings, and other part as varints. This is the old behaviour. + OLD, /// Part of the settings are serialized as strings, and other part as variants. This is the old behaviour. STRINGS, /// All settings are serialized as strings. Before each value the flag `is_ignorable` is serialized. DEFAULT = STRINGS, }; diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index f433f9c46c2..71b02e3b264 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -147,10 +147,8 @@ class FunctionStartsEndsWith : public FunctionPerformanceAdaptor> { public: - FunctionStartsEndsWith(const Context &) - : FunctionPerformanceAdaptor>( - PerformanceAdaptorOptions() - ) + FunctionStartsEndsWith(const Context & context_) + : FunctionPerformanceAdaptor>(context_) { if constexpr (UseMultitargetCode) { diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index a957c0b5e97..a716826d4e1 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -97,9 +97,8 @@ template class FunctionRandom : public FunctionPerformanceAdaptor> { public: - FunctionRandom() - : FunctionPerformanceAdaptor>( - PerformanceAdaptorOptions()) + FunctionRandom(const Context & context_) + : FunctionPerformanceAdaptor>(context_) { if constexpr (UseMultitargetCode) { @@ -110,9 +109,9 @@ public: } } - static FunctionPtr create(const Context &) + static FunctionPtr create(const Context & context) { - return std::make_shared>(); + return std::make_shared>(context); } }; diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index ea3f2ae0b47..eaaa594a4bf 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -174,9 +175,9 @@ public: using BaseFunctionPtr = FunctionExecutor::BaseFunctionPtr; template - FunctionPerformanceAdaptor(PerformanceAdaptorOptions options_, Params&&... params) + FunctionPerformanceAdaptor(const Context & context_, Params&&... params) : FunctionExecutor(std::forward(params)...) - , options(std::move(options_)) + , context(context_) { if (isImplementationEnabled(DefaultFunction::getImplementationTag())) statistics.emplace_back(); @@ -195,15 +196,17 @@ public: bool isImplementationEnabled(const String & impl_tag) { - if (!options.implementations) - return true; + const String & tag = context.getSettingsRef().function_implementation.value; + return tag.empty() || tag == impl_tag; + // if (!options.implementations) + // return true; - for (const auto & tag : *options.implementations) - { - if (tag == impl_tag) - return true; - } - return false; + // for (const auto & tag : *options.implementations) + // { + // if (tag == impl_tag) + // return true; + // } + // return false; } protected: @@ -249,7 +252,7 @@ protected: private: std::vector impls; // Alternative implementations. PerformanceStatistics statistics; - PerformanceAdaptorOptions options; + const Context & context; }; } diff --git a/src/Functions/RandXorshift.cpp b/src/Functions/RandXorshift.cpp index 652bb90f559..9f1dded700c 100644 --- a/src/Functions/RandXorshift.cpp +++ b/src/Functions/RandXorshift.cpp @@ -19,6 +19,9 @@ DECLARE_DEFAULT_CODE( void RandXorshiftImpl::execute(char * output, size_t size) { + if (size == 0) + return; + char * end = output + size; xorshift128plus_key_s mykey; @@ -89,6 +92,64 @@ void RandXorshiftImpl::execute(char * output, size_t size) ) // DECLARE_AVX2_SPECIFIC_CODE +DECLARE_AVX2_SPECIFIC_CODE( + +void RandXorshiftImpl2::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + avx_xorshift128plus_key_t mykey; + avx_xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), + 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), + &mykey); + + avx_xorshift128plus_key_t mykey2; + avx_xorshift128plus_init(0xdfe532a6b5a5eb2cULL ^ reinterpret_cast(output), + 0x21cdf6cd1e22bf9cULL ^ reinterpret_cast(output), + &mykey2); + + constexpr int safe_overwrite = 15; /// How many bytes we can write behind the end. + constexpr int bytes_per_write = 32; + constexpr intptr_t mask = bytes_per_write - 1; + + if (size + safe_overwrite < bytes_per_write) + { + /// size <= 16. + _mm_storeu_si128(reinterpret_cast<__m128i*>(output), + _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); + return; + } + + /// Process head to make output aligned. + _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); + output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); + + while ((end - output) + safe_overwrite >= bytes_per_write * 2) + { + _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); + _mm256_store_si256(reinterpret_cast<__m256i*>(output + bytes_per_write), avx_xorshift128plus(&mykey2)); + output += bytes_per_write * 2; + } + + if ((end - output) + safe_overwrite >= bytes_per_write) + { + _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); + output += bytes_per_write; + } + + /// Process tail. (end - output) <= 16. + if ((end - output) > 0) + { + _mm_store_si128(reinterpret_cast<__m128i*>(output), + _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); + } +} + +) // DECLARE_AVX2_SPECIFIC_CODE + struct NameRandXorshift { static constexpr auto name = "randxorshift"; }; using FunctionRandXorshift = FunctionRandomXorshift; struct NameRandXorshift64 { static constexpr auto name = "randxorshift64"; }; diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 5f3f3c9c04f..2dd7723ff0a 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -25,6 +25,12 @@ struct RandXorshiftImpl static String getImplementationTag() { return ToString(BuildArch); } }; +struct RandXorshiftImpl2 +{ + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } +}; + ) // DECLARE_MULTITARGET_CODE template @@ -32,19 +38,19 @@ class FunctionRandomXorshift : public FunctionPerformanceAdaptor> { public: - FunctionRandomXorshift() - : FunctionPerformanceAdaptor>( - PerformanceAdaptorOptions()) + FunctionRandomXorshift(const Context & context_) + : FunctionPerformanceAdaptor>(context_) { if constexpr (UseMultitargetCode) { registerImplementation>(TargetArch::AVX2); + registerImplementation>(TargetArch::AVX2); } } - static FunctionPtr create(const Context &) + static FunctionPtr create(const Context & context) { - return std::make_shared>(); + return std::make_shared>(context); } }; diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index 4168fb60a59..65f8641ee8e 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -8,13 +8,13 @@ namespace DB UInt32 GetSupportedArches() { UInt32 result = 0; - if (Cpu::haveSSE42()) + if (Cpu::CpuFlagsCache::have_SSE42) result |= static_cast(TargetArch::SSE42); - if (Cpu::haveAVX()) + if (Cpu::CpuFlagsCache::have_AVX) result |= static_cast(TargetArch::AVX); - if (Cpu::haveAVX2()) + if (Cpu::CpuFlagsCache::have_AVX2) result |= static_cast(TargetArch::AVX2); - if (Cpu::haveAVX512F()) + if (Cpu::CpuFlagsCache::have_AVX512F) result |= static_cast(TargetArch::AVX512F); return result; } From 35e4f43ac749a479dee960452e866cb575b588a0 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 18 May 2020 09:24:22 +0200 Subject: [PATCH 0231/2229] Add alternative implenetation for rand --- src/Functions/FunctionsRandom.cpp | 46 +++++++++++++++++++++++++++++++ src/Functions/FunctionsRandom.h | 12 ++++++-- src/Functions/RandXorshift.h | 5 ---- 3 files changed, 56 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 11861d2d12c..9c6d90e9e73 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -66,6 +66,52 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } +void RandImpl2::execute(char * output, size_t size) +{ + if (size == 0) + return; + + LinearCongruentialGenerator generator0; + LinearCongruentialGenerator generator1; + LinearCongruentialGenerator generator2; + LinearCongruentialGenerator generator3; + LinearCongruentialGenerator generator4; + LinearCongruentialGenerator generator5; + LinearCongruentialGenerator generator6; + LinearCongruentialGenerator generator7; + + seed(generator0, 0xfb4121280b2ab902ULL + reinterpret_cast(output)); + seed(generator1, 0x0121cf76df39c673ULL + reinterpret_cast(output)); + seed(generator2, 0x17ae86e3a19a602fULL + reinterpret_cast(output)); + seed(generator3, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)); + seed(generator4, 0xfb4122280b2ab102ULL + reinterpret_cast(output)); + seed(generator5, 0x0121c276df39c173ULL + reinterpret_cast(output)); + seed(generator6, 0x17ae82e3a19a612fULL + reinterpret_cast(output)); + seed(generator7, 0x8b6e12da7e06d122ULL + reinterpret_cast(output)); + + const char * end = output + size; + + for (; (end - output + 15) <= 32; output += 32) + { + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); + unalignedStore(output + 16, generator4.next()); + unalignedStore(output + 20, generator5.next()); + unalignedStore(output + 24, generator6.next()); + unalignedStore(output + 28, generator7.next()); + } + + while (end - output > 0) { + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); + output += 16; + } +} + ) //DECLARE_MULTITARGET_CODE } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index a716826d4e1..443f44a4e44 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -7,8 +7,7 @@ #include #include -// #include "TargetSpecific.h" -// #include "PerformanceAdaptors.h" + namespace DB { @@ -45,6 +44,12 @@ struct RandImpl static String getImplementationTag() { return ToString(BuildArch); } }; +struct RandImpl2 +{ + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } +}; + ) // DECLARE_MULTITARGET_CODE template @@ -106,6 +111,9 @@ public: registerImplementation>(TargetArch::AVX); registerImplementation>(TargetArch::AVX2); registerImplementation>(TargetArch::AVX512F); + + registerImplementation>(TargetArch::Default); + registerImplementation>(TargetArch::AVX2); } } diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 2dd7723ff0a..b74fdeecbef 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -12,11 +12,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - DECLARE_MULTITARGET_CODE( struct RandXorshiftImpl From b2b3ba59420dce136943e4f0275c6f125540676d Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 18 May 2020 10:48:35 +0200 Subject: [PATCH 0232/2229] Finaly fix clang build --- src/Functions/FunctionStartsEndsWith.h | 8 ++++---- src/Functions/FunctionsRandom.h | 12 ++++++------ src/Functions/PerformanceAdaptors.h | 2 +- src/Functions/RandXorshift.h | 4 ++-- src/Functions/TargetSpecific.h | 4 ++++ 5 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 71b02e3b264..b0465ecefa6 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -152,10 +152,10 @@ public: { if constexpr (UseMultitargetCode) { - registerImplementation> (TargetArch::SSE42); - registerImplementation> (TargetArch::AVX); - registerImplementation> (TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512F); + this->template registerImplementation> (TargetArch::SSE42); + this->template registerImplementation> (TargetArch::AVX); + this->template registerImplementation> (TargetArch::AVX2); + this->template registerImplementation>(TargetArch::AVX512F); } } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 443f44a4e44..eeba5546fc9 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -107,13 +107,13 @@ public: { if constexpr (UseMultitargetCode) { - registerImplementation>(TargetArch::SSE42); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512F); + this->template registerImplementation>(TargetArch::SSE42); + this->template registerImplementation>(TargetArch::AVX); + this->template registerImplementation>(TargetArch::AVX2); + this->template registerImplementation>(TargetArch::AVX512F); - registerImplementation>(TargetArch::Default); - registerImplementation>(TargetArch::AVX2); + this->template registerImplementation>(TargetArch::Default); + this->template registerImplementation>(TargetArch::AVX2); } } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index eaaa594a4bf..0b5e3e10104 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -172,7 +172,7 @@ template class FunctionPerformanceAdaptor : public FunctionExecutor { public: - using BaseFunctionPtr = FunctionExecutor::BaseFunctionPtr; + using BaseFunctionPtr = typename FunctionExecutor::BaseFunctionPtr; template FunctionPerformanceAdaptor(const Context & context_, Params&&... params) diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index b74fdeecbef..8713d85fdbd 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -38,8 +38,8 @@ public: { if constexpr (UseMultitargetCode) { - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX2); + this->template registerImplementation>(TargetArch::AVX2); + this->template registerImplementation>(TargetArch::AVX2); } } diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index 0c9eb7357d1..e69bd22f271 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -112,6 +112,7 @@ constexpr bool UseMultitargetCode = true; #define DECLARE_SSE42_SPECIFIC_CODE(...) \ BEGIN_SSE42_SPECIFIC_CODE \ namespace TargetSpecific::SSE42 { \ + void __dummy_function_clang(); \ using namespace DB::TargetSpecific::SSE42; \ __VA_ARGS__ \ } \ @@ -120,6 +121,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX_SPECIFIC_CODE(...) \ BEGIN_AVX_SPECIFIC_CODE \ namespace TargetSpecific::AVX { \ + void __dummy_function_clang(); \ using namespace DB::TargetSpecific::AVX; \ __VA_ARGS__ \ } \ @@ -128,6 +130,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX2_SPECIFIC_CODE(...) \ BEGIN_AVX2_SPECIFIC_CODE \ namespace TargetSpecific::AVX2 { \ + void __dummy_function_clang(); \ using namespace DB::TargetSpecific::AVX2; \ __VA_ARGS__ \ } \ @@ -136,6 +139,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512F_SPECIFIC_CODE(...) \ BEGIN_AVX512F_SPECIFIC_CODE \ namespace TargetSpecific::AVX512F { \ + void __dummy_function_clang(); \ using namespace DB::TargetSpecific::AVX512F; \ __VA_ARGS__ \ } \ From bd847514baa5194e65c406a93179c2d6d05eae84 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 18 May 2020 13:44:56 +0200 Subject: [PATCH 0233/2229] better random --- src/Functions/FunctionsRandom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 9c6d90e9e73..1de5fb50b8a 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -103,7 +103,7 @@ void RandImpl2::execute(char * output, size_t size) unalignedStore(output + 28, generator7.next()); } - while (end - output > 0) { + if (end - output > 0) { unalignedStore(output, generator0.next()); unalignedStore(output + 4, generator1.next()); unalignedStore(output + 8, generator2.next()); From ea1285328be4579b738c53d97ad8e34b6cf5f3e6 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 18 May 2020 19:07:36 +0200 Subject: [PATCH 0234/2229] Fix gcc build, fix PVS error --- src/Functions/FunctionsRandom.cpp | 3 +- src/Functions/TargetSpecific.h | 157 ++++++++++++++++-------------- 2 files changed, 86 insertions(+), 74 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 1de5fb50b8a..fd493d5605b 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -103,7 +103,8 @@ void RandImpl2::execute(char * output, size_t size) unalignedStore(output + 28, generator7.next()); } - if (end - output > 0) { + if (end - output > 0) + { unalignedStore(output, generator0.next()); unalignedStore(output + 4, generator1.next()); unalignedStore(output + 8, generator2.next()); diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index e69bd22f271..11dae939bbd 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -2,72 +2,73 @@ #include -/// This file contains macros and helpers for writing platform-dependent code. -/// -/// Macros DECLARE__SPECIFIC_CODE will wrap code inside them into the -/// namespace TargetSpecific:: and enable Arch-specific compile options. -/// Thus, it's allowed to call functions inside these namespaces only after -/// checking platform in runtime (see IsArchSupported() below). -/// -/// For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code -/// into the namespace TargetSpecific::Default but dosn't specify any additional -/// copile options. -/// -/// Example: -/// -/// DECLARE_DEFAULT_CODE ( -/// int funcImpl() { -/// return 1; -/// } -/// ) // DECLARE_DEFAULT_CODE -/// -/// DECLARE_AVX2_SPECIFIC_CODE ( -/// int funcImpl() { -/// return 2; -/// } -/// ) // DECLARE_DEFAULT_CODE -/// -/// int func() { -/// if (IsArchSupported(TargetArch::AVX2)) -/// return TargetSpecifc::AVX2::funcImpl(); -/// return TargetSpecifc::Default::funcImpl(); -/// } -/// -/// Sometimes code may benefit from compiling with different options. -/// For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several -/// copies of the code and compile it with different options. These copies are -/// available via TargetSpecifc namespaces described above. -/// -/// Inside every TargetSpecific namespace there is a constexpr variable BuildArch, -/// which indicates the target platform for current code. -/// -/// Example: -/// -/// DECLARE_MULTITARGET_CODE( -/// int funcImpl(int size, ...) { -/// int iteration_size = 1; -/// if constexpr (BuildArch == TargetArch::SSE42) -/// iteration_size = 2 -/// else if constexpr (BuildArch == TargetArch::AVX || BuildArch == TargetArch::AVX2) -/// iteration_size = 4; -/// else if constexpr (BuildArch == TargetArch::AVX512) -/// iteration_size = 8; -/// for (int i = 0; i < size; i += iteration_size) -/// ... -/// } -/// ) // DECLARE_MULTITARGET_CODE -/// -/// // All 5 versions of func are available here. Use runtime detection to choose one. -/// -/// If you want to write IFunction or IExecutableFuncionImpl with runtime dispatching, see PerformanceAdaptors.h. +/* This file contains macros and helpers for writing platform-dependent code. + * + * Macros DECLARE__SPECIFIC_CODE will wrap code inside them into the + * namespace TargetSpecific:: and enable Arch-specific compile options. + * Thus, it's allowed to call functions inside these namespaces only after + * checking platform in runtime (see IsArchSupported() below). + * + * For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code + * into the namespace TargetSpecific::Default but dosn't specify any additional + * copile options. + * + * Example: + * + * DECLARE_DEFAULT_CODE ( + * int funcImpl() { + * return 1; + * } + * ) // DECLARE_DEFAULT_CODE + * + * DECLARE_AVX2_SPECIFIC_CODE ( + * int funcImpl() { + * return 2; + * } + * ) // DECLARE_DEFAULT_CODE + * + * int func() { + * if (IsArchSupported(TargetArch::AVX2)) + * return TargetSpecifc::AVX2::funcImpl(); + * return TargetSpecifc::Default::funcImpl(); + * } + * + * Sometimes code may benefit from compiling with different options. + * For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several + * copies of the code and compile it with different options. These copies are + * available via TargetSpecifc namespaces described above. + * + * Inside every TargetSpecific namespace there is a constexpr variable BuildArch, + * which indicates the target platform for current code. + * + * Example: + * + * DECLARE_MULTITARGET_CODE( + * int funcImpl(int size, ...) { + * int iteration_size = 1; + * if constexpr (BuildArch == TargetArch::SSE42) + * iteration_size = 2 + * else if constexpr (BuildArch == TargetArch::AVX || BuildArch == TargetArch::AVX2) + * iteration_size = 4; + * else if constexpr (BuildArch == TargetArch::AVX512) + * iteration_size = 8; + * for (int i = 0; i < size; i += iteration_size) + * ... + * } + * ) // DECLARE_MULTITARGET_CODE + * + * // All 5 versions of func are available here. Use runtime detection to choose one. + * + * If you want to write IFunction or IExecutableFuncionImpl with runtime dispatching, see PerformanceAdaptors.h. + */ namespace DB { enum class TargetArch : UInt32 { - Default = 0, /// Without any additional compiler options. - SSE42 = (1 << 0), /// SSE4.2 + Default = 0, /// Without any additional compiler options. + SSE42 = (1 << 0), /// SSE4.2 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), @@ -83,15 +84,21 @@ String ToString(TargetArch arch); constexpr bool UseMultitargetCode = true; #if defined(__clang__) -# define BEGIN_AVX512F_SPECIFIC_CODE _Pragma(\ - "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2,avx512f\"))),apply_to=function)") -# define BEGIN_AVX2_SPECIFIC_CODE _Pragma(\ - "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2\"))),apply_to=function)") -# define BEGIN_AVX_SPECIFIC_CODE _Pragma(\ - "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx\"))),apply_to=function)") -# define BEGIN_SSE42_SPECIFIC_CODE _Pragma(\ - "clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") -# define END_TARGET_SPECIFIC_CODE _Pragma("clang attribute pop") +# define BEGIN_AVX512F_SPECIFIC_CODE \ + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2,avx512f\"))),apply_to=function)") +# define BEGIN_AVX2_SPECIFIC_CODE \ + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2\"))),apply_to=function)") +# define BEGIN_AVX_SPECIFIC_CODE \ + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx\"))),apply_to=function)") +# define BEGIN_SSE42_SPECIFIC_CODE \ + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") +# define END_TARGET_SPECIFIC_CODE \ + _Pragma("clang attribute pop") + +/* Clang shows warning when there aren't any objects to apply pragma. + * To prevent this warning we define this function inside every macros with pragmas. + */ +# define DUMMY_FUNCTION_DEFINITION void __dummy_function_definition(); #else # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ @@ -107,12 +114,16 @@ constexpr bool UseMultitargetCode = true; _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") # define END_TARGET_SPECIFIC_CODE \ _Pragma("GCC pop_options") + +/* GCC doesn't show such warning, we don't need to define anything. + */ +# define DUMMY_FUNCTION_DEFINITION #endif #define DECLARE_SSE42_SPECIFIC_CODE(...) \ BEGIN_SSE42_SPECIFIC_CODE \ namespace TargetSpecific::SSE42 { \ - void __dummy_function_clang(); \ + DUMMY_FUNCTION_DEFINITION \ using namespace DB::TargetSpecific::SSE42; \ __VA_ARGS__ \ } \ @@ -121,7 +132,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX_SPECIFIC_CODE(...) \ BEGIN_AVX_SPECIFIC_CODE \ namespace TargetSpecific::AVX { \ - void __dummy_function_clang(); \ + DUMMY_FUNCTION_DEFINITION \ using namespace DB::TargetSpecific::AVX; \ __VA_ARGS__ \ } \ @@ -130,7 +141,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX2_SPECIFIC_CODE(...) \ BEGIN_AVX2_SPECIFIC_CODE \ namespace TargetSpecific::AVX2 { \ - void __dummy_function_clang(); \ + DUMMY_FUNCTION_DEFINITION \ using namespace DB::TargetSpecific::AVX2; \ __VA_ARGS__ \ } \ @@ -139,7 +150,7 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512F_SPECIFIC_CODE(...) \ BEGIN_AVX512F_SPECIFIC_CODE \ namespace TargetSpecific::AVX512F { \ - void __dummy_function_clang(); \ + DUMMY_FUNCTION_DEFINITION \ using namespace DB::TargetSpecific::AVX512F; \ __VA_ARGS__ \ } \ From c524642d245487315d8a9b37d7d3783c8b66139f Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 18 May 2020 22:07:24 +0200 Subject: [PATCH 0235/2229] Delete awful template PerformanceAdaptro and add simple ImplementationSelector instead --- src/Functions/FunctionStartsEndsWith.h | 29 ++-- src/Functions/FunctionsRandom.h | 34 +++-- src/Functions/PerformanceAdaptors.h | 179 ++++++++++--------------- src/Functions/RandXorshift.h | 23 +++- 4 files changed, 134 insertions(+), 131 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index b0465ecefa6..76aa4530c99 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -143,26 +143,39 @@ private: ) // DECLARE_MULTITARGET_CODE template -class FunctionStartsEndsWith - : public FunctionPerformanceAdaptor> +class FunctionStartsEndsWith : public TargetSpecific::Default::FunctionStartsEndsWith { public: - FunctionStartsEndsWith(const Context & context_) - : FunctionPerformanceAdaptor>(context_) + FunctionStartsEndsWith(const Context & context) : selector(context) { + selector.registerImplementation>(); + if constexpr (UseMultitargetCode) { - this->template registerImplementation> (TargetArch::SSE42); - this->template registerImplementation> (TargetArch::AVX); - this->template registerImplementation> (TargetArch::AVX2); - this->template registerImplementation>(TargetArch::AVX512F); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); } } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + static FunctionPtr create(const Context & context) { return std::make_shared>(context); } + +private: + ImplementationSelector selector; }; } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index eeba5546fc9..995f8ffeb9c 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -99,28 +99,44 @@ public: }; template -class FunctionRandom : public FunctionPerformanceAdaptor> +class FunctionRandom : public FunctionRandomImpl { public: - FunctionRandom(const Context & context_) - : FunctionPerformanceAdaptor>(context_) + FunctionRandom(const Context & context) : selector(context) { + selector.registerImplementation>(); + selector.registerImplementation>(); + if constexpr (UseMultitargetCode) { - this->template registerImplementation>(TargetArch::SSE42); - this->template registerImplementation>(TargetArch::AVX); - this->template registerImplementation>(TargetArch::AVX2); - this->template registerImplementation>(TargetArch::AVX512F); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); - this->template registerImplementation>(TargetArch::Default); - this->template registerImplementation>(TargetArch::AVX2); + selector.registerImplementation>(); } } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + static FunctionPtr create(const Context & context) { return std::make_shared>(context); } + +private: + ImplementationSelector selector; }; } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 0b5e3e10104..b86730952fb 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -117,123 +117,67 @@ struct PerformanceStatistics PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} }; -struct PerformanceAdaptorOptions -{ - std::optional> implementations; -}; - -/// Redirects IExecutableFunctionImpl::execute() and IFunction:executeImpl() to executeFunctionImpl(); -template -class FunctionExecutor; - -template -class FunctionExecutor>> - : public DefaultFunction +/* Class which is used to store implementations for the function and selecting the best one to run + * based on processor architecture and statistics from previous runs. + * + * FunctionInterface is typically IFunction or IExecutableFunctionImpl, but practically it can be + * any interface that contains "execute" method (IFunction is an exception and is supported as well). + * + * Example of usage: + * + * class MyDefaulImpl : public IFunction {...}; + * class MySecondImpl : public IFunction {...}; + * class MyAVX2Impl : public IFunction {...}; + * + * /// All methods but execute/executeImpl are usually not bottleneck, so just use them from + * /// default implementation. + * class MyFunction : public MyDefaultImpl + * { + * MyFunction(const Context & context) : selector(context) { + * /// Register all implementations in constructor. + * /// There could be as many implementation for every target as you want. + * selector.registerImplementation(); + * selector.registerImplementation(); + * selector.registreImplementation(); + * } + * + * void executeImpl(...) override { + * selector.selectAndExecute(...); + * } + * + * static FunctionPtr create(const Context & context) { + * return std::make_shared(context); + * } + * private: + * ImplementationSelector selector; + * }; + */ +template +class ImplementationSelector { public: - using BaseFunctionPtr = ExecutableFunctionImplPtr; + using ImplementationPtr = std::shared_ptr; - template - FunctionExecutor(Args&&... args) : DefaultFunction(std::forward(args)...) {} + ImplementationSelector(const Context & context_) : context(context_) {} - virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; - - virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + /* Select the best implementation based on previous runs. + * If FunctionInterface is IFunction, then "executeImpl" method of the implementation will be called + * and "execute" otherwise. + */ + void selectAndExecute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) { - executeFunctionImpl(block, arguments, result, input_rows_count); - } -}; - -template -class FunctionExecutor>> - : public DefaultFunction -{ -public: - using BaseFunctionPtr = FunctionPtr; - - template - FunctionExecutor(Args&&... args) : DefaultFunction(std::forward(args)...) {} - - virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) = 0; - - virtual void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - executeFunctionImpl(block, arguments, result, input_rows_count); - } -}; - -/// Combine several IExecutableFunctionImpl into one. -/// All the implementations should be equivalent. -/// Implementation to execute will be selected based on performance on previous runs. -/// DefaultFunction should be executable on every supported platform, while alternative implementations -/// could use extended set of instructions (AVX, NEON, etc). -/// It's convenient to inherit your func from this and register all alternative implementations in the constructor. -template -class FunctionPerformanceAdaptor : public FunctionExecutor -{ -public: - using BaseFunctionPtr = typename FunctionExecutor::BaseFunctionPtr; - - template - FunctionPerformanceAdaptor(const Context & context_, Params&&... params) - : FunctionExecutor(std::forward(params)...) - , context(context_) - { - if (isImplementationEnabled(DefaultFunction::getImplementationTag())) - statistics.emplace_back(); - } - - /// Register alternative implementation. - template - void registerImplementation(TargetArch arch, Params&&... params) - { - if (IsArchSupported(arch) && isImplementationEnabled(Function::getImplementationTag())) - { - impls.emplace_back(std::make_shared(std::forward(params)...)); - statistics.emplace_back(); - } - } - - bool isImplementationEnabled(const String & impl_tag) - { - const String & tag = context.getSettingsRef().function_implementation.value; - return tag.empty() || tag == impl_tag; - // if (!options.implementations) - // return true; - - // for (const auto & tag : *options.implementations) - // { - // if (tag == impl_tag) - // return true; - // } - // return false; - } - -protected: - virtual void executeFunctionImpl(Block & block, const ColumnNumbers & arguments, - size_t result, size_t input_rows_count) override - { - if (statistics.empty()) - throw Exception("All available implementations are disabled by user config", + if (implementations.empty()) + throw Exception("There are no available implementations for function " "TODO(dakovalkov): add name", ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION); auto id = statistics.select(); Stopwatch watch; - if (id == impls.size()) - { - if constexpr (std::is_base_of_v) - DefaultFunction::executeImpl(block, arguments, result, input_rows_count); - else - DefaultFunction::execute(block, arguments, result, input_rows_count); - } + if constexpr (std::is_same_v) + implementations[id]->executeImpl(block, arguments, result, input_rows_count); else - { - if constexpr (std::is_base_of_v) - impls[id]->executeImpl(block, arguments, result, input_rows_count); - else - impls[id]->execute(block, arguments, result, input_rows_count); - } + implementations[id]->execute(block, arguments, result, input_rows_count); + watch.stop(); // TODO(dakovalkov): Calculate something more informative. @@ -249,10 +193,29 @@ protected: } } + /* Register new implementation for function. + * + * Arch - required instruction set for running the implementation. It's guarantied that no one method would + * be called (even the constructor and static methods) if the processor doesn't support this instruction set. + * + * FunctionImpl - implementation, should be inherited from template argument FunctionInterface. + * + * All function arguments will be forwarded to the implementation constructor. + */ + template + void registerImplementation(Args&&... args) + { + if (IsArchSupported(Arch)) + { + implementations.emplace_back(std::make_shared(std::forward(args)...)); + statistics.emplace_back(); + } + } + private: - std::vector impls; // Alternative implementations. - PerformanceStatistics statistics; const Context & context; + std::vector implementations; + PerformanceStatistics statistics; }; } diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 8713d85fdbd..49655d637f2 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -29,24 +29,35 @@ struct RandXorshiftImpl2 ) // DECLARE_MULTITARGET_CODE template -class FunctionRandomXorshift - : public FunctionPerformanceAdaptor> +class FunctionRandomXorshift : public FunctionRandomImpl { public: - FunctionRandomXorshift(const Context & context_) - : FunctionPerformanceAdaptor>(context_) + FunctionRandomXorshift(const Context & context) : selector(context) { + selector.registerImplementation>(); + if constexpr (UseMultitargetCode) { - this->template registerImplementation>(TargetArch::AVX2); - this->template registerImplementation>(TargetArch::AVX2); + selector.registerImplementation>(); + selector.registerImplementation>(); } } + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + static FunctionPtr create(const Context & context) { return std::make_shared>(context); } + +private: + ImplementationSelector selector; }; } From 66d530e90190b38cf2ce766403eb811342fb3f3a Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 19 May 2020 12:54:41 +0200 Subject: [PATCH 0236/2229] fast rand for performance tests --- src/Functions/FunctionsRandom.cpp | 165 +++++++++++++++++++++++++--- src/Functions/FunctionsRandom.h | 79 +++++++++---- src/Functions/PerformanceAdaptors.h | 9 +- src/Functions/generateUUIDv4.cpp | 2 +- src/Functions/randConstant.cpp | 2 +- 5 files changed, 220 insertions(+), 37 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index fd493d5605b..d0d25e56c60 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -7,8 +7,15 @@ namespace DB { +/* + +// TODO(dakovalkov): remove this workaround. +#pragma GCC diagnostic ignored "-Wvector-operation-performance" + DECLARE_MULTITARGET_CODE( +*/ + namespace { /// NOTE Probably @@ -80,22 +87,22 @@ void RandImpl2::execute(char * output, size_t size) LinearCongruentialGenerator generator6; LinearCongruentialGenerator generator7; - seed(generator0, 0xfb4121280b2ab902ULL + reinterpret_cast(output)); - seed(generator1, 0x0121cf76df39c673ULL + reinterpret_cast(output)); - seed(generator2, 0x17ae86e3a19a602fULL + reinterpret_cast(output)); - seed(generator3, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)); - seed(generator4, 0xfb4122280b2ab102ULL + reinterpret_cast(output)); - seed(generator5, 0x0121c276df39c173ULL + reinterpret_cast(output)); - seed(generator6, 0x17ae82e3a19a612fULL + reinterpret_cast(output)); - seed(generator7, 0x8b6e12da7e06d122ULL + reinterpret_cast(output)); + seed(generator0, 0xfaaae481acb5874aULL + reinterpret_cast(output)); + seed(generator1, 0x3181a34f32887db6ULL + reinterpret_cast(output)); + seed(generator2, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); + seed(generator3, 0xc16062649e83dc13ULL + reinterpret_cast(output)); + seed(generator4, 0xbb093972da5c8d92ULL + reinterpret_cast(output)); + seed(generator5, 0xc37dcc410dcfed31ULL + reinterpret_cast(output)); + seed(generator6, 0x45e1526b7a4367d5ULL + reinterpret_cast(output)); + seed(generator7, 0x99c2759203868a7fULL + reinterpret_cast(output)); const char * end = output + size; for (; (end - output + 15) <= 32; output += 32) { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); unalignedStore(output + 12, generator3.next()); unalignedStore(output + 16, generator4.next()); unalignedStore(output + 20, generator5.next()); @@ -105,14 +112,144 @@ void RandImpl2::execute(char * output, size_t size) if (end - output > 0) { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); unalignedStore(output + 12, generator3.next()); output += 16; } } +/* + +typedef UInt64 UInt64x16 __attribute__ ((vector_size (128))); +typedef UInt64 UInt64x8 __attribute__ ((vector_size (64))); +typedef UInt64 UInt64x4 __attribute__ ((vector_size (32))); + +typedef UInt32 UInt32x16 __attribute__ ((vector_size (64))); +typedef UInt32 UInt32x8 __attribute__ ((vector_size (32))); +typedef UInt32 UInt32x4 __attribute__ ((vector_size (16))); + +void RandImpl3::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64x4 generators = { + 0xfb4121280b2ab902ULL + reinterpret_cast(output), + 0x0121cf76df39c673ULL + reinterpret_cast(output), + 0x17ae86e3a19a602fULL + reinterpret_cast(output), + 0x8b6e16da7e06d622ULL + reinterpret_cast(output), + }; + + constexpr int bytes_per_write = sizeof(UInt32x4); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators, UInt32x4)); + output += bytes_per_write; + } +} + +void RandImpl4::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64x8 generators = { + 0x5f186ce5faee450bULL + reinterpret_cast(output), + 0x9adb2ca3c72ac2eeULL + reinterpret_cast(output), + 0x07acf8bfa2537705ULL + reinterpret_cast(output), + 0x692b1b533834db92ULL + reinterpret_cast(output), + 0x5148b84cdda30081ULL + reinterpret_cast(output), + 0xe17b8a75a301ad47ULL + reinterpret_cast(output), + 0x6d4a5d69ed2a5f56ULL + reinterpret_cast(output), + 0x114e23266201b333ULL + reinterpret_cast(output), + }; + + constexpr int bytes_per_write = sizeof(UInt32x8); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators, UInt32x8)); + output += bytes_per_write; + } + + if ((end - output) > 0) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + UInt32x8 values = __builtin_convertvector(generators, UInt32x8); + for (int i = 0; (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + +void RandImpl5::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64x16 generators = { + 0xfb4121280b2ab902ULL + reinterpret_cast(output), + 0x0121cf76df39c673ULL + reinterpret_cast(output), + 0x17ae86e3a19a602fULL + reinterpret_cast(output), + 0x8b6e16da7e06d622ULL + reinterpret_cast(output), + 0xfb4121f80b2ab902ULL + reinterpret_cast(output), + 0x0122cf767f39c633ULL + reinterpret_cast(output), + 0x14ae86e3a79a502fULL + reinterpret_cast(output), + 0x876316da7e06d622ULL + reinterpret_cast(output), + 0xfb4821280b2ab912ULL + reinterpret_cast(output), + 0x0126cf76df39c633ULL + reinterpret_cast(output), + 0x17a486e3a19a602fULL + reinterpret_cast(output), + 0x8b6216da7e08d622ULL + reinterpret_cast(output), + 0xfb4101f80b5ab902ULL + reinterpret_cast(output), + 0x01226f767f34c633ULL + reinterpret_cast(output), + 0x14ae86e3a75a502fULL + reinterpret_cast(output), + 0x876e36da7e36d622ULL + reinterpret_cast(output), + }; + + constexpr int bytes_per_write = sizeof(UInt32x16); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators, UInt32x16)); + output += bytes_per_write; + } + + if ((end - output) > 0) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + UInt32x16 values = __builtin_convertvector(generators, UInt32x16); + for (int i = 0; (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + ) //DECLARE_MULTITARGET_CODE +*/ + } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 995f8ffeb9c..9a06d8df7a3 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -36,22 +36,48 @@ namespace ErrorCodes * This means that the timer must be of sufficient resolution to give different values to each block. */ +/* + DECLARE_MULTITARGET_CODE( +*/ + struct RandImpl { static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch); } + static String getImplementationTag() { return ToString(TargetArch::Default); } }; struct RandImpl2 { static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } + static String getImplementationTag() { return ToString(TargetArch::Default) + "_v2"; } +}; + +/* + +struct RandImpl3 +{ + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v3"; } +}; + +struct RandImpl4 +{ + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v4"; } +}; + +struct RandImpl5 +{ + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v5"; } }; ) // DECLARE_MULTITARGET_CODE +*/ + template class FunctionRandomImpl : public IFunction { @@ -99,30 +125,45 @@ public: }; template -class FunctionRandom : public FunctionRandomImpl +class FunctionRandom : public FunctionRandomImpl { public: FunctionRandom(const Context & context) : selector(context) { + // selector.registerImplementation>(); selector.registerImplementation>(); - selector.registerImplementation>(); + FunctionRandomImpl>(); - if constexpr (UseMultitargetCode) - { - selector.registerImplementation>(); - selector.registerImplementation>(); - selector.registerImplementation>(); - selector.registerImplementation>(); + // if constexpr (UseMultitargetCode) + // { + // selector.registerImplementation>(); + // selector.registerImplementation>(); + // selector.registerImplementation>(); + // selector.registerImplementation>(); - selector.registerImplementation>(); - } + // selector.registerImplementation>(); + + // selector.registerImplementation>(); + // selector.registerImplementation>(); + + // selector.registerImplementation>(); + // selector.registerImplementation>(); + + // selector.registerImplementation>(); + // selector.registerImplementation>(); + // } } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index b86730952fb..717ad196e61 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -207,8 +207,13 @@ public: { if (IsArchSupported(Arch)) { - implementations.emplace_back(std::make_shared(std::forward(args)...)); - statistics.emplace_back(); + // TODO(dakovalkov): make this option better. + const auto & choose_impl = context.getSettingsRef().function_implementation.value; + if (choose_impl.empty() || choose_impl == FunctionImpl::getImplementationTag()) + { + implementations.emplace_back(std::make_shared(std::forward(args)...)); + statistics.emplace_back(); + } } } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index d543226ba5c..4db3bd4c73d 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -33,7 +33,7 @@ public: size_t size = input_rows_count; vec_to.resize(size); // TODO(dakovalkov): rewrite this workaround - TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); + RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); for (UInt128 & uuid: vec_to) { diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index 3eba5abf10d..163f943d206 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -100,7 +100,7 @@ public: typename ColumnVector::Container vec_to(1); // TODO(dakovalkov): Rewrite this workaround - TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); + RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); ToType value = vec_to[0]; return std::make_unique>(value, argument_types, return_type); From 90bc3e6136a751aed685d0fd2b7a6187652e6853 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 20 May 2020 14:42:21 +0200 Subject: [PATCH 0237/2229] More rand implementations --- src/Functions/FunctionsRandom.cpp | 317 +++++++++++++++++------------- src/Functions/FunctionsRandom.h | 125 ++++++++---- src/Functions/generateUUIDv4.cpp | 2 +- src/Functions/randConstant.cpp | 2 +- 4 files changed, 272 insertions(+), 174 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index d0d25e56c60..496e0edcc5a 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -3,19 +3,18 @@ #include #include #include +#include namespace DB { -/* - // TODO(dakovalkov): remove this workaround. -#pragma GCC diagnostic ignored "-Wvector-operation-performance" +#if !defined(__clang__) +# pragma GCC diagnostic ignored "-Wvector-operation-performance" +#endif DECLARE_MULTITARGET_CODE( -*/ - namespace { /// NOTE Probably @@ -45,10 +44,16 @@ namespace } }; - void seed(LinearCongruentialGenerator & generator, intptr_t additional_seed) + UInt64 calcSeed(UInt64 rand_seed, UInt64 additional_seed) { - generator.seed(intHash64(randomSeed() ^ intHash64(additional_seed))); + return intHash64(rand_seed ^ intHash64(additional_seed)); } + + void seed(LinearCongruentialGenerator & generator, UInt64 rand_seed, intptr_t additional_seed) + { + generator.seed(calcSeed(rand_seed, additional_seed)); + } + } void RandImpl::execute(char * output, size_t size) @@ -58,10 +63,12 @@ void RandImpl::execute(char * output, size_t size) LinearCongruentialGenerator generator2; LinearCongruentialGenerator generator3; - seed(generator0, 0xfb4121280b2ab902ULL + reinterpret_cast(output)); - seed(generator1, 0x0121cf76df39c673ULL + reinterpret_cast(output)); - seed(generator2, 0x17ae86e3a19a602fULL + reinterpret_cast(output)); - seed(generator3, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)); + UInt64 rand_seed = randomSeed(); + + seed(generator0, rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)); + seed(generator1, rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)); + seed(generator2, rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)); + seed(generator3, rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)); for (const char * end = output + size; output < end; output += 16) { @@ -73,55 +80,6 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } -void RandImpl2::execute(char * output, size_t size) -{ - if (size == 0) - return; - - LinearCongruentialGenerator generator0; - LinearCongruentialGenerator generator1; - LinearCongruentialGenerator generator2; - LinearCongruentialGenerator generator3; - LinearCongruentialGenerator generator4; - LinearCongruentialGenerator generator5; - LinearCongruentialGenerator generator6; - LinearCongruentialGenerator generator7; - - seed(generator0, 0xfaaae481acb5874aULL + reinterpret_cast(output)); - seed(generator1, 0x3181a34f32887db6ULL + reinterpret_cast(output)); - seed(generator2, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); - seed(generator3, 0xc16062649e83dc13ULL + reinterpret_cast(output)); - seed(generator4, 0xbb093972da5c8d92ULL + reinterpret_cast(output)); - seed(generator5, 0xc37dcc410dcfed31ULL + reinterpret_cast(output)); - seed(generator6, 0x45e1526b7a4367d5ULL + reinterpret_cast(output)); - seed(generator7, 0x99c2759203868a7fULL + reinterpret_cast(output)); - - const char * end = output + size; - - for (; (end - output + 15) <= 32; output += 32) - { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); - unalignedStore(output + 16, generator4.next()); - unalignedStore(output + 20, generator5.next()); - unalignedStore(output + 24, generator6.next()); - unalignedStore(output + 28, generator7.next()); - } - - if (end - output > 0) - { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); - output += 16; - } -} - -/* - typedef UInt64 UInt64x16 __attribute__ ((vector_size (128))); typedef UInt64 UInt64x8 __attribute__ ((vector_size (64))); typedef UInt64 UInt64x4 __attribute__ ((vector_size (32))); @@ -130,58 +88,85 @@ typedef UInt32 UInt32x16 __attribute__ ((vector_size (64))); typedef UInt32 UInt32x8 __attribute__ ((vector_size (32))); typedef UInt32 UInt32x4 __attribute__ ((vector_size (16))); -void RandImpl3::execute(char * output, size_t size) +template +struct DummyStruct; + +template <> +struct DummyStruct<4> { + using UInt64Type = UInt64x4; + using UInt32Type = UInt32x4; +}; +template <> +struct DummyStruct<8> +{ + using UInt64Type = UInt64x8; + using UInt32Type = UInt32x8; +}; +template <> +struct DummyStruct<16> +{ + using UInt64Type = UInt64x16; + using UInt32Type = UInt32x16; +}; + +template +using VecUInt64 = typename DummyStruct::UInt64Type; +template +using VecUInt32 = typename DummyStruct::UInt32Type; + +namespace { + +constexpr std::array random_numbers = { + 0x0c8ff307dabc0c4cULL, + 0xf4bce78bf3821c1bULL, + 0x4eb628a1e189c21aULL, + 0x85ae000d253e0dbcULL, + + 0xc98073e6480f8a10ULL, + 0xb17e9b70a084d570ULL, + 0x1361c752b768da8cULL, + 0x3d915f60c06d144dULL, + + 0xd5bc9b7aced79587ULL, + 0x66c28000ba8a66cfULL, + 0x0fb58da7a48820f5ULL, + 0x540ee1b57aa861a1ULL, + + 0x212f11936ef2db04ULL, + 0xa3939cd900edcc58ULL, + 0xc676c84420170102ULL, + 0xcbdc824e8b4bf3edULL, +}; + +}; + +template +void RandVecImpl::execute(char * output, size_t size) +{ + static_assert(VectorSize >= 4); + static_assert(VectorSize <= random_numbers.size()); + if (size == 0) return; char * end = output + size; - UInt64x4 generators = { - 0xfb4121280b2ab902ULL + reinterpret_cast(output), - 0x0121cf76df39c673ULL + reinterpret_cast(output), - 0x17ae86e3a19a602fULL + reinterpret_cast(output), - 0x8b6e16da7e06d622ULL + reinterpret_cast(output), - }; - - constexpr int bytes_per_write = sizeof(UInt32x4); constexpr int safe_overwrite = 15; + constexpr int bytes_per_write = sizeof(VecUInt32); + + UInt64 rand_seed = randomSeed(); + + VecUInt64 generators{}; + for (int i = 0; i < VectorSize; ++i) + generators[i] = calcSeed(rand_seed, random_numbers[VectorSize] + reinterpret_cast(output)); while ((end - output) + safe_overwrite >= bytes_per_write) { generators *= LinearCongruentialGenerator::a; generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators, UInt32x4)); - output += bytes_per_write; - } -} - -void RandImpl4::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - UInt64x8 generators = { - 0x5f186ce5faee450bULL + reinterpret_cast(output), - 0x9adb2ca3c72ac2eeULL + reinterpret_cast(output), - 0x07acf8bfa2537705ULL + reinterpret_cast(output), - 0x692b1b533834db92ULL + reinterpret_cast(output), - 0x5148b84cdda30081ULL + reinterpret_cast(output), - 0xe17b8a75a301ad47ULL + reinterpret_cast(output), - 0x6d4a5d69ed2a5f56ULL + reinterpret_cast(output), - 0x114e23266201b333ULL + reinterpret_cast(output), - }; - - constexpr int bytes_per_write = sizeof(UInt32x8); - constexpr int safe_overwrite = 15; - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators, UInt32x8)); + VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); + unalignedStore>(output, values); output += bytes_per_write; } @@ -189,7 +174,7 @@ void RandImpl4::execute(char * output, size_t size) { generators *= LinearCongruentialGenerator::a; generators += LinearCongruentialGenerator::c; - UInt32x8 values = __builtin_convertvector(generators, UInt32x8); + VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); for (int i = 0; (end - output) > 0; ++i) { unalignedStore(output, values[i]); @@ -198,49 +183,50 @@ void RandImpl4::execute(char * output, size_t size) } } -void RandImpl5::execute(char * output, size_t size) +template struct RandVecImpl<4>; +template struct RandVecImpl<8>; +template struct RandVecImpl<16>; + +template +void RandVecImpl2::execute(char * output, size_t size) { + static_assert(VectorSize >= 4); + if (size == 0) return; char * end = output + size; - UInt64x16 generators = { - 0xfb4121280b2ab902ULL + reinterpret_cast(output), - 0x0121cf76df39c673ULL + reinterpret_cast(output), - 0x17ae86e3a19a602fULL + reinterpret_cast(output), - 0x8b6e16da7e06d622ULL + reinterpret_cast(output), - 0xfb4121f80b2ab902ULL + reinterpret_cast(output), - 0x0122cf767f39c633ULL + reinterpret_cast(output), - 0x14ae86e3a79a502fULL + reinterpret_cast(output), - 0x876316da7e06d622ULL + reinterpret_cast(output), - 0xfb4821280b2ab912ULL + reinterpret_cast(output), - 0x0126cf76df39c633ULL + reinterpret_cast(output), - 0x17a486e3a19a602fULL + reinterpret_cast(output), - 0x8b6216da7e08d622ULL + reinterpret_cast(output), - 0xfb4101f80b5ab902ULL + reinterpret_cast(output), - 0x01226f767f34c633ULL + reinterpret_cast(output), - 0x14ae86e3a75a502fULL + reinterpret_cast(output), - 0x876e36da7e36d622ULL + reinterpret_cast(output), - }; - - constexpr int bytes_per_write = sizeof(UInt32x16); constexpr int safe_overwrite = 15; + constexpr int bytes_per_write = 2 * sizeof(VecUInt32); + + UInt64 rand_seed = randomSeed(); + VecUInt64 gens1{}, gens2{}; + for (int i = 0; i < VectorSize; ++i) + { + gens1[i] = calcSeed(rand_seed, i * 1123465ull * reinterpret_cast(output)); + gens2[i] = calcSeed(rand_seed, i * 6432453ull * reinterpret_cast(output)); + } while ((end - output) + safe_overwrite >= bytes_per_write) { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators, UInt32x16)); + gens1 *= LinearCongruentialGenerator::a; + gens1 += LinearCongruentialGenerator::c; + VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); + unalignedStore>(output, values1); + gens2 *= LinearCongruentialGenerator::a; + gens2 += LinearCongruentialGenerator::c; + VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); + unalignedStore>(output, values2); output += bytes_per_write; } - - if ((end - output) > 0) + + while ((end - output) > 0) { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - UInt32x16 values = __builtin_convertvector(generators, UInt32x16); - for (int i = 0; (end - output) > 0; ++i) + gens1 *= LinearCongruentialGenerator::a; + gens1 += LinearCongruentialGenerator::c; + VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); + for (int i = 0; (end - output) > 0 && i < VectorSize; ++i) { unalignedStore(output, values[i]); output += sizeof(UInt32); @@ -248,8 +234,73 @@ void RandImpl5::execute(char * output, size_t size) } } +template struct RandVecImpl2<4>; +template struct RandVecImpl2<8>; +template struct RandVecImpl2<16>; + +// template +// void RandVecImpl4::execute(char * output, size_t size) +// { +// static_assert(VectorSize >= 4); + +// if (size == 0) +// return; + +// char * end = output + size; + +// constexpr int safe_overwrite = 15; +// constexpr int bytes_per_write = 4 * sizeof(VecUInt32); + +// VecUInt64 gens1{}, gens2{}, gens3{}, gens4{}; +// for (int i = 0; i < VectorSize; ++i) +// { +// gens1[i] = calcSeed(i * 1123465ull * reinterpret_cast(output)); +// gens2[i] = calcSeed(i * 6432453ull * reinterpret_cast(output)); +// gens3[i] = calcSeed(i * 1346434ull * reinterpret_cast(output)); +// gens4[i] = calcSeed(i * 5344753ull * reinterpret_cast(output)); +// } + +// while ((end - output) + safe_overwrite >= bytes_per_write) +// { +// gens1 *= LinearCongruentialGenerator::a; +// gens1 += LinearCongruentialGenerator::c; +// VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); +// unalignedStore>(output, values1); +// gens2 *= LinearCongruentialGenerator::a; +// gens2 += LinearCongruentialGenerator::c; +// VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); +// unalignedStore>(output, values2); +// gens3 *= LinearCongruentialGenerator::a; +// gens3 += LinearCongruentialGenerator::c; +// VecUInt32 values3 = __builtin_convertvector(gens3 >> 16, VecUInt32); +// unalignedStore>(output, values3); +// gens4 *= LinearCongruentialGenerator::a; +// gens4 += LinearCongruentialGenerator::c; +// VecUInt32 values4 = __builtin_convertvector(gens4 >> 16, VecUInt32); +// unalignedStore>(output, values4); +// output += bytes_per_write; +// } + +// while ((end - output) > 0) +// { +// gens1 *= LinearCongruentialGenerator::a; +// gens1 += LinearCongruentialGenerator::c; +// VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); +// for (int i = 0; (end - output) > 0 && i < VectorSize; i += 4) +// { +// unalignedStore(output, values[i]); +// unalignedStore(output + 4, values[i + 1]); +// unalignedStore(output + 8, values[i + 2]); +// unalignedStore(output + 12, values[i + 3]); +// output += 16; +// } +// } +// } + +// template struct RandVecImpl2<4>; +// template struct RandVecImpl2<8>; +// template struct RandVecImpl2<16>; + ) //DECLARE_MULTITARGET_CODE -*/ - } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 9a06d8df7a3..557e1fbe868 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -36,26 +36,20 @@ namespace ErrorCodes * This means that the timer must be of sufficient resolution to give different values to each block. */ -/* - DECLARE_MULTITARGET_CODE( -*/ - struct RandImpl { static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(TargetArch::Default); } + static String getImplementationTag() { return ToString(BuildArch); } }; struct RandImpl2 { static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(TargetArch::Default) + "_v2"; } + static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } }; -/* - struct RandImpl3 { static void execute(char * output, size_t size); @@ -74,9 +68,27 @@ struct RandImpl5 static String getImplementationTag() { return ToString(BuildArch) + "_v5"; } }; -) // DECLARE_MULTITARGET_CODE +template +struct RandVecImpl +{ + static void execute(char * outpu, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VectorSize); } +}; -*/ +template +struct RandVecImpl2 +{ + static void execute(char * outpu, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VectorSize); } +}; + +struct RandImpl6 +{ + static void execute(char * outpu, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_v6"; } +}; + +) // DECLARE_MULTITARGET_CODE template class FunctionRandomImpl : public IFunction @@ -125,45 +137,80 @@ public: }; template -class FunctionRandom : public FunctionRandomImpl +class FunctionRandom : public FunctionRandomImpl { public: FunctionRandom(const Context & context) : selector(context) { - // selector.registerImplementation>(); selector.registerImplementation>(); + FunctionRandomImpl>(); + selector.registerImplementation>(); - // if constexpr (UseMultitargetCode) - // { - // selector.registerImplementation>(); - // selector.registerImplementation>(); - // selector.registerImplementation>(); - // selector.registerImplementation>(); + if constexpr (UseMultitargetCode) + { + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); - // selector.registerImplementation>(); + selector.registerImplementation>(); - // selector.registerImplementation>(); - // selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); - // selector.registerImplementation>(); - // selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); - // selector.registerImplementation>(); - // selector.registerImplementation>(); - // } + selector.registerImplementation>(); + selector.registerImplementation>(); + + // vec impl + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + // vec impl 2 + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + selector.registerImplementation, ToType, Name>>(); + selector.registerImplementation, ToType, Name>>(); + + selector.registerImplementation>(); + } } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 4db3bd4c73d..d543226ba5c 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -33,7 +33,7 @@ public: size_t size = input_rows_count; vec_to.resize(size); // TODO(dakovalkov): rewrite this workaround - RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); + TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); for (UInt128 & uuid: vec_to) { diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index 163f943d206..3eba5abf10d 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -100,7 +100,7 @@ public: typename ColumnVector::Container vec_to(1); // TODO(dakovalkov): Rewrite this workaround - RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); + TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); ToType value = vec_to[0]; return std::make_unique>(value, argument_types, return_type); From e6031585710b3b3d3a1921b88e8d50b086b65dc9 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 20 May 2020 14:43:33 +0200 Subject: [PATCH 0238/2229] Rand implementations --- src/Functions/FunctionsRandom.cpp | 249 ++++++++++++++++++++++++++++++ 1 file changed, 249 insertions(+) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 496e0edcc5a..283013bdb9b 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -80,6 +80,62 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } +void RandImpl2::execute(char * output, size_t size) +{ + if (size == 0) + return; + + LinearCongruentialGenerator generator0; + LinearCongruentialGenerator generator1; + LinearCongruentialGenerator generator2; + LinearCongruentialGenerator generator3; + LinearCongruentialGenerator generator4; + LinearCongruentialGenerator generator5; + LinearCongruentialGenerator generator6; + LinearCongruentialGenerator generator7; + + UInt64 rand_seed = randomSeed(); + + seed(generator0, rand_seed, 0xfaaae481acb5874aULL + reinterpret_cast(output)); + seed(generator1, rand_seed, 0x3181a34f32887db6ULL + reinterpret_cast(output)); + seed(generator2, rand_seed, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); + seed(generator3, rand_seed, 0xc16062649e83dc13ULL + reinterpret_cast(output)); + seed(generator4, rand_seed, 0xbb093972da5c8d92ULL + reinterpret_cast(output)); + seed(generator5, rand_seed, 0xc37dcc410dcfed31ULL + reinterpret_cast(output)); + seed(generator6, rand_seed, 0x45e1526b7a4367d5ULL + reinterpret_cast(output)); + seed(generator7, rand_seed, 0x99c2759203868a7fULL + reinterpret_cast(output)); + + const char * end = output + size; + + constexpr int bytes_per_write = 32; + constexpr int safe_overwrite = 15; + + for (; (end - output) + safe_overwrite >= bytes_per_write; output += safe_overwrite) + { + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); + unalignedStore(output + 16, generator4.next()); + unalignedStore(output + 20, generator5.next()); + unalignedStore(output + 24, generator6.next()); + unalignedStore(output + 28, generator7.next()); + } + + seed(generator0, rand_seed, 0xfaaae481acb5874aULL + reinterpret_cast(output)); + seed(generator1, rand_seed, 0x3181a34f32887db6ULL + reinterpret_cast(output)); + seed(generator2, rand_seed, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); + seed(generator3, rand_seed, 0xc16062649e83dc13ULL + reinterpret_cast(output)); + + if (end - output > 0) + { + unalignedStore(output, generator0.next()); + unalignedStore(output + 4, generator1.next()); + unalignedStore(output + 8, generator2.next()); + unalignedStore(output + 12, generator3.next()); + } +} + typedef UInt64 UInt64x16 __attribute__ ((vector_size (128))); typedef UInt64 UInt64x8 __attribute__ ((vector_size (64))); typedef UInt64 UInt64x4 __attribute__ ((vector_size (32))); @@ -115,6 +171,128 @@ using VecUInt64 = typename DummyStruct::UInt64Type; template using VecUInt32 = typename DummyStruct::UInt32Type; +void RandImpl3::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64x4 generators = { + 0xfb4121280b2ab902ULL + reinterpret_cast(output), + 0x0121cf76df39c673ULL + reinterpret_cast(output), + 0x17ae86e3a19a602fULL + reinterpret_cast(output), + 0x8b6e16da7e06d622ULL + reinterpret_cast(output), + }; + + constexpr int bytes_per_write = sizeof(UInt32x4); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x4)); + output += bytes_per_write; + } +} + +void RandImpl4::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64 rand_seed = randomSeed(); + + UInt64x8 generators = { + calcSeed(rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0xfb4121f80b2ab902ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x0122cf767f39c633ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x14ae86e3a79a502fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x876316da7e06d622ULL + reinterpret_cast(output)), + }; + + constexpr int bytes_per_write = sizeof(UInt32x8); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x8)); + output += bytes_per_write; + } + + if ((end - output) > 0) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + UInt32x8 values = __builtin_convertvector(generators >> 16, UInt32x8); + for (int i = 0; (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + +void RandImpl5::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64 rand_seed = randomSeed(); + + UInt64x16 generators = { + calcSeed(rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0xfb4121f80b2ab902ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x0122cf767f39c633ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x14ae86e3a79a502fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x876316da7e06d622ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0xfb4821280b2ab912ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x0126cf76df39c633ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x17a486e3a19a602fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x8b6216da7e08d622ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0xfb4101f80b5ab902ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x01226f767f34c633ULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x14ae86e3a75a502fULL + reinterpret_cast(output)), + calcSeed(rand_seed, 0x876e36da7e36d622ULL + reinterpret_cast(output)), + }; + + constexpr int bytes_per_write = sizeof(UInt32x16); + constexpr int safe_overwrite = 15; + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x16)); + output += bytes_per_write; + } + + if ((end - output) > 0) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + UInt32x16 values = __builtin_convertvector(generators >> 16, UInt32x16); + for (int i = 0; (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + namespace { constexpr std::array random_numbers = { @@ -303,4 +481,75 @@ template struct RandVecImpl2<16>; ) //DECLARE_MULTITARGET_CODE +DECLARE_AVX2_SPECIFIC_CODE( + +void RandImpl6::execute(char * output, size_t size) +{ + if (size == 0) + return; + + char * end = output + size; + + UInt64x8 generators = { + 0x5f186ce5faee450bULL + reinterpret_cast(output), + 0x9adb2ca3c72ac2eeULL + reinterpret_cast(output), + 0x07acf8bfa2537705ULL + reinterpret_cast(output), + 0x692b1b533834db92ULL + reinterpret_cast(output), + 0x5148b84cdda30081ULL + reinterpret_cast(output), + 0xe17b8a75a301ad47ULL + reinterpret_cast(output), + 0x6d4a5d69ed2a5f56ULL + reinterpret_cast(output), + 0x114e23266201b333ULL + reinterpret_cast(output), + }; + + union { + UInt64x8 vec; + __m256i mm[2]; + } gens {generators}; + + constexpr int bytes_per_write = sizeof(UInt32x8); + constexpr int safe_overwrite = 15; + + const auto low_a = _mm256_set1_epi64x(0xDEECE66D); + // const auto high_a = _mm256_set1_epi64x(5); + const auto c = _mm256_set1_epi64x(11); + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + { + auto gens_high = _mm256_srli_epi64(gens.mm[0], 32); + auto low_low_res = _mm256_mul_epu32(gens.mm[0], low_a); + auto high_low_res = _mm256_slli_epi64(_mm256_mul_epu32(gens_high, low_a), 32); + auto low_high_res = _mm256_slli_epi64(gens.mm[0], 32) + _mm256_slli_epi64(gens.mm[0], 34); + gens.mm[0] = _mm256_add_epi64(_mm256_add_epi64(low_low_res, high_low_res), + _mm256_add_epi64(low_high_res, c)); + } + { + auto gens_high = _mm256_srli_epi64(gens.mm[1], 32); + auto low_low_res = _mm256_mul_epu32(gens.mm[1], low_a); + auto high_low_res = _mm256_slli_epi64(_mm256_mul_epu32(gens_high, low_a), 32); + auto low_high_res = _mm256_slli_epi64(gens.mm[1], 32) + _mm256_slli_epi64(gens.mm[1], 34); + gens.mm[1] = _mm256_add_epi64(_mm256_add_epi64(low_low_res, high_low_res), + _mm256_add_epi64(low_high_res, c)); + } + // generators *= LinearCongruentialGenerator::a; + // generators += LinearCongruentialGenerator::c; + unalignedStore(output, __builtin_convertvector(gens.vec >> 16, UInt32x8)); + output += bytes_per_write; + } + + if ((end - output) > 0) + { + generators *= LinearCongruentialGenerator::a; + generators += LinearCongruentialGenerator::c; + UInt32x8 values = __builtin_convertvector(generators >> 16, UInt32x8); + for (int i = 0; (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + +) // DECLARE_AVX2_SPECIFIC_CODE + } From feaed1e020934ddac683fab616fd2927e6d256a8 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 20 May 2020 17:43:01 +0200 Subject: [PATCH 0239/2229] rand isn't avx2-vectorizable, I give it up --- src/Functions/FunctionsRandom.cpp | 581 +++++++++--------------------- src/Functions/FunctionsRandom.h | 111 +----- src/Functions/VectorExtension.h | 101 ++++++ 3 files changed, 278 insertions(+), 515 deletions(-) create mode 100644 src/Functions/VectorExtension.h diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 283013bdb9b..5ab51e9e3b8 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -8,11 +9,6 @@ namespace DB { -// TODO(dakovalkov): remove this workaround. -#if !defined(__clang__) -# pragma GCC diagnostic ignored "-Wvector-operation-performance" -#endif - DECLARE_MULTITARGET_CODE( namespace @@ -80,250 +76,34 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } -void RandImpl2::execute(char * output, size_t size) -{ - if (size == 0) - return; - - LinearCongruentialGenerator generator0; - LinearCongruentialGenerator generator1; - LinearCongruentialGenerator generator2; - LinearCongruentialGenerator generator3; - LinearCongruentialGenerator generator4; - LinearCongruentialGenerator generator5; - LinearCongruentialGenerator generator6; - LinearCongruentialGenerator generator7; - - UInt64 rand_seed = randomSeed(); - - seed(generator0, rand_seed, 0xfaaae481acb5874aULL + reinterpret_cast(output)); - seed(generator1, rand_seed, 0x3181a34f32887db6ULL + reinterpret_cast(output)); - seed(generator2, rand_seed, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); - seed(generator3, rand_seed, 0xc16062649e83dc13ULL + reinterpret_cast(output)); - seed(generator4, rand_seed, 0xbb093972da5c8d92ULL + reinterpret_cast(output)); - seed(generator5, rand_seed, 0xc37dcc410dcfed31ULL + reinterpret_cast(output)); - seed(generator6, rand_seed, 0x45e1526b7a4367d5ULL + reinterpret_cast(output)); - seed(generator7, rand_seed, 0x99c2759203868a7fULL + reinterpret_cast(output)); - - const char * end = output + size; - - constexpr int bytes_per_write = 32; - constexpr int safe_overwrite = 15; - - for (; (end - output) + safe_overwrite >= bytes_per_write; output += safe_overwrite) - { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); - unalignedStore(output + 16, generator4.next()); - unalignedStore(output + 20, generator5.next()); - unalignedStore(output + 24, generator6.next()); - unalignedStore(output + 28, generator7.next()); - } - - seed(generator0, rand_seed, 0xfaaae481acb5874aULL + reinterpret_cast(output)); - seed(generator1, rand_seed, 0x3181a34f32887db6ULL + reinterpret_cast(output)); - seed(generator2, rand_seed, 0xb6970e4a91b66afdULL + reinterpret_cast(output)); - seed(generator3, rand_seed, 0xc16062649e83dc13ULL + reinterpret_cast(output)); - - if (end - output > 0) - { - unalignedStore(output, generator0.next()); - unalignedStore(output + 4, generator1.next()); - unalignedStore(output + 8, generator2.next()); - unalignedStore(output + 12, generator3.next()); - } -} - -typedef UInt64 UInt64x16 __attribute__ ((vector_size (128))); -typedef UInt64 UInt64x8 __attribute__ ((vector_size (64))); -typedef UInt64 UInt64x4 __attribute__ ((vector_size (32))); - -typedef UInt32 UInt32x16 __attribute__ ((vector_size (64))); -typedef UInt32 UInt32x8 __attribute__ ((vector_size (32))); -typedef UInt32 UInt32x4 __attribute__ ((vector_size (16))); - -template -struct DummyStruct; - -template <> -struct DummyStruct<4> -{ - using UInt64Type = UInt64x4; - using UInt32Type = UInt32x4; -}; -template <> -struct DummyStruct<8> -{ - using UInt64Type = UInt64x8; - using UInt32Type = UInt32x8; -}; -template <> -struct DummyStruct<16> -{ - using UInt64Type = UInt64x16; - using UInt32Type = UInt32x16; -}; - -template -using VecUInt64 = typename DummyStruct::UInt64Type; -template -using VecUInt32 = typename DummyStruct::UInt32Type; - -void RandImpl3::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - UInt64x4 generators = { - 0xfb4121280b2ab902ULL + reinterpret_cast(output), - 0x0121cf76df39c673ULL + reinterpret_cast(output), - 0x17ae86e3a19a602fULL + reinterpret_cast(output), - 0x8b6e16da7e06d622ULL + reinterpret_cast(output), - }; - - constexpr int bytes_per_write = sizeof(UInt32x4); - constexpr int safe_overwrite = 15; - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x4)); - output += bytes_per_write; - } -} - -void RandImpl4::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - UInt64 rand_seed = randomSeed(); - - UInt64x8 generators = { - calcSeed(rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0xfb4121f80b2ab902ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x0122cf767f39c633ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x14ae86e3a79a502fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x876316da7e06d622ULL + reinterpret_cast(output)), - }; - - constexpr int bytes_per_write = sizeof(UInt32x8); - constexpr int safe_overwrite = 15; - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x8)); - output += bytes_per_write; - } - - if ((end - output) > 0) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - UInt32x8 values = __builtin_convertvector(generators >> 16, UInt32x8); - for (int i = 0; (end - output) > 0; ++i) - { - unalignedStore(output, values[i]); - output += sizeof(UInt32); - } - } -} - -void RandImpl5::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - UInt64 rand_seed = randomSeed(); - - UInt64x16 generators = { - calcSeed(rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0xfb4121f80b2ab902ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x0122cf767f39c633ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x14ae86e3a79a502fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x876316da7e06d622ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0xfb4821280b2ab912ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x0126cf76df39c633ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x17a486e3a19a602fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x8b6216da7e08d622ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0xfb4101f80b5ab902ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x01226f767f34c633ULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x14ae86e3a75a502fULL + reinterpret_cast(output)), - calcSeed(rand_seed, 0x876e36da7e36d622ULL + reinterpret_cast(output)), - }; - - constexpr int bytes_per_write = sizeof(UInt32x16); - constexpr int safe_overwrite = 15; - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(generators >> 16, UInt32x16)); - output += bytes_per_write; - } - - if ((end - output) > 0) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - UInt32x16 values = __builtin_convertvector(generators >> 16, UInt32x16); - for (int i = 0; (end - output) > 0; ++i) - { - unalignedStore(output, values[i]); - output += sizeof(UInt32); - } - } -} - namespace { -constexpr std::array random_numbers = { - 0x0c8ff307dabc0c4cULL, - 0xf4bce78bf3821c1bULL, - 0x4eb628a1e189c21aULL, - 0x85ae000d253e0dbcULL, +// The array of random numbers from 'head -c8 /dev/urandom | xxd -p'. +// Can be used for creating seeds for random generators. +constexpr std::array random_numbers = { + 0x0c8ff307dabc0c4cULL, 0xf4bce78bf3821c1bULL, 0x4eb628a1e189c21aULL, 0x85ae000d253e0dbcULL, + 0xc98073e6480f8a10ULL, 0xb17e9b70a084d570ULL, 0x1361c752b768da8cULL, 0x3d915f60c06d144dULL, + 0xd5bc9b7aced79587ULL, 0x66c28000ba8a66cfULL, 0x0fb58da7a48820f5ULL, 0x540ee1b57aa861a1ULL, + 0x212f11936ef2db04ULL, 0xa3939cd900edcc58ULL, 0xc676c84420170102ULL, 0xcbdc824e8b4bf3edULL, - 0xc98073e6480f8a10ULL, - 0xb17e9b70a084d570ULL, - 0x1361c752b768da8cULL, - 0x3d915f60c06d144dULL, - - 0xd5bc9b7aced79587ULL, - 0x66c28000ba8a66cfULL, - 0x0fb58da7a48820f5ULL, - 0x540ee1b57aa861a1ULL, - - 0x212f11936ef2db04ULL, - 0xa3939cd900edcc58ULL, - 0xc676c84420170102ULL, - 0xcbdc824e8b4bf3edULL, + 0x8296f9d93cc94e3bULL, 0x78a7e826d62085b2ULL, 0xaa30620211fc6c69ULL, 0xbd38de52f0a93677ULL, + 0x19983de8d79dcc4eULL, 0x8afe883ef2199e6fULL, 0xb7160f7ed022b60aULL, 0x2ce173d373ddafd4ULL, + 0x15762761bb55b9acULL, 0x3e448fc94fdd28e7ULL, 0xa5121232adfbe70aULL, 0xb1e0f6d286112804ULL, + 0x6062e96de9554806ULL, 0xcc679b329c28882aULL, 0x5c6d29f45cbc060eULL, 0x1af1325a86ffb162ULL, }; }; -template -void RandVecImpl::execute(char * output, size_t size) +using namespace VectorExtension; + +template +void RandVecImpl::execute(char * output, size_t size) { - static_assert(VectorSize >= 4); - static_assert(VectorSize <= random_numbers.size()); + static_assert(VecSize >= 4); + static_assert(VecSize <= random_numbers.size()); + + using VecUInt64 = UInt64x; + using VecUInt32 = UInt32x; if (size == 0) return; @@ -331,80 +111,38 @@ void RandVecImpl::execute(char * output, size_t size) char * end = output + size; constexpr int safe_overwrite = 15; - constexpr int bytes_per_write = sizeof(VecUInt32); + constexpr int bytes_per_write = sizeof(VecUInt32); UInt64 rand_seed = randomSeed(); - VecUInt64 generators{}; - for (int i = 0; i < VectorSize; ++i) - generators[i] = calcSeed(rand_seed, random_numbers[VectorSize] + reinterpret_cast(output)); + UInt64 a = LinearCongruentialGenerator::a; + // TODO(dakovalkov): try to remove this. + /// Note: GCC likes to expand multiplication by a constant into shifts + additions. + /// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down. + /// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0. + if (rand_seed == 0) + a = LinearCongruentialGenerator::a + 2; + + constexpr UInt64 c = LinearCongruentialGenerator::c; + + VecUInt64 generators{}; + for (int i = 0; i < VecSize; ++i) + generators[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); while ((end - output) + safe_overwrite >= bytes_per_write) { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); - unalignedStore>(output, values); + generators = generators * a + c;; + VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); + unalignedStore(output, values); output += bytes_per_write; } - if ((end - output) > 0) - { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); - for (int i = 0; (end - output) > 0; ++i) - { - unalignedStore(output, values[i]); - output += sizeof(UInt32); - } - } -} - -template struct RandVecImpl<4>; -template struct RandVecImpl<8>; -template struct RandVecImpl<16>; - -template -void RandVecImpl2::execute(char * output, size_t size) -{ - static_assert(VectorSize >= 4); - - if (size == 0) - return; - - char * end = output + size; - - constexpr int safe_overwrite = 15; - constexpr int bytes_per_write = 2 * sizeof(VecUInt32); - - UInt64 rand_seed = randomSeed(); - VecUInt64 gens1{}, gens2{}; - for (int i = 0; i < VectorSize; ++i) - { - gens1[i] = calcSeed(rand_seed, i * 1123465ull * reinterpret_cast(output)); - gens2[i] = calcSeed(rand_seed, i * 6432453ull * reinterpret_cast(output)); - } - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - gens1 *= LinearCongruentialGenerator::a; - gens1 += LinearCongruentialGenerator::c; - VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); - unalignedStore>(output, values1); - gens2 *= LinearCongruentialGenerator::a; - gens2 += LinearCongruentialGenerator::c; - VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); - unalignedStore>(output, values2); - output += bytes_per_write; - } - + // Process tail while ((end - output) > 0) { - gens1 *= LinearCongruentialGenerator::a; - gens1 += LinearCongruentialGenerator::c; - VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); - for (int i = 0; (end - output) > 0 && i < VectorSize; ++i) + generators = generators * a + c;; + VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); + for (int i = 0; i < VecSize && (end - output) > 0; ++i) { unalignedStore(output, values[i]); output += sizeof(UInt32); @@ -412,137 +150,60 @@ void RandVecImpl2::execute(char * output, size_t size) } } -template struct RandVecImpl2<4>; -template struct RandVecImpl2<8>; -template struct RandVecImpl2<16>; - -// template -// void RandVecImpl4::execute(char * output, size_t size) -// { -// static_assert(VectorSize >= 4); - -// if (size == 0) -// return; - -// char * end = output + size; - -// constexpr int safe_overwrite = 15; -// constexpr int bytes_per_write = 4 * sizeof(VecUInt32); - -// VecUInt64 gens1{}, gens2{}, gens3{}, gens4{}; -// for (int i = 0; i < VectorSize; ++i) -// { -// gens1[i] = calcSeed(i * 1123465ull * reinterpret_cast(output)); -// gens2[i] = calcSeed(i * 6432453ull * reinterpret_cast(output)); -// gens3[i] = calcSeed(i * 1346434ull * reinterpret_cast(output)); -// gens4[i] = calcSeed(i * 5344753ull * reinterpret_cast(output)); -// } - -// while ((end - output) + safe_overwrite >= bytes_per_write) -// { -// gens1 *= LinearCongruentialGenerator::a; -// gens1 += LinearCongruentialGenerator::c; -// VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); -// unalignedStore>(output, values1); -// gens2 *= LinearCongruentialGenerator::a; -// gens2 += LinearCongruentialGenerator::c; -// VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); -// unalignedStore>(output, values2); -// gens3 *= LinearCongruentialGenerator::a; -// gens3 += LinearCongruentialGenerator::c; -// VecUInt32 values3 = __builtin_convertvector(gens3 >> 16, VecUInt32); -// unalignedStore>(output, values3); -// gens4 *= LinearCongruentialGenerator::a; -// gens4 += LinearCongruentialGenerator::c; -// VecUInt32 values4 = __builtin_convertvector(gens4 >> 16, VecUInt32); -// unalignedStore>(output, values4); -// output += bytes_per_write; -// } - -// while ((end - output) > 0) -// { -// gens1 *= LinearCongruentialGenerator::a; -// gens1 += LinearCongruentialGenerator::c; -// VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); -// for (int i = 0; (end - output) > 0 && i < VectorSize; i += 4) -// { -// unalignedStore(output, values[i]); -// unalignedStore(output + 4, values[i + 1]); -// unalignedStore(output + 8, values[i + 2]); -// unalignedStore(output + 12, values[i + 3]); -// output += 16; -// } -// } -// } - -// template struct RandVecImpl2<4>; -// template struct RandVecImpl2<8>; -// template struct RandVecImpl2<16>; - -) //DECLARE_MULTITARGET_CODE - -DECLARE_AVX2_SPECIFIC_CODE( - -void RandImpl6::execute(char * output, size_t size) +template +void RandVecImpl2::execute(char * output, size_t size) { + static_assert(VecSize >= 4); + static_assert(2 * VecSize <= random_numbers.size()); + + using VecUInt64 = UInt64x; + using VecUInt32 = UInt32x; + if (size == 0) return; char * end = output + size; - UInt64x8 generators = { - 0x5f186ce5faee450bULL + reinterpret_cast(output), - 0x9adb2ca3c72ac2eeULL + reinterpret_cast(output), - 0x07acf8bfa2537705ULL + reinterpret_cast(output), - 0x692b1b533834db92ULL + reinterpret_cast(output), - 0x5148b84cdda30081ULL + reinterpret_cast(output), - 0xe17b8a75a301ad47ULL + reinterpret_cast(output), - 0x6d4a5d69ed2a5f56ULL + reinterpret_cast(output), - 0x114e23266201b333ULL + reinterpret_cast(output), - }; - - union { - UInt64x8 vec; - __m256i mm[2]; - } gens {generators}; - - constexpr int bytes_per_write = sizeof(UInt32x8); constexpr int safe_overwrite = 15; + constexpr int bytes_per_write = 2 * sizeof(VecUInt32); - const auto low_a = _mm256_set1_epi64x(0xDEECE66D); - // const auto high_a = _mm256_set1_epi64x(5); - const auto c = _mm256_set1_epi64x(11); + UInt64 rand_seed = randomSeed(); + + UInt64 a = LinearCongruentialGenerator::a; + // TODO(dakovalkov): try to remove this. + /// Note: GCC likes to expand multiplication by a constant into shifts + additions. + /// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down. + /// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0. + if (rand_seed == 0) + a = LinearCongruentialGenerator::a + 2; + + constexpr UInt64 c = LinearCongruentialGenerator::c; + + VecUInt64 gens1{}; + VecUInt64 gens2{}; + for (int i = 0; i < VecSize; ++i) + { + gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); + gens2[i] = calcSeed(rand_seed, random_numbers[i + VecSize] + reinterpret_cast(output)); + } while ((end - output) + safe_overwrite >= bytes_per_write) { - { - auto gens_high = _mm256_srli_epi64(gens.mm[0], 32); - auto low_low_res = _mm256_mul_epu32(gens.mm[0], low_a); - auto high_low_res = _mm256_slli_epi64(_mm256_mul_epu32(gens_high, low_a), 32); - auto low_high_res = _mm256_slli_epi64(gens.mm[0], 32) + _mm256_slli_epi64(gens.mm[0], 34); - gens.mm[0] = _mm256_add_epi64(_mm256_add_epi64(low_low_res, high_low_res), - _mm256_add_epi64(low_high_res, c)); - } - { - auto gens_high = _mm256_srli_epi64(gens.mm[1], 32); - auto low_low_res = _mm256_mul_epu32(gens.mm[1], low_a); - auto high_low_res = _mm256_slli_epi64(_mm256_mul_epu32(gens_high, low_a), 32); - auto low_high_res = _mm256_slli_epi64(gens.mm[1], 32) + _mm256_slli_epi64(gens.mm[1], 34); - gens.mm[1] = _mm256_add_epi64(_mm256_add_epi64(low_low_res, high_low_res), - _mm256_add_epi64(low_high_res, c)); - } - // generators *= LinearCongruentialGenerator::a; - // generators += LinearCongruentialGenerator::c; - unalignedStore(output, __builtin_convertvector(gens.vec >> 16, UInt32x8)); + gens1 = gens1 * a + c;; + VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); + unalignedStore(output, values1); + gens2 = gens2 * a + c;; + VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); + unalignedStore(output + sizeof(VecUInt32), values2); output += bytes_per_write; } - if ((end - output) > 0) + // Process tail + while ((end - output) > 0) { - generators *= LinearCongruentialGenerator::a; - generators += LinearCongruentialGenerator::c; - UInt32x8 values = __builtin_convertvector(generators >> 16, UInt32x8); - for (int i = 0; (end - output) > 0; ++i) + gens1 = gens1 * a + c;; + VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); + for (int i = 0; i < VecSize && (end - output) > 0; ++i) { unalignedStore(output, values[i]); output += sizeof(UInt32); @@ -550,6 +211,86 @@ void RandImpl6::execute(char * output, size_t size) } } +template +void RandVecImpl4::execute(char * output, size_t size) +{ + static_assert(VecSize >= 4); + static_assert(4 * VecSize <= random_numbers.size()); + + using VecUInt64 = UInt64x; + using VecUInt32 = UInt32x; + + if (size == 0) + return; + + char * end = output + size; + + constexpr int safe_overwrite = 15; + constexpr int bytes_per_write = 4 * sizeof(VecUInt32); + + UInt64 rand_seed = randomSeed(); + + UInt64 a = LinearCongruentialGenerator::a; + // TODO(dakovalkov): try to remove this. + /// Note: GCC likes to expand multiplication by a constant into shifts + additions. + /// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down. + /// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0. + if (rand_seed == 0) + a = LinearCongruentialGenerator::a + 2; + + constexpr UInt64 c = LinearCongruentialGenerator::c; + + VecUInt64 gens1{}; + VecUInt64 gens2{}; + VecUInt64 gens3{}; + VecUInt64 gens4{}; + for (int i = 0; i < VecSize; ++i) + { + gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); + gens2[i] = calcSeed(rand_seed, random_numbers[i + VecSize] + reinterpret_cast(output)); + gens3[i] = calcSeed(rand_seed, random_numbers[i + 2 * VecSize] + reinterpret_cast(output)); + gens4[i] = calcSeed(rand_seed, random_numbers[i + 3 * VecSize] + reinterpret_cast(output)); + } + + while ((end - output) + safe_overwrite >= bytes_per_write) + { + gens1 = gens1 * a + c; + VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); + unalignedStore(output, values1); + gens2 = gens2 * a + c; + VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); + unalignedStore(output + sizeof(VecUInt32), values2); + gens3 = gens3 * a + c; + VecUInt32 values3 = __builtin_convertvector(gens3 >> 16, VecUInt32); + unalignedStore(output + 2 * sizeof(VecUInt32), values3); + gens4 = gens4 * a + c; + VecUInt32 values4 = __builtin_convertvector(gens4 >> 16, VecUInt32); + unalignedStore(output + 3 * sizeof(VecUInt32), values4); + output += bytes_per_write; + } + + // Process tail + while ((end - output) > 0) + { + gens1 = gens1 * a + c;; + VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); + for (int i = 0; i < VecSize && (end - output) > 0; ++i) + { + unalignedStore(output, values[i]); + output += sizeof(UInt32); + } + } +} + +) // DECLARE_MULTITARGET_CODE + +DECLARE_AVX2_SPECIFIC_CODE( + template struct RandVecImpl4<4>; ) // DECLARE_AVX2_SPECIFIC_CODE +DECLARE_AVX512F_SPECIFIC_CODE( + template struct RandVecImpl4<8>; +) // DECLARE_AVX512F_SPECIFIC_CODE + + } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 557e1fbe868..a82f199356e 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -43,49 +43,26 @@ struct RandImpl static void execute(char * output, size_t size); static String getImplementationTag() { return ToString(BuildArch); } }; - -struct RandImpl2 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } -}; - -struct RandImpl3 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v3"; } -}; - -struct RandImpl4 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v4"; } -}; - -struct RandImpl5 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v5"; } -}; - -template +// Isn't used now. +template struct RandVecImpl { - static void execute(char * outpu, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VectorSize); } + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VecSize); } }; - -template +// Isn't used now. +template struct RandVecImpl2 { - static void execute(char * outpu, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VectorSize); } + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VecSize); } }; -struct RandImpl6 +template +struct RandVecImpl4 { - static void execute(char * outpu, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v6"; } + static void execute(char * output, size_t size); + static String getImplementationTag() { return ToString(BuildArch) + "_vec4_" + toString(VecSize); } }; ) // DECLARE_MULTITARGET_CODE @@ -144,72 +121,16 @@ public: { selector.registerImplementation>(); - selector.registerImplementation>(); if constexpr (UseMultitargetCode) { - selector.registerImplementation>(); - selector.registerImplementation>(); + // vec impl 4 selector.registerImplementation>(); + FunctionRandomImpl, ToType, Name>>(); + selector.registerImplementation>(); + FunctionRandomImpl, ToType, Name>>(); - selector.registerImplementation>(); - - selector.registerImplementation>(); - selector.registerImplementation>(); - - selector.registerImplementation>(); - selector.registerImplementation>(); - - selector.registerImplementation>(); - selector.registerImplementation>(); - - // vec impl - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - // vec impl 2 - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation, ToType, Name>>(); - selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation>(); } } diff --git a/src/Functions/VectorExtension.h b/src/Functions/VectorExtension.h new file mode 100644 index 00000000000..49a029bb0d9 --- /dev/null +++ b/src/Functions/VectorExtension.h @@ -0,0 +1,101 @@ +#pragma once + +#include +// Contains types declarations and wrappers for GCC vector extension. + +// TODO(dakovalkov): remove this workaround. +#if !defined(__clang__) +# pragma GCC diagnostic ignored "-Wvector-operation-performance" +#endif + +namespace DB::VectorExtension +{ + +typedef UInt64 UInt64x2 __attribute__ ((vector_size (sizeof(UInt64) * 2))); +typedef UInt64 UInt64x4 __attribute__ ((vector_size (sizeof(UInt64) * 4))); +typedef UInt64 UInt64x8 __attribute__ ((vector_size (sizeof(UInt64) * 8))); +typedef UInt64 UInt64x16 __attribute__ ((vector_size (sizeof(UInt64) * 16))); +typedef UInt64 UInt64x32 __attribute__ ((vector_size (sizeof(UInt64) * 32))); + +typedef UInt32 UInt32x2 __attribute__ ((vector_size (sizeof(UInt32) * 2))); +typedef UInt32 UInt32x4 __attribute__ ((vector_size (sizeof(UInt32) * 4))); +typedef UInt32 UInt32x8 __attribute__ ((vector_size (sizeof(UInt32) * 8))); +typedef UInt32 UInt32x16 __attribute__ ((vector_size (sizeof(UInt32) * 16))); +typedef UInt32 UInt32x32 __attribute__ ((vector_size (sizeof(UInt32) * 32))); +typedef UInt32 UInt32x64 __attribute__ ((vector_size (sizeof(UInt32) * 64))); + +typedef UInt16 UInt16x2 __attribute__ ((vector_size (sizeof(UInt16) * 2))); +typedef UInt16 UInt16x4 __attribute__ ((vector_size (sizeof(UInt16) * 4))); +typedef UInt16 UInt16x8 __attribute__ ((vector_size (sizeof(UInt16) * 8))); +typedef UInt16 UInt16x16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); +typedef UInt16 UInt16x32 __attribute__ ((vector_size (sizeof(UInt16) * 32))); +typedef UInt16 UInt16x64 __attribute__ ((vector_size (sizeof(UInt16) * 64))); + +typedef UInt8 UInt8x2 __attribute__ ((vector_size (sizeof(UInt8) * 2))); +typedef UInt8 UInt8x4 __attribute__ ((vector_size (sizeof(UInt8) * 4))); +typedef UInt8 UInt8x8 __attribute__ ((vector_size (sizeof(UInt8) * 8))); +typedef UInt8 UInt8x16 __attribute__ ((vector_size (sizeof(UInt8) * 16))); +typedef UInt8 UInt8x32 __attribute__ ((vector_size (sizeof(UInt8) * 32))); +typedef UInt8 UInt8x64 __attribute__ ((vector_size (sizeof(UInt8) * 64))); + +namespace detail +{ + template + struct DummyStruct; + + template <> + struct DummyStruct<4> + { + using UInt8Type = UInt8x4; + using UInt16Type = UInt16x4; + using UInt32Type = UInt32x4; + using UInt64Type = UInt64x4; + }; + template <> + struct DummyStruct<8> + { + using UInt8Type = UInt8x8; + using UInt16Type = UInt16x8; + using UInt32Type = UInt32x8; + using UInt64Type = UInt64x8; + }; + template <> + struct DummyStruct<16> + { + using UInt8Type = UInt8x16; + using UInt16Type = UInt16x16; + using UInt32Type = UInt32x16; + using UInt64Type = UInt64x16; + }; + template <> + struct DummyStruct<32> + { + using UInt8Type = UInt8x32; + using UInt16Type = UInt16x32; + using UInt32Type = UInt32x32; + using UInt64Type = UInt64x32; + }; + +} + +// Same as above via template, e.g. UInt64x<8> +template +using UInt8x = typename detail::DummyStruct::UInt8Type; +template +using UInt16x = typename detail::DummyStruct::UInt16Type; +template +using UInt32x = typename detail::DummyStruct::UInt32Type; +template +using UInt64x = typename detail::DummyStruct::UInt64Type; + +/* Casts vectors of the same size. + * UInt32x4 x{}; + * UInt64x4 y = ConvertVector(x); + */ +// template +// inline To ConvertVector(From a) +// { +// return __builtin_convertvector(a, To); +// } + +} From cfc87767c5803f253f80f0ddb4725e4ae4ff1b00 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 20 May 2020 18:16:11 +0200 Subject: [PATCH 0240/2229] Fix bug in collecting statistics --- src/Functions/PerformanceAdaptors.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 717ad196e61..efe4243be79 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -186,6 +186,7 @@ public: { rows_summary += block.getByPosition(i).column->size(); } + rows_summary += block.getByPosition(result).column->size(); if (rows_summary >= 1000) { From d4a8d91789c949e9bfddc606152426e80bdc0c6c Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Wed, 20 May 2020 18:44:01 +0200 Subject: [PATCH 0241/2229] Fix style --- src/Functions/FunctionsRandom.cpp | 15 ++++++++------- src/Functions/FunctionsRandom.h | 4 +--- src/Functions/PerformanceAdaptors.h | 2 +- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 5ab51e9e3b8..0f7359f835f 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -76,7 +76,8 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } -namespace { +namespace +{ // The array of random numbers from 'head -c8 /dev/urandom | xxd -p'. // Can be used for creating seeds for random generators. @@ -103,11 +104,11 @@ void RandVecImpl::execute(char * output, size_t size) static_assert(VecSize <= random_numbers.size()); using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; + using VecUInt32 = UInt32x; if (size == 0) return; - + char * end = output + size; constexpr int safe_overwrite = 15; @@ -157,11 +158,11 @@ void RandVecImpl2::execute(char * output, size_t size) static_assert(2 * VecSize <= random_numbers.size()); using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; + using VecUInt32 = UInt32x; if (size == 0) return; - + char * end = output + size; constexpr int safe_overwrite = 15; @@ -218,11 +219,11 @@ void RandVecImpl4::execute(char * output, size_t size) static_assert(4 * VecSize <= random_numbers.size()); using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; + using VecUInt32 = UInt32x; if (size == 0) return; - + char * end = output + size; constexpr int safe_overwrite = 15; diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index a82f199356e..5f1e549d961 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -124,13 +124,11 @@ public: if constexpr (UseMultitargetCode) { - // vec impl 4 selector.registerImplementation, ToType, Name>>(); - + selector.registerImplementation, ToType, Name>>(); - } } diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index efe4243be79..daa65300570 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -177,7 +177,7 @@ public: implementations[id]->executeImpl(block, arguments, result, input_rows_count); else implementations[id]->execute(block, arguments, result, input_rows_count); - + watch.stop(); // TODO(dakovalkov): Calculate something more informative. From 310ca8562c19fd063de47906f59f66bd3ccb6834 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 21 May 2020 09:32:42 +0200 Subject: [PATCH 0242/2229] Add auto-vectorization support for binary operations --- src/Functions/FunctionBinaryArithmetic.h | 71 ++++++++++++++++++++++-- src/Functions/FunctionsRandom.cpp | 1 - 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 30b6da8b696..aba64a52519 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -28,6 +28,8 @@ #include #include +#include + #if !defined(ARCADIA_BUILD) # include #endif @@ -52,12 +54,7 @@ namespace ErrorCodes extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES; } - -/** Arithmetic operations: +, -, *, /, %, - * intDiv (integer division) - * Bitwise operations: |, &, ^, ~. - * Etc. - */ +DECLARE_MULTITARGET_CODE( template struct BinaryOperationImplBase @@ -89,6 +86,68 @@ struct BinaryOperationImplBase } }; +) // DECLARE_MULTITARGET_CODE + + +/** Arithmetic operations: +, -, *, /, %, + * intDiv (integer division) + * Bitwise operations: |, &, ^, ~. + * Etc. + */ +template +struct BinaryOperationImplBase +{ + using ResultType = ResultType_; + static const constexpr bool allow_fixed_string = false; + + static void vectorVector(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); + } + + static void vectorConstant(const A * __restrict a, B b, ResultType * __restrict c, size_t size) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); + } + + static void constantVector(A a, const B * __restrict b, ResultType * __restrict c, size_t size) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); + } + + static ResultType constantConstant(A a, B b) + { + return Op::template apply(a, b); + } +}; + template struct FixedStringOperationImpl { diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 0f7359f835f..f673b6c5f81 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -293,5 +293,4 @@ DECLARE_AVX512F_SPECIFIC_CODE( template struct RandVecImpl4<8>; ) // DECLARE_AVX512F_SPECIFIC_CODE - } From 8483dfa272fee7063f5cfc24bb7066bf48cf78cc Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 21 May 2020 09:40:27 +0200 Subject: [PATCH 0243/2229] Delete needless rand implementations --- src/Functions/FunctionsRandom.cpp | 119 +----------------------------- src/Functions/FunctionsRandom.h | 22 ++---- tests/performance/rand.xml | 4 +- 3 files changed, 8 insertions(+), 137 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index f673b6c5f81..cfb3b92c818 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -99,121 +99,6 @@ using namespace VectorExtension; template void RandVecImpl::execute(char * output, size_t size) -{ - static_assert(VecSize >= 4); - static_assert(VecSize <= random_numbers.size()); - - using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; - - if (size == 0) - return; - - char * end = output + size; - - constexpr int safe_overwrite = 15; - constexpr int bytes_per_write = sizeof(VecUInt32); - - UInt64 rand_seed = randomSeed(); - - UInt64 a = LinearCongruentialGenerator::a; - // TODO(dakovalkov): try to remove this. - /// Note: GCC likes to expand multiplication by a constant into shifts + additions. - /// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down. - /// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0. - if (rand_seed == 0) - a = LinearCongruentialGenerator::a + 2; - - constexpr UInt64 c = LinearCongruentialGenerator::c; - - VecUInt64 generators{}; - for (int i = 0; i < VecSize; ++i) - generators[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - generators = generators * a + c;; - VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); - unalignedStore(output, values); - output += bytes_per_write; - } - - // Process tail - while ((end - output) > 0) - { - generators = generators * a + c;; - VecUInt32 values = __builtin_convertvector(generators >> 16, VecUInt32); - for (int i = 0; i < VecSize && (end - output) > 0; ++i) - { - unalignedStore(output, values[i]); - output += sizeof(UInt32); - } - } -} - -template -void RandVecImpl2::execute(char * output, size_t size) -{ - static_assert(VecSize >= 4); - static_assert(2 * VecSize <= random_numbers.size()); - - using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; - - if (size == 0) - return; - - char * end = output + size; - - constexpr int safe_overwrite = 15; - constexpr int bytes_per_write = 2 * sizeof(VecUInt32); - - UInt64 rand_seed = randomSeed(); - - UInt64 a = LinearCongruentialGenerator::a; - // TODO(dakovalkov): try to remove this. - /// Note: GCC likes to expand multiplication by a constant into shifts + additions. - /// In this case a few multiplications become tens of shifts and additions. That leads to a huge slow down. - /// To avoid it we pretend that 'a' is not a constant. Actually we hope that rand_seed is never 0. - if (rand_seed == 0) - a = LinearCongruentialGenerator::a + 2; - - constexpr UInt64 c = LinearCongruentialGenerator::c; - - VecUInt64 gens1{}; - VecUInt64 gens2{}; - for (int i = 0; i < VecSize; ++i) - { - gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); - gens2[i] = calcSeed(rand_seed, random_numbers[i + VecSize] + reinterpret_cast(output)); - } - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - gens1 = gens1 * a + c;; - VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); - unalignedStore(output, values1); - gens2 = gens2 * a + c;; - VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); - unalignedStore(output + sizeof(VecUInt32), values2); - output += bytes_per_write; - } - - // Process tail - while ((end - output) > 0) - { - gens1 = gens1 * a + c;; - VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); - for (int i = 0; i < VecSize && (end - output) > 0; ++i) - { - unalignedStore(output, values[i]); - output += sizeof(UInt32); - } - } -} - -template -void RandVecImpl4::execute(char * output, size_t size) { static_assert(VecSize >= 4); static_assert(4 * VecSize <= random_numbers.size()); @@ -286,11 +171,11 @@ void RandVecImpl4::execute(char * output, size_t size) ) // DECLARE_MULTITARGET_CODE DECLARE_AVX2_SPECIFIC_CODE( - template struct RandVecImpl4<4>; + template struct RandVecImpl<4>; ) // DECLARE_AVX2_SPECIFIC_CODE DECLARE_AVX512F_SPECIFIC_CODE( - template struct RandVecImpl4<8>; + template struct RandVecImpl<8>; ) // DECLARE_AVX512F_SPECIFIC_CODE } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 5f1e549d961..5251f8fd622 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -43,27 +43,15 @@ struct RandImpl static void execute(char * output, size_t size); static String getImplementationTag() { return ToString(BuildArch); } }; -// Isn't used now. + +/// Implementation is in .cpp file. +/// Every specialization should be explicitly written in .cpp file. template struct RandVecImpl { static void execute(char * output, size_t size); static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VecSize); } }; -// Isn't used now. -template -struct RandVecImpl2 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_vec2_" + toString(VecSize); } -}; - -template -struct RandVecImpl4 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_vec4_" + toString(VecSize); } -}; ) // DECLARE_MULTITARGET_CODE @@ -125,10 +113,10 @@ public: if constexpr (UseMultitargetCode) { selector.registerImplementation, ToType, Name>>(); + FunctionRandomImpl, ToType, Name>>(); selector.registerImplementation, ToType, Name>>(); + FunctionRandomImpl, ToType, Name>>(); } } diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml index a007eb50179..bd34a7a83d8 100644 --- a/tests/performance/rand.xml +++ b/tests/performance/rand.xml @@ -13,13 +13,11 @@ table - numbers(10000000) + zeros(100000000) SELECT count() FROM (SELECT rand() FROM {table}) - SELECT count() FROM (SELECT randxorshift() FROM {table}) SELECT count() FROM (SELECT rand64() FROM {table}) - SELECT count() FROM (SELECT randxorshift64() FROM {table}) From e317dfb6e4fc5e6869ab7730ddc22b58ad9da0ca Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 21 May 2020 10:11:48 +0200 Subject: [PATCH 0244/2229] Fix FBA --- src/Functions/FunctionBinaryArithmetic.h | 70 ++++++++++++++++-------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index aba64a52519..40a387d09b2 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -102,44 +102,66 @@ struct BinaryOperationImplBase static void vectorVector(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size) { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); + if constexpr (UseMultitargetCode) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); + } else + { TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); + } + } static void vectorConstant(const A * __restrict a, B b, ResultType * __restrict c, size_t size) { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); + if constexpr (UseMultitargetCode) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); + } else + { TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); + } } static void constantVector(A a, const B * __restrict b, ResultType * __restrict c, size_t size) { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); + if constexpr (UseMultitargetCode) + { + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); + else + TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); + } else + { TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); + } } static ResultType constantConstant(A a, B b) From 0d1577c5da6f54e76d447c457803800ac01b4baf Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Sun, 24 May 2020 14:25:07 +0200 Subject: [PATCH 0245/2229] Better avx2 implementation for rand(). Expected to be ~10% faster --- src/Functions/FunctionBinaryArithmetic.h | 1 - src/Functions/FunctionsRandom.cpp | 123 ++++++++++++----------- src/Functions/FunctionsRandom.h | 14 +-- 3 files changed, 64 insertions(+), 74 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 40a387d09b2..c311b8d5d0a 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -119,7 +119,6 @@ struct BinaryOperationImplBase { TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); } - } static void vectorConstant(const A * __restrict a, B b, ResultType * __restrict c, size_t size) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index cfb3b92c818..9eaa44b0eb5 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -9,8 +9,6 @@ namespace DB { -DECLARE_MULTITARGET_CODE( - namespace { /// NOTE Probably @@ -50,8 +48,23 @@ namespace generator.seed(calcSeed(rand_seed, additional_seed)); } + /// The array of random numbers from 'head -c8 /dev/urandom | xxd -p'. + /// Can be used for creating seeds for random generators. + constexpr std::array random_numbers = { + 0x0c8ff307dabc0c4cULL, 0xf4bce78bf3821c1bULL, 0x4eb628a1e189c21aULL, 0x85ae000d253e0dbcULL, + 0xc98073e6480f8a10ULL, 0xb17e9b70a084d570ULL, 0x1361c752b768da8cULL, 0x3d915f60c06d144dULL, + 0xd5bc9b7aced79587ULL, 0x66c28000ba8a66cfULL, 0x0fb58da7a48820f5ULL, 0x540ee1b57aa861a1ULL, + 0x212f11936ef2db04ULL, 0xa3939cd900edcc58ULL, 0xc676c84420170102ULL, 0xcbdc824e8b4bf3edULL, + + 0x8296f9d93cc94e3bULL, 0x78a7e826d62085b2ULL, 0xaa30620211fc6c69ULL, 0xbd38de52f0a93677ULL, + 0x19983de8d79dcc4eULL, 0x8afe883ef2199e6fULL, 0xb7160f7ed022b60aULL, 0x2ce173d373ddafd4ULL, + 0x15762761bb55b9acULL, 0x3e448fc94fdd28e7ULL, 0xa5121232adfbe70aULL, 0xb1e0f6d286112804ULL, + 0x6062e96de9554806ULL, 0xcc679b329c28882aULL, 0x5c6d29f45cbc060eULL, 0x1af1325a86ffb162ULL, + }; } +DECLARE_DEFAULT_CODE( + void RandImpl::execute(char * output, size_t size) { LinearCongruentialGenerator generator0; @@ -61,10 +74,10 @@ void RandImpl::execute(char * output, size_t size) UInt64 rand_seed = randomSeed(); - seed(generator0, rand_seed, 0xfb4121280b2ab902ULL + reinterpret_cast(output)); - seed(generator1, rand_seed, 0x0121cf76df39c673ULL + reinterpret_cast(output)); - seed(generator2, rand_seed, 0x17ae86e3a19a602fULL + reinterpret_cast(output)); - seed(generator3, rand_seed, 0x8b6e16da7e06d622ULL + reinterpret_cast(output)); + seed(generator0, rand_seed, random_numbers[0] + reinterpret_cast(output)); + seed(generator1, rand_seed, random_numbers[1] + reinterpret_cast(output)); + seed(generator2, rand_seed, random_numbers[2] + reinterpret_cast(output)); + seed(generator3, rand_seed, random_numbers[3] + reinterpret_cast(output)); for (const char * end = output + size; output < end; output += 16) { @@ -76,43 +89,40 @@ void RandImpl::execute(char * output, size_t size) /// It is guaranteed (by PaddedPODArray) that we can overwrite up to 15 bytes after end. } -namespace -{ +) // DECLARE_DEFAULT_CODE -// The array of random numbers from 'head -c8 /dev/urandom | xxd -p'. -// Can be used for creating seeds for random generators. -constexpr std::array random_numbers = { - 0x0c8ff307dabc0c4cULL, 0xf4bce78bf3821c1bULL, 0x4eb628a1e189c21aULL, 0x85ae000d253e0dbcULL, - 0xc98073e6480f8a10ULL, 0xb17e9b70a084d570ULL, 0x1361c752b768da8cULL, 0x3d915f60c06d144dULL, - 0xd5bc9b7aced79587ULL, 0x66c28000ba8a66cfULL, 0x0fb58da7a48820f5ULL, 0x540ee1b57aa861a1ULL, - 0x212f11936ef2db04ULL, 0xa3939cd900edcc58ULL, 0xc676c84420170102ULL, 0xcbdc824e8b4bf3edULL, - - 0x8296f9d93cc94e3bULL, 0x78a7e826d62085b2ULL, 0xaa30620211fc6c69ULL, 0xbd38de52f0a93677ULL, - 0x19983de8d79dcc4eULL, 0x8afe883ef2199e6fULL, 0xb7160f7ed022b60aULL, 0x2ce173d373ddafd4ULL, - 0x15762761bb55b9acULL, 0x3e448fc94fdd28e7ULL, 0xa5121232adfbe70aULL, 0xb1e0f6d286112804ULL, - 0x6062e96de9554806ULL, 0xcc679b329c28882aULL, 0x5c6d29f45cbc060eULL, 0x1af1325a86ffb162ULL, -}; - -}; +DECLARE_AVX2_SPECIFIC_CODE( using namespace VectorExtension; -template -void RandVecImpl::execute(char * output, size_t size) +/* Takes 2 vectors with LinearCongruentialGenerator states and combines them into vector with random values. + * From every rand-state we use only bits 15...47 to generate random vector. + */ +inline UInt64x4 CombineValues(UInt64x4 a, UInt64x4 b) { - static_assert(VecSize >= 4); - static_assert(4 * VecSize <= random_numbers.size()); - - using VecUInt64 = UInt64x; - using VecUInt32 = UInt32x; + auto xa = reinterpret_cast<__m256i>(a); + auto xb = reinterpret_cast<__m256i>(b); + /// Every state is 8-byte value and we need to use only 4 from the middle. + /// Swap the low half and the high half of every state to move these bytes from the middle to sides. + /// xa = xa[1, 0, 3, 2, 5, 4, 7, 6] + xa = _mm256_shuffle_epi32(xa, 0xb1); + /// Now every 8-byte value in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use. + /// Just blend them to get the result vector. + /// res = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7,8],xb[9,10],xa[11,12],xb[13,14],xa[15] + auto res = _mm256_blend_epi16(xa, xb, 0x66); + return reinterpret_cast(res); +} +void RandImpl::execute(char * output, size_t size) +{ if (size == 0) return; char * end = output + size; + constexpr int vec_size = 4; constexpr int safe_overwrite = 15; - constexpr int bytes_per_write = 4 * sizeof(VecUInt32); + constexpr int bytes_per_write = 4 * sizeof(UInt64x4); UInt64 rand_seed = randomSeed(); @@ -126,56 +136,49 @@ void RandVecImpl::execute(char * output, size_t size) constexpr UInt64 c = LinearCongruentialGenerator::c; - VecUInt64 gens1{}; - VecUInt64 gens2{}; - VecUInt64 gens3{}; - VecUInt64 gens4{}; - for (int i = 0; i < VecSize; ++i) + UInt64x4 gens1{}; + UInt64x4 gens2{}; + UInt64x4 gens3{}; + UInt64x4 gens4{}; + for (int i = 0; i < vec_size; ++i) { gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); - gens2[i] = calcSeed(rand_seed, random_numbers[i + VecSize] + reinterpret_cast(output)); - gens3[i] = calcSeed(rand_seed, random_numbers[i + 2 * VecSize] + reinterpret_cast(output)); - gens4[i] = calcSeed(rand_seed, random_numbers[i + 3 * VecSize] + reinterpret_cast(output)); + gens2[i] = calcSeed(rand_seed, random_numbers[i + vec_size] + reinterpret_cast(output)); + gens3[i] = calcSeed(rand_seed, random_numbers[i + 2 * vec_size] + reinterpret_cast(output)); + gens4[i] = calcSeed(rand_seed, random_numbers[i + 3 * vec_size] + reinterpret_cast(output)); } while ((end - output) + safe_overwrite >= bytes_per_write) { gens1 = gens1 * a + c; - VecUInt32 values1 = __builtin_convertvector(gens1 >> 16, VecUInt32); - unalignedStore(output, values1); gens2 = gens2 * a + c; - VecUInt32 values2 = __builtin_convertvector(gens2 >> 16, VecUInt32); - unalignedStore(output + sizeof(VecUInt32), values2); + unalignedStore(output, CombineValues(gens1, gens2)); gens3 = gens3 * a + c; - VecUInt32 values3 = __builtin_convertvector(gens3 >> 16, VecUInt32); - unalignedStore(output + 2 * sizeof(VecUInt32), values3); gens4 = gens4 * a + c; - VecUInt32 values4 = __builtin_convertvector(gens4 >> 16, VecUInt32); - unalignedStore(output + 3 * sizeof(VecUInt32), values4); + unalignedStore(output + sizeof(UInt64x4), CombineValues(gens3, gens4)); + gens1 = gens1 * a + c; + gens2 = gens2 * a + c; + unalignedStore(output + 2 * sizeof(UInt64x4), CombineValues(gens1, gens2)); + gens3 = gens3 * a + c; + gens4 = gens4 * a + c; + unalignedStore(output + 3 * sizeof(UInt64x4), CombineValues(gens3, gens4)); output += bytes_per_write; } // Process tail while ((end - output) > 0) { - gens1 = gens1 * a + c;; - VecUInt32 values = __builtin_convertvector(gens1 >> 16, VecUInt32); - for (int i = 0; i < VecSize && (end - output) > 0; ++i) + gens1 = gens1 * a + c; + gens2 = gens2 * a + c; + UInt64x4 values = CombineValues(gens1, gens2); + for (int i = 0; i < vec_size && (end - output) > 0; ++i) { - unalignedStore(output, values[i]); - output += sizeof(UInt32); + unalignedStore(output, values[i]); + output += sizeof(UInt64); } } } -) // DECLARE_MULTITARGET_CODE - -DECLARE_AVX2_SPECIFIC_CODE( - template struct RandVecImpl<4>; ) // DECLARE_AVX2_SPECIFIC_CODE -DECLARE_AVX512F_SPECIFIC_CODE( - template struct RandVecImpl<8>; -) // DECLARE_AVX512F_SPECIFIC_CODE - } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 5251f8fd622..e10b249df8e 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -44,15 +44,6 @@ struct RandImpl static String getImplementationTag() { return ToString(BuildArch); } }; -/// Implementation is in .cpp file. -/// Every specialization should be explicitly written in .cpp file. -template -struct RandVecImpl -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_vec_" + toString(VecSize); } -}; - ) // DECLARE_MULTITARGET_CODE template @@ -113,10 +104,7 @@ public: if constexpr (UseMultitargetCode) { selector.registerImplementation, ToType, Name>>(); - - selector.registerImplementation, ToType, Name>>(); + FunctionRandomImpl>(); } } From 4a9891c601e459c6d9b9e638cd591ce7f3edbf13 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 25 May 2020 17:16:19 +0200 Subject: [PATCH 0246/2229] use vectorized rand in generateUUIDv4, add multitarget build in intHash32/64 --- src/Functions/FunctionsHashing.h | 37 +++++++++++++++++++++++++- src/Functions/FunctionsRandom.cpp | 7 ++--- src/Functions/VectorExtension.h | 23 ----------------- src/Functions/generateUUIDv4.cpp | 43 ++++++++++++++++++++++++++++--- tests/performance/rand.xml | 1 + 5 files changed, 81 insertions(+), 30 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index f647390e1c8..6f00981a22a 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -40,6 +40,8 @@ #include #include #include +#include +#include #include #include @@ -573,12 +575,13 @@ public: }; +DECLARE_MULTITARGET_CODE( + template class FunctionIntHash : public IFunction { public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } private: using ToType = typename Impl::ReturnType; @@ -612,6 +615,8 @@ public: return name; } + static String getImplementationTag() { return ToString(BuildArch); } + size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -646,6 +651,36 @@ public: } }; +) // DECLARE_MULTITARGET_CODE + +template +class FunctionIntHash : public TargetSpecific::Default::FunctionIntHash +{ +public: + FunctionIntHash(const Context & context) : selector(context) + { + selector.registerImplementation>(); + + selector.registerImplementation>(); + selector.registerImplementation>(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; template class FunctionAnyHash : public IFunction diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 9eaa44b0eb5..6b2e79e90ca 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -108,9 +108,9 @@ inline UInt64x4 CombineValues(UInt64x4 a, UInt64x4 b) xa = _mm256_shuffle_epi32(xa, 0xb1); /// Now every 8-byte value in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use. /// Just blend them to get the result vector. - /// res = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7,8],xb[9,10],xa[11,12],xb[13,14],xa[15] - auto res = _mm256_blend_epi16(xa, xb, 0x66); - return reinterpret_cast(res); + /// result = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7,8],xb[9,10],xa[11,12],xb[13,14],xa[15] + __m256i result = _mm256_blend_epi16(xa, xb, 0x66); + return reinterpret_cast(result); } void RandImpl::execute(char * output, size_t size) @@ -140,6 +140,7 @@ void RandImpl::execute(char * output, size_t size) UInt64x4 gens2{}; UInt64x4 gens3{}; UInt64x4 gens4{}; + for (int i = 0; i < vec_size; ++i) { gens1[i] = calcSeed(rand_seed, random_numbers[i] + reinterpret_cast(output)); diff --git a/src/Functions/VectorExtension.h b/src/Functions/VectorExtension.h index 49a029bb0d9..24c2ae9a18f 100644 --- a/src/Functions/VectorExtension.h +++ b/src/Functions/VectorExtension.h @@ -3,33 +3,23 @@ #include // Contains types declarations and wrappers for GCC vector extension. -// TODO(dakovalkov): remove this workaround. -#if !defined(__clang__) -# pragma GCC diagnostic ignored "-Wvector-operation-performance" -#endif - namespace DB::VectorExtension { typedef UInt64 UInt64x2 __attribute__ ((vector_size (sizeof(UInt64) * 2))); typedef UInt64 UInt64x4 __attribute__ ((vector_size (sizeof(UInt64) * 4))); typedef UInt64 UInt64x8 __attribute__ ((vector_size (sizeof(UInt64) * 8))); -typedef UInt64 UInt64x16 __attribute__ ((vector_size (sizeof(UInt64) * 16))); -typedef UInt64 UInt64x32 __attribute__ ((vector_size (sizeof(UInt64) * 32))); typedef UInt32 UInt32x2 __attribute__ ((vector_size (sizeof(UInt32) * 2))); typedef UInt32 UInt32x4 __attribute__ ((vector_size (sizeof(UInt32) * 4))); typedef UInt32 UInt32x8 __attribute__ ((vector_size (sizeof(UInt32) * 8))); typedef UInt32 UInt32x16 __attribute__ ((vector_size (sizeof(UInt32) * 16))); -typedef UInt32 UInt32x32 __attribute__ ((vector_size (sizeof(UInt32) * 32))); -typedef UInt32 UInt32x64 __attribute__ ((vector_size (sizeof(UInt32) * 64))); typedef UInt16 UInt16x2 __attribute__ ((vector_size (sizeof(UInt16) * 2))); typedef UInt16 UInt16x4 __attribute__ ((vector_size (sizeof(UInt16) * 4))); typedef UInt16 UInt16x8 __attribute__ ((vector_size (sizeof(UInt16) * 8))); typedef UInt16 UInt16x16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); typedef UInt16 UInt16x32 __attribute__ ((vector_size (sizeof(UInt16) * 32))); -typedef UInt16 UInt16x64 __attribute__ ((vector_size (sizeof(UInt16) * 64))); typedef UInt8 UInt8x2 __attribute__ ((vector_size (sizeof(UInt8) * 2))); typedef UInt8 UInt8x4 __attribute__ ((vector_size (sizeof(UInt8) * 4))); @@ -65,15 +55,12 @@ namespace detail using UInt8Type = UInt8x16; using UInt16Type = UInt16x16; using UInt32Type = UInt32x16; - using UInt64Type = UInt64x16; }; template <> struct DummyStruct<32> { using UInt8Type = UInt8x32; using UInt16Type = UInt16x32; - using UInt32Type = UInt32x32; - using UInt64Type = UInt64x32; }; } @@ -88,14 +75,4 @@ using UInt32x = typename detail::DummyStruct::UInt32Type; template using UInt64x = typename detail::DummyStruct::UInt64Type; -/* Casts vectors of the same size. - * UInt32x4 x{}; - * UInt64x4 y = ConvertVector(x); - */ -// template -// inline To ConvertVector(From a) -// { -// return __builtin_convertvector(a, To); -// } - } diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index d543226ba5c..04dd5877560 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -5,17 +5,24 @@ namespace DB { +#define DECLARE_SEVERAL_IMPLEMENTATIONS(...) \ +DECLARE_DEFAULT_CODE (__VA_ARGS__) \ +DECLARE_AVX2_SPECIFIC_CODE(__VA_ARGS__) + +DECLARE_SEVERAL_IMPLEMENTATIONS( + class FunctionGenerateUUIDv4 : public IFunction { public: static constexpr auto name = "generateUUIDv4"; - static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } + static String getImplementationTag() { return ToString(BuildArch); } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes &) const override @@ -32,8 +39,9 @@ public: size_t size = input_rows_count; vec_to.resize(size); - // TODO(dakovalkov): rewrite this workaround - TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); + + /// RandImpl is target-dependent and is not the same in different TargetSpecific namespaces. + RandImpl::execute(reinterpret_cast(vec_to.data()), vec_to.size() * sizeof(UInt128)); for (UInt128 & uuid: vec_to) { @@ -47,6 +55,35 @@ public: } }; +) // DECLARE_SEVERAL_IMPLEMENTATIONS +#undef DECLARE_SEVERAL_IMPLEMENTATIONS + +class FunctionGenerateUUIDv4 : public TargetSpecific::Default::FunctionGenerateUUIDv4 +{ +public: + FunctionGenerateUUIDv4(const Context & context) : selector(context) + { + selector.registerImplementation(); + + selector.registerImplementation(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + void registerFunctionGenerateUUIDv4(FunctionFactory & factory) { factory.registerFunction(); diff --git a/tests/performance/rand.xml b/tests/performance/rand.xml index bd34a7a83d8..ed629e5a2a7 100644 --- a/tests/performance/rand.xml +++ b/tests/performance/rand.xml @@ -20,4 +20,5 @@ SELECT count() FROM (SELECT rand() FROM {table}) SELECT count() FROM (SELECT rand64() FROM {table}) + SELECT count() FROM (SELECT generateUUIDv4() FROM {table}) From 991cbf397aabc03782da6713d35d2e44ee6ae835 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 26 May 2020 13:15:44 +0200 Subject: [PATCH 0247/2229] Thread safe performance statistics --- src/Functions/FunctionsRandom.cpp | 2 +- src/Functions/FunctionsRandom.h | 1 + src/Functions/PerformanceAdaptors.h | 203 ++++++++++++++++------------ 3 files changed, 119 insertions(+), 87 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 6b2e79e90ca..2c7b2e5f1f5 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -109,7 +109,7 @@ inline UInt64x4 CombineValues(UInt64x4 a, UInt64x4 b) /// Now every 8-byte value in xa is xx....xx and every value in xb is ..xxxx.. where x is random byte we want to use. /// Just blend them to get the result vector. /// result = xa[0],xb[1,2],xa[3,4],xb[5,6],xa[7,8],xb[9,10],xa[11,12],xb[13,14],xa[15] - __m256i result = _mm256_blend_epi16(xa, xb, 0x66); + __m256i result = _mm256_blend_epi16(xa, xb, 0x66); return reinterpret_cast(result); } diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index e10b249df8e..8cbe4286285 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -40,6 +40,7 @@ DECLARE_MULTITARGET_CODE( struct RandImpl { + /// Fill memory with random data. The memory region must be 15-bytes padded. static void execute(char * output, size_t size); static String getImplementationTag() { return ToString(BuildArch); } }; diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index daa65300570..9dbc2a68f86 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -6,6 +6,7 @@ #include #include +#include #include /// This file contains Adaptors which help to combine several implementations of the function. @@ -20,104 +21,138 @@ namespace ErrorCodes extern const int NO_SUITABLE_FUNCTION_IMPLEMENTATION; } -// TODO(dakovalkov): This is copied and pasted struct from LZ4_decompress_faster.h with little changes. -struct PerformanceStatistics +namespace detail { - struct Element + class PerformanceStatistics { - double count = 0; - double sum = 0; - - double adjustedCount() const + public: + size_t select(bool considarable) { - return count - NUM_INVOCATIONS_TO_THROW_OFF; + /// We don't need to choose/measure anything if there's only one variant. + if (size() == 1) + return 0; + + std::lock_guard guard(lock); + + size_t best = 0; + double best_sample = data[0].sample(rng); + + for (size_t i = 1; i < data.size(); ++i) + { + double sample = data[i].sample(rng); + if (sample < best_sample) + { + best_sample = sample; + best = i; + } + } + + if (considarable) + data[best].run(); + + return best; } - double mean() const + void complete(size_t id, double seconds, double bytes) { - return sum / adjustedCount(); + if (size() == 1) + return; + + std::lock_guard guard(lock); + data[id].complete(seconds, bytes); } - /// For better convergence, we don't use proper estimate of stddev. - /// We want to eventually separate between two algorithms even in case - /// when there is no statistical significant difference between them. - double sigma() const + size_t size() const { - return mean() / sqrt(adjustedCount()); + return data.size(); } - void update(double seconds, double bytes) + bool empty() const { - ++count; - - if (count > NUM_INVOCATIONS_TO_THROW_OFF) - sum += seconds / bytes; + return size() == 0; } - double sample(pcg64 & stat_rng) const + void emplace_back() { - /// If there is a variant with not enough statistics, always choose it. - /// And in that case prefer variant with less number of invocations. + data.emplace_back(); + } + + private: + struct Element + { + int completed_count = 0; + int running_count = 0; + double sum = 0; + + int adjustedCount() const + { + return completed_count - NUM_INVOCATIONS_TO_THROW_OFF; + } + + double mean() const + { + return sum / adjustedCount(); + } + + /// For better convergence, we don't use proper estimate of stddev. + /// We want to eventually separate between two algorithms even in case + /// when there is no statistical significant difference between them. + double sigma() const + { + return mean() / sqrt(adjustedCount()); + } + + void run() + { + ++running_count; + } + + void complete(double seconds, double bytes) + { + --running_count; + ++completed_count; + + if (adjustedCount() > 0) + sum += seconds / bytes; + } + + double sample(pcg64 & stat_rng) const + { + /// If there is a variant with not enough statistics, always choose it. + /// And in that case prefer variant with less number of invocations. + if (adjustedCount() < 2) + return adjustedCount() - 1 + running_count * 2; - if (adjustedCount() < 2) - return adjustedCount() - 1; - else return std::normal_distribution<>(mean(), sigma())(stat_rng); - } + } + }; + + std::vector data; + std::mutex lock; + /// It's Ok that generator is not seeded. + pcg64 rng; + /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. + static constexpr int NUM_INVOCATIONS_TO_THROW_OFF = 2; }; - /// Cold invocations may be affected by additional memory latencies. Don't take first invocations into account. - static constexpr double NUM_INVOCATIONS_TO_THROW_OFF = 2; + template + std::true_type hasImplementationTagTest(const T&); + std::false_type hasImplementationTagTest(...); - /// How to select method to run. - /// -1 - automatically, based on statistics (default); - /// -2 - choose methods in round robin fashion (for performance testing). - /// >= 0 - always choose specified method (for performance testing); - ssize_t choose_method = -1; + template + constexpr bool has_implementation_tag = decltype(hasImplementationTagTest(std::declval()))::value; - std::vector data; - - /// It's Ok that generator is not seeded. - pcg64 rng; - - /// To select from different algorithms we use a kind of "bandits" algorithm. - /// Sample random values from estimated normal distributions and choose the minimal. - size_t select() + template + String getImplementationTag(TargetArch arch) { - if (choose_method < 0) - { - std::vector samples(data.size()); - for (size_t i = 0; i < data.size(); ++i) - samples[i] = choose_method == -1 - ? data[i].sample(rng) - : data[i].adjustedCount(); - - return std::min_element(samples.begin(), samples.end()) - samples.begin(); - } + if constexpr (has_implementation_tag) + return ToString(arch) + "_" + T::getImplementationTag(); else - return choose_method; + return ToString(arch); } +} - size_t size() const - { - return data.size(); - } - - bool empty() const - { - return size() == 0; - } - - void emplace_back() - { - data.emplace_back(); - } - - PerformanceStatistics() {} - PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} -}; - -/* Class which is used to store implementations for the function and selecting the best one to run +/* Class which is used to store implementations for the function and to select the best one to run * based on processor architecture and statistics from previous runs. * * FunctionInterface is typically IFunction or IExecutableFunctionImpl, but practically it can be @@ -170,7 +205,10 @@ public: throw Exception("There are no available implementations for function " "TODO(dakovalkov): add name", ErrorCodes::NO_SUITABLE_FUNCTION_IMPLEMENTATION); - auto id = statistics.select(); + /// Statistics shouldn't rely on small blocks. + bool considerable = (input_rows_count > 1000); + + size_t id = statistics.select(considerable); Stopwatch watch; if constexpr (std::is_same_v) @@ -180,17 +218,10 @@ public: watch.stop(); - // TODO(dakovalkov): Calculate something more informative. - size_t rows_summary = 0; - for (auto i : arguments) + if (considerable) { - rows_summary += block.getByPosition(i).column->size(); - } - rows_summary += block.getByPosition(result).column->size(); - - if (rows_summary >= 1000) - { - statistics.data[id].update(watch.elapsedSeconds(), rows_summary); + // TODO(dakovalkov): Calculate something more informative than rows count. + statistics.complete(id, watch.elapsedSeconds(), input_rows_count); } } @@ -210,7 +241,7 @@ public: { // TODO(dakovalkov): make this option better. const auto & choose_impl = context.getSettingsRef().function_implementation.value; - if (choose_impl.empty() || choose_impl == FunctionImpl::getImplementationTag()) + if (choose_impl.empty() || choose_impl == detail::getImplementationTag(Arch)) { implementations.emplace_back(std::make_shared(std::forward(args)...)); statistics.emplace_back(); @@ -221,7 +252,7 @@ public: private: const Context & context; std::vector implementations; - PerformanceStatistics statistics; + detail::PerformanceStatistics statistics; }; } From ef030349ff29c7550b9c3b2931cf4db57d9a6e11 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 26 May 2020 17:56:46 +0200 Subject: [PATCH 0248/2229] Add hashes to multitarget code, a lot of cosmetics --- src/Functions/CMakeLists.txt | 4 +- src/Functions/FunctionBinaryArithmetic.h | 81 +++++++++---------- src/Functions/FunctionStartsEndsWith.h | 21 +++-- src/Functions/FunctionsHashing.h | 42 +++++++++- src/Functions/FunctionsRandom.h | 15 +--- src/Functions/PerformanceAdaptors.h | 28 +++++-- src/Functions/RandXorshift.h | 15 ++-- src/Functions/TargetSpecific.h | 54 ++++++++----- src/Functions/generateUUIDv4.cpp | 2 + src/Functions/randConstant.cpp | 2 +- tests/performance/arithmetic.xml | 3 +- .../synthetic_hardware_benchmark.xml | 3 +- 12 files changed, 156 insertions(+), 114 deletions(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 8c9cf159e30..2cc3208f6c4 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -85,9 +85,9 @@ endif() option(ENABLE_MULTITARGET_CODE "" ON) if (ENABLE_MULTITARGET_CODE) - add_definitions(-DUSE_MULTITARGET_CODE=1) + add_definitions(-DENABLE_MULTITARGET_CODE=1) else() - add_definitions(-DUSE_MULTITARGET_CODE=0) + add_definitions(-DENABLE_MULTITARGET_CODE=0) endif() target_link_libraries(clickhouse_functions PUBLIC "simdxorshift") diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index c311b8d5d0a..9a5d610d2af 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -102,65 +102,56 @@ struct BinaryOperationImplBase static void vectorVector(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size) { - if constexpr (UseMultitargetCode) - { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); - } + #if USE_MULTITARGET_CODE + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); else - { TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); - } + #else + TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); + #endif } static void vectorConstant(const A * __restrict a, B b, ResultType * __restrict c, size_t size) { - if constexpr (UseMultitargetCode) - { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); - } + #if USE_MULTITARGET_CODE + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); else - { TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); - } + #else + TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); + #endif } static void constantVector(A a, const B * __restrict b, ResultType * __restrict c, size_t size) { - if constexpr (UseMultitargetCode) - { - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); - } + #if USE_MULTITARGET_CODE + if (IsArchSupported(TargetArch::AVX512F)) + TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX2)) + TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::AVX)) + TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); + else if (IsArchSupported(TargetArch::SSE42)) + TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); else - { TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); - } + #else + TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); + #endif } static ResultType constantConstant(A a, B b) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 76aa4530c99..b148653e1b3 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -151,17 +151,16 @@ public: selector.registerImplementation>(); - if constexpr (UseMultitargetCode) - { - selector.registerImplementation>(); - selector.registerImplementation>(); - selector.registerImplementation>(); - selector.registerImplementation>(); - } + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + #endif } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 6f00981a22a..4562b9001a9 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -615,8 +615,6 @@ public: return name; } - static String getImplementationTag() { return ToString(BuildArch); } - size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -661,11 +659,13 @@ public: { selector.registerImplementation>(); - + + #if USE_MULTITARGET_CODE selector.registerImplementation>(); selector.registerImplementation>(); + #endif } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override @@ -682,12 +682,13 @@ private: ImplementationSelector selector; }; +DECLARE_MULTITARGET_CODE( + template class FunctionAnyHash : public IFunction { public: static constexpr auto name = Impl::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } private: using ToType = typename Impl::ReturnType; @@ -974,6 +975,39 @@ public: } }; +) // DECLARE_MULTITARGET_CODE + +template +class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash +{ +public: + FunctionAnyHash(const Context & context) : selector(context) + { + selector.registerImplementation>(); + + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + selector.registerImplementation>(); + #endif + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + struct URLHashImpl { diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 8cbe4286285..346c94e1d9f 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -42,7 +42,6 @@ struct RandImpl { /// Fill memory with random data. The memory region must be 15-bytes padded. static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch); } }; ) // DECLARE_MULTITARGET_CODE @@ -58,11 +57,6 @@ public: return name; } - static String getImplementationTag() - { - return RandImpl::getImplementationTag(); - } - bool isDeterministic() const override { return false; } bool isDeterministicInScopeOfQuery() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } @@ -102,11 +96,10 @@ public: selector.registerImplementation>(); - if constexpr (UseMultitargetCode) - { - selector.registerImplementation>(); - } + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + #endif } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 9dbc2a68f86..1d4b6be6102 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -9,9 +9,9 @@ #include #include -/// This file contains Adaptors which help to combine several implementations of the function. -/// Adaptors check that implementation can be executed on the current platform and choose -/// that one which works faster according to previous runs. +/* This file contains helper class ImplementationSelector. It makes easier to combine + * several implementations of IFunction/IExecutableFunctionImpl. + */ namespace DB { @@ -120,9 +120,16 @@ namespace detail { /// If there is a variant with not enough statistics, always choose it. /// And in that case prefer variant with less number of invocations. - if (adjustedCount() < 2) - return adjustedCount() - 1 + running_count * 2; + if (adjustedCount() < 2) + { + // TODO(dakovalkov): rewrite it. + int all_count = adjustedCount() + running_count; + if (all_count < 3) + return all_count - 2; + else + return adjustedCount() + running_count * 100; + } return std::normal_distribution<>(mean(), sigma())(stat_rng); } }; @@ -142,6 +149,9 @@ namespace detail template constexpr bool has_implementation_tag = decltype(hasImplementationTagTest(std::declval()))::value; + /* Implementation tag is used to run specific implementation (for debug/testing purposes). + * It can be specified via static method ::getImplementationTag() in Function (optional). + */ template String getImplementationTag(TargetArch arch) { @@ -161,8 +171,9 @@ namespace detail * Example of usage: * * class MyDefaulImpl : public IFunction {...}; - * class MySecondImpl : public IFunction {...}; + * DECLARE_AVX2_SPECIFIC_CODE( * class MyAVX2Impl : public IFunction {...}; + * ) * * /// All methods but execute/executeImpl are usually not bottleneck, so just use them from * /// default implementation. @@ -172,8 +183,9 @@ namespace detail * /// Register all implementations in constructor. * /// There could be as many implementation for every target as you want. * selector.registerImplementation(); - * selector.registerImplementation(); - * selector.registreImplementation(); + * #if USE_MULTITARGET_CODE + * selector.registreImplementation(); + * #endif * } * * void executeImpl(...) override { diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 49655d637f2..1d109adc087 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -23,7 +23,7 @@ struct RandXorshiftImpl struct RandXorshiftImpl2 { static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch) + "_v2"; } + static String getImplementationTag() { return "v2"; } }; ) // DECLARE_MULTITARGET_CODE @@ -37,13 +37,12 @@ public: selector.registerImplementation>(); - if constexpr (UseMultitargetCode) - { - selector.registerImplementation>(); - selector.registerImplementation>(); - } + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + selector.registerImplementation>(); + #endif } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index 11dae939bbd..ed9c0d3c244 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -4,16 +4,22 @@ /* This file contains macros and helpers for writing platform-dependent code. * - * Macros DECLARE__SPECIFIC_CODE will wrap code inside them into the + * Macros DECLARE__SPECIFIC_CODE will wrap code inside it into the * namespace TargetSpecific:: and enable Arch-specific compile options. * Thus, it's allowed to call functions inside these namespaces only after * checking platform in runtime (see IsArchSupported() below). * + * If compiler is not gcc/clang or target isn't x86_64 or ENABLE_MULTITARGET_CODE + * was set to OFF in cmake, all code inside these macroses will be removed and + * USE_MUTLITARGE_CODE will be set to 0. Use #if USE_MUTLITARGE_CODE whenever you + * use anything from this namespaces. + * * For similarities there is a macros DECLARE_DEFAULT_CODE, which wraps code * into the namespace TargetSpecific::Default but dosn't specify any additional - * copile options. + * copile options. Functions and classes inside this macros are available regardless + * of USE_MUTLITARGE_CODE. * - * Example: + * Example of usage: * * DECLARE_DEFAULT_CODE ( * int funcImpl() { @@ -28,15 +34,17 @@ * ) // DECLARE_DEFAULT_CODE * * int func() { + * #if USE_MULTITARGET_CODE * if (IsArchSupported(TargetArch::AVX2)) * return TargetSpecifc::AVX2::funcImpl(); + * #endif * return TargetSpecifc::Default::funcImpl(); * } * * Sometimes code may benefit from compiling with different options. - * For these purposes use DECLARE_MULTITARGET_CODE macros. It will create several - * copies of the code and compile it with different options. These copies are - * available via TargetSpecifc namespaces described above. + * For these purposes use DECLARE_MULTITARGET_CODE macros. It will create a copy + * of the code for every supported target and compile it with different options. + * These copies are available via TargetSpecifc namespaces described above. * * Inside every TargetSpecific namespace there is a constexpr variable BuildArch, * which indicates the target platform for current code. @@ -50,16 +58,16 @@ * iteration_size = 2 * else if constexpr (BuildArch == TargetArch::AVX || BuildArch == TargetArch::AVX2) * iteration_size = 4; - * else if constexpr (BuildArch == TargetArch::AVX512) - * iteration_size = 8; * for (int i = 0; i < size; i += iteration_size) * ... * } * ) // DECLARE_MULTITARGET_CODE * - * // All 5 versions of func are available here. Use runtime detection to choose one. + * // All target-specific and default implementations are available here via + * TargetSpecific::::funcImpl. Use runtime detection to choose one. * - * If you want to write IFunction or IExecutableFuncionImpl with runtime dispatching, see PerformanceAdaptors.h. + * If you want to write IFunction or IExecutableFuncionImpl with several implementations + * see PerformanceAdaptors.h. */ namespace DB @@ -74,24 +82,24 @@ enum class TargetArch : UInt32 AVX512F = (1 << 3), }; -// Runtime detection. +/// Runtime detection. bool IsArchSupported(TargetArch arch); String ToString(TargetArch arch); -#if USE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) +#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) -constexpr bool UseMultitargetCode = true; +#define USE_MULTITARGET_CODE 1 #if defined(__clang__) # define BEGIN_AVX512F_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2,avx512f\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f\"))),apply_to=function)") # define BEGIN_AVX2_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx,avx2\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2\"))),apply_to=function)") # define BEGIN_AVX_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx,avx\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx\"))),apply_to=function)") # define BEGIN_SSE42_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,mmx\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt\"))),apply_to=function)") # define END_TARGET_SPECIFIC_CODE \ _Pragma("clang attribute pop") @@ -102,16 +110,16 @@ constexpr bool UseMultitargetCode = true; #else # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native\")") # define BEGIN_AVX_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native\")") # define BEGIN_SSE42_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,tune=native\")") # define END_TARGET_SPECIFIC_CODE \ _Pragma("GCC pop_options") @@ -158,8 +166,10 @@ END_TARGET_SPECIFIC_CODE #else -constexpr bool UseMultitargetCode = false; +#define USE_MULTITARGET_CODE 0 +/* Multitarget code is disabled, just delete target-specific code. + */ #define DECLARE_SSE42_SPECIFIC_CODE(...) #define DECLARE_AVX_SPECIFIC_CODE(...) #define DECLARE_AVX2_SPECIFIC_CODE(...) diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 04dd5877560..a205f853d2a 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -66,8 +66,10 @@ public: selector.registerImplementation(); + #if USE_MULTITARGET_CODE selector.registerImplementation(); + #endif } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index 3eba5abf10d..ebf2f752b66 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -99,7 +99,7 @@ public: argument_types.emplace_back(arguments.back().type); typename ColumnVector::Container vec_to(1); - // TODO(dakovalkov): Rewrite this workaround + TargetSpecific::Default::RandImpl::execute(reinterpret_cast(vec_to.data()), sizeof(ToType)); ToType value = vec_to[0]; diff --git a/tests/performance/arithmetic.xml b/tests/performance/arithmetic.xml index 45f0d62f227..e56d35d43b9 100644 --- a/tests/performance/arithmetic.xml +++ b/tests/performance/arithmetic.xml @@ -1,4 +1,5 @@ - + 30000000000 diff --git a/tests/performance/synthetic_hardware_benchmark.xml b/tests/performance/synthetic_hardware_benchmark.xml index 2233bfeca8d..2688c5a1aec 100644 --- a/tests/performance/synthetic_hardware_benchmark.xml +++ b/tests/performance/synthetic_hardware_benchmark.xml @@ -1,4 +1,5 @@ - + 30000000000 From 71fabcedc4939b6ef917072d3e224de4bc65f6b8 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Tue, 26 May 2020 18:29:37 +0200 Subject: [PATCH 0249/2229] Fix test --- tests/performance/arithmetic.xml | 1 + tests/performance/synthetic_hardware_benchmark.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/performance/arithmetic.xml b/tests/performance/arithmetic.xml index e56d35d43b9..69b9bf30e45 100644 --- a/tests/performance/arithmetic.xml +++ b/tests/performance/arithmetic.xml @@ -1,5 +1,6 @@ + 30000000000 diff --git a/tests/performance/synthetic_hardware_benchmark.xml b/tests/performance/synthetic_hardware_benchmark.xml index 2688c5a1aec..deae39ab80f 100644 --- a/tests/performance/synthetic_hardware_benchmark.xml +++ b/tests/performance/synthetic_hardware_benchmark.xml @@ -1,5 +1,6 @@ + 30000000000 From cdb353856dee0ef0cb39022cc4b6623469a92d15 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 13:39:48 +0200 Subject: [PATCH 0250/2229] remove vectorization from binary arithmetic --- src/Functions/FunctionBinaryArithmetic.h | 80 ++---------------------- tests/performance/arithmetic.xml | 4 +- 2 files changed, 6 insertions(+), 78 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 9a5d610d2af..292f7da0475 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -54,8 +54,11 @@ namespace ErrorCodes extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES; } -DECLARE_MULTITARGET_CODE( - +/** Arithmetic operations: +, -, *, /, %, + * intDiv (integer division) + * Bitwise operations: |, &, ^, ~. + * Etc. + */ template struct BinaryOperationImplBase { @@ -86,79 +89,6 @@ struct BinaryOperationImplBase } }; -) // DECLARE_MULTITARGET_CODE - - -/** Arithmetic operations: +, -, *, /, %, - * intDiv (integer division) - * Bitwise operations: |, &, ^, ~. - * Etc. - */ -template -struct BinaryOperationImplBase -{ - using ResultType = ResultType_; - static const constexpr bool allow_fixed_string = false; - - static void vectorVector(const A * __restrict a, const B * __restrict b, ResultType * __restrict c, size_t size) - { - #if USE_MULTITARGET_CODE - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorVector(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); - #else - TargetSpecific::Default::BinaryOperationImplBase::vectorVector(a, b, c, size); - #endif - } - - static void vectorConstant(const A * __restrict a, B b, ResultType * __restrict c, size_t size) - { - #if USE_MULTITARGET_CODE - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::vectorConstant(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); - #else - TargetSpecific::Default::BinaryOperationImplBase::vectorConstant(a, b, c, size); - #endif - } - - static void constantVector(A a, const B * __restrict b, ResultType * __restrict c, size_t size) - { - #if USE_MULTITARGET_CODE - if (IsArchSupported(TargetArch::AVX512F)) - TargetSpecific::AVX512F::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX2)) - TargetSpecific::AVX2::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::AVX)) - TargetSpecific::AVX::BinaryOperationImplBase::constantVector(a, b, c, size); - else if (IsArchSupported(TargetArch::SSE42)) - TargetSpecific::SSE42::BinaryOperationImplBase::constantVector(a, b, c, size); - else - TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); - #else - TargetSpecific::Default::BinaryOperationImplBase::constantVector(a, b, c, size); - #endif - } - - static ResultType constantConstant(A a, B b) - { - return Op::template apply(a, b); - } -}; template struct FixedStringOperationImpl diff --git a/tests/performance/arithmetic.xml b/tests/performance/arithmetic.xml index 69b9bf30e45..45f0d62f227 100644 --- a/tests/performance/arithmetic.xml +++ b/tests/performance/arithmetic.xml @@ -1,6 +1,4 @@ - - + 30000000000 From 13e1102f03e35dce5f29d9434ff84ee9dff3c605 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 13:46:47 +0200 Subject: [PATCH 0251/2229] Disable xorshift --- contrib/CMakeLists.txt | 1 - src/Functions/CMakeLists.txt | 3 --- src/Functions/RandXorshift.cpp | 3 +++ src/Functions/RandXorshift.h | 5 +++++ src/Functions/registerFunctionsRandom.cpp | 2 -- 5 files changed, 8 insertions(+), 6 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 344a06f29b7..d122188ad0b 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -27,7 +27,6 @@ add_subdirectory (murmurhash) add_subdirectory (replxx-cmake) add_subdirectory (ryu-cmake) add_subdirectory (unixodbc-cmake) -add_subdirectory (SIMDxorshift-cmake) add_subdirectory (poco-cmake) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 2cc3208f6c4..e999955086e 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -90,9 +90,6 @@ else() add_definitions(-DENABLE_MULTITARGET_CODE=0) endif() -target_link_libraries(clickhouse_functions PUBLIC "simdxorshift") -message(STATUS "Using SIMDXORSHIFT ${SIMDXORSHIFT_LIBRARY}") - add_subdirectory(GatherUtils) target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils) diff --git a/src/Functions/RandXorshift.cpp b/src/Functions/RandXorshift.cpp index 9f1dded700c..4fa2280861f 100644 --- a/src/Functions/RandXorshift.cpp +++ b/src/Functions/RandXorshift.cpp @@ -1,3 +1,5 @@ +/// Disable xorshift +#if 0 #include #include #include @@ -162,3 +164,4 @@ void registerFunctionRandXorshift(FunctionFactory & factory) } } +#endif diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 1d109adc087..8e068cf5dff 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -1,5 +1,8 @@ #pragma once +/// disable xorshift +#if 0 + #include #include #include @@ -60,3 +63,5 @@ private: }; } + +#endif diff --git a/src/Functions/registerFunctionsRandom.cpp b/src/Functions/registerFunctionsRandom.cpp index 422ec91f025..3638474c4fe 100644 --- a/src/Functions/registerFunctionsRandom.cpp +++ b/src/Functions/registerFunctionsRandom.cpp @@ -10,7 +10,6 @@ void registerFunctionRandomPrintableASCII(FunctionFactory & factory); void registerFunctionRandomString(FunctionFactory & factory); void registerFunctionRandomFixedString(FunctionFactory & factory); void registerFunctionRandomStringUTF8(FunctionFactory & factory); -void registerFunctionRandXorshift(FunctionFactory & factory); void registerFunctionsRandom(FunctionFactory & factory) { @@ -22,7 +21,6 @@ void registerFunctionsRandom(FunctionFactory & factory) registerFunctionRandomString(factory); registerFunctionRandomFixedString(factory); registerFunctionRandomStringUTF8(factory); - registerFunctionRandXorshift(factory); } } From 07bdd9eaf6189703715b2dfe861ecf5913953752 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 13:48:56 +0200 Subject: [PATCH 0252/2229] Fix style issues --- src/Functions/FunctionsHashing.h | 6 +++--- src/Functions/PerformanceAdaptors.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 4562b9001a9..7edb8937275 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -659,7 +659,7 @@ public: { selector.registerImplementation>(); - + #if USE_MULTITARGET_CODE selector.registerImplementation>(); @@ -670,9 +670,9 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - selector.selectAndExecute(block, arguments, result, input_rows_count); + selector.selectAndExecute(block, arguments, result, input_rows_count); } - + static FunctionPtr create(const Context & context) { return std::make_shared(context); diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 1d4b6be6102..de321ee5605 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -32,7 +32,7 @@ namespace detail if (size() == 1) return 0; - std::lock_guard guard(lock); + std::lock_guard guard(lock); size_t best = 0; double best_sample = data[0].sample(rng); @@ -57,8 +57,8 @@ namespace detail { if (size() == 1) return; - - std::lock_guard guard(lock); + + std::lock_guard guard(lock); data[id].complete(seconds, bytes); } From 43b836adc13e7a5556fce396ee7e6a47f10413d3 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 13:50:59 +0200 Subject: [PATCH 0253/2229] cosmetics --- src/Functions/FunctionStartsEndsWith.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index b148653e1b3..30d6a150620 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -41,11 +41,6 @@ public: return name; } - static String getImplementationTag() - { - return ToString(BuildArch); - } - size_t getNumberOfArguments() const override { return 2; From 4c16f7a70ffe244f1e82f3ff06482288f212290a Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 15:01:00 +0200 Subject: [PATCH 0254/2229] cosmetics --- src/Functions/FunctionsRandom.cpp | 2 +- src/Functions/PerformanceAdaptors.h | 9 +-------- tests/performance/general_purpose_hashes.xml | 4 +++- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index 2c7b2e5f1f5..ced87d08cfa 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -1,5 +1,5 @@ -#include #include +#include #include #include #include diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index de321ee5605..bbe50d2e994 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -122,14 +122,7 @@ namespace detail /// And in that case prefer variant with less number of invocations. if (adjustedCount() < 2) - { - // TODO(dakovalkov): rewrite it. - int all_count = adjustedCount() + running_count; - if (all_count < 3) - return all_count - 2; - else - return adjustedCount() + running_count * 100; - } + return adjustedCount() - 1 + running_count; return std::normal_distribution<>(mean(), sigma())(stat_rng); } }; diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml index 31a1bd65835..ada1df439fe 100644 --- a/tests/performance/general_purpose_hashes.xml +++ b/tests/performance/general_purpose_hashes.xml @@ -1,4 +1,6 @@ - + + gp_hash_func From 0f730b2ace141c5b17a1a76568bfd2aefd72e976 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 17:35:05 +0200 Subject: [PATCH 0255/2229] multitarget great circle --- src/Functions/greatCircleDistance.cpp | 45 +++++++++++++++++++++++-- tests/performance/great_circle_dist.xml | 4 ++- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 238499f8def..89337f83ddf 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -153,6 +155,12 @@ enum class Method WGS84_METERS, }; +} + +DECLARE_MULTITARGET_CODE( + +namespace +{ template float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) @@ -220,7 +228,6 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) } - template class FunctionGeoDistance : public IFunction { @@ -230,8 +237,6 @@ public: : ((method == Method::SPHERE_METERS) ? "greatCircleDistance" : "geoDistance"); - static FunctionPtr create(const Context &) { return std::make_shared>(); } - private: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 4; } @@ -272,6 +277,40 @@ private: } }; +) // DECLARE_MULTITARGET_CODE + +template +class FunctionGeoDistance : public TargetSpecific::Default::FunctionGeoDistance +{ +public: + explicit FunctionGeoDistance(const Context & context) : selector(context) + { + selector.registerImplementation>(); + + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); + #endif + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared>(context); + } + +private: + ImplementationSelector selector; +}; void registerFunctionGeoDistance(FunctionFactory & factory) { diff --git a/tests/performance/great_circle_dist.xml b/tests/performance/great_circle_dist.xml index 13f9e6fde56..a57097bcbe7 100644 --- a/tests/performance/great_circle_dist.xml +++ b/tests/performance/great_circle_dist.xml @@ -1,4 +1,6 @@ - + + SELECT count() FROM numbers(1000000) WHERE NOT ignore(greatCircleDistance((rand(1) % 360) * 1. - 180, (number % 150) * 1.2 - 90, (number % 360) + toFloat64(rand(2)) / 4294967296 - 180, (rand(3) % 180) * 1. - 90)) From b0537bf31e1de4e5f52a399ff877a2313c11d31b Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 17:38:07 +0200 Subject: [PATCH 0256/2229] Fix clang builds --- src/Functions/FunctionStartsEndsWith.h | 2 +- src/Functions/FunctionsHashing.h | 4 ++-- src/Functions/FunctionsRandom.cpp | 4 +++- src/Functions/FunctionsRandom.h | 2 +- src/Functions/RandXorshift.h | 2 +- src/Functions/generateUUIDv4.cpp | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 30d6a150620..69627eb2ead 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -141,7 +141,7 @@ template class FunctionStartsEndsWith : public TargetSpecific::Default::FunctionStartsEndsWith { public: - FunctionStartsEndsWith(const Context & context) : selector(context) + explicit FunctionStartsEndsWith(const Context & context) : selector(context) { selector.registerImplementation>(); diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 7edb8937275..b4c87dd761a 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -655,7 +655,7 @@ template class FunctionIntHash : public TargetSpecific::Default::FunctionIntHash { public: - FunctionIntHash(const Context & context) : selector(context) + explicit FunctionIntHash(const Context & context) : selector(context) { selector.registerImplementation>(); @@ -981,7 +981,7 @@ template class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash { public: - FunctionAnyHash(const Context & context) : selector(context) + explicit FunctionAnyHash(const Context & context) : selector(context) { selector.registerImplementation>(); diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index ced87d08cfa..fba44d458bb 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -4,7 +4,9 @@ #include #include #include -#include +#if USE_MULTITARGET_CODE +# include +#endif namespace DB { diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 346c94e1d9f..bc11f671c1b 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -91,7 +91,7 @@ template class FunctionRandom : public FunctionRandomImpl { public: - FunctionRandom(const Context & context) : selector(context) + explicit FunctionRandom(const Context & context) : selector(context) { selector.registerImplementation>(); diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h index 8e068cf5dff..c005d7377dd 100644 --- a/src/Functions/RandXorshift.h +++ b/src/Functions/RandXorshift.h @@ -35,7 +35,7 @@ template class FunctionRandomXorshift : public FunctionRandomImpl { public: - FunctionRandomXorshift(const Context & context) : selector(context) + explicit FunctionRandomXorshift(const Context & context) : selector(context) { selector.registerImplementation>(); diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index a205f853d2a..53113a77273 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -61,7 +61,7 @@ public: class FunctionGenerateUUIDv4 : public TargetSpecific::Default::FunctionGenerateUUIDv4 { public: - FunctionGenerateUUIDv4(const Context & context) : selector(context) + explicit FunctionGenerateUUIDv4(const Context & context) : selector(context) { selector.registerImplementation(); From 278592106cd29732df3474a1da4a01a4d3d16dd0 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Thu, 28 May 2020 18:21:23 +0200 Subject: [PATCH 0257/2229] cosmetics --- src/Functions/FunctionBinaryArithmetic.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 292f7da0475..30b6da8b696 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -28,8 +28,6 @@ #include #include -#include - #if !defined(ARCADIA_BUILD) # include #endif @@ -54,11 +52,13 @@ namespace ErrorCodes extern const int CANNOT_ADD_DIFFERENT_AGGREGATE_STATES; } + /** Arithmetic operations: +, -, *, /, %, * intDiv (integer division) * Bitwise operations: |, &, ^, ~. * Etc. */ + template struct BinaryOperationImplBase { @@ -89,7 +89,6 @@ struct BinaryOperationImplBase } }; - template struct FixedStringOperationImpl { From 478ee2c185870bf9bd719abfb6951879b9a4e730 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 29 May 2020 06:54:18 +0200 Subject: [PATCH 0258/2229] delete SIMDxorshift --- .gitmodules | 3 - contrib/SIMDxorshift | 1 - contrib/SIMDxorshift-cmake/CMakeLists.txt | 12 -- src/Functions/RandXorshift.cpp | 167 ---------------------- src/Functions/RandXorshift.h | 67 --------- src/Functions/generateUUIDv4.cpp | 2 - 6 files changed, 252 deletions(-) delete mode 160000 contrib/SIMDxorshift delete mode 100644 contrib/SIMDxorshift-cmake/CMakeLists.txt delete mode 100644 src/Functions/RandXorshift.cpp delete mode 100644 src/Functions/RandXorshift.h diff --git a/.gitmodules b/.gitmodules index c14fef40457..7f5d1307a6e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -160,6 +160,3 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git -[submodule "contrib/SIMDxorshift"] - path = contrib/SIMDxorshift - url = https://github.com/lemire/SIMDxorshift diff --git a/contrib/SIMDxorshift b/contrib/SIMDxorshift deleted file mode 160000 index 270eb8936c9..00000000000 --- a/contrib/SIMDxorshift +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 270eb8936c9b4bd038c39f1783a8eba6b8f15b09 diff --git a/contrib/SIMDxorshift-cmake/CMakeLists.txt b/contrib/SIMDxorshift-cmake/CMakeLists.txt deleted file mode 100644 index 573173ff1b4..00000000000 --- a/contrib/SIMDxorshift-cmake/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -set(SIMDXORSHIFT_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/SIMDxorshift/include") -set(SIMDXORSHIFT_SRC_DIR "${SIMDXORSHIFT_INCLUDE_DIR}/../src") -set(SIMDXORSHIFT_SRC - ${SIMDXORSHIFT_SRC_DIR}/xorshift128plus.c - ${SIMDXORSHIFT_SRC_DIR}/simdxorshift128plus.c -) - -set(SIMDXORSHIFT_LIBRARY "simdxorshift") - -add_library(${SIMDXORSHIFT_LIBRARY} ${SIMDXORSHIFT_SRC}) -target_include_directories(${SIMDXORSHIFT_LIBRARY} PUBLIC "${SIMDXORSHIFT_INCLUDE_DIR}") -target_compile_options(${SIMDXORSHIFT_LIBRARY} PRIVATE -mavx2) diff --git a/src/Functions/RandXorshift.cpp b/src/Functions/RandXorshift.cpp deleted file mode 100644 index 4fa2280861f..00000000000 --- a/src/Functions/RandXorshift.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/// Disable xorshift -#if 0 -#include -#include -#include -#include -#include - -#include - -extern "C" -{ -#include -#include -} - -namespace DB -{ - -DECLARE_DEFAULT_CODE( - -void RandXorshiftImpl::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - xorshift128plus_key_s mykey; - - xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), - 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), - &mykey); - - constexpr int bytes_per_write = 8; - constexpr intptr_t mask = bytes_per_write - 1; - - // Process head to make output aligned. - unalignedStore(output, xorshift128plus(&mykey)); - output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); - - while (end - output > 0) - { - *reinterpret_cast(output) = xorshift128plus(&mykey); - output += bytes_per_write; - } -} - -) // DECLARE_DEFAULT_CODE - -DECLARE_AVX2_SPECIFIC_CODE( - -void RandXorshiftImpl::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - avx_xorshift128plus_key_t mykey; - avx_xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), - 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), - &mykey); - - constexpr int safe_overwrite = 15; /// How many bytes we can write behind the end. - constexpr int bytes_per_write = 32; - constexpr intptr_t mask = bytes_per_write - 1; - - if (size + safe_overwrite < bytes_per_write) - { - /// size <= 16. - _mm_storeu_si128(reinterpret_cast<__m128i*>(output), - _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); - return; - } - - /// Process head to make output aligned. - _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); - output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); - - while ((end - output) + safe_overwrite >= bytes_per_write) - { - _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); - output += bytes_per_write; - } - - /// Process tail. (end - output) <= 16. - if ((end - output) > 0) - { - _mm_store_si128(reinterpret_cast<__m128i*>(output), - _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); - } -} - -) // DECLARE_AVX2_SPECIFIC_CODE - -DECLARE_AVX2_SPECIFIC_CODE( - -void RandXorshiftImpl2::execute(char * output, size_t size) -{ - if (size == 0) - return; - - char * end = output + size; - - avx_xorshift128plus_key_t mykey; - avx_xorshift128plus_init(0xe9ef384566799595ULL ^ reinterpret_cast(output), - 0xa321e1523f4f88c7ULL ^ reinterpret_cast(output), - &mykey); - - avx_xorshift128plus_key_t mykey2; - avx_xorshift128plus_init(0xdfe532a6b5a5eb2cULL ^ reinterpret_cast(output), - 0x21cdf6cd1e22bf9cULL ^ reinterpret_cast(output), - &mykey2); - - constexpr int safe_overwrite = 15; /// How many bytes we can write behind the end. - constexpr int bytes_per_write = 32; - constexpr intptr_t mask = bytes_per_write - 1; - - if (size + safe_overwrite < bytes_per_write) - { - /// size <= 16. - _mm_storeu_si128(reinterpret_cast<__m128i*>(output), - _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); - return; - } - - /// Process head to make output aligned. - _mm256_storeu_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); - output = reinterpret_cast((reinterpret_cast(output) | mask) + 1); - - while ((end - output) + safe_overwrite >= bytes_per_write * 2) - { - _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); - _mm256_store_si256(reinterpret_cast<__m256i*>(output + bytes_per_write), avx_xorshift128plus(&mykey2)); - output += bytes_per_write * 2; - } - - if ((end - output) + safe_overwrite >= bytes_per_write) - { - _mm256_store_si256(reinterpret_cast<__m256i*>(output), avx_xorshift128plus(&mykey)); - output += bytes_per_write; - } - - /// Process tail. (end - output) <= 16. - if ((end - output) > 0) - { - _mm_store_si128(reinterpret_cast<__m128i*>(output), - _mm256_extracti128_si256(avx_xorshift128plus(&mykey), 0)); - } -} - -) // DECLARE_AVX2_SPECIFIC_CODE - -struct NameRandXorshift { static constexpr auto name = "randxorshift"; }; -using FunctionRandXorshift = FunctionRandomXorshift; -struct NameRandXorshift64 { static constexpr auto name = "randxorshift64"; }; -using FunctionRandXorshift64 = FunctionRandomXorshift; - -void registerFunctionRandXorshift(FunctionFactory & factory) -{ - factory.registerFunction(); - factory.registerFunction(); -} - -} -#endif diff --git a/src/Functions/RandXorshift.h b/src/Functions/RandXorshift.h deleted file mode 100644 index c005d7377dd..00000000000 --- a/src/Functions/RandXorshift.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once - -/// disable xorshift -#if 0 - -#include -#include -#include -#include - -#include -#include -#include - -namespace DB -{ - -DECLARE_MULTITARGET_CODE( - -struct RandXorshiftImpl -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return ToString(BuildArch); } -}; - -struct RandXorshiftImpl2 -{ - static void execute(char * output, size_t size); - static String getImplementationTag() { return "v2"; } -}; - -) // DECLARE_MULTITARGET_CODE - -template -class FunctionRandomXorshift : public FunctionRandomImpl -{ -public: - explicit FunctionRandomXorshift(const Context & context) : selector(context) - { - selector.registerImplementation>(); - - #if USE_MULTITARGET_CODE - selector.registerImplementation>(); - selector.registerImplementation>(); - #endif - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - selector.selectAndExecute(block, arguments, result, input_rows_count); - } - - static FunctionPtr create(const Context & context) - { - return std::make_shared>(context); - } - -private: - ImplementationSelector selector; -}; - -} - -#endif diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index 53113a77273..7dbb73c0cf3 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -21,8 +21,6 @@ public: return name; } - static String getImplementationTag() { return ToString(BuildArch); } - size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes &) const override From f1cfc7b472204696f0aaae391e915d62888ccdd2 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 11:47:01 +0400 Subject: [PATCH 0259/2229] Rename LDAP_PASSWORD to LDAP_SERVER and use "ldap_server" as a string key Some refactoring --- src/Access/AccessControlManager.cpp | 112 ----------------------- src/Access/Authentication.cpp | 9 +- src/Access/Authentication.h | 37 ++++---- src/Access/ExternalAuthenticators.cpp | 113 ++++++++++++++++++++++++ src/Access/ExternalAuthenticators.h | 12 +++ src/Access/LDAPClient.cpp | 17 ++-- src/Access/LDAPClient.h | 8 +- src/Access/UsersConfigAccessStorage.cpp | 4 +- src/Parsers/ASTCreateUserQuery.cpp | 7 +- src/Parsers/ASTCreateUserQuery.h | 4 +- src/Parsers/ParserCreateUserQuery.cpp | 6 +- src/Parsers/ParserCreateUserQuery.h | 4 +- src/Server/MySQLHandler.cpp | 2 +- 13 files changed, 172 insertions(+), 163 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index fc659d8bacb..07002971734 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -12,19 +12,11 @@ #include #include #include -#include -#include #include namespace DB { - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - namespace { std::vector> createStorages() @@ -41,110 +33,6 @@ namespace constexpr size_t DISK_ACCESS_STORAGE_INDEX = 0; constexpr size_t USERS_CONFIG_ACCESS_STORAGE_INDEX = 1; - - auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const String & ldap_server_name) - { - LDAPServerParams params; - - const String ldap_server_config = "ldap_servers." + ldap_server_name; - - const bool has_host = config.has(ldap_server_config + ".host"); - const bool has_port = config.has(ldap_server_config + ".port"); - const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); - const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); - const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); - const bool has_tls_cert_verify = config.has(ldap_server_config + ".tls_cert_verify"); - const bool has_ca_cert_dir = config.has(ldap_server_config + ".ca_cert_dir"); - const bool has_ca_cert_file = config.has(ldap_server_config + ".ca_cert_file"); - - if (!has_host) - throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); - - params.host = config.getString(ldap_server_config + ".host"); - - if (params.host.empty()) - throw Exception("Empty 'host' entry", ErrorCodes::BAD_ARGUMENTS); - - if (has_auth_dn_prefix) - params.auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); - - if (has_auth_dn_suffix) - params.auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); - - if (has_enable_tls) - { - String enable_tls_lc_str = config.getString(ldap_server_config + ".enable_tls"); - boost::to_lower(enable_tls_lc_str); - - if (enable_tls_lc_str == "starttls") - params.enable_tls = LDAPServerParams::TLSEnable::YES_STARTTLS; - else if (config.getBool(ldap_server_config + ".enable_tls")) - params.enable_tls = LDAPServerParams::TLSEnable::YES; - else - params.enable_tls = LDAPServerParams::TLSEnable::NO; - } - - if (has_tls_cert_verify) - { - String tls_cert_verify_lc_str = config.getString(ldap_server_config + ".tls_cert_verify"); - boost::to_lower(tls_cert_verify_lc_str); - - if (tls_cert_verify_lc_str == "never") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::NEVER; - else if (tls_cert_verify_lc_str == "allow") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::ALLOW; - else if (tls_cert_verify_lc_str == "try") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::TRY; - else if (tls_cert_verify_lc_str == "demand") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::DEMAND; - else - throw Exception("Bad value for 'tls_cert_verify' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); - } - - if (has_ca_cert_dir) - params.ca_cert_dir = config.getString(ldap_server_config + ".ca_cert_dir"); - - if (has_ca_cert_file) - params.ca_cert_file = config.getString(ldap_server_config + ".ca_cert_file"); - - if (has_port) - { - const auto port = config.getInt64(ldap_server_config + ".port"); - if (port < 0 || port > 65535) - throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS); - - params.port = port; - } - else - params.port = (params.enable_tls == LDAPServerParams::TLSEnable::YES ? 636 : 389); - - return params; - } - - void parseAndAddLDAPServers(ExternalAuthenticators & external_authenticators, const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) - { - Poco::Util::AbstractConfiguration::Keys ldap_server_names; - config.keys("ldap_servers", ldap_server_names); - - for (const auto & ldap_server_name : ldap_server_names) - { - try - { - external_authenticators.setLDAPServerParams(ldap_server_name, parseLDAPServer(config, ldap_server_name)); - } - catch (...) - { - tryLogCurrentException(log, "Could not parse LDAP server " + backQuote(ldap_server_name)); - } - } - } - - auto parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) - { - auto external_authenticators = std::make_unique(); - parseAndAddLDAPServers(*external_authenticators, config, log); - return external_authenticators; - } } diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 4d49d205040..970cb89ac10 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } + Authentication::Digest Authentication::getPasswordDoubleSHA1() const { switch (type) @@ -38,8 +39,8 @@ Authentication::Digest Authentication::getPasswordDoubleSHA1() const case DOUBLE_SHA1_PASSWORD: return password_hash; - case LDAP_PASSWORD: - throw Exception("Cannot get password double SHA1 for user with 'LDAP_PASSWORD' authentication.", ErrorCodes::BAD_ARGUMENTS); + case LDAP_SERVER: + throw Exception("Cannot get password double SHA1 for user with 'LDAP_SERVER' authentication.", ErrorCodes::BAD_ARGUMENTS); case MAX_TYPE: break; @@ -79,9 +80,9 @@ bool Authentication::isCorrectPassword(const String & password_, const String & return encodeSHA1(first_sha1) == password_hash; } - case LDAP_PASSWORD: + case LDAP_SERVER: { - auto ldap_server_params = external_authenticators.getLDAPServerParams(ldap_server_name); + auto ldap_server_params = external_authenticators.getLDAPServerParams(server_name); ldap_server_params.user = user_; ldap_server_params.password = password_; LDAPSimpleAuthClient ldap_client(ldap_server_params); diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 65f45db499b..35ff0fa1d32 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -40,7 +40,7 @@ public: DOUBLE_SHA1_PASSWORD, /// Password is checked by a [remote] LDAP server. Connection will be made at each authentication attempt. - LDAP_PASSWORD, + LDAP_SERVER, MAX_TYPE, }; @@ -82,12 +82,13 @@ public: /// Allowed to use for Type::NO_PASSWORD, Type::PLAINTEXT_PASSWORD, Type::DOUBLE_SHA1_PASSWORD. Digest getPasswordDoubleSHA1() const; - /// Sets an external LDAP server name. LDAP server name is used when authentication type is LDAP_PASSWORD. - void setLDAPServerName(const String & server_name); - const String & getLDAPServerName() const; + /// Sets an external authentication server name. + /// When authentication type is LDAP_SERVER, server name is expected to be the name of a preconfigured LDAP server. + const String & getServerName() const; + void setServerName(const String & server_name_); /// Checks if the provided password is correct. Returns false if not. - /// User name and external authenticators' info is used only by some specific authentication mechanisms (e.g., LDAP). + /// User name and external authenticators' info are used only by some specific authentication type (e.g., LDAP_SERVER). bool isCorrectPassword(const String & password_, const String & user_, const ExternalAuthenticators & external_authenticators) const; friend bool operator ==(const Authentication & lhs, const Authentication & rhs) { return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash); } @@ -102,7 +103,7 @@ private: Type type = Type::NO_PASSWORD; Digest password_hash; - String ldap_server_name; + String server_name; }; @@ -137,9 +138,9 @@ inline const Authentication::TypeInfo & Authentication::TypeInfo::get(Type type_ static const auto info = make_info("DOUBLE_SHA1_PASSWORD"); return info; } - case LDAP_PASSWORD: + case LDAP_SERVER: { - static const auto info = make_info("LDAP"); + static const auto info = make_info("LDAP_SERVER"); return info; } case MAX_TYPE: break; @@ -191,8 +192,8 @@ inline void Authentication::setPassword(const String & password_) case DOUBLE_SHA1_PASSWORD: return setPasswordHashBinary(encodeDoubleSHA1(password_)); - case LDAP_PASSWORD: - throw Exception("Cannot specify password for the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); + case LDAP_SERVER: + throw Exception("Cannot specify password for the 'LDAP_SERVER' authentication type", ErrorCodes::LOGICAL_ERROR); case MAX_TYPE: break; } @@ -218,8 +219,8 @@ inline void Authentication::setPasswordHashHex(const String & hash) inline String Authentication::getPasswordHashHex() const { - if (type == LDAP_PASSWORD) - throw Exception("Cannot get password of a user with the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); + if (type == LDAP_SERVER) + throw Exception("Cannot get password of a user with the 'LDAP_SERVER' authentication type", ErrorCodes::LOGICAL_ERROR); String hex; hex.resize(password_hash.size() * 2); boost::algorithm::hex(password_hash.begin(), password_hash.end(), hex.data()); @@ -262,22 +263,22 @@ inline void Authentication::setPasswordHashBinary(const Digest & hash) return; } - case LDAP_PASSWORD: - throw Exception("Cannot specify password for the 'LDAP_PASSWORD' authentication type", ErrorCodes::LOGICAL_ERROR); + case LDAP_SERVER: + throw Exception("Cannot specify password for the 'LDAP_SERVER' authentication type", ErrorCodes::LOGICAL_ERROR); case MAX_TYPE: break; } throw Exception("setPasswordHashBinary(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); } -inline const String & Authentication::getLDAPServerName() const +inline const String & Authentication::getServerName() const { - return ldap_server_name; + return server_name; } -inline void Authentication::setLDAPServerName(const String & server_name) +inline void Authentication::setServerName(const String & server_name_) { - ldap_server_name = server_name; + server_name = server_name_; } } diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 273048a020e..97f2f24a72e 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -1,5 +1,9 @@ #include #include +#include +#include +#include + namespace DB { @@ -9,6 +13,108 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + +auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const String & ldap_server_name) +{ + LDAPServerParams params; + + const String ldap_server_config = "ldap_servers." + ldap_server_name; + + const bool has_host = config.has(ldap_server_config + ".host"); + const bool has_port = config.has(ldap_server_config + ".port"); + const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); + const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); + const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); + const bool has_tls_cert_verify = config.has(ldap_server_config + ".tls_cert_verify"); + const bool has_ca_cert_dir = config.has(ldap_server_config + ".ca_cert_dir"); + const bool has_ca_cert_file = config.has(ldap_server_config + ".ca_cert_file"); + + if (!has_host) + throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); + + params.host = config.getString(ldap_server_config + ".host"); + + if (params.host.empty()) + throw Exception("Empty 'host' entry", ErrorCodes::BAD_ARGUMENTS); + + if (has_auth_dn_prefix) + params.auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + + if (has_auth_dn_suffix) + params.auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + + if (has_enable_tls) + { + String enable_tls_lc_str = config.getString(ldap_server_config + ".enable_tls"); + boost::to_lower(enable_tls_lc_str); + + if (enable_tls_lc_str == "starttls") + params.enable_tls = LDAPServerParams::TLSEnable::YES_STARTTLS; + else if (config.getBool(ldap_server_config + ".enable_tls")) + params.enable_tls = LDAPServerParams::TLSEnable::YES; + else + params.enable_tls = LDAPServerParams::TLSEnable::NO; + } + + if (has_tls_cert_verify) + { + String tls_cert_verify_lc_str = config.getString(ldap_server_config + ".tls_cert_verify"); + boost::to_lower(tls_cert_verify_lc_str); + + if (tls_cert_verify_lc_str == "never") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::NEVER; + else if (tls_cert_verify_lc_str == "allow") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::ALLOW; + else if (tls_cert_verify_lc_str == "try") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::TRY; + else if (tls_cert_verify_lc_str == "demand") + params.tls_cert_verify = LDAPServerParams::TLSCertVerify::DEMAND; + else + throw Exception("Bad value for 'tls_cert_verify' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); + } + + if (has_ca_cert_dir) + params.ca_cert_dir = config.getString(ldap_server_config + ".ca_cert_dir"); + + if (has_ca_cert_file) + params.ca_cert_file = config.getString(ldap_server_config + ".ca_cert_file"); + + if (has_port) + { + const auto port = config.getInt64(ldap_server_config + ".port"); + if (port < 0 || port > 65535) + throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS); + + params.port = port; + } + else + params.port = (params.enable_tls == LDAPServerParams::TLSEnable::YES ? 636 : 389); + + return params; +} + +void parseAndAddLDAPServers(ExternalAuthenticators & external_authenticators, const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) +{ + Poco::Util::AbstractConfiguration::Keys ldap_server_names; + config.keys("ldap_servers", ldap_server_names); + + for (const auto & ldap_server_name : ldap_server_names) + { + try + { + external_authenticators.setLDAPServerParams(ldap_server_name, parseLDAPServer(config, ldap_server_name)); + } + catch (...) + { + tryLogCurrentException(log, "Could not parse LDAP server " + backQuote(ldap_server_name)); + } + } +} + +} + void ExternalAuthenticators::setLDAPServerParams(const String & server, const LDAPServerParams & params) { std::scoped_lock lock(mutex); @@ -25,4 +131,11 @@ LDAPServerParams ExternalAuthenticators::getLDAPServerParams(const String & serv return it->second; } +std::unique_ptr parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) +{ + auto external_authenticators = std::make_unique(); + parseAndAddLDAPServers(*external_authenticators, config, log); + return external_authenticators; +} + } diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index f7707e0719f..1910243781f 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -4,9 +4,19 @@ #include #include +#include #include +namespace Poco { + class Logger; + namespace Util + { + class AbstractConfiguration; + } +} + + namespace DB { @@ -21,4 +31,6 @@ private: std::map ldap_server_params; }; +std::unique_ptr parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log); + } diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 136a7af70c4..d7f58b0ec50 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -23,6 +23,12 @@ LDAPClient::~LDAPClient() closeConnection(); } +void LDAPClient::openConnection() +{ + const bool graceful_bind_failure = false; + diag(openConnection(graceful_bind_failure)); +} + #if USE_LDAP namespace @@ -196,12 +202,6 @@ int LDAPClient::openConnection(const bool graceful_bind_failure) return rc; } -void LDAPClient::openConnection() -{ - const bool graceful_bind_failure = false; - diag(openConnection(graceful_bind_failure)); -} - void LDAPClient::closeConnection() noexcept { if (!handle) @@ -260,11 +260,6 @@ int LDAPClient::openConnection(const bool) throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } -void LDAPClient::openConnection() -{ - throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); -} - void LDAPClient::closeConnection() noexcept { } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index 1187f2c2a21..a07dc93f4b1 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -30,13 +30,13 @@ public: LDAPClient & operator= (LDAPClient &&) = delete; protected: - int openConnection(const bool graceful_bind_failure = false); - MAYBE_NORETURN void openConnection(); - void closeConnection() noexcept; MAYBE_NORETURN void diag(const int rc); + MAYBE_NORETURN void openConnection(); + int openConnection(const bool graceful_bind_failure = false); + void closeConnection() noexcept; protected: - LDAPServerParams params; + const LDAPServerParams params; #if USE_LDAP LDAP * handle = nullptr; #endif diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 883468f4963..38925142460 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -89,8 +89,8 @@ namespace const auto ldap_server_name = config.getString(user_config + ".ldap.server"); - user->authentication = Authentication{Authentication::LDAP_PASSWORD}; - user->authentication.setLDAPServerName(ldap_server_name); + user->authentication = Authentication{Authentication::LDAP_SERVER}; + user->authentication.setServerName(ldap_server_name); } const auto profile_name_config = user_config + ".profile"; diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index 9ad89fc02bd..087c8160bad 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -34,7 +34,7 @@ namespace String authentication_type_name = Authentication::TypeInfo::get(authentication_type).name; std::optional password; - if (show_password || authentication_type == Authentication::LDAP_PASSWORD) + if (show_password || authentication_type == Authentication::LDAP_SERVER) { switch (authentication_type) { @@ -55,10 +55,9 @@ namespace password = authentication.getPasswordHashHex(); break; } - case Authentication::LDAP_PASSWORD: + case Authentication::LDAP_SERVER: { - authentication_type_name = "ldap"; - password = authentication.getLDAPServerName(); + password = authentication.getServerName(); break; } diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 42ec3fb877b..c9657b9abdf 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -12,14 +12,14 @@ class ASTExtendedRoleSet; class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap}] BY {'password'|'hash'|'server_name'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap_server}] BY {'password'|'hash'|'server_name'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap}] BY {'password'|'hash'|'server_name'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap_server}] BY {'password'|'hash'|'server_name'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index f1b741a1056..9a8acd61b3a 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -60,8 +60,8 @@ namespace if (ParserKeyword{Authentication::TypeInfo::get(check_type).raw_name}.ignore(pos, expected)) { type = check_type; - expect_password = (check_type != Authentication::NO_PASSWORD && check_type != Authentication::LDAP_PASSWORD); - expect_server = (check_type == Authentication::LDAP_PASSWORD); + expect_password = (check_type != Authentication::NO_PASSWORD && check_type != Authentication::LDAP_SERVER); + expect_server = (check_type == Authentication::LDAP_SERVER); break; } } @@ -105,7 +105,7 @@ namespace else if (expect_hash) authentication->setPasswordHashHex(value); else if (expect_server) - authentication->setLDAPServerName(value); + authentication->setServerName(value); return true; }); diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index b4e1918aaed..2df8ee5bcff 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -7,13 +7,13 @@ namespace DB { /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap}] BY {'password'|'hash'|'server_name'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap_server}] BY {'password'|'hash'|'server_name'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap}] BY {'password'|'hash'|'server_name'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash|ldap_server}] BY {'password'|'hash'|'server_name'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index a053ec77967..2e88d17ee46 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -222,7 +222,7 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. auto user = connection_context.getAccessControlManager().read(user_name); const DB::Authentication::Type user_auth_type = user->authentication.getType(); - if (user_auth_type != DB::Authentication::LDAP_PASSWORD && user_auth_type != DB::Authentication::DOUBLE_SHA1_PASSWORD && user_auth_type != DB::Authentication::PLAINTEXT_PASSWORD && user_auth_type != DB::Authentication::NO_PASSWORD) + if (user_auth_type != DB::Authentication::LDAP_SERVER && user_auth_type != DB::Authentication::DOUBLE_SHA1_PASSWORD && user_auth_type != DB::Authentication::PLAINTEXT_PASSWORD && user_auth_type != DB::Authentication::NO_PASSWORD) { authPluginSSL(); } From 1bf8940c18a19ff3fbc0b95cc56669bc9f4300da Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 29 May 2020 11:12:08 +0200 Subject: [PATCH 0260/2229] better randomFixedString and randomString --- src/Functions/randomFixedString.cpp | 45 +++++++++++++++++++-------- src/Functions/randomString.cpp | 47 +++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/src/Functions/randomFixedString.cpp b/src/Functions/randomFixedString.cpp index 9fb7550346b..16b6726b5d1 100644 --- a/src/Functions/randomFixedString.cpp +++ b/src/Functions/randomFixedString.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -21,13 +23,12 @@ namespace ErrorCodes /* Generate random fixed string with fully random bytes (including zero). */ -class FunctionRandomFixedString : public IFunction +template +class FunctionRandomFixedStringImpl : public IFunction { public: static constexpr auto name = "randomFixedString"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - String getName() const override { return name; } bool isVariadic() const override { return false; } @@ -68,20 +69,40 @@ public: /// Fill random bytes. data_to.resize(total_size); - pcg64_fast rng(randomSeed()); /// TODO It is inefficient. We should use SIMD PRNG instead. - - auto * pos = data_to.data(); - auto * end = pos + data_to.size(); - while (pos < end) - { - unalignedStore(pos, rng()); - pos += sizeof(UInt64); // We have padding in column buffers that we can overwrite. - } + RandImpl::execute(reinterpret_cast(data_to.data()), total_size); block.getByPosition(result).column = std::move(col_to); } }; +class FunctionRandomFixedString : public FunctionRandomFixedStringImpl +{ +public: + explicit FunctionRandomFixedString(const Context & context) : selector(context) + { + selector.registerImplementation>(); + + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + #endif + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + void registerFunctionRandomFixedString(FunctionFactory & factory) { factory.registerFunction(); diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 4ea470f0a65..5ed8e459549 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,13 +21,12 @@ namespace ErrorCodes /* Generate random string of specified length with fully random bytes (including zero). */ -class FunctionRandomString : public IFunction +template +class FunctionRandomStringImpl : public IFunction { public: static constexpr auto name = "randomString"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -83,18 +84,10 @@ public: /// Fill random bytes. data_to.resize(offsets_to.back()); - pcg64_fast rng(randomSeed()); /// TODO It is inefficient. We should use SIMD PRNG instead. - - auto * pos = data_to.data(); - auto * end = pos + data_to.size(); - while (pos < end) - { - unalignedStore(pos, rng()); - pos += sizeof(UInt64); // We have padding in column buffers that we can overwrite. - } + RandImpl::execute(reinterpret_cast(data_to.data()), data_to.size()); /// Put zero bytes in between. - pos = data_to.data(); + auto * pos = data_to.data(); for (size_t row_num = 0; row_num < input_rows_count; ++row_num) pos[offsets_to[row_num] - 1] = 0; @@ -102,6 +95,34 @@ public: } }; +class FunctionRandomString : public FunctionRandomStringImpl +{ +public: + explicit FunctionRandomString(const Context & context) : selector(context) + { + selector.registerImplementation>(); + + #if USE_MULTITARGET_CODE + selector.registerImplementation>(); + #endif + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + selector.selectAndExecute(block, arguments, result, input_rows_count); + } + + static FunctionPtr create(const Context & context) + { + return std::make_shared(context); + } + +private: + ImplementationSelector selector; +}; + void registerFunctionRandomString(FunctionFactory & factory) { factory.registerFunction(); From 8390fcaa23ab7d78922c6b9817387fd0eea28178 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 29 May 2020 11:18:39 +0200 Subject: [PATCH 0261/2229] Cosmetics --- src/Functions/FunctionsRandom.cpp | 1 + src/Functions/FunctionsRandom.h | 4 ++-- src/Functions/randomFixedString.cpp | 2 +- src/Functions/randomString.cpp | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsRandom.cpp b/src/Functions/FunctionsRandom.cpp index fba44d458bb..e77bab9c0a5 100644 --- a/src/Functions/FunctionsRandom.cpp +++ b/src/Functions/FunctionsRandom.cpp @@ -8,6 +8,7 @@ # include #endif + namespace DB { diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index bc11f671c1b..b80ddb6f59e 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -3,10 +3,10 @@ #include #include #include -#include - #include #include +#include + namespace DB { diff --git a/src/Functions/randomFixedString.cpp b/src/Functions/randomFixedString.cpp index 16b6726b5d1..669dc084999 100644 --- a/src/Functions/randomFixedString.cpp +++ b/src/Functions/randomFixedString.cpp @@ -82,7 +82,7 @@ public: { selector.registerImplementation>(); - + #if USE_MULTITARGET_CODE selector.registerImplementation>(); diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 5ed8e459549..df3278c3800 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -102,7 +102,7 @@ public: { selector.registerImplementation>(); - + #if USE_MULTITARGET_CODE selector.registerImplementation>(); From e185f3d157295d33698d58975061eec382df47bf Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Fri, 29 May 2020 11:23:22 +0200 Subject: [PATCH 0262/2229] Fix style issue --- src/Functions/greatCircleDistance.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 89337f83ddf..bff92d7738d 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -287,7 +287,7 @@ public: { selector.registerImplementation>(); - + #if USE_MULTITARGET_CODE selector.registerImplementation>(); From bb68768ce1f95d9eb38b96a75ecee6d7819d01ab Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 13:30:07 +0400 Subject: [PATCH 0263/2229] Change auth_params type to string Write LDAP_SERVER params (server name) to auth_params in JSON --- src/Storages/System/StorageSystemUsers.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index e0755fe59ab..a1e7eba7232 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -12,6 +12,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -35,7 +39,7 @@ NamesAndTypesList StorageSystemUsers::getNamesAndTypes() {"id", std::make_shared()}, {"storage", std::make_shared()}, {"auth_type", std::make_shared(getAuthenticationTypeEnumValues())}, - {"auth_params", std::make_shared(std::make_shared())}, + {"auth_params", std::make_shared()}, {"host_ip", std::make_shared(std::make_shared())}, {"host_names", std::make_shared(std::make_shared())}, {"host_names_regexp", std::make_shared(std::make_shared())}, @@ -59,8 +63,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & auto & column_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_storage = assert_cast(*res_columns[column_index++]); auto & column_auth_type = assert_cast(*res_columns[column_index++]).getData(); - auto & column_auth_params = assert_cast(assert_cast(*res_columns[column_index]).getData()); - auto & column_auth_params_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); + auto & column_auth_params = assert_cast(*res_columns[column_index++]); auto & column_host_ip = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_host_ip_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_host_names = assert_cast(assert_cast(*res_columns[column_index]).getData()); @@ -86,7 +89,18 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & column_id.push_back(id); column_storage.insertData(storage_name.data(), storage_name.length()); column_auth_type.push_back(static_cast(authentication.getType())); - column_auth_params_offsets.push_back(column_auth_params.size()); + + if (authentication.getType() == Authentication::Type::LDAP_SERVER) + { + Poco::JSON::Object auth_params_json; + auth_params_json.set("server", authentication.getServerName()); + + std::ostringstream oss; + Poco::JSON::Stringifier::stringify(auth_params_json, oss); + const auto str = oss.str(); + + column_auth_params.insertData(str.data(), str.size()); + } if (allowed_hosts.containsAnyHost()) { From d9ca9cd9b2e6d44d4ec8160020b98d8f2340ba90 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 14:00:12 +0400 Subject: [PATCH 0264/2229] Compilation fix Typo fix --- cmake/Modules/FindOpenLDAP.cmake | 10 +++++----- cmake/find/ldap.cmake | 7 +++++-- cmake/tools.cmake | 2 +- contrib/CMakeLists.txt | 2 +- src/Access/LDAPClient.cpp | 2 ++ src/CMakeLists.txt | 2 +- 6 files changed, 15 insertions(+), 10 deletions(-) diff --git a/cmake/Modules/FindOpenLDAP.cmake b/cmake/Modules/FindOpenLDAP.cmake index c33eafdcb2e..ec559e9a7b5 100644 --- a/cmake/Modules/FindOpenLDAP.cmake +++ b/cmake/Modules/FindOpenLDAP.cmake @@ -7,7 +7,7 @@ # # Sets values of: # OPENLDAP_FOUND - TRUE if found -# OPENLDAP_INCLUDE_DIR - path to the include directory +# OPENLDAP_INCLUDE_DIRS - list of paths to the include directories # OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries # OPENLDAP_LDAP_LIBRARY - paths to the libldap library # OPENLDAP_LBER_LIBRARY - paths to the liblber library @@ -28,11 +28,11 @@ if(OPENLDAP_USE_REENTRANT_LIBS) endif() if(OPENLDAP_ROOT_DIR) - find_path(OPENLDAP_INCLUDE_DIR NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH) + find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH) find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) find_library(OPENLDAP_LBER_LIBRARY NAMES "lber" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) else() - find_path(OPENLDAP_INCLUDE_DIR NAMES "ldap.h" "lber.h") + find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h") find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}") find_library(OPENLDAP_LBER_LIBRARY NAMES "lber") endif() @@ -44,10 +44,10 @@ set(OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY}) include(FindPackageHandleStandardArgs) find_package_handle_standard_args( OpenLDAP DEFAULT_MSG - OPENLDAP_INCLUDE_DIR OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY + OPENLDAP_INCLUDE_DIRS OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY ) -mark_as_advanced(OPENLDAP_INCLUDE_DIR OPENLDAP_LIBRARIES OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY) +mark_as_advanced(OPENLDAP_INCLUDE_DIRS OPENLDAP_LIBRARIES OPENLDAP_LDAP_LIBRARY OPENLDAP_LBER_LIBRARY) if(OPENLDAP_USE_STATIC_LIBS) set(CMAKE_FIND_LIBRARY_SUFFIXES ${_orig_CMAKE_FIND_LIBRARY_SUFFIXES}) diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake index 230727819e4..0a753e80f08 100644 --- a/cmake/find/ldap.cmake +++ b/cmake/find/ldap.cmake @@ -54,7 +54,10 @@ if (ENABLE_LDAP) else () set (USE_INTERNAL_LDAP_LIBRARY 1) set (OPENLDAP_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap") - set (OPENLDAP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap/include") + set (OPENLDAP_INCLUDE_DIRS + "${ClickHouse_SOURCE_DIR}/contrib/openldap-cmake/${_system_name}_${_system_processor}" + "${ClickHouse_SOURCE_DIR}/contrib/openldap/include" + ) # Below, 'ldap'/'ldap_r' and 'lber' will be resolved to # the targets defined in contrib/openldap-cmake/CMakeLists.txt if (OPENLDAP_USE_REENTRANT_LIBS) @@ -73,4 +76,4 @@ if (ENABLE_LDAP) endif () endif () -message (STATUS "Using ldap=${USE_LDAP}: ${OPENLDAP_INCLUDE_DIR} : ${OPENLDAP_LIBRARIES}") +message (STATUS "Using ldap=${USE_LDAP}: ${OPENLDAP_INCLUDE_DIRS} : ${OPENLDAP_LIBRARIES}") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d261b62eca3..95e00ad9951 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -22,7 +22,7 @@ elseif (COMPILER_CLANG) if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${APPLE_CLANG_MINIMUM_VERSION}) message (FATAL_ERROR "AppleClang compiler version must be at least ${APPLE_CLANG_MINIMUM_VERSION} (Xcode ${XCODE_MINIMUM_VERSION}).") elseif (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11.0.0) - # char8_t is available staring (upstream vanilla) Clang 7, but prior to Clang 8, + # char8_t is available starting (upstream vanilla) Clang 7, but prior to Clang 8, # it is not enabled by -std=c++20 and can be enabled with an explicit -fchar8_t. set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fchar8_t") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fchar8_t") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d122188ad0b..402b7c9d8cf 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -109,7 +109,7 @@ if (USE_INTERNAL_SSL_LIBRARY) add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY}) endif () -if (ENABLE_LDAP AND USE_INTERNAL_LDAP_LIBRARY) +if (USE_INTERNAL_LDAP_LIBRARY) add_subdirectory (openldap-cmake) endif () diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index d7f58b0ec50..8d27fca3276 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -2,6 +2,8 @@ #include #include +#include + namespace DB { diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index baa0fbcb883..28dee92de02 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -325,7 +325,7 @@ if (OPENSSL_CRYPTO_LIBRARY) endif () if (USE_LDAP) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${OPENLDAP_INCLUDE_DIR}) + dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${OPENLDAP_INCLUDE_DIRS}) dbms_target_link_libraries (PRIVATE ${OPENLDAP_LIBRARIES}) endif () From 9fb0a95c750d163875c8fef473b33790678749b4 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 16:14:42 +0400 Subject: [PATCH 0265/2229] Compilation fix: add missing "/include" Style fix --- cmake/find/ldap.cmake | 2 +- src/Access/ExternalAuthenticators.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake index 0a753e80f08..11594817e4f 100644 --- a/cmake/find/ldap.cmake +++ b/cmake/find/ldap.cmake @@ -55,7 +55,7 @@ if (ENABLE_LDAP) set (USE_INTERNAL_LDAP_LIBRARY 1) set (OPENLDAP_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/openldap") set (OPENLDAP_INCLUDE_DIRS - "${ClickHouse_SOURCE_DIR}/contrib/openldap-cmake/${_system_name}_${_system_processor}" + "${ClickHouse_SOURCE_DIR}/contrib/openldap-cmake/${_system_name}_${_system_processor}/include" "${ClickHouse_SOURCE_DIR}/contrib/openldap/include" ) # Below, 'ldap'/'ldap_r' and 'lber' will be resolved to diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 1910243781f..dfd4e7061bd 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -8,8 +8,10 @@ #include -namespace Poco { +namespace Poco +{ class Logger; + namespace Util { class AbstractConfiguration; From 246900c1ace1d9b154b186f30f0d8732969db7c0 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 18:33:50 +0400 Subject: [PATCH 0266/2229] Compilation fix --- src/Access/LDAPClient.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 8d27fca3276..60bf49ff24e 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -4,6 +4,8 @@ #include +#include + namespace DB { @@ -133,7 +135,10 @@ int LDAPClient::openConnection(const bool graceful_bind_failure) } diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); + +#ifdef LDAP_OPT_KEEPCONN diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); +#endif { ::timeval operation_timeout; From c919840722add2d34f006a1c3242e5767642b775 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 May 2020 18:02:12 +0300 Subject: [PATCH 0267/2229] in-memory parts: partition commands --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 - src/Storages/MergeTree/IMergeTreeDataPart.h | 9 ++- src/Storages/MergeTree/MergeTreeData.cpp | 16 ++-- .../MergeTree/MergeTreeDataPartInMemory.cpp | 34 +++++++- .../MergeTree/MergeTreeDataPartInMemory.h | 4 +- .../MergeTree/MergeTreeWriteAheadLog.cpp | 77 +++++++++++++------ .../MergeTree/MergeTreeWriteAheadLog.h | 10 ++- .../0_stateless/01130_in_memory_parts.sql | 2 +- .../01130_in_memory_parts_partitons.reference | 22 ++++++ .../01130_in_memory_parts_partitons.sql | 25 ++++++ 10 files changed, 162 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_partitons.reference create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_partitons.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index bb51a4e8e67..a0370dfad16 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -794,15 +794,11 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) void IMergeTreeDataPart::renameToDetached(const String & prefix) const { - assertOnDisk(); renameTo(getRelativePathForDetachedPart(prefix)); } void IMergeTreeDataPart::makeCloneInDetached(const String & prefix) const { - assertOnDisk(); - LOG_INFO(storage.log, "Detaching " << relative_path); - String destination_path = storage.relative_data_path + getRelativePathForDetachedPart(prefix); /// Backup is not recursive (max_level is 0), so do not copy inner directories diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 8943a9fcb1f..5ce8c854156 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -287,9 +287,10 @@ public: size_t getFileSizeOrZero(const String & file_name) const; String getFullRelativePath() const; String getFullPath() const; - virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const; + void renameToDetached(const String & prefix) const; - void makeCloneInDetached(const String & prefix) const; + virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const; + virtual void makeCloneInDetached(const String & prefix) const; /// Makes full clone of part in detached/ on another disk void makeCloneOnDiskDetached(const ReservationPtr & reservation) const; @@ -324,6 +325,8 @@ protected: /// disk using columns and checksums. virtual void calculateEachColumnSizesOnDisk(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const = 0; + String getRelativePathForDetachedPart(const String & prefix) const; + private: /// In compact parts order of columns is necessary NameToPosition column_name_to_position; @@ -348,8 +351,6 @@ private: void loadTTLInfos(); void loadPartitionAndMinMaxIndex(); - - String getRelativePathForDetachedPart(const String & prefix) const; }; using MergeTreeDataPartState = IMergeTreeDataPart::State; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d9b6b5e8780..e5c4744296d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -896,11 +896,17 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) part_names_with_disks.emplace_back(it->name(), disk_ptr); - if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + /// Create and correctly initialize global WAL object, if it's needed + if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE && settings->in_memory_parts_enable_wal) + { + write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); + for (auto && part : write_ahead_log->restore()) + parts_from_wal.push_back(std::move(part)); + } + else if (startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) { MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); - auto current_parts = wal.restore(); - for (auto & part : current_parts) + for (auto && part : wal.restore()) parts_from_wal.push_back(std::move(part)); } } @@ -1120,7 +1126,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } } - if (settings->in_memory_parts_enable_wal) + if (settings->in_memory_parts_enable_wal && !write_ahead_log) { auto disk = makeEmptyReservationOnLargestDisk()->getDisk(); write_ahead_log = std::make_shared(*this, std::move(disk)); @@ -1976,7 +1982,7 @@ void MergeTreeData::renameTempPartAndReplace( if (part_in_memory && getSettings()->in_memory_parts_enable_wal) { auto wal = getWriteAheadLog(); - wal->write(part_in_memory->block, part_in_memory->name); + wal->addPart(part_in_memory->block, part_in_memory->name); } if (out_covered_parts) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 41b35757ed8..5d376b88b74 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -1,9 +1,11 @@ #include "MergeTreeDataPartInMemory.h" #include +#include #include #include #include - +#include +#include namespace DB { @@ -58,6 +60,36 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( return std::make_unique(ptr, columns_list, writer_settings); } +void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const +{ + String detached_path = getRelativePathForDetachedPart(prefix); + String destination_path = storage.getRelativeDataPath() + getRelativePathForDetachedPart(prefix); + + auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count); + auto new_data_part = storage.createPart(name, new_type, info, disk, detached_path); + + new_data_part->setColumns(columns); + new_data_part->partition.value.assign(partition.value); + new_data_part->minmax_idx = minmax_idx; + + if (disk->exists(destination_path)) + { + LOG_WARNING(&Logger::get(storage.getLogName()), "Removing old temporary directory " + disk->getPath() + destination_path); + disk->removeRecursive(destination_path); + } + + disk->createDirectories(destination_path); + + auto compression_codec = storage.global_context.chooseCompressionCodec(0, 0); + MergedBlockOutputStream out(new_data_part, columns, storage.skip_indices, compression_codec); + out.writePrefix(); + out.write(block); + out.writeSuffixAndFinalizePart(new_data_part); + + if (storage.getSettings()->in_memory_parts_enable_wal) + storage.getWriteAheadLog()->dropPart(name); +} + bool MergeTreeDataPartInMemory::waitUntilMerged(size_t timeout) const { auto lock = storage.lockParts(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index b264ff73436..437bbb8e308 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -38,12 +38,10 @@ public: const MergeTreeIndexGranularity & computed_index_granularity) const override; bool isStoredOnDisk() const override { return false; } - bool hasColumnFiles(const String & column_name, const IDataType & /* type */) const override { return !!getColumnPosition(column_name); } - String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } - void renameTo(const String & /*new_relative_path*/, bool /*remove_new_dir_if_exists*/) const override {} + void makeCloneInDetached(const String & prefix) const override; bool waitUntilMerged(size_t timeout) const override; void notifyMerged() const override; diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index e2dcea290de..d3bec08073c 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -13,6 +13,7 @@ namespace ErrorCodes extern const int UNKNOWN_FORMAT_VERSION; extern const int CANNOT_READ_ALL_DATA; extern const int BAD_DATA_PART_NAME; + extern const int CORRUPTED_DATA; } @@ -36,7 +37,7 @@ void MergeTreeWriteAheadLog::init() max_block_number = -1; } -void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name) +void MergeTreeWriteAheadLog::addPart(const Block & block, const String & part_name) { std::lock_guard lock(write_mutex); @@ -45,6 +46,7 @@ void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name max_block_number = std::max(max_block_number, part_info.max_block); writeIntBinary(static_cast(0), *out); /// version + writeIntBinary(static_cast(ActionType::ADD_PART), *out); writeStringBinary(part_name, *out); block_out->write(block); block_out->flush(); @@ -54,6 +56,15 @@ void MergeTreeWriteAheadLog::write(const Block & block, const String & part_name rotate(); } +void MergeTreeWriteAheadLog::dropPart(const String & part_name) +{ + std::lock_guard lock(write_mutex); + + writeIntBinary(static_cast(0), *out); + writeIntBinary(static_cast(ActionType::DROP_PART), *out); + writeStringBinary(part_name, *out); +} + void MergeTreeWriteAheadLog::rotate() { String new_name = String(WAL_FILE_NAME) + "_" @@ -68,9 +79,10 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() { std::lock_guard lock(write_mutex); - MergeTreeData::MutableDataPartsVector result; + MergeTreeData::MutableDataPartsVector parts; auto in = disk->readFile(path, DBMS_DEFAULT_BUFFER_SIZE); NativeBlockInputStream block_in(*in, 0); + NameSet dropped_parts; while (!in->eof()) { @@ -78,6 +90,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() UInt8 version; String part_name; Block block; + ActionType action_type; try { @@ -85,22 +98,35 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() if (version != 0) throw Exception("Unknown WAL format version: " + toString(version), ErrorCodes::UNKNOWN_FORMAT_VERSION); + readIntBinary(action_type, *in); readStringBinary(part_name, *in); - part = storage.createPart( - part_name, - MergeTreeDataPartType::IN_MEMORY, - MergeTreePartInfo::fromPartName(part_name, storage.format_version), - storage.reserveSpace(0)->getDisk(), - part_name); + if (action_type == ActionType::DROP_PART) + { + dropped_parts.insert(part_name); + } + else if (action_type == ActionType::ADD_PART) + { + part = storage.createPart( + part_name, + MergeTreeDataPartType::IN_MEMORY, + MergeTreePartInfo::fromPartName(part_name, storage.format_version), + storage.reserveSpace(0)->getDisk(), + part_name); - block = block_in.read(); + block = block_in.read(); + } + else + { + throw Exception("Unknown action type: " + toString(static_cast(action_type)), ErrorCodes::CORRUPTED_DATA); + } } catch (const Exception & e) { if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == ErrorCodes::UNKNOWN_FORMAT_VERSION - || e.code() == ErrorCodes::BAD_DATA_PART_NAME) + || e.code() == ErrorCodes::BAD_DATA_PART_NAME + || e.code() == ErrorCodes::CORRUPTED_DATA) { LOG_WARNING(&Logger::get(storage.getLogName() + " (WriteAheadLog)"), "WAL file '" << path << "' is broken. " << e.displayText()); @@ -117,23 +143,30 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore() throw; } - MergedBlockOutputStream part_out(part, block.getNamesAndTypesList(), {}, nullptr); + if (action_type == ActionType::ADD_PART) + { + MergedBlockOutputStream part_out(part, block.getNamesAndTypesList(), {}, nullptr); - part->minmax_idx.update(block, storage.minmax_idx_columns); - if (storage.partition_key_expr) - part->partition.create(storage, block, 0); - if (storage.hasSortingKey()) - storage.sorting_key_expr->execute(block); + part->minmax_idx.update(block, storage.minmax_idx_columns); + if (storage.partition_key_expr) + part->partition.create(storage, block, 0); + if (storage.hasSortingKey()) + storage.sorting_key_expr->execute(block); - part_out.writePrefix(); - part_out.write(block); - part_out.writeSuffixAndFinalizePart(part); + part_out.writePrefix(); + part_out.write(block); + part_out.writeSuffixAndFinalizePart(part); - min_block_number = std::min(min_block_number, part->info.min_block); - max_block_number = std::max(max_block_number, part->info.max_block); - result.push_back(std::move(part)); + min_block_number = std::min(min_block_number, part->info.min_block); + max_block_number = std::max(max_block_number, part->info.max_block); + parts.push_back(std::move(part)); + } } + MergeTreeData::MutableDataPartsVector result; + std::copy_if(parts.begin(), parts.end(), std::back_inserter(result), + [&dropped_parts](const auto & part) { return dropped_parts.count(part->name) == 0; }); + return result; } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 3081d51ecac..e33cc8d534b 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -13,6 +13,13 @@ class MergeTreeData; class MergeTreeWriteAheadLog { public: + /// Append-only enum. It is serialized to WAL + enum class ActionType : UInt8 + { + ADD_PART = 0, + DROP_PART = 1, + }; + constexpr static auto WAL_FILE_NAME = "wal"; constexpr static auto WAL_FILE_EXTENSION = ".bin"; constexpr static auto DEFAULT_WAL_FILE = "wal.bin"; @@ -20,7 +27,8 @@ public: MergeTreeWriteAheadLog(const MergeTreeData & storage_, const DiskPtr & disk_, const String & name = DEFAULT_WAL_FILE); - void write(const Block & block, const String & part_name); + void addPart(const Block & block, const String & part_name); + void dropPart(const String & part_name); std::vector restore(); using MinMaxBlockNumber = std::pair; diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql index b704fbdf081..d6471cfb35f 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS in_memory; CREATE TABLE in_memory (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a - SETTINGS min_rows_for_compact_part = 1000; + SETTINGS min_rows_for_compact_part = 1000, min_rows_for_compact_part = 1000; INSERT INTO in_memory SELECT number, number % 3 FROM numbers(100); SELECT DISTINCT part_type, marks FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference new file mode 100644 index 00000000000..4f9c1b40e27 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference @@ -0,0 +1,22 @@ +1 2 foo +1 3 bar +2 4 aa +2 5 bb +3 6 qq +3 7 ww +================== +2 4 aa +2 5 bb +3 6 qq +3 7 ww +================== +3 6 qq +3 7 ww +================== +2 4 aa +2 5 bb +3 6 qq +3 7 ww +2_4_4_0 Compact +3_3_3_0 InMemory +================== diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql new file mode 100644 index 00000000000..88b0e3322ed --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t2; + +CREATE TABLE t2(id UInt32, a UInt64, s String) + ENGINE = MergeTree ORDER BY a PARTITION BY id + SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000; + +INSERT INTO t2 VALUES (1, 2, 'foo'), (1, 3, 'bar'); +INSERT INTO t2 VALUES (2, 4, 'aa'), (2, 5, 'bb'); +INSERT INTO t2 VALUES (3, 6, 'qq'), (3, 7, 'ww'); + +SELECT * FROM t2 ORDER BY a; +SELECT '=================='; + +ALTER TABLE t2 DROP PARTITION 1; +SELECT * FROM t2 ORDER BY a; +SELECT '=================='; + +ALTER TABLE t2 DETACH PARTITION 2; +SELECT * FROM t2 ORDER BY a; +SELECT '=================='; + +ALTER TABLE t2 ATTACH PARTITION 2; +SELECT * FROM t2 ORDER BY a; +SELECT name, part_type FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; +SELECT '=================='; From 0362bb2d2f54ec90c2a71a9f446d2aec41bf920a Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 29 May 2020 16:04:44 +0000 Subject: [PATCH 0268/2229] Make connection between concurrent consumers shared - not private --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 15 +++- src/Storages/RabbitMQ/RabbitMQHandler.h | 4 +- .../ReadBufferFromRabbitMQConsumer.cpp | 72 +++++++------------ .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 14 ++-- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 24 ++++++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 ++ 6 files changed, 76 insertions(+), 57 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cde43862ede..1f6e9ce1bb1 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -19,13 +19,26 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) } -void RabbitMQHandler::start() +void RabbitMQHandler::start(std::atomic & check_param) { + /* The object of this class is shared between concurrent consumers, who call this method repeatedly at the same time. + * But the loop should not be attempted to start if it is already running. Also note that the loop is blocking to + * the thread that has started it. + */ + std::lock_guard lock(mutex); + + /* The callback, which changes this variable, could have already been activated by another thread while we waited for the + * mutex to unlock (as it runs all active events on the connection). This means that there is no need to start event loop again. + */ + if (check_param) + return; + event_base_loop(evbase, EVLOOP_NONBLOCK); } void RabbitMQHandler::stop() { + std::lock_guard lock(mutex); event_base_loopbreak(evbase); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 5b8a08be548..a70b08aba55 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -17,12 +17,14 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(); + void start(std::atomic & check_param); void stop(); private: event_base * evbase; Poco::Logger * log; + + std::mutex mutex; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 945de989b57..d6da5850472 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -7,17 +7,12 @@ #include -enum -{ - Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000 -}; - namespace DB { ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( - std::pair & parsed_address, + ChannelPtr consumer_channel_, + RabbitMQHandler & eventHandler_, const String & exchange_name_, const String & routing_key_, const size_t channel_id_, @@ -28,10 +23,8 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const size_t num_queues_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) - , evbase(event_base_new()) - , eventHandler(evbase, log) - , connection(&eventHandler, - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , consumer_channel(std::move(consumer_channel_)) + , eventHandler(eventHandler_) , exchange_name(exchange_name_) , routing_key(routing_key_) , channel_id(channel_id_) @@ -41,29 +34,9 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , hash_exchange(hash_exchange_) , num_queues(num_queues_) , stopped(stopped_) + , exchange_declared(false) + , false_param(false) { - /* It turned out to be very important to make a different connection each time the object of this class is created, - * because in case when num_consumers > 1 - inputStreams run asynchronously and if they share the same connection, - * then they also will share the same event loop. But it will mean that if one stream's consumer starts event loop, - * then it will run all callbacks on the connection - including other stream's consumer's callbacks - - * as a result local variables can be updated both by the current thread and in callbacks by another thread during - * event loop, which is blocking only to the thread that has started the loop. Therefore sharing the connection - * (== sharing event loop) results in occasional seg faults in case of asynchronous run of objects that share the connection. - */ - size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) - { - event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); - std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); - } - - if (!connection.ready()) - { - LOG_ERROR(log, "Cannot set up connection for consumer"); - } - - consumer_channel = std::make_shared(&connection); - messages.clear(); current = messages.begin(); @@ -79,7 +52,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() { - connection.close(); + consumer_channel->close(); messages.clear(); current = messages.begin(); @@ -139,7 +112,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) exchange_declared = true; } - bool bindings_created = false, bindings_error = false; + std::atomic bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -189,7 +162,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) while (!bindings_created && !bindings_error) { /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(); + startEventLoop(bindings_created); } } @@ -212,7 +185,7 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - bool consumer_created = false, consumer_error = false; + std::atomic consumer_created = false, consumer_error = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) @@ -224,7 +197,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { size_t message_size = message.bodySize(); - if (message_size && message.body() != nullptr) { String message_received = std::string(message.body(), message.body() + message_size); @@ -232,8 +204,10 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; - //LOG_TRACE(log, "Consumer " + consumerTag + " received the message " + message_received); - + /* Needed because this vector can be used at the same time by another thread in nextImpl() (below). + * So we lock mutex here and there so that they do not use it asynchronosly. + */ + std::lock_guard lock(mutex); received.push_back(message_received); } }) @@ -245,14 +219,15 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) while (!consumer_created && !consumer_error) { - startEventLoop(); + /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events + startEventLoop(consumer_created); } } -void ReadBufferFromRabbitMQConsumer::startEventLoop() +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) { - eventHandler.start(); + eventHandler.start(check_param); } @@ -265,9 +240,8 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /* Run the onReceived callbacks to save the messages that have been received by now - */ - startEventLoop(); + /// Run the onReceived callbacks to save the messages that have been received by now + startEventLoop(false_param); } if (received.empty()) @@ -277,6 +251,12 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() } messages.clear(); + + /* Needed because this vector can be used at the same time by another thread in onReceived callback (above). + * So we lock mutex here and there so that they do not use it asynchronosly. + */ + std::lock_guard lock(mutex); + messages.swap(received); current = messages.begin(); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 5e4318246a6..31babc5033f 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -22,7 +22,8 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( - std::pair & parsed_address, + ChannelPtr consumer_channel_, + RabbitMQHandler & eventHandler_, const String & exchange_name_, const String & routing_key_, const size_t channel_id_, @@ -42,10 +43,8 @@ private: using Messages = std::vector; using Queues = std::vector; - event_base * evbase; - RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; ChannelPtr consumer_channel; + RabbitMQHandler & eventHandler; const String & exchange_name; const String & routing_key; @@ -59,7 +58,8 @@ private: bool allowed = true; const std::atomic & stopped; - bool exchange_declared = false; + std::atomic exchange_declared; + std::atomic false_param; const size_t num_queues; Queues queues; bool subscribed = false; @@ -69,12 +69,14 @@ private: Messages messages; Messages::iterator current; + std::mutex mutex; + bool nextImpl() override; void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(); + void startEventLoop(std::atomic & check_param); }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fb20569200d..ed486e8e709 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -35,7 +35,9 @@ enum { - RESCHEDULE_WAIT = 500 + RESCHEDULE_WAIT = 500, + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 }; namespace DB @@ -75,10 +77,26 @@ StorageRabbitMQ::StorageRabbitMQ( , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) + , evbase(event_base_new()) + , eventHandler(evbase, log) + , connection(&eventHandler, + AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { - rabbitmq_context.makeQueryContext(); + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + { + event_base_loop(evbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for consumer"); + } + + rabbitmq_context.makeQueryContext(); setColumns(columns_); + task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); @@ -184,7 +202,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() update_channel_id = true; return std::make_shared( - parsed_address, exchange_name, routing_key, next_channel_id, + std::make_shared(&connection), eventHandler, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index b334b48a301..fc098b168f1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -79,6 +79,10 @@ private: Poco::Logger * log; std::pair parsed_address; + event_base * evbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + Poco::Semaphore semaphore; std::mutex mutex; std::vector buffers; /// available buffers for RabbitMQ consumers From fa05641b063408804d92cd9eed54572a20a0ee21 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 29 May 2020 19:58:08 +0300 Subject: [PATCH 0269/2229] in-memory parts: partition commands --- src/Storages/MergeTree/MergeTreeData.cpp | 17 +++++++++++- .../MergeTree/MergeTreeDataPartInMemory.cpp | 19 +++++++------ .../MergeTree/MergeTreeDataPartInMemory.h | 2 ++ .../01130_in_memory_parts_partitons.reference | 14 ++++++++++ .../01130_in_memory_parts_partitons.sql | 27 +++++++++++++++++++ 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e5c4744296d..6b6f3a2f45d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2021,6 +2021,9 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect if (part->state != IMergeTreeDataPart::State::Outdated) modifyPartState(part,IMergeTreeDataPart::State::Outdated); + + if (isInMemoryPart(part) && write_ahead_log) + write_ahead_log->dropPart(part->name); } } @@ -3317,6 +3320,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( if (disk->exists(dst_part_path)) throw Exception("Part in " + fullPath(disk, dst_part_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + /// If source part is in memory, flush it to disk and clone it already in on-disk format + if (auto * src_part_in_memory = dynamic_cast(src_part.get())) + { + auto flushed_part_path = tmp_part_prefix + src_part_in_memory->name; + src_part_in_memory->flushToDisk(relative_data_path, flushed_part_path); + src_part_path = src_part_in_memory->storage.relative_data_path + flushed_part_path + "/"; + } + LOG_DEBUG(log, "Cloning part " << fullPath(disk, src_part_path) << " to " << fullPath(disk, dst_part_path)); localBackup(disk, src_part_path, dst_part_path); disk->removeIfExists(dst_part_path + "/" + DELETE_ON_DESTROY_MARKER_PATH); @@ -3404,7 +3415,11 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & LOG_DEBUG(log, "Freezing part " << part->name << " snapshot will be placed at " + backup_path); String backup_part_path = backup_path + relative_data_path + part->relative_path; - localBackup(part->disk, part->getFullRelativePath(), backup_part_path); + if (auto part_in_memory = dynamic_cast(part.get())) + part_in_memory->flushToDisk(backup_path + relative_data_path, part->relative_path); + else + localBackup(part->disk, part->getFullRelativePath(), backup_part_path); + part->disk->removeIfExists(backup_part_path + "/" + DELETE_ON_DESTROY_MARKER_PATH); part->is_frozen.store(true, std::memory_order_relaxed); diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 5d376b88b74..bdcc00758d1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -13,6 +13,7 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int DIRECTORY_ALREADY_EXISTS; } @@ -60,13 +61,12 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter( return std::make_unique(ptr, columns_list, writer_settings); } -void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const +void MergeTreeDataPartInMemory::flushToDisk(const String & base_path, const String & new_relative_path) const { - String detached_path = getRelativePathForDetachedPart(prefix); - String destination_path = storage.getRelativeDataPath() + getRelativePathForDetachedPart(prefix); + String destination_path = base_path + new_relative_path; auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count); - auto new_data_part = storage.createPart(name, new_type, info, disk, detached_path); + auto new_data_part = storage.createPart(name, new_type, info, disk, new_relative_path); new_data_part->setColumns(columns); new_data_part->partition.value.assign(partition.value); @@ -74,8 +74,8 @@ void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const if (disk->exists(destination_path)) { - LOG_WARNING(&Logger::get(storage.getLogName()), "Removing old temporary directory " + disk->getPath() + destination_path); - disk->removeRecursive(destination_path); + throw Exception("Could not flush part " + quoteString(getFullPath()) + + ". Part in " + fullPath(disk, destination_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); } disk->createDirectories(destination_path); @@ -85,9 +85,12 @@ void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const out.writePrefix(); out.write(block); out.writeSuffixAndFinalizePart(new_data_part); +} - if (storage.getSettings()->in_memory_parts_enable_wal) - storage.getWriteAheadLog()->dropPart(name); +void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const +{ + String detached_path = getRelativePathForDetachedPart(prefix); + flushToDisk(storage.getRelativeDataPath(), detached_path); } bool MergeTreeDataPartInMemory::waitUntilMerged(size_t timeout) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 437bbb8e308..08e63ac182e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -43,6 +43,8 @@ public: void renameTo(const String & /*new_relative_path*/, bool /*remove_new_dir_if_exists*/) const override {} void makeCloneInDetached(const String & prefix) const override; + void flushToDisk(const String & base_path, const String & new_relative_path) const; + bool waitUntilMerged(size_t timeout) const override; void notifyMerged() const override; diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference index 4f9c1b40e27..b9daa88b4ca 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference +++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.reference @@ -20,3 +20,17 @@ 2_4_4_0 Compact 3_3_3_0 InMemory ================== +2 4 aa +2 5 bb +3 6 qq +3 7 ww +================== +2 4 aa +2 5 bb +3 6 cc +3 7 dd +t2 2_4_4_0 Compact +t2 3_6_6_0 Compact +t3 3_1_1_0 InMemory +================== +3_1_1_0 InMemory 1 diff --git a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql index 88b0e3322ed..c07d65f114c 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts_partitons.sql @@ -4,6 +4,8 @@ CREATE TABLE t2(id UInt32, a UInt64, s String) ENGINE = MergeTree ORDER BY a PARTITION BY id SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000; +SYSTEM STOP MERGES t2; + INSERT INTO t2 VALUES (1, 2, 'foo'), (1, 3, 'bar'); INSERT INTO t2 VALUES (2, 4, 'aa'), (2, 5, 'bb'); INSERT INTO t2 VALUES (3, 6, 'qq'), (3, 7, 'ww'); @@ -23,3 +25,28 @@ ALTER TABLE t2 ATTACH PARTITION 2; SELECT * FROM t2 ORDER BY a; SELECT name, part_type FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; SELECT '=================='; + +DETACH TABLE t2; +ATTACH TABLE t2; + +SELECT * FROM t2 ORDER BY a; +SELECT '=================='; + +DROP TABLE IF EXISTS t3; + +CREATE TABLE t3(id UInt32, a UInt64, s String) + ENGINE = MergeTree ORDER BY a PARTITION BY id + SETTINGS min_rows_for_compact_part = 1000, min_rows_for_wide_part = 2000; + +INSERT INTO t3 VALUES (3, 6, 'cc'), (3, 7, 'dd'); +ALTER TABLE t2 REPLACE PARTITION 3 FROM t3; +SELECT * FROM t2 ORDER BY a; +SELECT table, name, part_type FROM system.parts WHERE table = 't2' AND active AND database = currentDatabase() ORDER BY name; +SELECT table, name, part_type FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name; +SELECT '=================='; + +ALTER TABLE t3 FREEZE PARTITION 3; +SELECT name, part_type, is_frozen FROM system.parts WHERE table = 't3' AND active AND database = currentDatabase() ORDER BY name; + +DROP TABLE t2; +DROP TABLE t3; From ec9cb953b46a1d85813a218a158e108f72f33d87 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 29 May 2020 23:26:41 +0400 Subject: [PATCH 0270/2229] Try to find and link with Cyrus SASL and GSSAPI too, if static OpenLDAP libraries are requested --- cmake/Modules/FindOpenLDAP.cmake | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/cmake/Modules/FindOpenLDAP.cmake b/cmake/Modules/FindOpenLDAP.cmake index ec559e9a7b5..6420678ad3c 100644 --- a/cmake/Modules/FindOpenLDAP.cmake +++ b/cmake/Modules/FindOpenLDAP.cmake @@ -7,8 +7,9 @@ # # Sets values of: # OPENLDAP_FOUND - TRUE if found -# OPENLDAP_INCLUDE_DIRS - list of paths to the include directories -# OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries +# OPENLDAP_INCLUDE_DIRS - paths to the include directories +# OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries; libsasl2 (Cyrus SASL) and libgssapi (GSSAPI) libraries +# will be listed here too, if found, if static OpenLDAP libraries are requested # OPENLDAP_LDAP_LIBRARY - paths to the libldap library # OPENLDAP_LBER_LIBRARY - paths to the liblber library # @@ -31,16 +32,34 @@ if(OPENLDAP_ROOT_DIR) find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH) find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) find_library(OPENLDAP_LBER_LIBRARY NAMES "lber" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) + if(OPENLDAP_USE_STATIC_LIBS) + find_library(_cyrus_sasl_lib NAMES "sasl2" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) + find_library(_gssapi_lib NAMES "gssapi" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) + endif() else() find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h") find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}") find_library(OPENLDAP_LBER_LIBRARY NAMES "lber") + if(OPENLDAP_USE_STATIC_LIBS) + find_library(_cyrus_sasl_lib NAMES "sasl2") + find_library(_gssapi_lib NAMES "gssapi") + endif() endif() unset(_r_suffix) set(OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY}) +if(_cyrus_sasl_lib) + list(APPEND OPENLDAP_LIBRARIES ${_cyrus_sasl_lib}) + unset(_cyrus_sasl_lib) +endif() + +if(_gssapi_lib) + list(APPEND OPENLDAP_LIBRARIES ${_gssapi_lib}) + unset(_gssapi_lib) +endif() + include(FindPackageHandleStandardArgs) find_package_handle_standard_args( OpenLDAP DEFAULT_MSG From d9bb3ef91ba801f4752dde77032b43e892395e14 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 22:48:32 +0300 Subject: [PATCH 0271/2229] Add logging and adjust initialization --- base/daemon/BaseDaemon.cpp | 2 +- base/daemon/SentryWriter.cpp | 38 ++++++++++++++++++++++++++++++++++-- base/daemon/ya.make | 1 + 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index a8a79827552..4fd5bfa1379 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -532,7 +532,6 @@ void BaseDaemon::initialize(Application & self) } reloadConfiguration(); - SentryWriter::initialize(config()); /// This must be done before creation of any files (including logs). mode_t umask_num = 0027; @@ -658,6 +657,7 @@ void BaseDaemon::initialize(Application & self) void BaseDaemon::initializeTerminationAndSignalProcessing() { + SentryWriter::initialize(config()); std::set_terminate(terminate_handler); /// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead. diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 7e2a95c8369..d5c2766cf21 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -1,7 +1,11 @@ #include +#include +#include + #include #include +#include #if !defined(ARCADIA_BUILD) # include "Common/config_version.h" #endif @@ -44,20 +48,45 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { "send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" ); + const std::string & temp_folder_path = config.getString( + "send_crash_reports.tmp_path", + config.getString("tmp_path", Poco::Path::temp()) + "sentry/" + ); + Poco::File(temp_folder_path).createDirectories(); + sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); if (debug) { sentry_options_set_debug(options, 1); } - sentry_init(options); sentry_options_set_dsn(options, endpoint.c_str()); + sentry_options_set_database_path(options, temp_folder_path.c_str()); if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { sentry_options_set_environment(options, "prod"); } else { sentry_options_set_environment(options, "test"); } - initialized = true; + int init_status = sentry_init(options); + if (!init_status) + { + initialized = true; + LOG_INFO( + &Logger::get("SentryWriter"), + "Sending crash reports is initialized with {} endpoint and {} temp folder", + endpoint, + temp_folder_path + ); + } + else + { + LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); + } + + } + else + { + LOG_INFO(&Logger::get("SentryWriter"), "Sending crash reports is disabled"); } #endif } @@ -140,8 +169,13 @@ void SentryWriter::onFault( sentry_value_set_by_key(event, "threads", threads); + LOG_INFO(&Logger::get("SentryWriter"), "Sending crash report"); sentry_capture_event(event); shutdown(); } + else + { + LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); + } #endif } diff --git a/base/daemon/ya.make b/base/daemon/ya.make index 1c72af3ed53..125417adca5 100644 --- a/base/daemon/ya.make +++ b/base/daemon/ya.make @@ -9,6 +9,7 @@ PEERDIR( SRCS( BaseDaemon.cpp GraphiteWriter.cpp + SentryWriter.cpp ) END() From 4ef322274d117ecb6d04f79c4f73d0447b961c64 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 22:53:16 +0300 Subject: [PATCH 0272/2229] Add integration test --- .gitignore | 1 + programs/server/config.xml | 6 +++ .../configs/config_send_crash_reports.xml | 8 ++++ .../test_send_crash_reports/http_server.py | 43 ++++++++++++++++++ .../test_send_crash_reports/test.py | 44 +++++++++++++++++++ 5 files changed, 102 insertions(+) create mode 100644 tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml create mode 100644 tests/integration/test_send_crash_reports/http_server.py create mode 100644 tests/integration/test_send_crash_reports/test.py diff --git a/.gitignore b/.gitignore index 6bd57911ac8..afb4e67a1b8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ /build /build_* /build-* +/tests/venv /docs/build /docs/publish diff --git a/programs/server/config.xml b/programs/server/config.xml index e16af9d75d7..d07f20aa0e0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -42,6 +42,12 @@ --> + + + + false + + 8123 9000 diff --git a/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml b/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml new file mode 100644 index 00000000000..10f559b0054 --- /dev/null +++ b/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml @@ -0,0 +1,8 @@ + + + + true + true + http://6f33034cfe684dd7a3ab9875e57b1c8d@localhost:9500/5226277 + + diff --git a/tests/integration/test_send_crash_reports/http_server.py b/tests/integration/test_send_crash_reports/http_server.py new file mode 100644 index 00000000000..e3fa2e1cb57 --- /dev/null +++ b/tests/integration/test_send_crash_reports/http_server.py @@ -0,0 +1,43 @@ +import BaseHTTPServer + +RESULT_PATH = '/result.txt' + +class SentryHandler(BaseHTTPServer.BaseHTTPRequestHandler): + def do_POST(self): + post_data = self.__read_and_decode_post_data() + with open(RESULT_PATH, 'w') as f: + if self.headers.get("content-type") != "application/x-sentry-envelope": + f.write("INCORRECT_CONTENT_TYPE") + elif self.headers.get("content-length") < 3000: + f.write("INCORRECT_CONTENT_LENGTH") + elif '"http://6f33034cfe684dd7a3ab9875e57b1c8d@localhost:9500/5226277"' not in post_data: + f.write('INCORRECT_POST_DATA') + else: + f.write("OK") + self.send_response(200) + + def __read_and_decode_post_data(self): + transfer_encoding = self.headers.get("transfer-Encoding") + decoded = "" + if transfer_encoding == "chunked": + while True: + s = self.rfile.readline() + chunk_length = int(s, 16) + if not chunk_length: + break + decoded += self.rfile.read(chunk_length) + self.rfile.readline() + else: + content_length = int(self.headers.get("content-length", 0)) + decoded = self.rfile.read(content_length) + return decoded + + +if __name__ == "__main__": + with open(RESULT_PATH, 'w') as f: + f.write("INITIAL_STATE") + httpd = BaseHTTPServer.HTTPServer(("localhost", 9500,), SentryHandler) + try: + httpd.serve_forever() + finally: + httpd.server_close() \ No newline at end of file diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py new file mode 100644 index 00000000000..f9e95f953d0 --- /dev/null +++ b/tests/integration/test_send_crash_reports/test.py @@ -0,0 +1,44 @@ +import os +import time + +import pytest + +import helpers.cluster +import helpers.test_tools +import http_server + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_node(): + cluster = helpers.cluster.ClickHouseCluster(__file__) + try: + node = cluster.add_instance("node", main_configs=[ + os.path.join(SCRIPT_DIR, "configs", "config_send_crash_reports.xml") + ]) + cluster.start() + yield node + finally: + cluster.shutdown() + + +def test_send_segfault(started_node,): + started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "http_server.py"), "/http_server.py") + started_node.exec_in_container(["bash", "-c", "python2 /http_server.py"], detach=True, user="root") + time.sleep(0.5) + started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root") + + result = None + for attempt in range(1, 6): + time.sleep(0.25 * attempt) + result = started_node.exec_in_container(['cat', http_server.RESULT_PATH], user='root') + if result == 'OK': + break + elif result == 'INITIAL_STATE': + continue + elif result: + assert False, 'Unexpected state: ' + result + + assert result == 'OK', 'Crash report not sent' From 0386e526b2c7cbf13e017f5445ea79eb4f24f67a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:03:59 +0300 Subject: [PATCH 0273/2229] grammar --- programs/server/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index d07f20aa0e0..6086fcd7b1d 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -43,7 +43,7 @@ - + false From a84123195b7fe4677c417de9fe5483c5c283ec13 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:08:05 +0300 Subject: [PATCH 0274/2229] adjust comments --- base/daemon/CMakeLists.txt | 1 - base/daemon/SentryWriter.h | 1 + src/Common/config.h.in | 3 --- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 46fa4a0fe34..0b6a7188c83 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -9,5 +9,4 @@ target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickh if (USE_SENTRY) target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -# target_include_directories (daemon SYSTEM BEFORE PRIVATE ${SENTRY_INCLUDE_DIR}) endif () \ No newline at end of file diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h index ee45ae4f203..0b3f1ddd2b7 100644 --- a/base/daemon/SentryWriter.h +++ b/base/daemon/SentryWriter.h @@ -7,6 +7,7 @@ #include +/// Sends crash reports to ClickHouse core developer team via https://sentry.io class SentryWriter { public: diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 08fa03d659f..ed818b53167 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,9 +9,6 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 USE_OPENCL -<<<<<<< HEAD #cmakedefine01 USE_SENTRY -======= #cmakedefine01 USE_GRPC ->>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #cmakedefine01 CLICKHOUSE_SPLIT_BINARY From d0339413993371c37577ce513ecf0555683878b8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:20:28 +0300 Subject: [PATCH 0275/2229] try to fix merge issues --- contrib/grpc | 2 +- contrib/jemalloc | 2 +- programs/server/Server.cpp | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/contrib/grpc b/contrib/grpc index c1d176528fd..8aea4e168e7 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit c1d176528fd8da9dd4066d16554bcd216d29033f +Subproject commit 8aea4e168e78f3eb9828080740fc8cb73d53bf79 diff --git a/contrib/jemalloc b/contrib/jemalloc index cd2931ad9bb..ea6b3e973b4 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff +Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756 diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 77dc5305fa8..ce1d35e65d4 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -59,13 +59,7 @@ #include #include #include -<<<<<<< HEAD -#include "MySQLHandlerFactory.h" -#include - -======= #include ->>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #if !defined(ARCADIA_BUILD) # include "config_core.h" From b6e4a2ec61c928a433037fefa0657df7ebf8b8ac Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:21:53 +0300 Subject: [PATCH 0276/2229] one more merge issue --- contrib/cppkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cppkafka b/contrib/cppkafka index 9b184d881c1..f555ee36aaa 160000 --- a/contrib/cppkafka +++ b/contrib/cppkafka @@ -1 +1 @@ -Subproject commit 9b184d881c15cc50784b28688c7c99d3d764db24 +Subproject commit f555ee36aaa74d17ca0dab3ce472070a610b2966 From 69dedcbe21bd323f3d87508ba78f36b587a7dff5 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:28:55 +0300 Subject: [PATCH 0277/2229] Move sending crash reports below logging --- base/daemon/BaseDaemon.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 4fd5bfa1379..72da1984287 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -249,7 +249,6 @@ private: UInt32 thread_num, const std::string & query_id) const { - SentryWriter::onFault(sig, info, context, stack_trace); LOG_FATAL(log, "########################################"); { @@ -282,6 +281,9 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); + + /// Send crash report if configured + SentryWriter::onFault(sig, info, context, stack_trace); } }; From f88b85625a44cbcb1628dc76283567c6fceeedd7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:36:47 +0300 Subject: [PATCH 0278/2229] style --- base/daemon/SentryWriter.cpp | 71 +++++++++++++++++------------------- src/Common/StackTrace.cpp | 52 +++++++++++++------------- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index d5c2766cf21..7bbf3c62e97 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -3,35 +3,38 @@ #include #include -#include #include #include #if !defined(ARCADIA_BUILD) -# include "Common/config_version.h" +# include "Common/config_version.h" +# include #endif #if USE_SENTRY -#include +# include #endif -namespace { - static bool initialized = false; +namespace +{ +static bool initialized = false; - void setExtras() { +void setExtras() +{ #if USE_SENTRY - sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); - sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); - sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); - sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); - sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); - sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); - sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); + sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); + sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); + sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); + sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); + sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); + sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); + sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); #endif - } +} } -void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { +void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) +{ #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); @@ -44,14 +47,10 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { } if (enabled) { - const std::string & endpoint = config.getString( - "send_crash_reports.endpoint", - "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" - ); - const std::string & temp_folder_path = config.getString( - "send_crash_reports.tmp_path", - config.getString("tmp_path", Poco::Path::temp()) + "sentry/" - ); + const std::string & endpoint + = config.getString("send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); + const std::string & temp_folder_path + = config.getString("send_crash_reports.tmp_path", config.getString("tmp_path", Poco::Path::temp()) + "sentry/"); Poco::File(temp_folder_path).createDirectories(); sentry_options_t * options = sentry_options_new(); @@ -62,9 +61,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { } sentry_options_set_dsn(options, endpoint.c_str()); sentry_options_set_database_path(options, temp_folder_path.c_str()); - if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) + { sentry_options_set_environment(options, "prod"); - } else { + } + else + { sentry_options_set_environment(options, "test"); } int init_status = sentry_init(options); @@ -75,14 +77,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { &Logger::get("SentryWriter"), "Sending crash reports is initialized with {} endpoint and {} temp folder", endpoint, - temp_folder_path - ); + temp_folder_path); } else { LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); } - } else { @@ -91,20 +91,17 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { #endif } -void SentryWriter::shutdown() { +void SentryWriter::shutdown() +{ #if USE_SENTRY - if (initialized) { + if (initialized) + { sentry_shutdown(); } #endif } -void SentryWriter::onFault( - int sig, - const siginfo_t & info, - const ucontext_t & context, - const StackTrace & stack_trace - ) +void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY if (initialized) @@ -178,4 +175,4 @@ void SentryWriter::onFault( LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); } #endif -} +} \ No newline at end of file diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 5cc8c43a27a..2fd554fd008 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -1,12 +1,12 @@ #include +#include #include #include -#include #include +#include #include #include -#include #include #include @@ -26,8 +26,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext std::stringstream error; switch (sig) { - case SIGSEGV: - { + case SIGSEGV: { /// Print info about address and reason. if (nullptr == info.si_addr) error << "Address: NULL pointer."; @@ -59,8 +58,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGBUS: - { + case SIGBUS: { switch (info.si_code) { case BUS_ADRALN: @@ -92,8 +90,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGILL: - { + case SIGILL: { switch (info.si_code) { case ILL_ILLOPC: @@ -127,8 +124,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGFPE: - { + case SIGFPE: { switch (info.si_code) { case FPE_INTDIV: @@ -162,8 +158,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGTSTP: - { + case SIGTSTP: { error << "This is a signal used for debugging purposes by the user."; break; } @@ -176,13 +171,13 @@ static void * getCallerAddress(const ucontext_t & context) { #if defined(__x86_64__) /// Get the address at the time the signal was raised from the RIP (x86-64) -#if defined(__FreeBSD__) +# if defined(__FreeBSD__) return reinterpret_cast(context.uc_mcontext.mc_rip); -#elif defined(__APPLE__) +# elif defined(__APPLE__) return reinterpret_cast(context.uc_mcontext->__ss.__rip); -#else +# else return reinterpret_cast(context.uc_mcontext.gregs[REG_RIP]); -#endif +# endif #elif defined(__aarch64__) return reinterpret_cast(context.uc_mcontext.pc); #else @@ -197,7 +192,8 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; - for (size_t i = 0; i < offset; ++i) { + for (size_t i = 0; i < offset; ++i) + { frames.value()[i].virtual_addr = frame_pointers[i]; } @@ -217,7 +213,8 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) { + if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) + { current_frame.file = location.file.toString(); current_frame.line = location.line; } @@ -239,8 +236,9 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t current_frame.symbol = "?"; } } -# else - for (size_t i = 0; i < size; ++i) { +#else + for (size_t i = 0; i < size; ++i) + { frames.value()[i].virtual_addr = frame_pointers[i]; } UNUSED(offset); @@ -308,14 +306,16 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const const StackTrace::Frames & StackTrace::getFrames() const { - if (!frames.has_value()) { + if (!frames.has_value()) + { frames = {{}}; symbolize(frame_pointers.data(), offset, size, frames); } return frames; } -static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) +static void +toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { if (size == 0) return callback(""); @@ -324,7 +324,7 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs for (size_t i = offset; i < size; ++i) { - const StackTrace::Frame& current_frame = frames.value()[i]; + const StackTrace::Frame & current_frame = frames.value()[i]; out << i << ". "; if (current_frame.file.has_value() && current_frame.line.has_value()) @@ -338,7 +338,8 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs } out << " @ " << current_frame.physical_addr; - if (current_frame.object.has_value()) { + if (current_frame.object.has_value()) + { out << " in " << current_frame.object.value(); } @@ -362,7 +363,8 @@ void StackTrace::toStringEveryLine(std::function call toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); } -void StackTrace::resetFrames() { +void StackTrace::resetFrames() +{ frames.reset(); } From 444026494f9383d465ab9b9611c8bfc935661d85 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:52:49 +0300 Subject: [PATCH 0279/2229] brief docs --- .../settings.md | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 5961c701283..a103473a4ea 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -307,11 +307,11 @@ Logging settings. Keys: -- level – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. -- log – The log file. Contains all the entries according to `level`. -- errorlog – Error log file. -- size – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. -- count – The number of archived log files that ClickHouse stores. +- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. +- `log` – The log file. Contains all the entries according to `level`. +- `errorlog` – Error log file. +- `size` – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. +- `count` – The number of archived log files that ClickHouse stores. **Example** @@ -348,6 +348,27 @@ Keys: Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.` - format – Message format. Possible values: `bsd` and `syslog.` +## send_crash_reports {#server_configuration_parameters-logger} + +Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). +Enabling it, especially in pre-production environments, is strongly appreciated. + +Keys: + +- `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. +- `endpoint` – Overrides the Sentry endpoint. +- `debug` - Sets the Sentry client into debug mode. +- `tmp_path` - Filesystem path for temporary crash report state. + + +**Recommended way to use** + +``` xml + + true + +``` + ## macros {#macros} Parameter substitutions for replicated tables. From 95ca1c648da4e95ec1cb252afd6e79216f4f5aec Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 10:59:43 +0300 Subject: [PATCH 0280/2229] fix __msan_unpoison --- src/Common/StackTrace.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 2fd554fd008..e38bfa25dff 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -378,13 +378,13 @@ std::string StackTrace::toString() const return func_cached(frame_pointers.data(), offset, size); } -std::string StackTrace::toString(void ** frame_pointers, size_t offset, size_t size) +std::string StackTrace::toString(void ** frame_pointers_, size_t offset, size_t size) { - __msan_unpoison(frames_, size * sizeof(*frames_)); + __msan_unpoison(frame_pointers_, size * sizeof(*frame_pointers_)); StackTrace::FramePointers frame_pointers_copy{}; for (size_t i = 0; i < size; ++i) - frame_pointers_copy[i] = frame_pointers[i]; + frame_pointers_copy[i] = frame_pointers_[i]; static SimpleCache func_cached; return func_cached(frame_pointers_copy.data(), offset, size); From 6dfe44f437c393ded72f8859ca16d9625b8fbb53 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:01:15 +0300 Subject: [PATCH 0281/2229] style --- src/Common/StackTrace.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index e38bfa25dff..aa78ab62f9b 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -26,7 +26,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext std::stringstream error; switch (sig) { - case SIGSEGV: { + case SIGSEGV: + { /// Print info about address and reason. if (nullptr == info.si_addr) error << "Address: NULL pointer."; @@ -58,7 +59,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGBUS: { + case SIGBUS: + { switch (info.si_code) { case BUS_ADRALN: @@ -90,7 +92,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGILL: { + case SIGILL: + { switch (info.si_code) { case ILL_ILLOPC: @@ -124,7 +127,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGFPE: { + case SIGFPE: + { switch (info.si_code) { case FPE_INTDIV: @@ -158,7 +162,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGTSTP: { + case SIGTSTP: + { error << "This is a signal used for debugging purposes by the user."; break; } From 77d8c9bacae6b833a28a85ab45442355c0f4b2df Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:02:13 +0300 Subject: [PATCH 0282/2229] Add anonymize option and version tag --- base/daemon/SentryWriter.cpp | 17 +++++++++++++---- cmake/version.cmake | 1 + .../server-configuration-parameters/settings.md | 1 + src/Common/config_version.h.in | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 7bbf3c62e97..878ce6548aa 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -18,10 +18,16 @@ namespace { static bool initialized = false; +static bool anonymize = false; void setExtras() { #if USE_SENTRY + if (!anonymize) + { + sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); + } + sentry_set_tag("version", VERSION_STRING_SHORT); sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); @@ -69,15 +75,19 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { sentry_options_set_environment(options, "test"); } + int init_status = sentry_init(options); if (!init_status) { initialized = true; + anonymize = config.getBool("send_crash_reports.anonymize", false); + const std::string& anonymize_status = anonymize ? " (anonymized)" : ""; LOG_INFO( &Logger::get("SentryWriter"), - "Sending crash reports is initialized with {} endpoint and {} temp folder", + "Sending crash reports is initialized with {} endpoint and {} temp folder{}", endpoint, - temp_folder_path); + temp_folder_path, + anonymize_status); } else { @@ -109,7 +119,6 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c const std::string & error_message = signalToErrorMessage(sig, info, context); sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); sentry_set_tag("signal", strsignal(sig)); - sentry_set_tag("server_name", getFQDNOrHostName().c_str()); sentry_set_extra("signal_number", sentry_value_new_int32(sig)); setExtras(); @@ -175,4 +184,4 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); } #endif -} \ No newline at end of file +} diff --git a/cmake/version.cmake b/cmake/version.cmake index eea17f68c47..963f291c0f3 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -14,6 +14,7 @@ endif () set (VERSION_NAME "${PROJECT_NAME}") set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") set (VERSION_SO "${VERSION_STRING}") +set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a103473a4ea..ba8f3df9ad0 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -357,6 +357,7 @@ Keys: - `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. - `endpoint` – Overrides the Sentry endpoint. +- `anonymize` - Avoid attaching the server hostname to crash report. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. diff --git a/src/Common/config_version.h.in b/src/Common/config_version.h.in index bc90e63e39c..c3c0c6df87b 100644 --- a/src/Common/config_version.h.in +++ b/src/Common/config_version.h.in @@ -20,6 +20,7 @@ #cmakedefine VERSION_MINOR @VERSION_MINOR@ #cmakedefine VERSION_PATCH @VERSION_PATCH@ #cmakedefine VERSION_STRING "@VERSION_STRING@" +#cmakedefine VERSION_STRING_SHORT "@VERSION_STRING_SHORT@" #cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@" #cmakedefine VERSION_FULL "@VERSION_FULL@" #cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@" From d154415a5bedf24a0217306c1d7798b718a2995a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:13:04 +0300 Subject: [PATCH 0283/2229] adjust comments --- .../en/operations/server-configuration-parameters/settings.md | 4 +++- programs/server/config.xml | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index ba8f3df9ad0..3dc68e7fa6a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -351,7 +351,9 @@ Keys: ## send_crash_reports {#server_configuration_parameters-logger} Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). -Enabling it, especially in pre-production environments, is strongly appreciated. +Enabling it, especially in pre-production environments, is greatly appreciated. + +The server will need an access to public Internet for this feature to be functioning properly. Keys: diff --git a/programs/server/config.xml b/programs/server/config.xml index 6086fcd7b1d..d8d75222bc0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -46,6 +46,8 @@ false + + false From 52f7b9545b17304aa8e23373b77ab620fb338d50 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:24:21 +0300 Subject: [PATCH 0284/2229] Add http_proxy option --- base/daemon/SentryWriter.cpp | 6 ++++++ .../operations/server-configuration-parameters/settings.md | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 878ce6548aa..b2b1c69af8c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -76,6 +76,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) sentry_options_set_environment(options, "test"); } + const std::string & http_proxy = config.getString("send_crash_reports.http_proxy", ""); + if (!http_proxy.empty()) + { + sentry_options_set_http_proxy(options, http_proxy.c_str()); + } + int init_status = sentry_init(options); if (!init_status) { diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 3dc68e7fa6a..194293d5a19 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -359,7 +359,8 @@ Keys: - `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. - `endpoint` – Overrides the Sentry endpoint. -- `anonymize` - Avoid attaching the server hostname to crash report. +- `anonymize` - Avoid attaching the server hostname to crash report. +- `http_proxy` - Configure HTTP proxy for sending crash reports. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. From f9009809f2dbc935093e039592bdbdddd6e5412e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 30 May 2020 12:22:05 +0300 Subject: [PATCH 0285/2229] Delete order_by_with_limit.xml Delete new test to run old ones. --- tests/performance/order_by_with_limit.xml | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 tests/performance/order_by_with_limit.xml diff --git a/tests/performance/order_by_with_limit.xml b/tests/performance/order_by_with_limit.xml deleted file mode 100644 index b45f42071de..00000000000 --- a/tests/performance/order_by_with_limit.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - sorting - comparison - - - SELECT rand64() AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 500 - - From be94d8454dcefa117d43a8d96a01e4164be4ea51 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 13:54:57 +0300 Subject: [PATCH 0286/2229] fix Arcadia build --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index b2b1c69af8c..2fd846b720a 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -11,7 +11,7 @@ #endif #if USE_SENTRY -# include +# include // Y_IGNORE #endif From b4b5c90343be4391404aaae381fa96590a0bbf32 Mon Sep 17 00:00:00 2001 From: MovElb Date: Sat, 30 May 2020 20:04:02 +0300 Subject: [PATCH 0287/2229] squash --- programs/server/PostgreSQLHandler.cpp | 306 ++++++ programs/server/PostgreSQLHandler.h | 75 ++ programs/server/PostgreSQLHandlerFactory.cpp | 28 + programs/server/PostgreSQLHandlerFactory.h | 29 + programs/server/Server.cpp | 1 + src/Common/CurrentMetrics.cpp | 2 +- src/Core/PostgreSQLProtocol.cpp | 50 + src/Core/PostgreSQLProtocol.h | 914 ++++++++++++++++++ src/Core/ya.make | 1 + src/Formats/FormatFactory.cpp | 2 + src/Formats/FormatFactory.h | 1 + src/IO/WriteHelpers.h | 20 + .../Formats/Impl/PostgreSQLOutputFormat.cpp | 79 ++ .../Formats/Impl/PostgreSQLOutputFormat.h | 33 + src/Processors/ya.make | 1 + .../test_postgresql_protocol/__init__.py | 0 .../clients/java/0.reference | 15 + .../clients/java/Dockerfile | 18 + .../clients/java/Test.java | 83 ++ .../clients/java/docker_compose.yml | 8 + .../clients/psql/docker_compose.yml | 14 + .../configs/config.xml | 35 + .../configs/dhparam.pem | 25 + .../configs/server.crt | 18 + .../configs/server.key | 28 + .../configs/users.xml | 13 + .../test_postgresql_protocol/test.py | 148 +++ 27 files changed, 1946 insertions(+), 1 deletion(-) create mode 100644 programs/server/PostgreSQLHandler.cpp create mode 100644 programs/server/PostgreSQLHandler.h create mode 100644 programs/server/PostgreSQLHandlerFactory.cpp create mode 100644 programs/server/PostgreSQLHandlerFactory.h create mode 100644 src/Core/PostgreSQLProtocol.cpp create mode 100644 src/Core/PostgreSQLProtocol.h create mode 100644 src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/PostgreSQLOutputFormat.h create mode 100644 tests/integration/test_postgresql_protocol/__init__.py create mode 100644 tests/integration/test_postgresql_protocol/clients/java/0.reference create mode 100644 tests/integration/test_postgresql_protocol/clients/java/Dockerfile create mode 100644 tests/integration/test_postgresql_protocol/clients/java/Test.java create mode 100644 tests/integration/test_postgresql_protocol/clients/java/docker_compose.yml create mode 100644 tests/integration/test_postgresql_protocol/clients/psql/docker_compose.yml create mode 100644 tests/integration/test_postgresql_protocol/configs/config.xml create mode 100644 tests/integration/test_postgresql_protocol/configs/dhparam.pem create mode 100644 tests/integration/test_postgresql_protocol/configs/server.crt create mode 100644 tests/integration/test_postgresql_protocol/configs/server.key create mode 100644 tests/integration/test_postgresql_protocol/configs/users.xml create mode 100644 tests/integration/test_postgresql_protocol/test.py diff --git a/programs/server/PostgreSQLHandler.cpp b/programs/server/PostgreSQLHandler.cpp new file mode 100644 index 00000000000..843135ed3a1 --- /dev/null +++ b/programs/server/PostgreSQLHandler.cpp @@ -0,0 +1,306 @@ +#include +#include +#include +#include +#include +#include "PostgreSQLHandler.h" +#include +#include + +#if !defined(ARCADIA_BUILD) +# include +#endif + +#if USE_SSL +# include +# include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +PostgreSQLHandler::PostgreSQLHandler( + const Poco::Net::StreamSocket & socket_, + IServer & server_, + bool ssl_enabled_, + Int32 connection_id_, + std::vector> & auth_methods_) + : Poco::Net::TCPServerConnection(socket_) + , server(server_) + , connection_context(server.context()) + , ssl_enabled(ssl_enabled_) + , connection_id(connection_id_) + , authentication_manager(auth_methods_) +{ + changeIO(socket()); +} + +void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket) +{ + in = std::make_shared(socket); + out = std::make_shared(socket); + message_transport = std::make_shared(in.get(), out.get()); +} + +void PostgreSQLHandler::run() +{ + connection_context.makeSessionContext(); + connection_context.setDefaultFormat("PostgreSQLWire"); + + try + { + if (!startUp()) + return; + + while (true) + { + message_transport->send(PostgreSQLProtocol::Messaging::ReadyForQuery(), true); + PostgreSQLProtocol::Messaging::FrontMessageType message_type = message_transport->receiveMessageType(); + + switch (message_type) + { + case PostgreSQLProtocol::Messaging::FrontMessageType::QUERY: + processQuery(); + break; + case PostgreSQLProtocol::Messaging::FrontMessageType::TERMINATE: + LOG_INFO(log, "Client closed the connection"); + return; + case PostgreSQLProtocol::Messaging::FrontMessageType::PARSE: + case PostgreSQLProtocol::Messaging::FrontMessageType::BIND: + case PostgreSQLProtocol::Messaging::FrontMessageType::DESCRIBE: + case PostgreSQLProtocol::Messaging::FrontMessageType::SYNC: + case PostgreSQLProtocol::Messaging::FrontMessageType::FLUSH: + case PostgreSQLProtocol::Messaging::FrontMessageType::CLOSE: + message_transport->send( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse::ERROR, + "0A000", + "ClickHouse doesn't support exteneded query mechanism"), + true); + LOG_ERROR(log, "Client tried to access via extended query protocol"); + message_transport->dropMessage(); + break; + default: + message_transport->send( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse::ERROR, + "0A000", + "Command is not supported"), + true); + LOG_ERROR(log, Poco::format("Command is not supported. Command code %d", static_cast(message_type))); + message_transport->dropMessage(); + } + } + } + catch (const Poco::Exception &exc) + { + log->log(exc); + } + +} + +bool PostgreSQLHandler::startUp() +{ + Int32 payload_size; + Int32 info; + establishSecureConnection(payload_size, info); + + if (static_cast(info) == PostgreSQLProtocol::Messaging::FrontMessageType::CANCEL_REQUEST) + { + LOG_INFO(log, "Client issued request canceling"); + cancelRequest(); + return false; + } + + std::unique_ptr start_up_msg = receiveStartUpMessage(payload_size); + authentication_manager.authenticate(start_up_msg->user, connection_context, *message_transport, socket().peerAddress()); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(0, INT32_MAX); + secret_key = dis(gen); + + try + { + if (!start_up_msg->database.empty()) + connection_context.setCurrentDatabase(start_up_msg->database); + connection_context.setCurrentQueryId(Poco::format("postgres:%d:%d", connection_id, secret_key)); + } + catch (const Exception & exc) + { + message_transport->send( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse::ERROR, "XX000", exc.message()), + true); + throw; + } + + sendParameterStatusData(*start_up_msg); + + message_transport->send( + PostgreSQLProtocol::Messaging::BackendKeyData(connection_id, secret_key), true); + + LOG_INFO(log, "Successfully finished StartUp stage"); + return true; +} + +void PostgreSQLHandler::establishSecureConnection(Int32 & payload_size, Int32 & info) +{ + bool was_encryption_req = true; + readBinaryBigEndian(payload_size, *in); + readBinaryBigEndian(info, *in); + + switch (static_cast(info)) + { + case PostgreSQLProtocol::Messaging::FrontMessageType::SSL_REQUEST: + LOG_INFO(log, "Client requested SSL"); + if (ssl_enabled) + makeSecureConnectionSSL(); + else + message_transport->send('N', true); + break; + case PostgreSQLProtocol::Messaging::FrontMessageType::GSSENC_REQUEST: + LOG_INFO(log, "Client requested GSSENC"); + message_transport->send('N', true); + break; + default: + was_encryption_req = false; + } + if (was_encryption_req) + { + readBinaryBigEndian(payload_size, *in); + readBinaryBigEndian(info, *in); + } +} + +#if USE_SSL +void PostgreSQLHandler::makeSecureConnectionSSL() +{ + message_transport->send('S'); + ss = std::make_shared( + Poco::Net::SecureStreamSocket::attach(socket(), Poco::Net::SSLManager::instance().defaultServerContext())); + changeIO(*ss); +} +#else +void PostgreSQLHandler::makeSecureConnectionSSL() {} +#endif + +void PostgreSQLHandler::sendParameterStatusData(PostgreSQLProtocol::Messaging::StartUpMessage & start_up_message) +{ + std::unordered_map & parameters = start_up_message.parameters; + + if (parameters.find("application_name") != parameters.end()) + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("application_name", parameters["application_name"])); + if (parameters.find("client_encoding") != parameters.end()) + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("client_encoding", parameters["client_encoding"])); + else + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("client_encoding", "UTF8")); + + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("server_version", VERSION_STRING)); + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("server_encoding", "UTF8")); + message_transport->send(PostgreSQLProtocol::Messaging::ParameterStatus("DateStyle", "ISO")); + message_transport->flush(); +} + +void PostgreSQLHandler::cancelRequest() +{ + connection_context.setCurrentQueryId(""); + connection_context.setDefaultFormat("Null"); + + std::unique_ptr msg = + message_transport->receiveWithPayloadSize(8); + + String query = Poco::format("KILL QUERY WHERE query_id = 'postgres:%d:%d'", msg->process_id, msg->secret_key); + ReadBufferFromString replacement(query); + + executeQuery( + replacement, *out, true, connection_context, + [](const String &, const String &, const String &, const String &) {} + ); +} + +inline std::unique_ptr PostgreSQLHandler::receiveStartUpMessage(int payload_size) +{ + std::unique_ptr message; + try + { + message = message_transport->receiveWithPayloadSize(payload_size - 8); + } + catch (const Exception &) + { + message_transport->send( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse::ERROR, "08P01", "Can't correctly handle StartUp message"), + true); + throw; + } + + LOG_INFO(log, "Successfully received StartUp message"); + return message; +} + +void PostgreSQLHandler::processQuery() +{ + try + { + std::unique_ptr query = + message_transport->receive(); + + if (isEmptyQuery(query->query)) + { + message_transport->send(PostgreSQLProtocol::Messaging::EmptyQueryResponse()); + return; + } + + bool psycopg2_cond = query->query == "BEGIN" || query->query == "COMMIT"; // psycopg2 starts and ends queries with BEGIN/COMMIT commands + bool jdbc_cond = query->query.find("SET extra_float_digits") != String::npos || query->query.find("SET application_name") != String::npos; // jdbc starts with setting this parameter + if (psycopg2_cond || jdbc_cond) + { + message_transport->send( + PostgreSQLProtocol::Messaging::CommandComplete( + PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(query->query), 0)); + return; + } + + const auto & settings = connection_context.getSettingsRef(); + std::vector queries; + auto parse_res = splitMultipartQuery(query->query, queries, settings.max_query_size, settings.max_parser_depth); + if (!parse_res.second) + throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + + for (const auto & spl_query : queries) + { + ReadBufferFromString read_buf(spl_query); + executeQuery(read_buf, *out, true, connection_context, {}); + + PostgreSQLProtocol::Messaging::CommandComplete::Command command = + PostgreSQLProtocol::Messaging::CommandComplete::classifyQuery(spl_query); + message_transport->send(PostgreSQLProtocol::Messaging::CommandComplete(command, 0), true); + } + + } + catch (const Exception & e) + { + message_transport->send( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse( + PostgreSQLProtocol::Messaging::ErrorOrNoticeResponse::ERROR, "2F000", "Query execution failed.\n" + e.displayText()), + true); + throw; + } +} + +bool PostgreSQLHandler::isEmptyQuery(const String & query) +{ + if (query.empty()) + return true; + + Poco::RegularExpression regex(R"(\A\s*\z)"); + return regex.match(query); +} + +} diff --git a/programs/server/PostgreSQLHandler.h b/programs/server/PostgreSQLHandler.h new file mode 100644 index 00000000000..1062fed5cbb --- /dev/null +++ b/programs/server/PostgreSQLHandler.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include +#include +#include "IServer.h" + +#if USE_SSL +# include +#endif + +namespace CurrentMetrics +{ + extern const Metric PostgreSQLConnection; +} + +namespace DB +{ + +/** PostgreSQL wire protocol implementation. + * For more info see https://www.postgresql.org/docs/current/protocol.html + */ +class PostgreSQLHandler : public Poco::Net::TCPServerConnection +{ +public: + PostgreSQLHandler( + const Poco::Net::StreamSocket & socket_, + IServer & server_, + bool ssl_enabled_, + Int32 connection_id_, + std::vector> & auth_methods_); + + void run() final; + +private: + Poco::Logger * log = &Poco::Logger::get("PostgreSQLHandler"); + + IServer & server; + Context connection_context; + bool ssl_enabled; + Int32 connection_id; + Int32 secret_key; + + std::shared_ptr in; + std::shared_ptr out; + std::shared_ptr message_transport; + +#if USE_SSL + std::shared_ptr ss; +#endif + + PostgreSQLProtocol::PGAuthentication::AuthenticationManager authentication_manager; + + CurrentMetrics::Increment metric_increment{CurrentMetrics::PostgreSQLConnection}; + + void changeIO(Poco::Net::StreamSocket & socket); + + bool startUp(); + + void establishSecureConnection(Int32 & payload_size, Int32 & info); + + void makeSecureConnectionSSL(); + + void sendParameterStatusData(PostgreSQLProtocol::Messaging::StartUpMessage & start_up_message); + + void cancelRequest(); + + std::unique_ptr receiveStartUpMessage(int payload_size); + + void processQuery(); + + bool isEmptyQuery(const String & query); +}; + +} diff --git a/programs/server/PostgreSQLHandlerFactory.cpp b/programs/server/PostgreSQLHandlerFactory.cpp new file mode 100644 index 00000000000..210d8558bfd --- /dev/null +++ b/programs/server/PostgreSQLHandlerFactory.cpp @@ -0,0 +1,28 @@ +#include "PostgreSQLHandlerFactory.h" +#include "IServer.h" +#include +#include +#include "PostgreSQLHandler.h" + +namespace DB +{ + +PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_) + : server(server_) + , log(&Logger::get("PostgreSQLHandlerFactory")) +{ + auth_methods = + { + std::make_shared(), + std::make_shared(), + }; +} + +Poco::Net::TCPServerConnection * PostgreSQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket) +{ + Int32 connection_id = last_connection_id++; + LOG_TRACE(log, "PostgreSQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString()); + return new PostgreSQLHandler(socket, server, ssl_enabled, connection_id, auth_methods); +} + +} diff --git a/programs/server/PostgreSQLHandlerFactory.h b/programs/server/PostgreSQLHandlerFactory.h new file mode 100644 index 00000000000..a95a22c162c --- /dev/null +++ b/programs/server/PostgreSQLHandlerFactory.h @@ -0,0 +1,29 @@ +#pragma once + +#include "IServer.h" +#include +#include + +namespace DB +{ + +class PostgreSQLHandlerFactory : public Poco::Net::TCPServerConnectionFactory +{ +private: +#if USE_SSL + IServer & server; + Poco::Logger * log; + bool ssl_enabled = true; +#else + bool ssl_enabled = false; +#endif + + std::atomic last_connection_id = 0; + std::vector> auth_methods; + +public: + explicit PostgreSQLHandlerFactory(IServer & server_); + + Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket) override; +}; +} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 29096327a71..04324d34574 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -61,6 +61,7 @@ #include #include + #if !defined(ARCADIA_BUILD) # include "config_core.h" # include "Common/config_version.h" diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 1eb401905f6..489598a5b26 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -23,6 +23,7 @@ M(MySQLConnection, "Number of client connections using MySQL protocol") \ M(HTTPConnection, "Number of connections to HTTP server") \ M(InterserverConnection, "Number of connections from other replicas to fetch parts") \ + M(PostgreSQLConnection, "Number of client connections using PostgreSQL protocol") \ M(OpenFileForRead, "Number of files open for reading") \ M(OpenFileForWrite, "Number of files open for writing") \ M(Read, "Number of read (read, pread, io_getevents, etc.) syscalls in fly") \ @@ -61,7 +62,6 @@ M(LocalThreadActive, "Number of threads in local thread pools running a task.") \ M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \ - namespace CurrentMetrics { #define M(NAME, DOCUMENTATION) extern const Metric NAME = __COUNTER__; diff --git a/src/Core/PostgreSQLProtocol.cpp b/src/Core/PostgreSQLProtocol.cpp new file mode 100644 index 00000000000..553d195605a --- /dev/null +++ b/src/Core/PostgreSQLProtocol.cpp @@ -0,0 +1,50 @@ +#include "PostgreSQLProtocol.h" + +namespace DB::PostgreSQLProtocol::Messaging +{ + +ColumnTypeSpec convertTypeIndexToPostgresColumnTypeSpec(TypeIndex type_index) +{ + switch (type_index) + { + case TypeIndex::Int8: + return {ColumnType::CHAR, 1}; + + case TypeIndex::UInt8: + case TypeIndex::Int16: + return {ColumnType::INT2, 2}; + + case TypeIndex::UInt16: + case TypeIndex::Int32: + return {ColumnType::INT4, 4}; + + case TypeIndex::UInt32: + case TypeIndex::Int64: + return {ColumnType::INT8, 8}; + + case TypeIndex::Float32: + return {ColumnType::FLOAT4, 4}; + case TypeIndex::Float64: + return {ColumnType::FLOAT8, 8}; + + case TypeIndex::FixedString: + case TypeIndex::String: + return {ColumnType::VARCHAR, -1}; + + case TypeIndex::Date: + return {ColumnType::DATE, 4}; + + case TypeIndex::Decimal32: + case TypeIndex::Decimal64: + case TypeIndex::Decimal128: + return {ColumnType::NUMERIC, -1}; + + case TypeIndex::UUID: + return {ColumnType::UUID, 16}; + + default: + return {ColumnType::VARCHAR, -1}; + } +} + +} diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h new file mode 100644 index 00000000000..b5fa67d68ea --- /dev/null +++ b/src/Core/PostgreSQLProtocol.h @@ -0,0 +1,914 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Types.h" +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_PACKET_FROM_CLIENT; + extern const int UNEXPECTED_PACKET_FROM_CLIENT; + extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_TYPE; +} + + +namespace PostgreSQLProtocol +{ + +namespace Messaging +{ + +enum class FrontMessageType : Int32 +{ +// first message types + CANCEL_REQUEST = 80877102, + SSL_REQUEST = 80877103, + GSSENC_REQUEST = 80877104, + +// other front message types + PASSWORD_MESSAGE = 'p', + QUERY = 'Q', + TERMINATE = 'X', + PARSE = 'P', + BIND = 'B', + DESCRIBE = 'D', + SYNC = 'S', + FLUSH = 'H', + CLOSE = 'C', +}; + +enum class MessageType : Int32 +{ +// common + ERROR_RESPONSE = 0, + CANCEL_REQUEST = 1, + COMMAND_COMPLETE = 2, + NOTICE_RESPONSE = 3, + NOTIFICATION_RESPONSE = 4, + PARAMETER_STATUS = 5, + READY_FOR_QUERY = 6, + SYNC = 7, + TERMINATE = 8, + +// start up and authentication + AUTHENTICATION_OK = 30, + AUTHENTICATION_KERBEROS_V5 = 31, + AUTHENTICATION_CLEARTEXT_PASSWORD = 32, + AUTHENTICATION_MD5_PASSWORD = 33, + AUTHENTICATION_SCM_CREDENTIAL = 34, + AUTHENTICATION_GSS = 35, + AUTHENTICATION_SSPI = 36, + AUTHENTICATION_GSS_CONTINUE = 37, + AUTHENTICATION_SASL = 38, + AUTHENTICATION_SASL_CONTINUE = 39, + AUTHENTICATION_SASL_FINAL = 40, + BACKEND_KEY_DATA = 41, + GSSENC_REQUEST = 42, + GSS_RESPONSE = 43, + NEGOTIATE_PROTOCOL_VERSION = 44, + PASSWORD_MESSAGE = 45, + SASL_INITIAL_RESPONSE = 46, + SASL_RESPONSE = 47, + SSL_REQUEST = 48, + STARTUP_MESSAGE = 49, + +// simple query + DATA_ROW = 100, + EMPTY_QUERY_RESPONSE = 101, + ROW_DESCRIPTION = 102, + QUERY = 103, + +// extended query + BIND = 120, + BIND_COMPLETE = 121, + CLOSE = 122, + CLOSE_COMPLETE = 123, + DESCRIBE = 124, + EXECUTE = 125, + FLUSH = 126, + NODATA = 127, + PARAMETER_DESCRIPTION = 128, + PARSE = 129, + PARSE_COMPLETE = 130, + PORTAL_SUSPENDED = 131, + +// copy query + COPY_DATA = 171, + COPY_DONE = 172, + COPY_FAIL = 173, + COPY_IN_RESPONSE = 174, + COPY_OUT_RESPONSE = 175, + COPY_BOTH_RESPONSE = 176, + +// function query (deprecated by the protocol) + FUNCTION_CALL = 190, + FUNCTION_CALL_RESPONSE = 191, +}; + +//// Column 'typelem' from 'pg_type' table. NB: not all types are compatible with PostgreSQL's ones +enum class ColumnType : Int32 +{ + CHAR = 18, + INT8 = 20, + INT2 = 21, + INT4 = 23, + FLOAT4 = 700, + FLOAT8 = 701, + VARCHAR = 1043, + DATE = 1082, + NUMERIC = 1700, + UUID = 2950, +}; + +class ColumnTypeSpec +{ +public: + ColumnType type; + Int16 len; + + ColumnTypeSpec(ColumnType type_, Int16 len_) : type(type_), len(len_) {} +}; + +ColumnTypeSpec convertTypeIndexToPostgresColumnTypeSpec(TypeIndex type_index); + +class MessageTransport +{ +private: + ReadBuffer * in; + WriteBuffer * out; + +public: + MessageTransport(WriteBuffer * out_) : in(nullptr), out(out_) {} + + MessageTransport(ReadBuffer * in_, WriteBuffer * out_): in(in_), out(out_) {} + + template + std::unique_ptr receiveWithPayloadSize(Int32 payload_size) + { + std::unique_ptr message = std::make_unique(payload_size); + message->deserialize(*in); + return message; + } + + template + std::unique_ptr receive() + { + std::unique_ptr message = std::make_unique(); + message->deserialize(*in); + return message; + } + + FrontMessageType receiveMessageType() + { + char type = 0; + in->read(type); + return static_cast(type); + } + + template + void send(TMessage & message, bool flush=false) + { + message.serialize(*out); + if (flush) + out->next(); + } + + template + void send(TMessage && message, bool flush=false) + { + send(message, flush); + } + + void send(char message, bool flush=false) + { + out->write(message); + if (flush) + out->next(); + } + + void dropMessage() + { + Int32 size; + readBinaryBigEndian(size, *in); + in->ignore(size - 4); + } + + void flush() + { + out->next(); + } +}; + +/** Basic class for messages sent by client or server. */ +class IMessage +{ +public: + virtual MessageType getMessageType() const = 0; + + virtual ~IMessage() = default; +}; + +class ISerializable +{ +public: + /** Should be overridden for sending the message */ + virtual void serialize(WriteBuffer & out) const = 0; + + /** Size of the message in bytes including message length part (4 bytes) */ + virtual Int32 size() const = 0; + + virtual ~ISerializable() = default; +}; + +class FrontMessage : public IMessage +{ +public: + /** Should be overridden for receiving the message + * NB: This method should not read the first byte, which means the type of the message + * (if type is provided for the message by the protocol). + */ + virtual void deserialize(ReadBuffer & in) = 0; +}; + +class BackendMessage : public IMessage, public ISerializable +{}; + +class FirstMessage : public FrontMessage +{ +public: + Int32 payload_size; + FirstMessage() = delete; + FirstMessage(int payload_size_) : payload_size(payload_size_) {} +}; + +class CancelRequest : public FirstMessage +{ +public: + Int32 process_id; + Int32 secret_key; + CancelRequest(int payload_size_) : FirstMessage(payload_size_) {} + + void deserialize(ReadBuffer & in) override + { + readBinaryBigEndian(process_id, in); + readBinaryBigEndian(secret_key, in); + } + + MessageType getMessageType() const override + { + return MessageType::CANCEL_REQUEST; + } +}; + +class ErrorOrNoticeResponse : BackendMessage +{ +public: + enum Severity {ERROR = 0, FATAL = 1, PANIC = 2, WARNING = 3, NOTICE = 4, DEBUG = 5, INFO = 6, LOG = 7}; + +private: + Severity severity; + String sql_state; + String message; + + String enum_to_string[8] = {"ERROR", "FATAL", "PANIC", "WARNING", "NOTICE", "DEBUG", "INFO", "LOG"}; + + char isErrorOrNotice() const + { + switch (severity) + { + case ERROR: + case FATAL: + case PANIC: + return 'E'; + case WARNING: + case NOTICE: + case DEBUG: + case INFO: + case LOG: + return 'N'; + } + throw Exception("Unknown severity type " + std::to_string(severity), ErrorCodes::UNKNOWN_TYPE); + } + +public: + ErrorOrNoticeResponse(const Severity & severity_, const String & sql_state_, const String & message_) + : severity(severity_) + , sql_state(sql_state_) + , message(message_) + {} + + void serialize(WriteBuffer & out) const override + { + out.write(isErrorOrNotice()); + Int32 sz = size(); + writeBinaryBigEndian(sz, out); + + out.write('S'); + writeNullTerminatedString(enum_to_string[severity], out); + out.write('C'); + writeNullTerminatedString(sql_state, out); + out.write('M'); + writeNullTerminatedString(message, out); + + out.write(0); + } + + Int32 size() const override + { + // message length part + (1 + sizes of other fields + 1) + null byte in the end of the message + return 4 + (1 + enum_to_string[severity].size() + 1) + (1 + sql_state.size() + 1) + (1 + message.size() + 1) + 1; + } + + MessageType getMessageType() const override + { + if (isErrorOrNotice() == 'E') + return MessageType::ERROR_RESPONSE; + return MessageType::NOTICE_RESPONSE; + } +}; + +class ReadyForQuery : BackendMessage +{ +public: + void serialize(WriteBuffer &out) const override + { + out.write('Z'); + writeBinaryBigEndian(size(), out); + // 'I' means that we are not in a transaction block. We use it here, because ClickHouse doesn't support transactions. + out.write('I'); + } + + Int32 size() const override + { + return 4 + 1; + } + + MessageType getMessageType() const override + { + return MessageType::READY_FOR_QUERY; + } +}; + +class Terminate : FrontMessage +{ +public: + void deserialize(ReadBuffer & in) override + { + in.ignore(4); + } + + MessageType getMessageType() const override + { + return MessageType::TERMINATE; + } +}; + +class StartUpMessage : FirstMessage +{ +public: + String user; + String database; + // includes username, may also include database and other runtime parameters + std::unordered_map parameters; + + StartUpMessage(Int32 payload_size_) : FirstMessage(payload_size_) {} + + void deserialize(ReadBuffer & in) override + { + Int32 ps = payload_size - 1; + while (ps > 0) + { + String parameter_name; + String parameter_value; + readNullTerminated(parameter_name, in); + readNullTerminated(parameter_value, in); + ps -= parameter_name.size() + 1; + ps -= parameter_value.size() + 1; + + if (parameter_name == "user") + { + user = parameter_value; + } + else if (parameter_name == "database") + { + database = parameter_value; + } + + parameters.insert({std::move(parameter_name), std::move(parameter_value)}); + + if (payload_size < 0) + { + throw Exception( + Poco::format( + "Size of payload is larger than one declared in the message of type %d.", + getMessageType()), + ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + } + } + in.ignore(); + } + + MessageType getMessageType() const override + { + return MessageType::STARTUP_MESSAGE; + } +}; + +class AuthenticationCleartextPassword : public Messaging::BackendMessage +{ +public: + void serialize(WriteBuffer & out) const override + { + out.write('R'); + writeBinaryBigEndian(size(), out); + writeBinaryBigEndian(static_cast(3), out); // specifies that a clear-text password is required (by protocol) + } + + Int32 size() const override + { + // length of message + special int32 + return 4 + 4; + } + + MessageType getMessageType() const override + { + return MessageType::AUTHENTICATION_CLEARTEXT_PASSWORD; + } +}; + +class AuthenticationOk : BackendMessage +{ +public: + void serialize(WriteBuffer & out) const override + { + out.write('R'); + writeBinaryBigEndian(size(), out); + writeBinaryBigEndian(0, out); // specifies that the authentication was successful (by protocol) + } + + Int32 size() const override + { + // length of message + special int32 + return 4 + 4; + } + + MessageType getMessageType() const override + { + return MessageType::AUTHENTICATION_OK; + } +}; + +class PasswordMessage : FrontMessage +{ +public: + String password; + + void deserialize(ReadBuffer & in) override + { + Int32 sz; + readBinaryBigEndian(sz, in); + readNullTerminated(password, in); + } + + MessageType getMessageType() const override + { + return MessageType::PASSWORD_MESSAGE; + } +}; + +class ParameterStatus : BackendMessage +{ +private: + String name; + String value; + +public: + ParameterStatus(String name_, String value_) + : name(name_) + , value(value_) + {} + + void serialize(WriteBuffer & out) const override + { + out.write('S'); + writeBinaryBigEndian(size(), out); + writeNullTerminatedString(name, out); + writeNullTerminatedString(value, out); + } + + Int32 size() const override + { + return 4 + name.size() + 1 + value.size() + 1; + } + + MessageType getMessageType() const override + { + return MessageType::PARAMETER_STATUS; + } +}; + +class BackendKeyData : BackendMessage +{ +private: + Int32 process_id; + Int32 secret_key; + +public: + BackendKeyData(Int32 process_id_, Int32 secret_key_) + : process_id(process_id_) + , secret_key(secret_key_) + {} + + void serialize(WriteBuffer & out) const override + { + out.write('K'); + writeBinaryBigEndian(size(), out); + writeBinaryBigEndian(process_id, out); + writeBinaryBigEndian(secret_key, out); + } + + Int32 size() const override + { + return 4 + 4 + 4; + } + + MessageType getMessageType() const override + { + return MessageType::BACKEND_KEY_DATA; + } +}; + +class Query : FrontMessage +{ +public: + String query; + + void deserialize(ReadBuffer & in) override + { + Int32 sz; + readBinaryBigEndian(sz, in); + readNullTerminated(query, in); + } + + MessageType getMessageType() const override + { + return MessageType::QUERY; + } +}; + +class EmptyQueryResponse : public BackendMessage +{ +public: + void serialize(WriteBuffer & out) const override + { + out.write('I'); + writeBinaryBigEndian(size(), out); + } + + Int32 size() const override + { + return 4; + } + + MessageType getMessageType() const override + { + return MessageType::EMPTY_QUERY_RESPONSE; + } +}; + +enum class FormatCode : Int16 +{ + TEXT = 0, + BINARY = 1, +}; + +class FieldDescription : ISerializable +{ +private: + const String & name; + ColumnTypeSpec type_spec; + FormatCode format_code; + +public: + FieldDescription(const String & name_, TypeIndex type_index, FormatCode format_code_ = FormatCode::TEXT) + : name(name_) + , type_spec(convertTypeIndexToPostgresColumnTypeSpec(type_index)) + , format_code(format_code_) + {} + + void serialize(WriteBuffer & out) const override + { + writeNullTerminatedString(name, out); + writeBinaryBigEndian(static_cast(0), out); + writeBinaryBigEndian(static_cast(0), out); + writeBinaryBigEndian(static_cast(type_spec.type), out); + writeBinaryBigEndian(type_spec.len, out); + writeBinaryBigEndian(static_cast(-1), out); + writeBinaryBigEndian(static_cast(format_code), out); + } + + Int32 size() const override + { + // size of name (C string) + // + object ID of the table (Int32 and always zero) + attribute number of the column (Int16 and always zero) + // + type object id (Int32) + data type size (Int16) + // + type modifier (Int32 and always -1) + format code (Int16) + return (name.size() + 1) + 4 + 2 + 4 + 2 + 4 + 2; + } +}; + +class RowDescription : BackendMessage +{ +private: + const std::vector & fields_descr; + +public: + RowDescription(const std::vector & fields_descr_) : fields_descr(fields_descr_) {} + + void serialize(WriteBuffer & out) const override + { + out.write('T'); + writeBinaryBigEndian(size(), out); + writeBinaryBigEndian(static_cast(fields_descr.size()), out); + for (const FieldDescription & field : fields_descr) + field.serialize(out); + } + + Int32 size() const override + { + Int32 sz = 4 + 2; // size of message + number of fields + for (const FieldDescription & field : fields_descr) + sz += field.size(); + return sz; + } + + MessageType getMessageType() const override + { + return MessageType::ROW_DESCRIPTION; + } +}; + +class StringField : public ISerializable +{ +private: + String str; +public: + StringField(String str_) : str(str_) {} + + void serialize(WriteBuffer & out) const override + { + writeString(str, out); + } + + Int32 size() const override + { + return str.size(); + } +}; + +class NullField : public ISerializable +{ +public: + void serialize(WriteBuffer & /* out */) const override {} + + Int32 size() const override + { + return -1; + } +}; + +class DataRow : BackendMessage +{ +private: + const std::vector> & row; + +public: + DataRow(const std::vector> & row_) : row(row_) {} + + void serialize(WriteBuffer & out) const override + { + out.write('D'); + writeBinaryBigEndian(size(), out); + writeBinaryBigEndian(static_cast(row.size()), out); + for (const std::shared_ptr & field : row) + { + Int32 sz = field->size(); + writeBinaryBigEndian(sz, out); + if (sz > 0) + field->serialize(out); + } + } + + Int32 size() const override + { + Int32 sz = 4 + 2; // size of message + number of fields + for (const std::shared_ptr & field : row) + sz += 4 + field->size(); + return sz; + } + + MessageType getMessageType() const override + { + return MessageType::DATA_ROW; + } +}; + +class CommandComplete : BackendMessage +{ +public: + enum Command {BEGIN = 0, COMMIT = 1, INSERT = 2, DELETE = 3, UPDATE = 4, SELECT = 5, MOVE = 6, FETCH = 7, COPY = 8}; +private: + String enum_to_string[9] = {"BEGIN", "COMMIT", "INSERT", "DELETE", "UPDATE", "SELECT", "MOVE", "FETCH", "COPY"}; + + String value; + +public: + CommandComplete(Command cmd_, Int32 rows_count_) + { + value = enum_to_string[cmd_]; + String add = " "; + if (cmd_ == Command::INSERT) + add = " 0 "; + value += add + std::to_string(rows_count_); + } + + void serialize(WriteBuffer & out) const override + { + out.write('C'); + writeBinaryBigEndian(size(), out); + writeNullTerminatedString(value, out); + } + + Int32 size() const override + { + return 4 + value.size() + 1; + } + + MessageType getMessageType() const override + { + return MessageType::COMMAND_COMPLETE; + } + + static Command classifyQuery(const String & query) + { + std::vector query_types({"BEGIN", "COMMIT", "INSERT", "DELETE", "UPDATE", "SELECT", "MOVE", "FETCH", "COPY"}); + for (size_t i = 0; i != query_types.size(); ++i) + { + String::const_iterator iter = std::search( + query.begin(), + query.end(), + query_types[i].begin(), + query_types[i].end(), + [](char a, char b){return std::toupper(a) == b;}); + + if (iter != query.end()) + return static_cast(i); + } + + return Command::SELECT; + } +}; + +} + +namespace PGAuthentication +{ + +class AuthenticationMethod +{ +protected: + void setPassword( + const String & user_name, + const String & password, + Context & context, + Messaging::MessageTransport & mt, + const Poco::Net::SocketAddress & address) + { + try { + context.setUser(user_name, password, address, ""); + } + catch (const Exception &) + { + mt.send( + Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), + true); + throw; + } + } + +public: + virtual void authenticate( + const String & user_name, + Context & context, + Messaging::MessageTransport & mt, + const Poco::Net::SocketAddress & address) = 0; + + virtual Authentication::Type getType() const = 0; + + virtual ~AuthenticationMethod() = default; +}; + +class NoPasswordAuth : public AuthenticationMethod +{ +public: + void authenticate( + const String & /* user_name */, + Context & /* context */, + Messaging::MessageTransport & /* mt */, + const Poco::Net::SocketAddress & /* address */) override {} + + Authentication::Type getType() const override + { + return Authentication::Type::NO_PASSWORD; + } +}; + +class CleartextPasswordAuth : public AuthenticationMethod +{ +public: + void authenticate( + const String & user_name, + Context & context, + Messaging::MessageTransport & mt, + const Poco::Net::SocketAddress & address) override + { + mt.send(Messaging::AuthenticationCleartextPassword(), true); + + Messaging::FrontMessageType type = mt.receiveMessageType(); + if (type == Messaging::FrontMessageType::PASSWORD_MESSAGE) + { + std::unique_ptr password = mt.receive(); + setPassword(user_name, password->password, context, mt, address); + } + else + throw Exception( + Poco::format( + "Client sent wrong message or closed the connection. Message byte was %d.", + static_cast(type)), + ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + } + + Authentication::Type getType() const override + { + return Authentication::Type::PLAINTEXT_PASSWORD; + } +}; + +class AuthenticationManager +{ +private: + Poco::Logger * log = &Poco::Logger::get("AuthenticationManager"); + std::unordered_map> type_to_method = {}; + +public: + AuthenticationManager(const std::vector> & auth_methods) + { + for (const std::shared_ptr & method : auth_methods) + { + type_to_method[method->getType()] = method; + } + } + + void authenticate( + const String & user_name, + Context & context, + Messaging::MessageTransport & mt, + const Poco::Net::SocketAddress & address) + { + auto user = context.getAccessControlManager().read(user_name); + Authentication::Type user_auth_type = user->authentication.getType(); + + if (type_to_method.find(user_auth_type) != type_to_method.end()) + { + type_to_method[user_auth_type]->authenticate(user_name, context, mt, address); + mt.send(Messaging::AuthenticationOk(), true); + LOG_INFO(log, "Authentication for user " << user_name << " was successful."); + return; + } + + mt.send( + Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), + true); + + throw Exception(Poco::format("Authentication type %d is not supported.", user_auth_type), ErrorCodes::NOT_IMPLEMENTED); + } +}; +} + +} +} diff --git a/src/Core/ya.make b/src/Core/ya.make index 4999fe334bc..06fed2dc257 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -16,6 +16,7 @@ SRCS( Field.cpp iostream_debug_helpers.cpp MySQLProtocol.cpp + PostgreSQLProtocol.cpp NamesAndTypes.cpp Settings.cpp SettingsCollection.cpp diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 669baace2f5..9b4d7940efe 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -393,6 +394,7 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorNull(*this); registerOutputFormatProcessorMySQLWrite(*this); registerOutputFormatProcessorMarkdown(*this); + registerOutputFormatProcessorPostgreSQLWrite(*this); } FormatFactory & FormatFactory::instance() diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index c8dd97aa940..87925ca1d75 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -201,6 +201,7 @@ void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); void registerOutputFormatProcessorNull(FormatFactory & factory); void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory); void registerOutputFormatProcessorMarkdown(FormatFactory & factory); +void registerOutputFormatProcessorPostgreSQLWrite(FormatFactory & factory); /// Input only formats. void registerInputFormatProcessorCapnProto(FormatFactory & factory); diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 10918fb7b61..ac326676511 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -998,4 +998,24 @@ inline String toString(const T & x) return buf.str(); } +inline void writeNullTerminatedString(const String & s, WriteBuffer & buffer) +{ + buffer.write(s.data(), s.size()); + buffer.write(0); +} + +template +inline std::enable_if_t && (sizeof(T) <= 8), void> +writeBinaryBigEndian(T x, WriteBuffer & buf) /// Assuming little endian architecture. +{ + if constexpr (sizeof(x) == 2) + x = __builtin_bswap16(x); + else if constexpr (sizeof(x) == 4) + x = __builtin_bswap32(x); + else if constexpr (sizeof(x) == 8) + x = __builtin_bswap64(x); + + writePODBinary(x, buf); +} + } diff --git a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp new file mode 100644 index 00000000000..f03656ec304 --- /dev/null +++ b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.cpp @@ -0,0 +1,79 @@ +#include +#include "PostgreSQLOutputFormat.h" + +namespace DB +{ + +PostgreSQLOutputFormat::PostgreSQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_) + : IOutputFormat(header_, out_) + , format_settings(settings_) + , message_transport(&out) +{ +} + +void PostgreSQLOutputFormat::doWritePrefix() +{ + if (initialized) + return; + + initialized = true; + const auto & header = getPort(PortKind::Main).getHeader(); + data_types = header.getDataTypes(); + + if (header.columns()) + { + std::vector columns; + columns.reserve(header.columns()); + + for (size_t i = 0; i < header.columns(); i++) + { + const auto & column_name = header.getColumnsWithTypeAndName()[i].name; + columns.emplace_back(column_name, data_types[i]->getTypeId()); + } + message_transport.send(PostgreSQLProtocol::Messaging::RowDescription(columns)); + } +} + +void PostgreSQLOutputFormat::consume(Chunk chunk) +{ + doWritePrefix(); + + for (size_t i = 0; i != chunk.getNumRows(); ++i) + { + const Columns & columns = chunk.getColumns(); + std::vector> row; + row.reserve(chunk.getNumColumns()); + + for (size_t j = 0; j != chunk.getNumColumns(); ++j) + { + if (columns[j]->isNullAt(i)) + row.push_back(std::make_shared()); + else + { + WriteBufferFromOwnString ostr; + data_types[j]->serializeAsText(*columns[j], i, ostr, format_settings); + row.push_back(std::make_shared(std::move(ostr.str()))); + } + } + + message_transport.send(PostgreSQLProtocol::Messaging::DataRow(row)); + } +} + +void PostgreSQLOutputFormat::finalize() {} + +void PostgreSQLOutputFormat::flush() +{ + message_transport.flush(); +} + +void registerOutputFormatProcessorPostgreSQLWrite(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor( + "PostgreSQLWire", + [](WriteBuffer & buf, + const Block & sample, + const FormatFactory::WriteCallback &, + const FormatSettings & settings) { return std::make_shared(buf, sample, settings); }); +} +} diff --git a/src/Processors/Formats/Impl/PostgreSQLOutputFormat.h b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.h new file mode 100644 index 00000000000..8ff5aae5067 --- /dev/null +++ b/src/Processors/Formats/Impl/PostgreSQLOutputFormat.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include + +#include +#include + +namespace DB +{ + +//// https://www.postgresql.org/docs/current/protocol-flow.html#id-1.10.5.7.4 +class PostgreSQLOutputFormat final : public IOutputFormat +{ +public: + PostgreSQLOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & settings_); + + String getName() const override {return "PostgreSQLOutputFormat";} + + void doWritePrefix() override; + void consume(Chunk) override; + void finalize() override; + void flush() override; + +private: + bool initialized = false; + + FormatSettings format_settings; + PostgreSQLProtocol::Messaging::MessageTransport message_transport; + DataTypes data_types; +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 62320f1c147..b709b0e44b7 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -38,6 +38,7 @@ SRCS( Formats/Impl/NullFormat.cpp Formats/Impl/ODBCDriver2BlockOutputFormat.cpp Formats/Impl/ODBCDriverBlockOutputFormat.cpp + Formats/Impl/PostgreSQLOutputFormat.cpp Formats/Impl/PrettyBlockOutputFormat.cpp Formats/Impl/PrettyCompactBlockOutputFormat.cpp Formats/Impl/PrettySpaceBlockOutputFormat.cpp diff --git a/tests/integration/test_postgresql_protocol/__init__.py b/tests/integration/test_postgresql_protocol/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_postgresql_protocol/clients/java/0.reference b/tests/integration/test_postgresql_protocol/clients/java/0.reference new file mode 100644 index 00000000000..3e3e20d1ebb --- /dev/null +++ b/tests/integration/test_postgresql_protocol/clients/java/0.reference @@ -0,0 +1,15 @@ +33jdbcnull +44cknull +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 diff --git a/tests/integration/test_postgresql_protocol/clients/java/Dockerfile b/tests/integration/test_postgresql_protocol/clients/java/Dockerfile new file mode 100644 index 00000000000..f08470ee805 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/clients/java/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu:18.04 + +RUN apt-get update && \ + apt-get install -y software-properties-common build-essential openjdk-8-jdk curl + +RUN rm -rf \ + /var/lib/apt/lists/* \ + /var/cache/debconf \ + /tmp/* \ +RUN apt-get clean + +ARG ver=42.2.12 +RUN curl -L -o /postgresql-java-${ver}.jar https://repo1.maven.org/maven2/org/postgresql/postgresql/${ver}/postgresql-${ver}.jar +ENV CLASSPATH=$CLASSPATH:/postgresql-java-${ver}.jar + +WORKDIR /jdbc +COPY Test.java Test.java +RUN javac Test.java diff --git a/tests/integration/test_postgresql_protocol/clients/java/Test.java b/tests/integration/test_postgresql_protocol/clients/java/Test.java new file mode 100644 index 00000000000..772a749711a --- /dev/null +++ b/tests/integration/test_postgresql_protocol/clients/java/Test.java @@ -0,0 +1,83 @@ +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Properties; + +class JavaConnectorTest { + private static final String CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS default.test1 (`age` Int32, `name` String, `int_nullable` Nullable(Int32)) Engine = Memory"; + private static final String INSERT_SQL = "INSERT INTO default.test1(`age`, `name`) VALUES(33, 'jdbc'),(44, 'ck')"; + private static final String SELECT_SQL = "SELECT * FROM default.test1"; + private static final String SELECT_NUMBER_SQL = "SELECT * FROM system.numbers LIMIT 13"; + private static final String DROP_TABLE_SQL = "DROP TABLE default.test1"; + + public static void main(String[] args) { + int i = 0; + String host = "127.0.0.1"; + String port = "5432"; + String user = "default"; + String password = ""; + String database = "default"; + while (i < args.length) { + switch (args[i]) { + case "--host": + host = args[++i]; + break; + case "--port": + port = args[++i]; + break; + case "--user": + user = args[++i]; + break; + case "--password": + password = args[++i]; + break; + case "--database": + database = args[++i]; + break; + default: + i++; + break; + } + } + + String jdbcUrl = String.format("jdbc:postgresql://%s:%s/%s", host, port, database); + + Connection conn = null; + Statement stmt = null; + Properties props = new Properties(); + props.setProperty("user", user); + props.setProperty("password", password); + props.setProperty("preferQueryMode", "simple"); + props.setProperty("sslmode", "disable"); + try { + conn = DriverManager.getConnection(jdbcUrl, props); + stmt = conn.createStatement(); + stmt.executeUpdate(CREATE_TABLE_SQL); + stmt.executeUpdate(INSERT_SQL); + + ResultSet rs = stmt.executeQuery(SELECT_SQL); + while (rs.next()) { + System.out.print(rs.getString("age")); + System.out.print(rs.getString("name")); + System.out.print(rs.getString("int_nullable")); + System.out.println(); + } + + stmt.executeUpdate(DROP_TABLE_SQL); + + rs = stmt.executeQuery(SELECT_NUMBER_SQL); + while (rs.next()) { + System.out.print(rs.getString(1)); + System.out.println(); + } + + stmt.close(); + conn.close(); + } catch (SQLException e) { + e.printStackTrace(); + System.exit(1); + } + } +} diff --git a/tests/integration/test_postgresql_protocol/clients/java/docker_compose.yml b/tests/integration/test_postgresql_protocol/clients/java/docker_compose.yml new file mode 100644 index 00000000000..7094c8b2359 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/clients/java/docker_compose.yml @@ -0,0 +1,8 @@ +version: '2.2' +services: + java: + build: + context: ./ + network: host + # to keep container running + command: sleep infinity diff --git a/tests/integration/test_postgresql_protocol/clients/psql/docker_compose.yml b/tests/integration/test_postgresql_protocol/clients/psql/docker_compose.yml new file mode 100644 index 00000000000..984f5f97384 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/clients/psql/docker_compose.yml @@ -0,0 +1,14 @@ +version: '2.2' +services: + psql: + image: postgres:12.2-alpine + restart: always + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + ports: + - "5433:5433" + environment: + POSTGRES_HOST_AUTH_METHOD: "trust" \ No newline at end of file diff --git a/tests/integration/test_postgresql_protocol/configs/config.xml b/tests/integration/test_postgresql_protocol/configs/config.xml new file mode 100644 index 00000000000..678b48425b1 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/configs/config.xml @@ -0,0 +1,35 @@ + + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + + + + + /etc/clickhouse-server/server.crt + /etc/clickhouse-server/server.key + + /etc/clickhouse-server/dhparam.pem + none + true + true + sslv2,sslv3 + true + + + + 9000 + 5433 + 127.0.0.1 + + 500 + 5368709120 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_postgresql_protocol/configs/dhparam.pem b/tests/integration/test_postgresql_protocol/configs/dhparam.pem new file mode 100644 index 00000000000..a59693e9bfc --- /dev/null +++ b/tests/integration/test_postgresql_protocol/configs/dhparam.pem @@ -0,0 +1,25 @@ +-----BEGIN X9.42 DH PARAMETERS----- +MIIELAKCAgEAkX9p27H48x6pBuiT5i7yVvSXjMaCnGkViPCL/R6+FdSpv/MVs0WX +dBq1uQWjin2AL7T4uHOyhd1sD4MrzgzPGR5q7lJr6CjvRtqj5ZjBX/xbo/N4xeix +VL+UTCpvfPwwkve7UL6C4v79f7AIH34ie+Ew2H5Bvy8RraFL5zrfhDWjdMPVk+Kz +Y4+GAXKEzB6CaXzpXBv/s5w7vXO11+EIXgWn2z6lJ2rEkEdT7hCamzNGy+ajH8on +FIvxrvvEQ1oLcMYPu6OB6PEGxonAjTrwIwYth1+4lnG0A4X5Bn1Bx0DKEyCAZSHw +ByjDZ9ZCspqY4b/auRKRnWSWDLYPkW4YtNCVV/+5pJydcL511gQ2WQs7quZEsGem +4x14xpIM5qDvF3bzFuDpVMuuzlf6AB9dEMSms6iIwuWpSxck6AydII0okxUaxSlW +QJxZGQBE/2m9DwFmMHDBWUYBGvevX51RjQCsJsmgZPlwnY7hnZ29sB7MeVzqF26d +103byJBUq+rWUkxzKrYKbm+FjOz84/hv3ONxoxBI0FstKdaEr7PnpDLyLmZCheeL +tz0RzNM3h9AEno1SJNrzWaVI5s5L6QjM9QRRWfF2JB5QyhQjc++FgRGk3SDBbcW5 +IhHVcboq/MppZiE82FSwMtCkmPvB7KrPoYx8fmrTs7zfHtx+glsMguMCggIAY32m +/EZbhpvmmbq0G/zjh1Nkdvj0IOQdkxnz7FhnKviKNgqWTbgHSaE+pcubK8XVsuAj +NLOp5AMpccV9h02ABGRdaSSyMeJxfnYRUhuSWHN+i/rqL3Xtv7w/BQXsUZd3tQQ+ +I4UhnC/VUlGgndL5b8TbYOA/9CXPItGRMQb3S9SzijzEeKwWHu0n+j4Nwbl3nrtk +epWey/Wv0SU1d07es9vXxob/iPZSwM1E9SDjRFrqokLQCWFzaELzOF14TBXUn1RT +1agpxeux9UQpPS1ELjReh+c94BWQh5Soj/HJ2L76EgWkKM0se7uD6AhZee+b22YM +KKqbWWetStSjSSsLxR4yvPMct/eUS8V9UCQfPuY3DpLZi3+F5hAMcKqV3gGHJBrD +82MkQUj8eJaz3qEocG3zzYnxZ3sXze9HYpGCVIXX6b5p8yg9R1I8mNLo9w0IS2mU +5rmw2YdioZKUTN+jMVP79GFgsoGTPAf9sFDdswwD1ie1MYG/sw1K/Jxw3MPED4y5 +we+bBaaa2WLaSB32eEnyxZBd8OOQOmTunp/zw12BAC485mF9Innr1fAhic8t+LOB +CyVAF02HA0puj365kGsZDjcXn+EEuwK+VeStERTXApcbwL+78VW+DQ1J/vBjkt4Z +ustnEMN3HdfV3DTBBRxmEj34MuEhrz0WjhgRskACIQCU5YbOgiW+L9L/mDwyGARK +jZ/2Z6yJuWyeim3EVpWG2Q== +-----END X9.42 DH PARAMETERS----- diff --git a/tests/integration/test_postgresql_protocol/configs/server.crt b/tests/integration/test_postgresql_protocol/configs/server.crt new file mode 100644 index 00000000000..070d37f3b77 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/configs/server.crt @@ -0,0 +1,18 @@ +-----BEGIN CERTIFICATE----- +MIIC+zCCAeOgAwIBAgIJANhP897Se2gmMA0GCSqGSIb3DQEBCwUAMBQxEjAQBgNV +BAMMCWxvY2FsaG9zdDAeFw0yMDA0MTgyMTE2NDBaFw0yMTA0MTgyMTE2NDBaMBQx +EjAQBgNVBAMMCWxvY2FsaG9zdDCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC +ggEBAM92kcojQoMsjZ9YGhPMY6h/fDUsZeSKHLxgqE6wbmfU1oZKCPWqnvl+4n0J +pnT5h1ETxxYZLepimKq0DEVPUTmCl0xmcKbtUNiaTUKYKsdita6b2vZCX9wUPN9p +2Kjnm41l+aZNqIEBhIgHNWg9qowi20y0EIXR79jQLwwaInHAaJLZxVsqY2zjQ/D7 +1Zh82MXud7iqxBQiEfw9Cz35UFA239R8QTlPkVQfsN1gfLxnLk24QUX3o+hbUI1g +nlSpyYDHYQlOmwz8doDs6THHAZNJ4bPE9xHNFpw6dGZdbtH+IKQ/qRZIiOaiNuzJ +IOHl6XQDRDkW2LMTiCQ6fjC7Pz8CAwEAAaNQME4wHQYDVR0OBBYEFFvhaA/Eguyf +BXkMj8BkNLBqMnz2MB8GA1UdIwQYMBaAFFvhaA/EguyfBXkMj8BkNLBqMnz2MAwG +A1UdEwQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBACeU/oL48eVAKH7NQntHhRaJ +ZGeQzKIjrSBjFo8BGXD1nJZhUeFsylLrhCkC8/5/3grE3BNVX9bxcGjO81C9Mn4U +t0z13d6ovJjCZSQArtLwgeJGlpH7gNdD3DyT8DQmrqYVnmnB7UmBu45XH1LWGQZr +FAOhGRVs6s6mNj8QlLMgdmsOeOQnsGCMdoss8zV9vO2dc4A5SDSSL2mqGGY4Yjtt +X+XlEhXXnksGyx8NGVOZX4wcj8WeCAj/lihQ7Zh6XYwZH9i+E46ompUwoziZnNPu +2RH63tLNCxkOY2HF5VMlbMmzer3FkhlM6TAZZRPcvSphKPwXK4A33yqc6wnWvpc= +-----END CERTIFICATE----- diff --git a/tests/integration/test_postgresql_protocol/configs/server.key b/tests/integration/test_postgresql_protocol/configs/server.key new file mode 100644 index 00000000000..b3dee82dcda --- /dev/null +++ b/tests/integration/test_postgresql_protocol/configs/server.key @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDPdpHKI0KDLI2f +WBoTzGOof3w1LGXkihy8YKhOsG5n1NaGSgj1qp75fuJ9CaZ0+YdRE8cWGS3qYpiq +tAxFT1E5gpdMZnCm7VDYmk1CmCrHYrWum9r2Ql/cFDzfadio55uNZfmmTaiBAYSI +BzVoPaqMIttMtBCF0e/Y0C8MGiJxwGiS2cVbKmNs40Pw+9WYfNjF7ne4qsQUIhH8 +PQs9+VBQNt/UfEE5T5FUH7DdYHy8Zy5NuEFF96PoW1CNYJ5UqcmAx2EJTpsM/HaA +7OkxxwGTSeGzxPcRzRacOnRmXW7R/iCkP6kWSIjmojbsySDh5el0A0Q5FtizE4gk +On4wuz8/AgMBAAECggEAJ54J2yL+mZQRe2NUn4FBarTloDXZQ1pIgISov1Ybz0Iq +sTxEF728XAKp95y3J9Fa0NXJB+RJC2BGrRpy2W17IlNY1yMc0hOxg5t7s4LhcG/e +J/jlSG+GZL2MnlFVKXQJFWhq0yIzUmdayqstvLlB7z7cx/n+yb88YRfoVBRNjZEL +Tdrsw+087igDjrIxZJ3eMN5Wi434n9s4yAoRQC1bP5wcWx0gD4MzdmL8ip6suiRc +LRuBAhV/Op812xlxUhrF5dInUM9OLlGTXpUzexAS8Cyy7S4bfkW2BaCxTF7I7TFw +Whx28CKn/G49tIuU0m6AlxWbXpLVePTFyMb7RJz5cQKBgQD7VQd2u3HM6eE3PcXD +p6ObdLTUk8OAJ5BMmADFc71W0Epyo26/e8KXKGYGxE2W3fr13y+9b0fl5fxZPuhS +MgvXEO7rItAVsLcp0IzaqY0WUee2b4XWPAU0XuPqvjYMpx8H5OEHqFK6lhZysAqM +X7Ot3/Hux9X0MC4v5a/HNbDUOQKBgQDTUPaP3ADRrmpmE2sWuzWEnCSEz5f0tCLO +wTqhV/UraWUNlAbgK5NB790IjH/gotBSqqNPLJwJh0LUfClKM4LiaHsEag0OArOF +GhPMK1Ohps8c2RRsiG8+hxX2HEHeAVbkouEDPDiHdIW/92pBViDoETXL6qxDKbm9 +LkOcVeDfNwKBgQChh1xsqrvQ/t+IKWNZA/zahH9TwEP9sW/ESkz0mhYuHWA7nV4o +ItpFW+l2n+Nd+vy32OFN1p9W2iD9GrklWpTRfEiRRqaFyjVt4mMkhaPvnGRXlAVo +Utrldbb1v5ntN9txr2ARE9VXpe53dzzQSxGnxi4vUK/paK3GitAWMCOdwQKBgQCi +hmGsUXQb0P6qVYMGr6PAw2re7t8baLRguoMCdqjs45nCMLh9D2apzvb8TTtJJU/+ +VJlYGqJEPdDrpjcHh8jBo8QBqCM0RGWYGG9jl2syKB6hPGCV/PU6bSE58Y/DVNpk +7NUM7PM5UyhPddY2PC0A78Ole29UFLJzSzLa+b4DTwKBgH9Wh2k4YPnPcRrX89UL +eSwWa1CGq6HWX8Kd5qyz256aeHWuG5nv15+rBt+D7nwajUsqeVkAXz5H/dHuG1xz +jb7RW+pEjx0GVAmIbkM9vOLqEUfHHHPuk4AXCGGZ5sarPiKg4BHKBBsY1dpoO5UH +0j71fRA6zurHnTXDaCLWlUpZ +-----END PRIVATE KEY----- diff --git a/tests/integration/test_postgresql_protocol/configs/users.xml b/tests/integration/test_postgresql_protocol/configs/users.xml new file mode 100644 index 00000000000..86f5b6657c2 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/configs/users.xml @@ -0,0 +1,13 @@ + + + + + + + + + + 123 + + + diff --git a/tests/integration/test_postgresql_protocol/test.py b/tests/integration/test_postgresql_protocol/test.py new file mode 100644 index 00000000000..d9e2dfe3228 --- /dev/null +++ b/tests/integration/test_postgresql_protocol/test.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- + +from __future__ import print_function + +import datetime +import decimal +import docker +import psycopg2 as py_psql +import psycopg2.extras +import pytest +import os +import sys +import subprocess +import time +import uuid + +from helpers.cluster import ClickHouseCluster + +psycopg2.extras.register_uuid() + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +config_dir = os.path.join(SCRIPT_DIR, './configs') + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', config_dir=config_dir, env_variables={'UBSAN_OPTIONS': 'print_stacktrace=1'}) + +server_port = 5433 + + +@pytest.fixture(scope="module") +def server_address(): + cluster.start() + try: + yield cluster.get_instance_ip('node') + finally: + cluster.shutdown() + + +@pytest.fixture(scope='module') +def psql_client(): + docker_compose = os.path.join(SCRIPT_DIR, 'clients', 'psql', 'docker_compose.yml') + subprocess.check_call(['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d', '--build']) + yield docker.from_env().containers.get(cluster.project_name + '_psql_1') + + +@pytest.fixture(scope='module') +def psql_server(psql_client): + """Return PostgreSQL container when it is healthy.""" + retries = 30 + for i in range(retries): + info = psql_client.client.api.inspect_container(psql_client.name) + if info['State']['Health']['Status'] == 'healthy': + break + time.sleep(1) + else: + print(info['State']) + raise Exception('PostgreSQL server has not started after {} retries.'.format(retries)) + + return psql_client + + +@pytest.fixture(scope='module') +def java_container(): + docker_compose = os.path.join(SCRIPT_DIR, 'clients', 'java', 'docker_compose.yml') + subprocess.check_call(['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d', '--build']) + yield docker.from_env().containers.get(cluster.project_name + '_java_1') + + +def test_psql_is_ready(psql_server): + pass + + +def test_psql_client(psql_client, server_address): + cmd_prefix = 'psql "sslmode=require host={server_address} port={server_port} user=default dbname=default password=123" '\ + .format(server_address=server_address, server_port=server_port) + cmd_prefix += "--no-align --field-separator=' ' " + + code, (stdout, stderr) = psql_client.exec_run(cmd_prefix + '-c "SELECT 1 as a"', demux=True) + assert stdout == '\n'.join(['a', '1', '(1 row)', '']) + + code, (stdout, stderr) = psql_client.exec_run(cmd_prefix + '''-c "SELECT 'колонка' as a"''', demux=True) + assert stdout == '\n'.join(['a', 'колонка', '(1 row)', '']) + + code, (stdout, stderr) = psql_client.exec_run( + cmd_prefix + '-c ' + + ''' + "CREATE DATABASE x; + USE x; + CREATE TABLE table1 (column UInt32) ENGINE = Memory; + INSERT INTO table1 VALUES (0), (1), (5); + INSERT INTO table1 VALUES (0), (1), (5); + SELECT * FROM table1 ORDER BY column;" + ''', + demux=True + ) + assert stdout == '\n'.join(['column', '0', '0', '1', '1', '5', '5', '(6 rows)', '']) + + code, (stdout, stderr) = psql_client.exec_run( + cmd_prefix + '-c ' + + ''' + "DROP DATABASE x; + CREATE TEMPORARY TABLE tmp (tmp_column UInt32); + INSERT INTO tmp VALUES (0), (1); + SELECT * FROM tmp ORDER BY tmp_column;" + ''', + demux=True + ) + assert stdout == '\n'.join(['tmp_column', '0', '1', '(2 rows)', '']) + + +def test_python_client(server_address): + with pytest.raises(py_psql.InternalError) as exc_info: + ch = py_psql.connect(host=server_address, port=server_port, user='default', password='123', database='') + cur = ch.cursor() + cur.execute('select name from tables;') + + assert exc_info.value.args == ("Query execution failed.\nDB::Exception: Table default.tables doesn't exist.\nSSL connection has been closed unexpectedly\n",) + + ch = py_psql.connect(host=server_address, port=server_port, user='default', password='123', database='') + cur = ch.cursor() + + cur.execute('select 1 as a, 2 as b') + assert (cur.description[0].name, cur.description[1].name) == ('a', 'b') + assert cur.fetchall() == [(1, 2)] + + cur.execute('CREATE DATABASE x') + cur.execute('USE x') + cur.execute('CREATE TEMPORARY TABLE tmp2 (ch Int8, i64 Int64, f64 Float64, str String, date Date, dec Decimal(19, 10), uuid UUID) ENGINE = Memory') + cur.execute("insert into tmp2 (ch, i64, f64, str, date, dec, uuid) values (44, 534324234, 0.32423423, 'hello', '2019-01-23', 0.333333, '61f0c404-5cb3-11e7-907b-a6006ad3dba0')") + cur.execute('select * from tmp2') + assert cur.fetchall()[0] == ('44', 534324234, 0.32423423, 'hello', datetime.date(2019, 1, 23), decimal.Decimal('0.3333330000'), uuid.UUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')) + + +def test_java_client(server_address, java_container): + with open(os.path.join(SCRIPT_DIR, 'clients', 'java', '0.reference')) as fp: + reference = fp.read() + + # database not exists exception. + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user default --database ' + 'abc'.format(host=server_address, port=server_port), demux=True) + assert code == 1 + + # non-empty password passed. + code, (stdout, stderr) = java_container.exec_run('java JavaConnectorTest --host {host} --port {port} --user default --password 123 --database ' + 'default'.format(host=server_address, port=server_port), demux=True) + print(stdout, stderr, file=sys.stderr) + assert code == 0 + assert stdout == reference From 142c63e487c0bc0adacf64773113272fadff1875 Mon Sep 17 00:00:00 2001 From: MovElb Date: Sat, 30 May 2020 23:02:11 +0300 Subject: [PATCH 0288/2229] done rebase --- programs/server/Server.cpp | 16 ++++++++++++++++ src/Core/PostgreSQLProtocol.h | 5 +++-- .../server => src/Server}/PostgreSQLHandler.cpp | 0 .../server => src/Server}/PostgreSQLHandler.h | 1 + .../Server}/PostgreSQLHandlerFactory.cpp | 5 ++--- .../Server}/PostgreSQLHandlerFactory.h | 4 +++- src/Server/ya.make | 2 ++ 7 files changed, 27 insertions(+), 6 deletions(-) rename {programs/server => src/Server}/PostgreSQLHandler.cpp (100%) rename {programs/server => src/Server}/PostgreSQLHandler.h (98%) rename {programs/server => src/Server}/PostgreSQLHandlerFactory.cpp (81%) rename {programs/server => src/Server}/PostgreSQLHandlerFactory.h (91%) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 04324d34574..d991c096eca 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) @@ -935,6 +936,21 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Listening for MySQL compatibility protocol: {}", address.toString()); }); + create_server("postgresql_port", [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socket_bind_listen(socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + servers.emplace_back(std::make_unique( + new PostgreSQLHandlerFactory(*this), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for PostgreSQL compatibility protocol: " + address.toString()); + }); + /// Prometheus (if defined and not setup yet with http_port) create_server("prometheus.port", [&](UInt16 port) { diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index b5fa67d68ea..3acf0b02ce2 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -800,7 +801,7 @@ protected: const Poco::Net::SocketAddress & address) { try { - context.setUser(user_name, password, address, ""); + context.setUser(user_name, password, address); } catch (const Exception &) { @@ -897,7 +898,7 @@ public: { type_to_method[user_auth_type]->authenticate(user_name, context, mt, address); mt.send(Messaging::AuthenticationOk(), true); - LOG_INFO(log, "Authentication for user " << user_name << " was successful."); + LOG_INFO(log, "Authentication for user {} was successful.", user_name); return; } diff --git a/programs/server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp similarity index 100% rename from programs/server/PostgreSQLHandler.cpp rename to src/Server/PostgreSQLHandler.cpp diff --git a/programs/server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h similarity index 98% rename from programs/server/PostgreSQLHandler.h rename to src/Server/PostgreSQLHandler.h index 1062fed5cbb..28b9ef9a350 100644 --- a/programs/server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "IServer.h" #if USE_SSL diff --git a/programs/server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp similarity index 81% rename from programs/server/PostgreSQLHandlerFactory.cpp rename to src/Server/PostgreSQLHandlerFactory.cpp index 210d8558bfd..ce433188c04 100644 --- a/programs/server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -1,8 +1,7 @@ #include "PostgreSQLHandlerFactory.h" -#include "IServer.h" #include #include -#include "PostgreSQLHandler.h" +#include namespace DB { @@ -21,7 +20,7 @@ PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_) Poco::Net::TCPServerConnection * PostgreSQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket) { Int32 connection_id = last_connection_id++; - LOG_TRACE(log, "PostgreSQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString()); + LOG_TRACE(log, "PostgreSQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString()); return new PostgreSQLHandler(socket, server, ssl_enabled, connection_id, auth_methods); } diff --git a/programs/server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h similarity index 91% rename from programs/server/PostgreSQLHandlerFactory.h rename to src/Server/PostgreSQLHandlerFactory.h index a95a22c162c..0546b4ef8c2 100644 --- a/programs/server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -1,7 +1,9 @@ #pragma once -#include "IServer.h" #include +#include +#include +#include #include namespace DB diff --git a/src/Server/ya.make b/src/Server/ya.make index 1d689ee73b8..6e01e2599e4 100644 --- a/src/Server/ya.make +++ b/src/Server/ya.make @@ -11,6 +11,8 @@ SRCS( InterserverIOHTTPHandler.cpp MySQLHandler.cpp MySQLHandlerFactory.cpp + PostgreSQLHandler.cpp + PostgreSQLHandlerFactory.cpp NotFoundHandler.cpp PrometheusMetricsWriter.cpp PrometheusRequestHandler.cpp From 8266715c492749e035f4bd764f5c75d3620af73e Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 31 May 2020 08:39:22 +0000 Subject: [PATCH 0289/2229] Fix build & fix style --- src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp | 3 +-- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 3 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 ++++++---- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index d498a36f95b..86d760be54a 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -64,8 +64,7 @@ Block RabbitMQBlockInputStream::readImpl() MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); - auto input_format = FormatFactory::instance().getInputFormat( - storage.getFormatName(), *buffer, non_virtual_header, context, 1); + auto input_format = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, 1); InputPort port(input_format->getPort().getHeader(), input_format.get()); connect(input_format->getPort(), port); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 1f6e9ce1bb1..cebe8ee3c3a 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,7 +12,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * , const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * /* connection */, const char * message) { LOG_ERROR(log, "Library error report: {}", message); stop(); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index d6da5850472..d6372dfe4d3 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -1,6 +1,9 @@ #include #include #include +#include +#include +#include #include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ed486e8e709..5f7570dd8c1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -45,10 +45,8 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -157,6 +155,7 @@ void StorageRabbitMQ::shutdown() popReadBuffer(); } + connection.close(); task->deactivate(); } @@ -201,8 +200,10 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() next_channel_id += num_queues; update_channel_id = true; + ChannelPtr consumer_channel = std::make_shared(&connection); + return std::make_shared( - std::make_shared(&connection), eventHandler, exchange_name, routing_key, next_channel_id, + consumer_channel, eventHandler, exchange_name, routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -460,7 +461,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - return StorageRabbitMQ::create(args.table_id, args.context, args.columns, host_port, routing_key, exchange, + return StorageRabbitMQ::create( + args.table_id, args.context, args.columns, host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); }; From 037ed3a02ce5bc0ed3c49b9dd24b13359286828b Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 31 May 2020 09:34:57 +0000 Subject: [PATCH 0290/2229] Code fix & style fix & merge fix --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 5 +-- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 8 ++++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +++++++++---------- 3 files changed, 25 insertions(+), 21 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 86d760be54a..1c6eaf6f2e9 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -64,8 +64,9 @@ Block RabbitMQBlockInputStream::readImpl() MutableColumns result_columns = non_virtual_header.cloneEmptyColumns(); MutableColumns virtual_columns = virtual_header.cloneEmptyColumns(); - auto input_format = FormatFactory::instance().getInputFormat(storage.getFormatName(), *buffer, non_virtual_header, context, 1); - + auto input_format = FormatFactory::instance().getInputFormat( + storage.getFormatName(), *buffer, non_virtual_header, context, 1); + InputPort port(input_format->getPort().getHeader(), input_format.get()); connect(input_format->getPort(), port); port.setNeeded(); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index cebe8ee3c3a..09398da73c7 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -12,9 +12,15 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * /* connection */, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); + if (!connection->ready()) + { + std::cerr << "Connection lost, no recovery is possible"; + throw; + } + stop(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 5f7570dd8c1..b1060a59e00 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -32,17 +32,15 @@ #include #include - -enum - { - RESCHEDULE_WAIT = 500, - Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000 - }; - namespace DB { +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000 +}; + namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -77,8 +75,7 @@ StorageRabbitMQ::StorageRabbitMQ( , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) , eventHandler(evbase, log) - , connection(&eventHandler, - AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -136,9 +133,10 @@ void StorageRabbitMQ::startup() pushReadBuffer(createReadBuffer()); ++num_created_consumers; } - catch (const AMQP::Exception &) + catch (const AMQP::Exception & e) { - tryLogCurrentException(log); + std::cerr << e.what(); + throw; } } @@ -202,9 +200,8 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - return std::make_shared( - consumer_channel, eventHandler, exchange_name, routing_key, next_channel_id, - log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + return std::make_shared(consumer_channel, eventHandler, exchange_name, + routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } @@ -266,7 +263,7 @@ void StorageRabbitMQ::threadFunc() /// Wait for attached views if (!stream_cancelled) - task->scheduleAfter(RESCHEDULE_WAIT); + task->activateAndSchedule(); } @@ -462,8 +459,8 @@ void registerStorageRabbitMQ(StorageFactory & factory) } return StorageRabbitMQ::create( - args.table_id, args.context, args.columns, host_port, routing_key, exchange, - format, row_delimiter, num_consumers, num_queues, hash_exchange); + args.table_id, args.context, args.columns, + host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); From 165dc4e1094d3a757c51cb7579b1c0a9f4a29faf Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Sun, 31 May 2020 15:04:56 +0400 Subject: [PATCH 0291/2229] Disable system static OpenLDAP linking support (due to fPIC mismatch) --- cmake/Modules/FindOpenLDAP.cmake | 21 +-------------------- cmake/find/ldap.cmake | 13 +++++++++---- src/Access/LDAPClient.cpp | 4 ++-- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/cmake/Modules/FindOpenLDAP.cmake b/cmake/Modules/FindOpenLDAP.cmake index 6420678ad3c..9c6262fa245 100644 --- a/cmake/Modules/FindOpenLDAP.cmake +++ b/cmake/Modules/FindOpenLDAP.cmake @@ -8,8 +8,7 @@ # Sets values of: # OPENLDAP_FOUND - TRUE if found # OPENLDAP_INCLUDE_DIRS - paths to the include directories -# OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries; libsasl2 (Cyrus SASL) and libgssapi (GSSAPI) libraries -# will be listed here too, if found, if static OpenLDAP libraries are requested +# OPENLDAP_LIBRARIES - paths to the libldap and liblber libraries # OPENLDAP_LDAP_LIBRARY - paths to the libldap library # OPENLDAP_LBER_LIBRARY - paths to the liblber library # @@ -32,34 +31,16 @@ if(OPENLDAP_ROOT_DIR) find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "include" NO_DEFAULT_PATH) find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) find_library(OPENLDAP_LBER_LIBRARY NAMES "lber" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) - if(OPENLDAP_USE_STATIC_LIBS) - find_library(_cyrus_sasl_lib NAMES "sasl2" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) - find_library(_gssapi_lib NAMES "gssapi" PATHS "${OPENLDAP_ROOT_DIR}" PATH_SUFFIXES "lib" NO_DEFAULT_PATH) - endif() else() find_path(OPENLDAP_INCLUDE_DIRS NAMES "ldap.h" "lber.h") find_library(OPENLDAP_LDAP_LIBRARY NAMES "ldap${_r_suffix}") find_library(OPENLDAP_LBER_LIBRARY NAMES "lber") - if(OPENLDAP_USE_STATIC_LIBS) - find_library(_cyrus_sasl_lib NAMES "sasl2") - find_library(_gssapi_lib NAMES "gssapi") - endif() endif() unset(_r_suffix) set(OPENLDAP_LIBRARIES ${OPENLDAP_LDAP_LIBRARY} ${OPENLDAP_LBER_LIBRARY}) -if(_cyrus_sasl_lib) - list(APPEND OPENLDAP_LIBRARIES ${_cyrus_sasl_lib}) - unset(_cyrus_sasl_lib) -endif() - -if(_gssapi_lib) - list(APPEND OPENLDAP_LIBRARIES ${_gssapi_lib}) - unset(_gssapi_lib) -endif() - include(FindPackageHandleStandardArgs) find_package_handle_standard_args( OpenLDAP DEFAULT_MSG diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake index 11594817e4f..99c9007d6b5 100644 --- a/cmake/find/ldap.cmake +++ b/cmake/find/ldap.cmake @@ -16,11 +16,16 @@ if (ENABLE_LDAP) set (OPENLDAP_USE_REENTRANT_LIBS 1) if (NOT USE_INTERNAL_LDAP_LIBRARY) - if (APPLE AND NOT OPENLDAP_ROOT_DIR) - set (OPENLDAP_ROOT_DIR "/usr/local/opt/openldap") - endif () + if (OPENLDAP_USE_STATIC_LIBS) + message (WARNING "Unable to use external static OpenLDAP libraries, falling back to the bundled version.") + set (USE_INTERNAL_LDAP_LIBRARY 1) + else () + if (APPLE AND NOT OPENLDAP_ROOT_DIR) + set (OPENLDAP_ROOT_DIR "/usr/local/opt/openldap") + endif () - find_package (OpenLDAP) + find_package (OpenLDAP) + endif () endif () if (NOT OPENLDAP_FOUND AND NOT MISSING_INTERNAL_LDAP_LIBRARY) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 60bf49ff24e..fc6ee697468 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -225,11 +225,11 @@ bool LDAPSimpleAuthClient::check() if (params.user.empty()) throw Exception("LDAP authentication of a user with an empty name is not allowed", ErrorCodes::BAD_ARGUMENTS); + SCOPE_EXIT({ closeConnection(); }); + const bool graceful_bind_failure = true; const auto rc = openConnection(graceful_bind_failure); - SCOPE_EXIT({ closeConnection(); }); - switch (rc) { case LDAP_SUCCESS: From fd5b1741a6e94522514975ca122d8fb90f6271a3 Mon Sep 17 00:00:00 2001 From: MovElb Date: Sun, 31 May 2020 17:05:41 +0300 Subject: [PATCH 0292/2229] build fix --- src/Server/PostgreSQLHandler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index 28b9ef9a350..31a96f478ec 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -70,7 +70,7 @@ private: void processQuery(); - bool isEmptyQuery(const String & query); + static bool isEmptyQuery(const String & query); }; } From 3543da3ca463fc4deb1002e5b5bf91df13306931 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sun, 31 May 2020 17:44:49 +0300 Subject: [PATCH 0293/2229] fix stylecheck --- src/AggregateFunctions/AggregateFunctionDistinct.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 57e17ffb13c..32f5df6d8f0 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -106,7 +106,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - for (size_t i = 0; i < num_arguments; ++i) { + for (size_t i = 0; i < num_arguments; ++i) + { columns[i]->updateHashWithValue(row_num, hash); } From fd3279f9f16e704d78a0fd425216679651bc45ac Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 00:02:08 +0300 Subject: [PATCH 0294/2229] trigger ci --- docs/en/operations/server-configuration-parameters/settings.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 194293d5a19..a3a6d1a0955 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -364,7 +364,6 @@ Keys: - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. - **Recommended way to use** ``` xml From 57df571e60b8f1a7cb0a0141a2129bedcbf3fae8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jun 2020 01:40:41 +0300 Subject: [PATCH 0295/2229] Remove trailing whitespaces from formatted queries in some cases --- src/Parsers/ASTExpressionList.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 4f0b2d4cd6b..1395d8b15fe 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -37,10 +37,8 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For { if (separator) settings.ostr << separator; - settings.ostr << ' '; } - if (children.size() > 1) settings.ostr << indent_str; From 81989bd95a91f0e1d70fb49bcfb4167ecbdd59c1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 10:51:22 +0300 Subject: [PATCH 0296/2229] submodule via https --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index daa5d12a62c..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = git@github.com:getsentry/sentry-native.git + url = https://github.com/getsentry/sentry-native.git From ba112e84cb10891cfdfa561ef6da4fd40693693e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 13:30:11 +0300 Subject: [PATCH 0297/2229] trigger ci From fe170508bd37db85b6eb173fcb5f90d78ac5e55d Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Mon, 1 Jun 2020 15:10:32 +0300 Subject: [PATCH 0298/2229] devirtualize compareAt calls --- src/Columns/ColumnAggregateFunction.h | 5 +++ src/Columns/ColumnArray.cpp | 4 ++ src/Columns/ColumnArray.h | 1 + src/Columns/ColumnConst.h | 5 +++ src/Columns/ColumnDecimal.cpp | 6 +++ src/Columns/ColumnDecimal.h | 1 + src/Columns/ColumnFixedString.h | 5 +++ src/Columns/ColumnFunction.h | 5 +++ src/Columns/ColumnLowCardinality.cpp | 5 +++ src/Columns/ColumnLowCardinality.h | 2 + src/Columns/ColumnNullable.cpp | 5 +++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnString.h | 5 +++ src/Columns/ColumnTuple.cpp | 6 +++ src/Columns/ColumnTuple.h | 1 + src/Columns/ColumnUnique.h | 7 ++++ src/Columns/ColumnVector.h | 5 +++ src/Columns/IColumn.h | 8 +++- src/Columns/IColumnDummy.h | 1 + src/Columns/IColumnImpl.h | 14 +++++++ .../Transforms/PartialSortingTransform.cpp | 41 +++++++++++++++---- 21 files changed, 123 insertions(+), 10 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 40f73665ebe..7d5c62fc49c 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -192,6 +192,11 @@ public: return 0; } + std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override + { + return std::vector(getData().size(), 0); + } + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 604381f0c16..9f27c4e9f18 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -309,6 +309,10 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir : 1); } +std::vector ColumnArray::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); +} namespace { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 55935a91cde..85db887e324 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -72,6 +72,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 5fc96b14be8..3ffefd6cf6c 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -187,6 +187,11 @@ public: return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + { + return data->compareAt(rhs, rhs_row_num, mask, nan_direction_hint); + } + MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; void gather(ColumnGathererStream &) override diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 3e6fb833b56..c287474c7d5 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -39,6 +39,12 @@ int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c return decimalLess(b, a, other.scale, scale) ? 1 : (decimalLess(a, b, scale, other.scale) ? -1 : 0); } +template +std::vector ColumnDecimal::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl(static_cast(rhs), rhs_row_num, mask, nan_direction_hint); +} + template StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 86357dc8be7..006c5564761 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -107,6 +107,7 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_range) const override; diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 996a1f99ef1..01a00f8b2b9 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -116,6 +116,11 @@ public: return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n); } + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + { + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + } + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 31cb8708a6e..9f26a520c79 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -116,6 +116,11 @@ public: throw Exception("compareAt is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override + { + throw Exception("compareAt(const IColumn &, size_t, const std::vector &, int) is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void getPermutation(bool, size_t, int, Permutation &) const override { throw Exception("getPermutation is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 9e979a507ff..73281923877 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -279,6 +279,11 @@ int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +std::vector ColumnLowCardinality::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); +} + void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { if (limit == 0) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 905d15f8167..ccf097938e7 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -109,6 +109,8 @@ public: int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index a3c4e77db0d..c753dc638e0 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -248,6 +248,11 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +std::vector ColumnNullable::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); +} + void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const { /// Cannot pass limit because of unknown amount of NULLs. diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 2cd8ff9f40f..2fcc6a98af3 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -77,6 +77,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index a0b3d259b67..3a4a15f1e4d 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -220,6 +220,11 @@ public: return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); } + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + { + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + } + /// Variant of compareAt for string comparison with respect of collation. int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 78117b8e310..75e2c9600f9 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -278,6 +279,11 @@ int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_dire return 0; } +std::vector ColumnTuple::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); +} + template struct ColumnTuple::Less { diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 69b18e2fc0f..59c02a19a79 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -70,6 +70,7 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; void gather(ColumnGathererStream & gatherer_stream) override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 5bbac6baf4d..c161d738e1e 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -77,6 +77,7 @@ public: } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } @@ -375,6 +376,12 @@ int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); } +template +std::vector ColumnUnique::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + return compareImpl>(static_cast &>(rhs), rhs_row_num, mask, nan_direction_hint); +} + template void ColumnUnique::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index a6105034f1a..94c07084065 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -276,6 +276,11 @@ public: return typeid(rhs) == typeid(ColumnVector); } + std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + { + return this->template compareImpl(static_cast(rhs), rhs_row_num, mask, nan_direction_hint); + } + /** More efficient methods of manipulation - to manipulate with data directly. */ Container & getData() { diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 1d92ed1c3ab..ad5446f713b 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -244,6 +244,10 @@ public: */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + + virtual std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const = 0; + + /** Returns a permutation that sorts elements of this column, * i.e. perm[i]-th element of source column should be i-th element of sorted column. * reverse - reverse ordering (acsending). @@ -399,7 +403,6 @@ public: virtual bool lowCardinality() const { return false; } - virtual ~IColumn() = default; IColumn() = default; IColumn(const IColumn &) = default; @@ -414,6 +417,9 @@ protected: /// In derived classes (that use final keyword), implement scatter method as call to scatterImpl. template std::vector scatterImpl(ColumnIndex num_columns, const Selector & selector) const; + + template + std::vector compareImpl(const Derived & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const; }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index b0c479c46c7..38ae303e16f 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -35,6 +35,7 @@ public: size_t byteSize() const override { return 0; } size_t allocatedBytes() const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } + std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override { return std::vector(s, 0); } Field operator[](size_t) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void get(size_t, Field &) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 9c44c34844c..7eeb362f769 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -46,4 +46,18 @@ std::vector IColumn::scatterImpl(ColumnIndex num_columns, return columns; } +template +std::vector IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +{ + size_t rows_num = size(); + std::vector results(rows_num, 0); + + for (size_t i = 0; i < rows_num; ++i) + { + if (mask[i]) + results[i] = compareAt(i, rhs_row_num, rhs, nan_direction_hint); + } + return results; +} + } diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 23cf80439ef..b806317d1fa 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -44,6 +44,34 @@ bool less(const ColumnRawPtrs & lhs, UInt64 lhs_row_num, return false; } +IColumn::Filter getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_t rhs_row_num, const SortDescription & description, size_t rows_num) +{ + IColumn::Filter filter(rows_num, 1); + std::vector mask(rows_num, 1); + + size_t size = description.size(); + for (size_t i = 0; i < size; ++i) + { + std::vector compare_result = lhs[i]->compareAt(*rhs[i], rhs_row_num, mask, 1); + int direction = description[i].direction; + + for (size_t j = 0; j < rows_num; ++j) + { + if (mask[j]) + { + int res = direction * compare_result[j]; + if (res) + { + filter[j] = (res >= 0); + mask[j] = 0; + } + } + } + } + + return filter; +} + void PartialSortingTransform::transform(Chunk & chunk) { if (read_rows) @@ -60,18 +88,13 @@ void PartialSortingTransform::transform(Chunk & chunk) */ if (!threshold_block_columns.empty()) { - IColumn::Filter filter(rows_num, 1); block_columns = extractColumns(block, description); size_t filtered_count = 0; - for (UInt64 i = 0; i < rows_num; ++i) - { - if (less(threshold_block_columns, limit - 1, block_columns, i, description)) - { - ++filtered_count; - filter[i] = 0; - } - } + IColumn::Filter filter = getFilterMask(block_columns, threshold_block_columns, limit - 1, description, rows_num); + + for (const auto & item : filter) + filtered_count += !item; if (filtered_count) { From 3e0811f297cfdc77c657aa0a8ce04eb387e55ec6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 17:15:14 +0300 Subject: [PATCH 0299/2229] Adapt to recent logging changes --- base/daemon/SentryWriter.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 2fd846b720a..f7edc8d1e93 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -44,6 +44,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); + auto logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { if (debug || (strlen(VERSION_OFFICIAL) > 0)) @@ -89,7 +90,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) anonymize = config.getBool("send_crash_reports.anonymize", false); const std::string& anonymize_status = anonymize ? " (anonymized)" : ""; LOG_INFO( - &Logger::get("SentryWriter"), + logger, "Sending crash reports is initialized with {} endpoint and {} temp folder{}", endpoint, temp_folder_path, @@ -97,12 +98,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) } else { - LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); + LOG_WARNING(logger, "Sending crash reports failed to initialized with {} status", init_status); } } else { - LOG_INFO(&Logger::get("SentryWriter"), "Sending crash reports is disabled"); + LOG_INFO(logger, "Sending crash reports is disabled"); } #endif } @@ -120,6 +121,7 @@ void SentryWriter::shutdown() void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY + auto logger = &Poco::Logger::get("SentryWriter"); if (initialized) { const std::string & error_message = signalToErrorMessage(sig, info, context); @@ -181,13 +183,13 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c sentry_value_set_by_key(event, "threads", threads); - LOG_INFO(&Logger::get("SentryWriter"), "Sending crash report"); + LOG_INFO(logger, "Sending crash report"); sentry_capture_event(event); shutdown(); } else { - LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); + LOG_INFO(logger, "Not sending crash report"); } #endif } From 9ad1bb8d9398cb83a95b39df8d36a17d340afc16 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 18:26:20 +0300 Subject: [PATCH 0300/2229] trigger ci From acf22bfb19292c5ae56e54dcedd06895577e2914 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 19:42:59 +0300 Subject: [PATCH 0301/2229] fix sanitizers build --- base/daemon/BaseDaemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 3aeebd369e5..9da8849342d 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -317,7 +317,7 @@ static void sanitizerDeathCallback() std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - bare_stacktrace << ' ' << stack_trace.getFrames()[i]; + bare_stacktrace << ' ' << stack_trace.getFramePointers()[i]; LOG_FATAL(log, bare_stacktrace.str()); } From 1ce25238f80fc9435c82766f44da896639e97ee1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 19:49:11 +0300 Subject: [PATCH 0302/2229] try fix some more build issues --- cmake/find/sentry.cmake | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index f94b53ffb00..30b8b28f6f1 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,15 +5,17 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -option (USE_SENTRY "Use Sentry" ON) +if (NOT OS_FREEBSD AND NOT UNBUNDLED) + option (USE_SENTRY "Use Sentry" ON) -set (BUILD_SHARED_LIBS OFF) -set (SENTRY_PIC OFF) -set (SENTRY_BACKEND "none") -set (SENTRY_TRANSPORT "curl") -set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) -set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + set (BUILD_SHARED_LIBS OFF) + set (SENTRY_PIC OFF) + set (SENTRY_BACKEND "none") + set (SENTRY_TRANSPORT "curl") + set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) + set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) -message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") + message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") -include_directories("${SENTRY_INCLUDE_DIR}") \ No newline at end of file + include_directories("${SENTRY_INCLUDE_DIR}") +endif () \ No newline at end of file From 5757dd1d57c68a8f03ddc5b9ba41e85d584f909c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 15:37:23 +0000 Subject: [PATCH 0303/2229] Add insert part --- src/Storages/RabbitMQ/Buffer_fwd.h | 3 + .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 57 ++++++ .../RabbitMQ/RabbitMQBlockOutputStream.h | 29 +++ src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 16 ++ src/Storages/RabbitMQ/StorageRabbitMQ.h | 7 + .../WriteBufferToRabbitMQProducer.cpp | 169 ++++++++++++++++++ .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 67 +++++++ 7 files changed, 348 insertions(+) create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp create mode 100644 src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h create mode 100644 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp create mode 100644 src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h diff --git a/src/Storages/RabbitMQ/Buffer_fwd.h b/src/Storages/RabbitMQ/Buffer_fwd.h index f0ef010c518..5be2c6fdf6a 100644 --- a/src/Storages/RabbitMQ/Buffer_fwd.h +++ b/src/Storages/RabbitMQ/Buffer_fwd.h @@ -8,4 +8,7 @@ namespace DB class ReadBufferFromRabbitMQConsumer; using ConsumerBufferPtr = std::shared_ptr; +class WriteBufferToRabbitMQProducer; +using ProducerBufferPtr = std::shared_ptr; + } diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp new file mode 100644 index 00000000000..3f940891c23 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern int CANNOT_CREATE_IO_BUFFER; +} + + +RabbitMQBlockOutputStream::RabbitMQBlockOutputStream( + StorageRabbitMQ & storage_, const Context & context_) : storage(storage_), context(context_) +{ +} + + +Block RabbitMQBlockOutputStream::getHeader() const +{ + return storage.getSampleBlockNonMaterialized(); +} + + +void RabbitMQBlockOutputStream::writePrefix() +{ + buffer = storage.createWriteBuffer(); + if (!buffer) + throw Exception("Failed to create RabbitMQ producer!", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + + child = FormatFactory::instance().getOutput( + storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) + { + buffer->count_row(); + }); +} + + +void RabbitMQBlockOutputStream::write(const Block & block) +{ + child->write(block); + + if (buffer) + buffer->flush(); +} + + +void RabbitMQBlockOutputStream::writeSuffix() +{ + child->writeSuffix(); +} + +} diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h new file mode 100644 index 00000000000..2f7b89a2a30 --- /dev/null +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +class RabbitMQBlockOutputStream : public IBlockOutputStream +{ + +public: + explicit RabbitMQBlockOutputStream(StorageRabbitMQ & storage_, const Context & context_); + + Block getHeader() const override; + + void writePrefix() override; + void write(const Block & block) override; + void writeSuffix() override; + +private: + StorageRabbitMQ & storage; + Context context; + ProducerBufferPtr buffer; + BlockOutputStreamPtr child; +}; +} diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index fb705e4d1bc..ee5dede5261 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -124,6 +126,12 @@ Pipes StorageRabbitMQ::read( } +BlockOutputStreamPtr StorageRabbitMQ::write(const ASTPtr &, const Context & context) +{ + return std::make_shared(*this, context); +} + + void StorageRabbitMQ::startup() { for (size_t i = 0; i < num_consumers; ++i) @@ -205,6 +213,14 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() } +ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() +{ + return std::make_shared(parsed_address, routing_key, exchange_name, + log, num_consumers, bind_by_id, hash_exchange, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); +} + + bool StorageRabbitMQ::checkDependencies(const StorageID & table_id) { // Check if all dependencies are attached diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index fc098b168f1..5aa77a9a732 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -25,6 +25,7 @@ public: std::string getName() const override { return "RabbitMQ"; } bool supportsSettings() const override { return true; } + bool noPushingToViews() const override { return true; } void startup() override; void shutdown() override; @@ -37,10 +38,16 @@ public: size_t max_block_size, unsigned num_streams) override; + BlockOutputStreamPtr write( + const ASTPtr & query, + const Context & context) override; + void pushReadBuffer(ConsumerBufferPtr buf); ConsumerBufferPtr popReadBuffer(); ConsumerBufferPtr popReadBuffer(std::chrono::milliseconds timeout); + ProducerBufferPtr createWriteBuffer(); + const String & getExchangeName() const { return exchange_name; } const String & getRoutingKey() const { return routing_key; } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp new file mode 100644 index 00000000000..529cc5bd93b --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -0,0 +1,169 @@ +#include +#include "Core/Block.h" +#include "Columns/ColumnString.h" +#include "Columns/ColumnsNumber.h" +#include +#include +#include +#include + + +namespace DB +{ + +enum +{ + Connection_setup_sleep = 200, + Connection_setup_retries_max = 1000, + Buffer_limit_to_flush = 50000 +}; + +WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + const size_t num_queues_, + const bool bind_by_id_, + const bool hash_exchange_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_) + : WriteBuffer(nullptr, 0) + , routing_key(routing_key_) + , exchange_name(exchange_) + , log(log_) + , num_queues(num_queues_) + , bind_by_id(bind_by_id_) + , hash_exchange(hash_exchange_) + , delim(delimiter) + , max_rows(rows_per_message) + , chunk_size(chunk_size_) + , producerEvbase(event_base_new()) + , eventHandler(producerEvbase, log) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) +{ + /* The reason behind making a separate connection for each concurrent producer is explained here: + * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 + * - publishing from different threads (as outputStreams are asynchronous) leads to internal libary errors. + */ + size_t cnt_retries = 0; + while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + { + event_base_loop(producerEvbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); + std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); + } + + if (!connection.ready()) + { + LOG_ERROR(log, "Cannot set up connection for producer!"); + } + + producer_channel = std::make_shared(&connection); +} + + +WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() +{ + flush(); + connection.close(); + + assert(rows == 0 && chunks.empty()); +} + + +void WriteBufferToRabbitMQProducer::count_row() +{ + if (++rows % max_rows == 0) + { + const std::string & last_chunk = chunks.back(); + size_t last_chunk_size = offset(); + + if (delim && last_chunk[last_chunk_size - 1] == delim) + --last_chunk_size; + + std::string payload; + payload.reserve((chunks.size() - 1) * chunk_size + last_chunk_size); + + for (auto i = chunks.begin(), e = --chunks.end(); i != e; ++i) + payload.append(*i); + + payload.append(last_chunk, 0, last_chunk_size); + + rows = 0; + chunks.clear(); + set(nullptr, 0); + + messages.emplace_back(payload); + ++message_counter; + + if (messages.size() >= Buffer_limit_to_flush) + { + flush(); + } + } +} + + +void WriteBufferToRabbitMQProducer::flush() +{ + /* Why accumulating payloads and not publishing each of them at once in count_row()? Because publishing needs to + * be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it each time + * we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + * exchange becoming inactive at some point and part of messages is lost as a result. + */ + std::atomic exchange_declared = false, exchange_error = false; + + producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) + .onSuccess([&]() + { + for (auto & payload : messages) + { + if (!message_counter) + return; + + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + + --message_counter; + } + + exchange_declared = true; + messages.clear(); + }) + .onError([&](const char * message) + { + exchange_error = true; + exchange_declared = false; + LOG_ERROR(log, "Exchange was not declared: {}", message); + }); + + while (!exchange_declared && !exchange_error) + { + startEventLoop(exchange_declared); + } +} + + +void WriteBufferToRabbitMQProducer::nextImpl() +{ + chunks.push_back(std::string()); + chunks.back().resize(chunk_size); + set(chunks.back().data(), chunk_size); +} + + +void WriteBufferToRabbitMQProducer::startEventLoop(std::atomic & check_param) +{ + eventHandler.start(check_param); +} + +} diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h new file mode 100644 index 00000000000..d7a1715d491 --- /dev/null +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -0,0 +1,67 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using ProducerPtr = std::shared_ptr; +using Messages = std::vector; + +class WriteBufferToRabbitMQProducer : public WriteBuffer +{ +public: + WriteBufferToRabbitMQProducer( + std::pair & parsed_address, + const String & routing_key_, + const String & exchange_, + Poco::Logger * log_, + const size_t num_queues_, + const bool bind_by_id_, + const bool hash_exchange_, + std::optional delimiter, + size_t rows_per_message, + size_t chunk_size_ + ); + + ~WriteBufferToRabbitMQProducer() override; + + void count_row(); + void flush(); + +private: + void nextImpl() override; + void checkExchange(); + void startEventLoop(std::atomic & check_param); + + const String routing_key; + const String exchange_name; + const bool bind_by_id; + const bool hash_exchange; + const size_t num_queues; + + event_base * producerEvbase; + RabbitMQHandler eventHandler; + AMQP::TcpConnection connection; + ProducerPtr producer_channel; + + size_t next_queue = 0; + UInt64 message_counter = 0; + String channel_id; + + Messages messages; + + Poco::Logger * log; + const std::optional delim; + const size_t max_rows; + const size_t chunk_size; + size_t count_mes = 0; + size_t rows = 0; + std::list chunks; +}; + +} From 5939422b85c44038f2345b99810d95ed5bb090a3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 16:19:59 +0000 Subject: [PATCH 0304/2229] Add tests for insert part --- .../integration/test_storage_rabbitmq/test.py | 234 ++++++++++++++++++ 1 file changed, 234 insertions(+) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 821c5a19e68..0533dd7e2f4 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -838,6 +838,240 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(180) +def test_rabbitmq_insert(rabbitmq_cluster): + instance.query(''' + CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'insert1', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + ''') + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + consumer_connection = pika.BlockingConnection(parameters) + + consumer = consumer_connection.channel() + consumer.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + result = consumer.queue_declare(queue='') + queue_name = result.method.queue + consumer.queue_bind(exchange='clickhouse-exchange', queue=queue_name, routing_key='insert1') + + values = [] + for i in range(50): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + insert_messages = [] + def onReceived(channel, method, properties, body): + i = 0 + insert_messages.append(body.decode()) + if (len(insert_messages) == 50): + channel.stop_consuming() + + consumer.basic_qos(prefetch_count=50) + consumer.basic_consume(onReceived, queue_name) + consumer.start_consuming() + consumer_connection.close() + + result = '\n'.join(insert_messages) + rabbitmq_check_result(result, True) + + +@pytest.mark.timeout(240) +def test_rabbitmq_many_inserts(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.view_many; + DROP TABLE IF EXISTS test.consumer_many; + CREATE TABLE test.rabbitmq_many (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_routing_key = 'insert2', + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_many (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_many TO test.view_many AS + SELECT * FROM test.rabbitmq_many; + ''') + + messages_num = 1000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_many VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_many') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_many; + DROP TABLE test.view_many; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(240) +def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + CREATE TABLE test.rabbitmq_sharding (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_sharding (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_sharding TO test.view_sharding AS + SELECT * FROM test.rabbitmq_sharding; + ''') + + messages_num = 10000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_sharding VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 20 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_sharding') + time.sleep(1) + print result + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_sharding; + DROP TABLE test.view_sharding; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_overloaded_insert(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.view_overload; + DROP TABLE IF EXISTS test.consumer_overload; + CREATE TABLE test.rabbitmq_overload (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_format = 'TSV', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_overload (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.consumer_overload TO test.view_overload AS + SELECT * FROM test.rabbitmq_overload; + ''') + + messages_num = 100000 + def insert(): + values = [] + for i in range(messages_num): + values.append("({i}, {i})".format(i=i)) + values = ','.join(values) + + while True: + try: + instance.query("INSERT INTO test.rabbitmq_overload VALUES {}".format(values)) + break + except QueryRuntimeException as e: + if 'Local: Timed out.' in str(e): + continue + else: + raise + + threads = [] + threads_num = 5 + for _ in range(threads_num): + threads.append(threading.Thread(target=insert)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_overload') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + instance.query(''' + DROP TABLE test.consumer_overload; + DROP TABLE test.view_overload; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 386dc4d95ee269c289fa338bedc4f9cb1d0b9149 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 16:56:16 +0000 Subject: [PATCH 0305/2229] Fixes --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 1 - src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 8 ++------ src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 09398da73c7..547851f349a 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -44,7 +44,6 @@ void RabbitMQHandler::start(std::atomic & check_param) void RabbitMQHandler::stop() { - std::lock_guard lock(mutex); event_base_loopbreak(evbase); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index d6372dfe4d3..27c5ab800f0 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -207,9 +207,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; - /* Needed because this vector can be used at the same time by another thread in nextImpl() (below). - * So we lock mutex here and there so that they do not use it asynchronosly. - */ + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). std::lock_guard lock(mutex); received.push_back(message_received); } @@ -255,9 +253,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() messages.clear(); - /* Needed because this vector can be used at the same time by another thread in onReceived callback (above). - * So we lock mutex here and there so that they do not use it asynchronosly. - */ + /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback (above). std::lock_guard lock(mutex); messages.swap(received); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 529cc5bd93b..12d6c2b0fb8 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -15,7 +15,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 50000 + Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( From aade00130c2a1e76b83ae595be8c1ae9c0c0e39b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 1 Jun 2020 20:35:30 +0300 Subject: [PATCH 0306/2229] add raii wrappers --- .../CassandraBlockInputStream.cpp | 88 ++++--------------- src/Dictionaries/CassandraBlockInputStream.h | 16 ++-- .../CassandraDictionarySource.cpp | 27 ++---- src/Dictionaries/CassandraDictionarySource.h | 24 +---- src/Dictionaries/CassandraHelpers.cpp | 38 ++++++++ src/Dictionaries/CassandraHelpers.h | 72 +++++++++++++++ src/Dictionaries/ya.make | 1 + 7 files changed, 140 insertions(+), 126 deletions(-) create mode 100644 src/Dictionaries/CassandraHelpers.cpp create mode 100644 src/Dictionaries/CassandraHelpers.h diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 6d8a45508ce..e00fd5ec3e9 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -23,26 +23,17 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession * session_, - const std::string &query_str_, - const DB::Block &sample_block, + const CassClusterPtr & cluster, + const String & query_str, + const Block & sample_block, const size_t max_block_size_) - : session(session_) - , statement(cass_statement_new(query_str_.c_str(), 0)) - , query_str(query_str_) + : statement(query_str.c_str(), /*parameters count*/ 0) , max_block_size(max_block_size_) + , has_more_pages(cass_true) { - cass_statement_set_paging_size(statement, max_block_size); - this->has_more_pages = cass_true; - description.init(sample_block); -} - -CassandraBlockInputStream::~CassandraBlockInputStream() { - if (iterator != nullptr) - cass_iterator_free(iterator); - if (result) - cass_result_free(result); + cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); + cassandraWaitAndCheck(cass_session_connect(session, cluster)); } namespace @@ -51,7 +42,7 @@ namespace void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { - /// Cassandra does not support unsigned integers + /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) switch (type) { case ValueType::vtUInt8: @@ -159,19 +150,17 @@ namespace } } - // void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } - Block CassandraBlockInputStream::readImpl() { if (!has_more_pages) return {}; MutableColumns columns = description.sample_block.cloneEmptyColumns(); - CassFuture* query_future = cass_session_execute(session, statement); + CassFuturePtr query_future = cass_session_execute(session, statement); - result = cass_future_get_result(query_future); + CassResultPtr result = cass_future_get_result(query_future); - if (result == nullptr) { + if (!result) { const char* error_message; size_t error_message_length; cass_future_error_message(query_future, &error_message, &error_message_length); @@ -181,7 +170,7 @@ namespace [[maybe_unused]] size_t row_count = 0; assert(cass_result_column_count(result) == columns.size()); - CassIterator * rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] while (cass_iterator_next(rows_iter)) { const CassRow * row = cass_iterator_get_row(rows_iter); @@ -192,7 +181,7 @@ namespace columns[col_idx]->insertDefault(); else if (description.types[col_idx].second) { - ColumnNullable & column_nullable = static_cast(*columns[col_idx]); + ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); column_nullable.getNullMapData().emplace_back(0); } @@ -202,61 +191,14 @@ namespace ++row_count; } assert(cass_result_row_count(result) == row_count); - cass_iterator_free(rows_iter); - - //const CassRow* row = cass_result_first_row(result); - //const CassValue* map = cass_row_get_column(row, 0); - //const CassValue* map = cass_row_get_column(row, 0); - //iterator = cass_iterator_from_map(map); - //while (cass_iterator_next(iterator)) { - // const CassValue* cass_key = cass_iterator_get_map_key(iterator); - // const CassValue* cass_value = cass_iterator_get_map_value(iterator); - // auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; - // for (const auto &[value, idx]: pair_values) { - // if (description.types[idx].second) { - // ColumnNullable & column_nullable = static_cast(*columns[idx]); - // insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); - // column_nullable.getNullMapData().emplace_back(0); - // } else { - // insertValue(*columns[idx], description.types[idx].first, value); - // } - // } - //} has_more_pages = cass_result_has_more_pages(result); - if (has_more_pages) { - cass_statement_set_paging_state(statement, result); - } - - cass_result_free(result); + if (has_more_pages) + cassandraCheck(cass_statement_set_paging_state(statement, result)); return description.sample_block.cloneWithColumns(std::move(columns)); } - -void cassandraCheck(CassError code) -{ - if (code != CASS_OK) - throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), - ErrorCodes::CASSANDRA_INTERNAL_ERROR); -} - -void cassandraWaitAndCheck(CassFuture * future) -{ - auto code = cass_future_error_code(future); /// Waits if not ready - if (code == CASS_OK) - { - cass_future_free(future); - return; - } - const char * message; - size_t message_len; - cass_future_error_message(future, &message, & message_len); - String full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; - cass_future_free(future); /// Frees message - throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); -} - } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 22e4429343d..700211ebb3e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -9,8 +9,6 @@ namespace DB { -void cassandraCheck(CassError code); -void cassandraWaitAndCheck(CassFuture * future); /// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining @@ -18,11 +16,10 @@ void cassandraWaitAndCheck(CassFuture * future); { public: CassandraBlockInputStream( - CassSession * session, - const std::string & query_str, + const CassClusterPtr & cluster, + const String & query_str, const Block & sample_block, const size_t max_block_size); - ~CassandraBlockInputStream() override; String getName() const override { return "Cassandra"; } @@ -31,14 +28,11 @@ void cassandraWaitAndCheck(CassFuture * future); private: Block readImpl() override; - CassSession * session; - CassStatement * statement; - String query_str; + CassSessionPtr session; + CassStatementPtr statement; const size_t max_block_size; ExternalResultDescription description; - const CassResult * result = nullptr; cass_bool_t has_more_pages; - CassIterator * iterator = nullptr; }; } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 2aee5d44b6b..a58dd383b46 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -34,7 +34,6 @@ namespace DB #if USE_CASSANDRA -#include #include #include "CassandraBlockInputStream.h" @@ -54,28 +53,23 @@ CassandraDictionarySource::CassandraDictionarySource( UInt16 port_, const String & user_, const String & password_, - //const std::string & method_, const String & db_, const String & table_, const DB::Block & sample_block_) - : log(&Logger::get("CassandraDictionarySource")) + : log(&Poco::Logger::get("CassandraDictionarySource")) , dict_struct(dict_struct_) , host(host_) , port(port_) , user(user_) , password(password_) - //, method(method_) , db(db_) , table(table_) , sample_block(sample_block_) - , cluster(cass_cluster_new()) //FIXME will not be freed in case of exception - , session(cass_session_new()) { cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); if (port) cassandraCheck(cass_cluster_set_port(cluster, port)); cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); - cassandraWaitAndCheck(cass_session_connect_keyspace(session, cluster, db.c_str())); } CassandraDictionarySource::CassandraDictionarySource( @@ -89,7 +83,6 @@ CassandraDictionarySource::CassandraDictionarySource( config.getUInt(config_prefix + ".port", 0), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), - //config.getString(config_prefix + ".method", ""), config.getString(config_prefix + ".keyspace", ""), config.getString(config_prefix + ".column_family"), sample_block_) @@ -102,22 +95,12 @@ CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySo other.port, other.user, other.password, - //other.method, other.db, other.table, other.sample_block} { } -CassandraDictionarySource::~CassandraDictionarySource() { - cass_session_free(session); - cass_cluster_free(cluster); -} - -//std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { -// return host + (port != 0 ? ":" + std::to_string(port) : ""); -//} - BlockInputStreamPtr CassandraDictionarySource::loadAll() { ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; @@ -125,11 +108,11 @@ BlockInputStreamPtr CassandraDictionarySource::loadAll() query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading all using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { - return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); + return "Cassandra: " + db + '.' + table; } BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) @@ -139,7 +122,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading ids using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -150,7 +133,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading keys using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 400481d0a95..564fa75c3a2 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) -#include -#endif +#include #if USE_CASSANDRA @@ -10,10 +8,10 @@ #include "IDictionarySource.h" #include #include -#include namespace DB { + class CassandraDictionarySource final : public IDictionarySource { CassandraDictionarySource( const DictionaryStructure & dict_struct, @@ -21,7 +19,6 @@ class CassandraDictionarySource final : public IDictionarySource { UInt16 port, const String & user, const String & password, - //const std::string & method, const String & db, const String & table, const Block & sample_block); @@ -35,29 +32,20 @@ public: CassandraDictionarySource(const CassandraDictionarySource & other); - ~CassandraDictionarySource() override; - BlockInputStreamPtr loadAll() override; bool supportsSelectiveLoad() const override { return true; } bool isModified() const override { return true; } - ///Not yet supported bool hasUpdateField() const override { return false; } DictionarySourcePtr clone() const override { return std::make_unique(*this); } BlockInputStreamPtr loadIds(const std::vector & ids) override; - //{ - // throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - //} BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; - //{ - // throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - //} - + BlockInputStreamPtr loadUpdatedAll() override { throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; @@ -66,21 +54,17 @@ public: std::string toString() const override; private: - //static std::string toConnectionString(const std::string & host, const UInt16 port); - Poco::Logger * log; const DictionaryStructure dict_struct; const String host; const UInt16 port; const String user; const String password; - //const std::string method; const String db; const String table; Block sample_block; - CassCluster * cluster; - CassSession * session; + CassClusterPtr cluster; }; } diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp new file mode 100644 index 00000000000..9c181abcf43 --- /dev/null +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -0,0 +1,38 @@ +#include + +#if USE_CASSANDRA +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int CASSANDRA_INTERNAL_ERROR; +} + +void cassandraCheck(CassError code) +{ + if (code != CASS_OK) + throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), + ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + + +void cassandraWaitAndCheck(CassFuturePtr && future) +{ + auto code = cass_future_error_code(future); /// Waits if not ready + if (code == CASS_OK) + return; + + /// `future` owns `message` and will free it on destruction + const char * message; + size_t message_len; + cass_future_error_message(future, &message, & message_len); + std::string full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; + throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + +} + +#endif diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h new file mode 100644 index 00000000000..48573c1080f --- /dev/null +++ b/src/Dictionaries/CassandraHelpers.h @@ -0,0 +1,72 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_CASSANDRA +#include +#include + +namespace DB +{ + +namespace Cassandra +{ + +template +CassT * defaultCtor() { return nullptr; } + +/// RAII wrapper for raw pointers to objects from cassandra driver library +template> +class ObjectHolder +{ + CassT * ptr = nullptr; +public: + template + ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} + ObjectHolder(CassT * ptr_) : ptr(ptr_) {} + + ObjectHolder(const ObjectHolder &) = delete; + ObjectHolder & operator = (const ObjectHolder &) = delete; + + ObjectHolder(ObjectHolder && rhs) noexcept : ptr(rhs.ptr) { rhs.ptr = nullptr; } + ObjectHolder & operator = (ObjectHolder && rhs) noexcept + { + if (ptr) + Dtor(ptr); + ptr = rhs.ptr; + rhs.ptr = nullptr; + } + + ~ObjectHolder() + { + if (ptr) + Dtor(ptr); + } + + /// For implicit conversion when passing object to driver library functions + operator CassT * () { return ptr; } + operator const CassT * () const { return ptr; } +}; + +} + +/// These object are created on pointer construction +using CassClusterPtr = Cassandra::ObjectHolder; +using CassSessionPtr = Cassandra::ObjectHolder; +using CassStatementPtr = Cassandra::ObjectHolder; + +/// The following objects are created inside Cassandra driver library, +/// but must be freed by user code +using CassFuturePtr = Cassandra::ObjectHolder; +using CassResultPtr = Cassandra::ObjectHolder; +using CassIteratorPtr = Cassandra::ObjectHolder; + +/// Checks return code, throws exception on error +void cassandraCheck(CassError code); +void cassandraWaitAndCheck(CassFuturePtr && future); + +} + +#endif diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index 5e1af27a165..3de623a9a8b 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -19,6 +19,7 @@ SRCS( CacheDictionary_generate3.cpp CassandraBlockInputStream.cpp CassandraDictionarySource.cpp + CassandraHelpers.cpp ClickHouseDictionarySource.cpp ComplexKeyCacheDictionary.cpp ComplexKeyCacheDictionary_createAttributeWithType.cpp From e1970f6d28540e7baa6a5694a0d866ace3887d1b Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 1 Jun 2020 20:52:09 +0300 Subject: [PATCH 0307/2229] in-memory parts: fix reading of nested --- src/Storages/MergeTree/IMergeTreeReader.cpp | 18 +++++- src/Storages/MergeTree/IMergeTreeReader.h | 5 +- .../MergeTree/MergeTreeReaderCompact.cpp | 20 +------ .../MergeTree/MergeTreeReaderCompact.h | 3 - .../MergeTree/MergeTreeReaderInMemory.cpp | 56 ++++++++++++++----- .../MergeTree/MergeTreeReaderInMemory.h | 2 + .../01130_in_memory_parts_nested.reference | 15 +++++ .../01130_in_memory_parts_nested.sql | 16 ++++++ 8 files changed, 96 insertions(+), 39 deletions(-) create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_nested.reference create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_nested.sql diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index a2984421c2a..624de2886a8 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -248,7 +248,23 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) } } -void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) +IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const String & column_name) const +{ + String table_name = Nested::extractTableName(column_name); + for (const auto & part_column : data_part->getColumns()) + { + if (typeid_cast(part_column.type.get())) + { + auto position = data_part->getColumnPosition(part_column.name); + if (position && Nested::extractTableName(part_column.name) == table_name) + return position; + } + } + + return {}; +} + +void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const { if (num_columns_to_read != columns.size()) throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 79f7860d1cc..90a43a61536 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -61,7 +61,7 @@ protected: /// Returns actual column type in part, which can differ from table metadata. NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const; - void checkNumberOfColumns(size_t columns_num_to_read); + void checkNumberOfColumns(size_t columns_num_to_read) const; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; @@ -79,6 +79,9 @@ protected: const MergeTreeData & storage; MarkRanges all_mark_ranges; + using ColumnPosition = std::optional; + ColumnPosition findColumnForOffsets(const String & column_name) const; + friend class MergeTreeRangeReader::DelayedStream; private: diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a63397b9b9c..5b84069cc2c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -84,12 +84,11 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( if (!position && typeid_cast(type.get())) { /// If array of Nested column is missing in part, - /// we have to read it's offsets if they exists. + /// we have to read its offsets if they exist. position = findColumnForOffsets(name); read_only_offsets[i] = (position != std::nullopt); } - column_positions[i] = std::move(position); } @@ -168,23 +167,6 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, return read_rows; } -MergeTreeReaderCompact::ColumnPosition MergeTreeReaderCompact::findColumnForOffsets(const String & column_name) -{ - String table_name = Nested::extractTableName(column_name); - for (const auto & part_column : data_part->getColumns()) - { - if (typeid_cast(part_column.type.get())) - { - auto position = data_part->getColumnPosition(part_column.name); - if (position && Nested::extractTableName(part_column.name) == table_name) - return position; - } - } - - return {}; -} - - void MergeTreeReaderCompact::readData( const String & name, IColumn & column, const IDataType & type, size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets) diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index 827306cd983..75d1da342fb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -40,7 +40,6 @@ private: MergeTreeMarksLoader marks_loader; - using ColumnPosition = std::optional; /// Positions of columns in part structure. std::vector column_positions; /// Should we read full column or only it's offsets @@ -53,8 +52,6 @@ private: void readData(const String & name, IColumn & column, const IDataType & type, size_t from_mark, size_t column_position, size_t rows_to_read, bool only_offsets = false); - - ColumnPosition findColumnForOffsets(const String & column_name); }; } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 5e4c3e49e3b..ce6eb44a50b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -24,10 +25,20 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( std::move(settings_), {}) , part_in_memory(std::move(data_part_)) { + for (const auto & name_and_type : columns) + { + auto [name, type] = getColumnFromPart(name_and_type); + if (!part_in_memory->block.has(name) && typeid_cast(type.get())) + if (auto offset_position = findColumnForOffsets(name)) + positions_for_offsets[name] = *offset_position; + } } -size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool /* continue_reading */, size_t max_rows_to_read, Columns & res_columns) +size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) { + if (!continue_reading) + total_rows_read = 0; + size_t total_marks = data_part->index_granularity.getMarksCount(); if (from_mark >= total_marks) throw Exception("Mark " + toString(from_mark) + " is out of bound. Max mark: " @@ -41,34 +52,49 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool /* continue_read throw Exception("Cannot read data in MergeTreeReaderInMemory. Rows already read: " + toString(total_rows_read) + ". Rows in part: " + toString(part_rows), ErrorCodes::CANNOT_READ_ALL_DATA); + size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read); auto column_it = columns.begin(); - size_t rows_read = 0; for (size_t i = 0; i < num_columns; ++i, ++column_it) { auto [name, type] = getColumnFromPart(*column_it); - if (!part_in_memory->block.has(name)) - continue; - const auto & block_column = part_in_memory->block.getByName(name).column; - if (total_rows_read == 0 && part_rows <= max_rows_to_read) - { - res_columns[i] = block_column; - rows_read = part_rows; - } - else + auto offsets_it = positions_for_offsets.find(name); + if (offsets_it != positions_for_offsets.end()) { + const auto & source_offsets = assert_cast( + *part_in_memory->block.getByPosition(offsets_it->second).column).getOffsets(); + if (res_columns[i] == nullptr) res_columns[i] = type->createColumn(); auto mutable_column = res_columns[i]->assumeMutable(); - rows_read = std::min(max_rows_to_read, part_rows - total_rows_read); - mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_read); + auto & res_offstes = assert_cast(*mutable_column).getOffsets(); + for (size_t row = 0; row < rows_to_read; ++row) + res_offstes.push_back(source_offsets[total_rows_read + row]); + res_columns[i] = std::move(mutable_column); } + else if (part_in_memory->block.has(name)) + { + const auto & block_column = part_in_memory->block.getByName(name).column; + if (rows_to_read == part_rows) + { + res_columns[i] = block_column; + } + else + { + if (res_columns[i] == nullptr) + res_columns[i] = type->createColumn(); + + auto mutable_column = res_columns[i]->assumeMutable(); + mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_to_read); + res_columns[i] = std::move(mutable_column); + } + } } - total_rows_read += rows_read; - return rows_read; + total_rows_read += rows_to_read; + return rows_to_read; } } diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.h b/src/Storages/MergeTree/MergeTreeReaderInMemory.h index 6d64801682e..196fc53725a 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.h +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.h @@ -29,6 +29,8 @@ public: private: size_t total_rows_read = 0; DataPartInMemoryPtr part_in_memory; + + std::unordered_map positions_for_offsets; }; } diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.reference b/tests/queries/0_stateless/01130_in_memory_parts_nested.reference new file mode 100644 index 00000000000..abc233c46f4 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_nested.reference @@ -0,0 +1,15 @@ +[0] +[0,0,0] +[0,0,0,0,0] +[0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0] +[0] +[0,2,4] +[0,2,4,6,8] +[0,2,4,6,8,10,12] +[0,2,4,6,8,10,12,14,16] +[0] [0] +[0,1,2] [0,2,4] +[0,1,2,3,4] [0,2,4,6,8] +[0,1,2,3,4,5,6] [0,2,4,6,8,10,12] +[0,1,2,3,4,5,6,7,8] [0,2,4,6,8,10,12,14,16] diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.sql b/tests/queries/0_stateless/01130_in_memory_parts_nested.sql new file mode 100644 index 00000000000..c09593d01bc --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_nested.sql @@ -0,0 +1,16 @@ +-- Test 00576_nested_and_prewhere, but with in-memory parts. +DROP TABLE IF EXISTS nested; + +CREATE TABLE nested (x UInt64, filter UInt8, n Nested(a UInt64)) ENGINE = MergeTree ORDER BY x + SETTINGS min_rows_for_compact_part = 200000, min_rows_for_wide_part = 300000; + +INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.numbers LIMIT 100000; + +ALTER TABLE nested ADD COLUMN n.b Array(UInt64); +SELECT DISTINCT n.b FROM nested PREWHERE filter; + +ALTER TABLE nested ADD COLUMN n.c Array(UInt64) DEFAULT arrayMap(x -> x * 2, n.a); +SELECT DISTINCT n.c FROM nested PREWHERE filter; +SELECT DISTINCT n.a, n.c FROM nested PREWHERE filter; + +DROP TABLE nested; From f6e69355faa1f131fd22bfca93bc1cee0c1aca1e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:10:19 +0300 Subject: [PATCH 0308/2229] experiment --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aa78ab62f9b..526edd7792f 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include #include #include From 1797a47a9f26d4a97e8aa044af5a45a6c34e6d4f Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:25:25 +0300 Subject: [PATCH 0309/2229] fix clang warnings --- base/daemon/SentryWriter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index f7edc8d1e93..6bfc07ea2fb 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -17,8 +17,8 @@ namespace { -static bool initialized = false; -static bool anonymize = false; +bool initialized = false; +bool anonymize = false; void setExtras() { @@ -44,7 +44,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); - auto logger = &Poco::Logger::get("SentryWriter"); + auto * logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { if (debug || (strlen(VERSION_OFFICIAL) > 0)) @@ -121,7 +121,7 @@ void SentryWriter::shutdown() void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY - auto logger = &Poco::Logger::get("SentryWriter"); + auto * logger = &Poco::Logger::get("SentryWriter"); if (initialized) { const std::string & error_message = signalToErrorMessage(sig, info, context); From 8babd4d18c093f44d96e616a31d5551b24e73de2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:36:33 +0300 Subject: [PATCH 0310/2229] experiment --- cmake/find/sentry.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 30b8b28f6f1..06312b64495 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,6 +6,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT UNBUNDLED) + cmake_policy (SET CMP0077 NEW) option (USE_SENTRY "Use Sentry" ON) set (BUILD_SHARED_LIBS OFF) From 965204dfb161953616b6fa5168bde03375d87205 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:48:34 +0300 Subject: [PATCH 0311/2229] Try to fix the msan build --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 3bfce2d17c1..9e214a1265a 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 3bfce2d17c1b80fbbaae83bb5ef41c1b290d34fb +Subproject commit 9e214a1265a4ea628c21045b7f43d1aec15e385d From 65ff11aeac99061b53de62ab120d9ff75ae0dc03 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 22:49:00 +0300 Subject: [PATCH 0312/2229] old cmake compatibility --- cmake/find/sentry.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 06312b64495..d10c15cd334 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,7 +6,9 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT UNBUNDLED) - cmake_policy (SET CMP0077 NEW) + if (POLICY CMP0077) + cmake_policy (SET CMP0077 NEW) + endif () option (USE_SENTRY "Use Sentry" ON) set (BUILD_SHARED_LIBS OFF) From 806fd2739567562f62fae565fea980bdcaea051b Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 1 Jun 2020 20:48:24 +0000 Subject: [PATCH 0313/2229] Fix build & fix style & fix --- src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 3 ++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 13 +++++++------ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 3f940891c23..0858e2101df 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -10,7 +10,7 @@ namespace DB namespace ErrorCodes { - extern int CANNOT_CREATE_IO_BUFFER; + extern const int CANNOT_CREATE_IO_BUFFER; } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 547851f349a..6308e2e0089 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -15,7 +15,8 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); - if (!connection->ready()) + + if (!connection->usable() || !connection->ready()) { std::cerr << "Connection lost, no recovery is possible"; throw; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index ee5dede5261..147d3ba2115 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -72,7 +72,7 @@ StorageRabbitMQ::StorageRabbitMQ( , num_consumers(num_consumers_) , num_queues(num_queues_) , hash_exchange(hash_exchange_) - , log(&Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) + , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 12d6c2b0fb8..73434bc0ea6 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers + Buffer_limit_to_flush = 5000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -44,8 +45,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) { /* The reason behind making a separate connection for each concurrent producer is explained here: - * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - * - publishing from different threads (as outputStreams are asynchronous) leads to internal libary errors. + * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from + * different threads (as outputStreams are asynchronous) with the same connection leads to internal libary errors. */ size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -107,9 +108,9 @@ void WriteBufferToRabbitMQProducer::count_row() void WriteBufferToRabbitMQProducer::flush() { - /* Why accumulating payloads and not publishing each of them at once in count_row()? Because publishing needs to - * be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it each time - * we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing + * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it + * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to * exchange becoming inactive at some point and part of messages is lost as a result. */ std::atomic exchange_declared = false, exchange_error = false; From e9a04f7741550a48d6c963fe4a225bf3ce616141 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 23:48:42 +0300 Subject: [PATCH 0314/2229] more build fixes --- base/daemon/SentryWriter.cpp | 4 ++-- src/Common/StackTrace.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 6bfc07ea2fb..95189b72e81 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -145,8 +145,8 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c { const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; sentry_value_t frame = sentry_value_new_object(); - unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); - snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); + std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 526edd7792f..e71ce1e1139 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -192,7 +192,7 @@ static void * getCallerAddress(const ucontext_t & context) static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) { -#if defined(__ELF__) && !defined(__FreeBSD__) +#if defined(__ELF__) && !defined(__FreeBSD__) && !defined(ARCADIA_BUILD) const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; From 5a32d7913524a139ce43b835f53f73e1f0b42943 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 23:55:32 +0300 Subject: [PATCH 0315/2229] experiment --- cmake/find/sentry.cmake | 2 +- src/Common/StackTrace.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index d10c15cd334..309f63e9165 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT UNBUNDLED) +if (NOT OS_FREEBSD AND NOT UNBUNDLED AND NOT SPLITTED AND NOT (COMPILER_CLANG AND OS_DARWIN)) if (POLICY CMP0077) cmake_policy (SET CMP0077 NEW) endif () diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index e71ce1e1139..dbe3d005be7 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames = {{}}; + frames = std::array(); symbolize(frame_pointers.data(), offset, size, frames); } return frames; From 6eb6d8f3fd8beacbcb8ea536366e6c53ea833ff5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jun 2020 00:11:08 +0300 Subject: [PATCH 0316/2229] Get rid of annoying trailing whitespaces in CREATE query --- src/DataTypes/DataTypeTuple.cpp | 1 + src/Parsers/ASTColumnDeclaration.cpp | 3 -- src/Parsers/ASTConstraintDeclaration.cpp | 4 --- src/Parsers/ASTCreateQuery.cpp | 26 ++++++++--------- src/Parsers/ASTIndexDeclaration.cpp | 35 ++++++++++++++++++++++ src/Parsers/ASTIndexDeclaration.h | 37 ++---------------------- src/Parsers/ASTNameTypePair.cpp | 35 ++++++++++++++++++++++ src/Parsers/ASTNameTypePair.h | 24 ++------------- src/Parsers/ya.make | 2 ++ 9 files changed, 89 insertions(+), 78 deletions(-) create mode 100644 src/Parsers/ASTIndexDeclaration.cpp create mode 100644 src/Parsers/ASTNameTypePair.cpp diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 29db2a49b99..b69c4c31ca4 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index b281315f555..15bf1d59574 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -47,9 +47,6 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta { frame.need_parens = false; - if (!settings.one_line) - settings.ostr << settings.nl_or_ws << std::string(4 * frame.indent, ' '); - /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. settings.ostr << backQuote(name); diff --git a/src/Parsers/ASTConstraintDeclaration.cpp b/src/Parsers/ASTConstraintDeclaration.cpp index f268141f619..371bfa40f54 100644 --- a/src/Parsers/ASTConstraintDeclaration.cpp +++ b/src/Parsers/ASTConstraintDeclaration.cpp @@ -19,10 +19,6 @@ ASTPtr ASTConstraintDeclaration::clone() const void ASTConstraintDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - frame.need_parens = false; - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - - s.ostr << s.nl_or_ws << indent_str; s.ostr << backQuoteIfNeed(name); s.ostr << (s.hilite ? hilite_keyword : "") << " CHECK " << (s.hilite ? hilite_none : ""); expr->formatImpl(s, state, frame); diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index d5942753f78..f7481ac3c09 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -108,17 +108,9 @@ void ASTColumnsElement::formatImpl(const FormatSettings & s, FormatState & state return; } - frame.need_parens = false; - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - - s.ostr << s.nl_or_ws << indent_str; s.ostr << (s.hilite ? hilite_keyword : "") << prefix << (s.hilite ? hilite_none : ""); - - FormatSettings nested_settings = s; - nested_settings.one_line = true; - nested_settings.nl_or_ws = ' '; - - elem->formatImpl(nested_settings, state, frame); + s.ostr << ' '; + elem->formatImpl(s, state, frame); } @@ -172,7 +164,12 @@ void ASTColumns::formatImpl(const FormatSettings & s, FormatState & state, Forma } if (!list.children.empty()) - list.formatImpl(s, state, frame); + { + if (s.one_line) + list.formatImpl(s, state, frame); + else + list.formatImplMultiline(s, state, frame); + } } @@ -277,7 +274,6 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat { settings.ostr << (settings.one_line ? " (" : "\n("); FormatStateStacked frame_nested = frame; - ++frame_nested.indent; columns_list->formatImpl(settings, state, frame_nested); settings.ostr << (settings.one_line ? ")" : "\n)"); } @@ -286,8 +282,10 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat { settings.ostr << (settings.one_line ? " (" : "\n("); FormatStateStacked frame_nested = frame; - ++frame_nested.indent; - dictionary_attributes_list->formatImpl(settings, state, frame_nested); + if (settings.one_line) + dictionary_attributes_list->formatImpl(settings, state, frame_nested); + else + dictionary_attributes_list->formatImplMultiline(settings, state, frame_nested); settings.ostr << (settings.one_line ? ")" : "\n)"); } diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp new file mode 100644 index 00000000000..e89f9bf26ed --- /dev/null +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -0,0 +1,35 @@ +#include +#include + + +namespace DB +{ + +ASTPtr ASTIndexDeclaration::clone() const +{ + auto res = std::make_shared(); + + res->name = name; + res->granularity = granularity; + + if (expr) + res->set(res->expr, expr->clone()); + if (type) + res->set(res->type, type->clone()); + return res; +} + + +void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const +{ + s.ostr << backQuoteIfNeed(name); + s.ostr << " "; + expr->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); + type->formatImpl(s, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); + s.ostr << granularity; +} + +} + diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index c71ab21cf57..64ef6eb2db1 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -1,15 +1,8 @@ #pragma once -#include -#include -#include -#include -#include #include #include -#include - namespace DB { @@ -27,34 +20,8 @@ public: /** Get the text that identifies this element. */ String getID(char) const override { return "Index"; } - ASTPtr clone() const override - { - auto res = std::make_shared(); - - res->name = name; - res->granularity = granularity; - - if (expr) - res->set(res->expr, expr->clone()); - if (type) - res->set(res->type, type->clone()); - return res; - } - - void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override - { - frame.need_parens = false; - std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - - s.ostr << s.nl_or_ws << indent_str; - s.ostr << backQuoteIfNeed(name); - s.ostr << " "; - expr->formatImpl(s, state, frame); - s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); - type->formatImpl(s, state, frame); - s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); - s.ostr << granularity; - } + ASTPtr clone() const override; + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; }; } diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp new file mode 100644 index 00000000000..6c41d35315c --- /dev/null +++ b/src/Parsers/ASTNameTypePair.cpp @@ -0,0 +1,35 @@ +#include +#include + + +namespace DB +{ + +ASTPtr ASTNameTypePair::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (type) + { + res->type = type; + res->children.push_back(res->type); + } + + return res; +} + + +void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << '#'; + settings.ostr << indent_str << backQuoteIfNeed(name) << ' '; + type->formatImpl(settings, state, frame); + settings.ostr << '#'; +} + +} + + diff --git a/src/Parsers/ASTNameTypePair.h b/src/Parsers/ASTNameTypePair.h index 48dd7ae1ac9..638e980cbdc 100644 --- a/src/Parsers/ASTNameTypePair.h +++ b/src/Parsers/ASTNameTypePair.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB @@ -19,29 +18,10 @@ public: /** Get the text that identifies this element. */ String getID(char delim) const override { return "NameTypePair" + (delim + name); } - - ASTPtr clone() const override - { - auto res = std::make_shared(*this); - res->children.clear(); - - if (type) - { - res->type = type; - res->children.push_back(res->type); - } - - return res; - } + ASTPtr clone() const override; protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override - { - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - - settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name) << " "; - type->formatImpl(settings, state, frame); - } + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 8c7e4ff68af..c1cca094518 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -26,9 +26,11 @@ SRCS( ASTFunctionWithKeyValueArguments.cpp ASTGrantQuery.cpp ASTIdentifier.cpp + ASTIndexDeclaration.cpp ASTInsertQuery.cpp ASTKillQueryQuery.cpp ASTLiteral.cpp + ASTNameTypePair.cpp ASTOptimizeQuery.cpp ASTOrderByElement.cpp ASTPartition.cpp From 2a0da608fd34ad5d35e4ed14a6797451e548c718 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jun 2020 02:31:50 +0300 Subject: [PATCH 0317/2229] Update tests --- .../00597_push_down_predicate.reference | 50 +++++++++---------- ...51_default_databasename_for_view.reference | 8 +-- .../00826_cross_to_inner_join.reference | 16 +++--- .../00849_multiple_comma_join.reference | 24 ++++----- .../00849_multiple_comma_join_2.reference | 18 +++---- .../0_stateless/00908_analyze_query.reference | 2 +- ...0957_format_with_clashed_aliases.reference | 2 +- ...58_format_of_tuple_array_element.reference | 14 +++--- .../01056_predicate_optimizer_bugs.reference | 10 ++-- ...76_predicate_optimizer_with_view.reference | 8 +-- .../01083_cross_to_inner_with_like.reference | 6 +-- .../01278_format_multiple_queries.reference | 6 +-- 12 files changed, 82 insertions(+), 82 deletions(-) diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference index 480b1c4525c..829c5a1577e 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -4,12 +4,12 @@ 1 2000-01-01 1 test string 1 1 -------Forbid push down------- -SELECT count()\nFROM \n(\n SELECT \n [number] AS a, \n [number * 2] AS b\n FROM system.numbers\n LIMIT 1\n) AS t\nARRAY JOIN \n a, \n b\nWHERE NOT ignore(a + b) +SELECT count()\nFROM \n(\n SELECT \n [number] AS a,\n [number * 2] AS b\n FROM system.numbers\n LIMIT 1\n) AS t\nARRAY JOIN \n a,\n b\nWHERE NOT ignore(a + b) 1 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY LEFT JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a, \n 1 AS b\n) USING (a)\nWHERE b = 0 -SELECT \n a, \n b\nFROM \n(\n SELECT \n 1 AS a, \n 1 AS b\n)\nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a,\n b\nFROM \n(\n SELECT 1 AS a\n)\nANY LEFT JOIN \n(\n SELECT \n 1 AS a,\n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a,\n b\nFROM \n(\n SELECT \n 1 AS a,\n 1 AS b\n)\nANY RIGHT JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 +SELECT \n a,\n b\nFROM \n(\n SELECT 1 AS a\n)\nANY FULL OUTER JOIN \n(\n SELECT \n 1 AS a,\n 1 AS b\n) USING (a)\nWHERE b = 0 +SELECT \n a,\n b\nFROM \n(\n SELECT \n 1 AS a,\n 1 AS b\n)\nANY FULL OUTER JOIN \n(\n SELECT 1 AS a\n) USING (a)\nWHERE b = 0 -------Need push down------- SELECT toString(value) AS value\nFROM \n(\n SELECT 1 AS value\n) 1 @@ -19,46 +19,46 @@ SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\ 1 SELECT id\nFROM \n(\n SELECT arrayJoin([1, 2, 3]) AS id\n WHERE id = 1\n)\nWHERE id = 1 1 -SELECT \n id, \n subquery\nFROM \n(\n SELECT \n 1 AS id, \n CAST(1, \'UInt8\') AS subquery\n) +SELECT \n id,\n subquery\nFROM \n(\n SELECT \n 1 AS id,\n CAST(1, \'UInt8\') AS subquery\n) 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597\n HAVING a = 3\n)\nWHERE a = 3 +SELECT \n a,\n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a,\n b\n FROM test_00597\n HAVING a = 3\n)\nWHERE a = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n name, \n value, \n min(id) AS id\n FROM test_00597\n GROUP BY \n date, \n name, \n value\n HAVING id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n name,\n value,\n min(id) AS id\n FROM test_00597\n GROUP BY \n date,\n name,\n value\n HAVING id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n a, \n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a, \n b\n FROM test_00597 AS table_alias\n HAVING b = 3\n) AS outer_table_alias\nWHERE b = 3 +SELECT \n a,\n b\nFROM \n(\n SELECT \n toUInt64(sum(id) AS b) AS a,\n b\n FROM test_00597 AS table_alias\n HAVING b = 3\n) AS outer_table_alias\nWHERE b = 3 3 3 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS b\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n ) AS b\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n) AS b\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n WHERE id = 1\n) AS b\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n WHERE id = 1\n) AS b\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n id, \n date, \n value\nFROM \n(\n SELECT \n id, \n date, \n min(value) AS value\n FROM test_00597\n WHERE id = 1\n GROUP BY \n id, \n date\n)\nWHERE id = 1 +SELECT \n id,\n date,\n value\nFROM \n(\n SELECT \n id,\n date,\n min(value) AS value\n FROM test_00597\n WHERE id = 1\n GROUP BY \n id,\n date\n)\nWHERE id = 1 1 2000-01-01 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n UNION ALL\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n UNION ALL\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n)\nANY LEFT JOIN \n(\n SELECT id\n FROM test_00597\n) USING (id)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value,\n date,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n)\nANY LEFT JOIN \n(\n SELECT id\n FROM test_00597\n) USING (id)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n) AS test_00597 USING (id)\nWHERE value = 1 +SELECT \n id,\n date,\n name,\n value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n) AS test_00597 USING (id)\nWHERE value = 1 1 2000-01-01 test string 1 1 SELECT value\nFROM \n(\n SELECT toInt8(1) AS id\n)\nANY LEFT JOIN test_00597 AS b USING (id)\nWHERE value = 1 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n date, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n ANY LEFT JOIN \n (\n SELECT id\n FROM test_00597\n ) USING (id)\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value,\n date,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n )\n ANY LEFT JOIN \n (\n SELECT id\n FROM test_00597\n ) USING (id)\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.name, \n b.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n)\nANY LEFT JOIN \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n) AS b USING (id)\nWHERE b.id = 1 +SELECT \n date,\n id,\n name,\n value,\n b.date,\n b.name,\n b.value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n)\nANY LEFT JOIN \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n) AS b USING (id)\nWHERE b.id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 -SELECT \n id, \n date, \n name, \n value\nFROM \n(\n SELECT \n toInt8(1) AS id, \n toDate(\'2000-01-01\') AS date\n FROM system.numbers\n LIMIT 1\n)\nANY LEFT JOIN \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') +SELECT \n id,\n date,\n name,\n value\nFROM \n(\n SELECT \n toInt8(1) AS id,\n toDate(\'2000-01-01\') AS date\n FROM system.numbers\n LIMIT 1\n)\nANY LEFT JOIN \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n) AS b USING (date, id)\nWHERE b.date = toDate(\'2000-01-01\') 1 2000-01-01 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n `b.date`, \n `b.id`, \n `b.name`, \n `b.value`\nFROM \n(\n SELECT \n date, \n id, \n name, \n value, \n b.date, \n b.id, \n b.name, \n b.value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n ANY LEFT JOIN \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n ) AS b ON id = b.id\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value,\n `b.date`,\n `b.id`,\n `b.name`,\n `b.value`\nFROM \n(\n SELECT \n date,\n id,\n name,\n value,\n b.date,\n b.id,\n b.name,\n b.value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n ) AS a\n ANY LEFT JOIN \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n ) AS b ON id = b.id\n WHERE id = 1\n)\nWHERE id = 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -SELECT \n date, \n id, \n name, \n value, \n r.date, \n r.name, \n r.value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n)\nSEMI LEFT JOIN \n(\n SELECT \n date, \n id, \n name, \n value\n FROM \n (\n SELECT \n date, \n id, \n name, \n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n) AS r USING (id)\nWHERE r.id = 1 +SELECT \n date,\n id,\n name,\n value,\n r.date,\n r.name,\n r.value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n)\nSEMI LEFT JOIN \n(\n SELECT \n date,\n id,\n name,\n value\n FROM \n (\n SELECT \n date,\n id,\n name,\n value\n FROM test_00597\n WHERE id = 1\n )\n WHERE id = 1\n) AS r USING (id)\nWHERE r.id = 1 2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 diff --git a/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 5ba1861e3ef..4814cc77b37 100644 --- a/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,15 +1,15 @@ CREATE MATERIALIZED VIEW test_00751.t_mv_00751 ( - `date` Date, - `platform` Enum8('a' = 0, 'b' = 1), + `date` Date, + `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1) ) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT - date, - platform, + date, + platform, app FROM test_00751.t_00751 WHERE (app = diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference index 32b1c42ca2c..2a4b1487f20 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -35,18 +35,18 @@ comma nullable 1 1 1 1 2 2 1 2 cross -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a cross nullable vs not nullable -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b cross self -SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) +SELECT \n a,\n b,\n y.a,\n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b) cross one table expr -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b cross multiple ands -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) cross and inside and -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction -SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) +SELECT \n a,\n b,\n t2_00826.a,\n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0) diff --git a/tests/queries/0_stateless/00849_multiple_comma_join.reference b/tests/queries/0_stateless/00849_multiple_comma_join.reference index 829a5d25e54..0f7d28b65a0 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join.reference +++ b/tests/queries/0_stateless/00849_multiple_comma_join.reference @@ -1,18 +1,18 @@ SELECT a\nFROM t1_00849\nCROSS JOIN t2_00849 SELECT a\nFROM t1_00849\nALL INNER JOIN t2_00849 ON a = t2_00849.a\nWHERE a = t2_00849.a SELECT a\nFROM t1_00849\nALL INNER JOIN t2_00849 ON b = t2_00849.b\nWHERE b = t2_00849.b -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) AS `--.s`\nALL INNER JOIN t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n) AS `--.s`\nALL INNER JOIN t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n `--t1_00849.b`, \n `t2_00849.a`, \n `--t2_00849.b`, \n a, \n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b AS `--t1_00849.b`, \n t2_00849.a, \n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nALL INNER JOIN t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `--t2_00849.a`, \n `t2_00849.b`, \n a AS `--t3_00849.a`, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`, \n b, \n `t2_00849.a`, \n `t2_00849.b`, \n a, \n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a, \n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n) AS `--.s`\nCROSS JOIN t3_00849 -SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`, \n b, \n t2_00849.a AS `--t2_00849.a`, \n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) AS `--.s`\nCROSS JOIN t3_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) AS `--.s`\nALL INNER JOIN t3_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`,\n b AS `--t1_00849.b`,\n t2_00849.a,\n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n) AS `--.s`\nALL INNER JOIN t3_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `--t2_00849.a`,\n `t2_00849.b`,\n a AS `--t3_00849.a`,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t1_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t1_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t1_00849.a` = `--t3_00849.a`) AND (`--t1_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n `--t1_00849.b`,\n `t2_00849.a`,\n `--t2_00849.b`,\n a,\n b AS `--t3_00849.b`\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b AS `--t1_00849.b`,\n t2_00849.a,\n t2_00849.b AS `--t2_00849.b`\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.b` = `--t2_00849.b`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t1_00849.b` = `--t3_00849.b`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t1_00849.b` = b\nWHERE (`--t1_00849.b` = `--t2_00849.b`) AND (`--t1_00849.b` = `--t3_00849.b`) AND (`--t1_00849.b` = b) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `--t2_00849.a`,\n `t2_00849.b`,\n a AS `--t3_00849.a`,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t2_00849.a` = `--t1_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t2_00849.a` = a\nWHERE (`--t2_00849.a` = `--t1_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t2_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `--t2_00849.a`,\n `t2_00849.b`,\n a AS `--t3_00849.a`,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`)\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t3_00849.a` = `--t1_00849.a`) AND (`--t3_00849.a` = `--t2_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `--t2_00849.a`,\n `t2_00849.b`,\n a AS `--t3_00849.a`,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nALL INNER JOIN t4_00849 ON (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`)\nWHERE (a = `--t1_00849.a`) AND (a = `--t2_00849.a`) AND (a = `--t3_00849.a`) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `--t2_00849.a`,\n `t2_00849.b`,\n a AS `--t3_00849.a`,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n ) AS `--.s`\n ALL INNER JOIN t3_00849 ON `--t2_00849.a` = `--t3_00849.a`\n) AS `--.s`\nALL INNER JOIN t4_00849 ON `--t3_00849.a` = a\nWHERE (`--t1_00849.a` = `--t2_00849.a`) AND (`--t2_00849.a` = `--t3_00849.a`) AND (`--t3_00849.a` = a) +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `t2_00849.a`,\n `t2_00849.b`,\n a,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a,\n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n `--t1_00849.a`,\n b,\n `t2_00849.a`,\n `t2_00849.b`,\n a,\n t3_00849.b\n FROM \n (\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a,\n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n ) AS `--.s`\n CROSS JOIN t3_00849\n) AS `--.s`\nCROSS JOIN t4_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a,\n t2_00849.b\n FROM t1_00849\n CROSS JOIN t2_00849\n) AS `--.s`\nCROSS JOIN t3_00849 +SELECT `--t1_00849.a` AS `t1_00849.a`\nFROM \n(\n SELECT \n a AS `--t1_00849.a`,\n b,\n t2_00849.a AS `--t2_00849.a`,\n t2_00849.b\n FROM t1_00849\n ALL INNER JOIN t2_00849 ON `--t1_00849.a` = `--t2_00849.a`\n) AS `--.s`\nCROSS JOIN t3_00849 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference index 7875c1e9e86..f2e832123e0 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference @@ -1,18 +1,18 @@ SELECT a\nFROM t1\nCROSS JOIN t2 SELECT a\nFROM t1\nALL INNER JOIN t2 ON a = t2.a\nWHERE a = t2.a SELECT a\nFROM t1\nALL INNER JOIN t2 ON b = t2.b\nWHERE b = t2.b -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n) AS `--.s`\nALL INNER JOIN t3 ON `--t1.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = a) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n b AS `--t1.b`, \n a AS `--t1.a`, \n t2.b AS `--t2.b`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.b` = `--t2.b`\n) AS `--.s`\nALL INNER JOIN t3 ON `--t1.b` = b\nWHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = b) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t1.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t1.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = `--t3.a`) AND (`--t1.a` = a) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.b`, \n `--t1.a`, \n `--t2.b`, \n b AS `--t3.b`\n FROM \n (\n SELECT \n b AS `--t1.b`, \n a AS `--t1.a`, \n t2.b AS `--t2.b`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.b` = `--t2.b`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t1.b` = `--t3.b`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t1.b` = b\nWHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = `--t3.b`) AND (`--t1.b` = b) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t2.a` = `--t1.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t2.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t2.a` = a\nWHERE (`--t2.a` = `--t1.a`) AND (`--t2.a` = `--t3.a`) AND (`--t2.a` = a) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n ALL INNER JOIN t3 ON (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`)\n) AS `--.s`\nALL INNER JOIN t4 ON `--t3.a` = a\nWHERE (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`) AND (`--t3.a` = a) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)\nWHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`, \n `--t2.a`, \n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t2.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t3.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t2.a` = `--t3.a`) AND (`--t3.a` = a) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n) AS `--.s`\nALL INNER JOIN t3 ON `--t1.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = a) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n b AS `--t1.b`,\n a AS `--t1.a`,\n t2.b AS `--t2.b`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.b` = `--t2.b`\n) AS `--.s`\nALL INNER JOIN t3 ON `--t1.b` = b\nWHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = b) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`,\n `--t2.a`,\n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t1.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t1.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t1.a` = `--t3.a`) AND (`--t1.a` = a) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.b`,\n `--t1.a`,\n `--t2.b`,\n b AS `--t3.b`\n FROM \n (\n SELECT \n b AS `--t1.b`,\n a AS `--t1.a`,\n t2.b AS `--t2.b`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.b` = `--t2.b`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t1.b` = `--t3.b`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t1.b` = b\nWHERE (`--t1.b` = `--t2.b`) AND (`--t1.b` = `--t3.b`) AND (`--t1.b` = b) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`,\n `--t2.a`,\n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t2.a` = `--t1.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t2.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t2.a` = a\nWHERE (`--t2.a` = `--t1.a`) AND (`--t2.a` = `--t3.a`) AND (`--t2.a` = a) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`,\n `--t2.a`,\n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n ALL INNER JOIN t3 ON (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`)\n) AS `--.s`\nALL INNER JOIN t4 ON `--t3.a` = a\nWHERE (`--t3.a` = `--t1.a`) AND (`--t3.a` = `--t2.a`) AND (`--t3.a` = a) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`,\n `--t2.a`,\n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`)\nWHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n `--t1.a`,\n `--t2.a`,\n a AS `--t3.a`\n FROM \n (\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n ) AS `--.s`\n ALL INNER JOIN t3 ON `--t2.a` = `--t3.a`\n) AS `--.s`\nALL INNER JOIN t4 ON `--t3.a` = a\nWHERE (`--t1.a` = `--t2.a`) AND (`--t2.a` = `--t3.a`) AND (`--t3.a` = a) SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT `--t1.a`\n FROM \n (\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nCROSS JOIN t4 SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT `--t1.a`\n FROM \n (\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n ) AS `--.s`\n CROSS JOIN t3\n) AS `--.s`\nCROSS JOIN t4 SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT a AS `--t1.a`\n FROM t1\n CROSS JOIN t2\n) AS `--.s`\nCROSS JOIN t3 -SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n a AS `--t1.a`, \n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n) AS `--.s`\nCROSS JOIN t3 +SELECT `--t1.a` AS `t1.a`\nFROM \n(\n SELECT \n a AS `--t1.a`,\n t2.a AS `--t2.a`\n FROM t1\n ALL INNER JOIN t2 ON `--t1.a` = `--t2.a`\n) AS `--.s`\nCROSS JOIN t3 SELECT * FROM t1, t2 1 1 1 1 1 1 1 \N diff --git a/tests/queries/0_stateless/00908_analyze_query.reference b/tests/queries/0_stateless/00908_analyze_query.reference index a8619cfcd4b..66db6f5a2e4 100644 --- a/tests/queries/0_stateless/00908_analyze_query.reference +++ b/tests/queries/0_stateless/00908_analyze_query.reference @@ -1 +1 @@ -SELECT \n a, \n b\nFROM a +SELECT \n a,\n b\nFROM a diff --git a/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference b/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference index d3f7a9aa18b..d1c8033b363 100644 --- a/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference +++ b/tests/queries/0_stateless/00957_format_with_clashed_aliases.reference @@ -1,5 +1,5 @@ SELECT - 1 AS x, + 1 AS x, x.y FROM ( diff --git a/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference b/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference index 7265311960f..eaea02ba40b 100644 --- a/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference +++ b/tests/queries/0_stateless/00958_format_of_tuple_array_element.reference @@ -1,9 +1,9 @@ SELECT - (x.1)[1], - (((x[1]).1)[1]).1, - (NOT x)[1], - -(x[1]), - (-x)[1], - (NOT x).1, - -(x.1), + (x.1)[1], + (((x[1]).1)[1]).1, + (NOT x)[1], + -(x[1]), + (-x)[1], + (NOT x).1, + -(x.1), (-x).1 diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference index bd132202979..c797226d832 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.reference @@ -1,10 +1,10 @@ -SELECT \n k, \n v, \n d, \n i\nFROM \n(\n SELECT \n t.1 AS k, \n t.2 AS v, \n runningDifference(v) AS d, \n runningDifference(cityHash64(t.1)) AS i\n FROM \n (\n SELECT arrayJoin([(\'a\', 1), (\'a\', 2), (\'a\', 3), (\'b\', 11), (\'b\', 13), (\'b\', 15)]) AS t\n )\n)\nWHERE i = 0 +SELECT \n k,\n v,\n d,\n i\nFROM \n(\n SELECT \n t.1 AS k,\n t.2 AS v,\n runningDifference(v) AS d,\n runningDifference(cityHash64(t.1)) AS i\n FROM \n (\n SELECT arrayJoin([(\'a\', 1), (\'a\', 2), (\'a\', 3), (\'b\', 11), (\'b\', 13), (\'b\', 15)]) AS t\n )\n)\nWHERE i = 0 a 1 0 0 a 2 1 0 a 3 1 0 b 13 2 0 b 15 2 0 -SELECT \n co, \n co2, \n co3, \n num\nFROM \n(\n SELECT \n co, \n co2, \n co3, \n count() AS num\n FROM \n (\n SELECT \n 1 AS co, \n 2 AS co2, \n 3 AS co3\n )\n GROUP BY \n co, \n co2, \n co3\n WITH CUBE\n HAVING (co2 != 2) AND (co != 0)\n)\nWHERE (co != 0) AND (co2 != 2) +SELECT \n co,\n co2,\n co3,\n num\nFROM \n(\n SELECT \n co,\n co2,\n co3,\n count() AS num\n FROM \n (\n SELECT \n 1 AS co,\n 2 AS co2,\n 3 AS co3\n )\n GROUP BY \n co,\n co2,\n co3\n WITH CUBE\n HAVING (co2 != 2) AND (co != 0)\n)\nWHERE (co != 0) AND (co2 != 2) 1 0 3 1 1 0 0 1 SELECT alias AS name\nFROM \n(\n SELECT name AS alias\n FROM system.settings\n WHERE alias = \'enable_optimize_predicate_expression\'\n)\nANY INNER JOIN \n(\n SELECT name\n FROM system.settings\n) USING (name)\nWHERE name = \'enable_optimize_predicate_expression\' @@ -12,8 +12,8 @@ enable_optimize_predicate_expression 1 val11 val21 val31 SELECT ccc\nFROM \n(\n SELECT 1 AS ccc\n WHERE 0\n UNION ALL\n SELECT ccc\n FROM \n (\n SELECT 2 AS ccc\n )\n ANY INNER JOIN \n (\n SELECT 2 AS ccc\n ) USING (ccc)\n WHERE ccc > 1\n)\nWHERE ccc > 1 2 -SELECT \n ts, \n id, \n id_b, \n b.ts, \n b.id, \n id_c\nFROM \n(\n SELECT \n ts, \n id, \n id_b\n FROM A\n WHERE ts <= toDateTime(\'1970-01-01 03:00:00\')\n) AS a\nALL LEFT JOIN B AS b ON b.id = id_b\nWHERE ts <= toDateTime(\'1970-01-01 03:00:00\') -SELECT \n ts AS `--a.ts`, \n id AS `--a.id`, \n id_b AS `--a.id_b`, \n b.ts AS `--b.ts`, \n b.id AS `--b.id`, \n id_c AS `--b.id_c`\nFROM \n(\n SELECT \n ts, \n id, \n id_b\n FROM A\n WHERE ts <= toDateTime(\'1970-01-01 03:00:00\')\n) AS a\nALL LEFT JOIN B AS b ON `--b.id` = `--a.id_b`\nWHERE `--a.ts` <= toDateTime(\'1970-01-01 03:00:00\') +SELECT \n ts,\n id,\n id_b,\n b.ts,\n b.id,\n id_c\nFROM \n(\n SELECT \n ts,\n id,\n id_b\n FROM A\n WHERE ts <= toDateTime(\'1970-01-01 03:00:00\')\n) AS a\nALL LEFT JOIN B AS b ON b.id = id_b\nWHERE ts <= toDateTime(\'1970-01-01 03:00:00\') +SELECT \n ts AS `--a.ts`,\n id AS `--a.id`,\n id_b AS `--a.id_b`,\n b.ts AS `--b.ts`,\n b.id AS `--b.id`,\n id_c AS `--b.id_c`\nFROM \n(\n SELECT \n ts,\n id,\n id_b\n FROM A\n WHERE ts <= toDateTime(\'1970-01-01 03:00:00\')\n) AS a\nALL LEFT JOIN B AS b ON `--b.id` = `--a.id_b`\nWHERE `--a.ts` <= toDateTime(\'1970-01-01 03:00:00\') 2 3 3 4 4 5 @@ -24,6 +24,6 @@ SELECT \n ts AS `--a.ts`, \n id AS `--a.id`, \n id_b AS `--a.id_b`, \n 4 5 SELECT dummy\nFROM \n(\n SELECT dummy\n FROM system.one\n WHERE arrayMap(x -> (x + 1), [dummy]) = [1]\n)\nWHERE arrayMap(x -> (x + 1), [dummy]) = [1] 0 -SELECT \n id, \n value, \n value_1\nFROM \n(\n SELECT \n 1 AS id, \n 2 AS value\n)\nALL INNER JOIN \n(\n SELECT \n 1 AS id, \n 3 AS value_1\n) USING (id)\nWHERE arrayMap(x -> ((x + value) + value_1), [1]) = [6] +SELECT \n id,\n value,\n value_1\nFROM \n(\n SELECT \n 1 AS id,\n 2 AS value\n)\nALL INNER JOIN \n(\n SELECT \n 1 AS id,\n 3 AS value_1\n) USING (id)\nWHERE arrayMap(x -> ((x + value) + value_1), [1]) = [6] 1 2 3 SELECT dummy\nFROM system.one\nWHERE (dummy > 0) AND (dummy < 0) diff --git a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference index 1e92e7b8596..e2c3b5dab4a 100644 --- a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference +++ b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference @@ -1,4 +1,4 @@ -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 2\n)\nWHERE id = 2 -SELECT id\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 -SELECT id\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n) AS s\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 +SELECT \n date,\n id,\n name,\n value\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM default.test\n WHERE id = 2\n)\nWHERE id = 2 +SELECT id\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT \n date,\n id,\n name,\n value\n FROM default.test\n WHERE id = 1\n) AS s\nWHERE id = 1 diff --git a/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference b/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference index 92b51afb544..e6ebffcae9c 100644 --- a/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference +++ b/tests/queries/0_stateless/01083_cross_to_inner_with_like.reference @@ -1,3 +1,3 @@ -SELECT \n k, \n r.k, \n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name = \'A\') -SELECT \n k, \n r.k, \n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name LIKE \'A%\') -SELECT \n k, \n r.k, \n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name NOT LIKE \'A%\') +SELECT \n k,\n r.k,\n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name = \'A\') +SELECT \n k,\n r.k,\n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name LIKE \'A%\') +SELECT \n k,\n r.k,\n name\nFROM n\nALL INNER JOIN r ON k = r.k\nWHERE (k = r.k) AND (name NOT LIKE \'A%\') diff --git a/tests/queries/0_stateless/01278_format_multiple_queries.reference b/tests/queries/0_stateless/01278_format_multiple_queries.reference index cba2cc7b320..b12e3b30f0c 100644 --- a/tests/queries/0_stateless/01278_format_multiple_queries.reference +++ b/tests/queries/0_stateless/01278_format_multiple_queries.reference @@ -1,5 +1,5 @@ SELECT - a, + a, b AS x FROM table AS t INNER JOIN table2 AS t2 ON t.id = t2.t_id @@ -7,8 +7,8 @@ WHERE 1 = 1 ; SELECT - a, - b AS x, + a, + b AS x, if(x = 0, a, b) FROM table2 AS t WHERE t.id != 0 From 9f8c156fd23132d419618bbd8668f83245dc49fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jun 2020 02:35:44 +0300 Subject: [PATCH 0318/2229] Remove debug output --- src/Parsers/ASTNameTypePair.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp index 6c41d35315c..35493eb77d1 100644 --- a/src/Parsers/ASTNameTypePair.cpp +++ b/src/Parsers/ASTNameTypePair.cpp @@ -24,10 +24,8 @@ void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - settings.ostr << '#'; settings.ostr << indent_str << backQuoteIfNeed(name) << ' '; type->formatImpl(settings, state, frame); - settings.ostr << '#'; } } From 36c23e240df0c70917abfe38f1d9b910e8ed64c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 2 Jun 2020 02:41:41 +0300 Subject: [PATCH 0319/2229] Update some tests --- .../00061_merge_tree_alter.reference | 20 ++++++------ .../queries/0_stateless/00642_cast.reference | 2 +- .../00643_cast_zookeeper.reference | 2 +- .../00725_comment_columns.reference | 12 +++---- .../00725_ipv4_ipv6_domains.reference | 4 +-- .../00753_comment_columns_zookeeper.reference | 4 +-- ...4_alter_modify_column_partitions.reference | 4 +-- .../00754_alter_modify_order_by.reference | 2 +- ...fy_order_by_replicated_zookeeper.reference | 4 +-- ...4_test_custom_compression_codecs.reference | 4 +-- ...m_compression_codes_log_storages.reference | 8 ++--- .../0_stateless/00836_indices_alter.reference | 10 +++--- ...dices_alter_replicated_zookeeper.reference | 24 +++++++------- .../0_stateless/00933_alter_ttl.reference | 2 +- .../0_stateless/00933_ttl_simple.reference | 8 ++--- .../00980_merge_alter_settings.reference | 10 +++--- ...keeper_merge_tree_alter_settings.reference | 12 +++---- .../00998_constraints_all_tables.reference | 4 +-- ...age_odbc_parsing_exception_check.reference | 2 +- .../01055_compact_parts_1.reference | 4 +-- .../01069_database_memory.reference | 2 +- .../01070_alter_with_ttl.reference | 4 +-- .../01079_alter_default_zookeeper.reference | 16 +++++----- .../01079_bad_alters_zookeeper.reference | 4 +-- ..._expressions_in_engine_arguments.reference | 14 ++++---- ...1135_default_and_alter_zookeeper.reference | 2 +- ...13_alter_rename_column_zookeeper.reference | 4 +-- .../01213_alter_rename_nested.reference | 6 ++-- ...er_rename_with_default_zookeeper.reference | 10 +++--- .../01213_alter_table_rename_nested.reference | 4 +-- ...01224_no_superfluous_dict_reload.reference | 2 +- ...how_create_table_from_dictionary.reference | 2 +- ...9_bad_arguments_for_bloom_filter.reference | 6 ++-- .../01272_suspicious_codecs.reference | 32 +++++++++---------- ...alter_rename_column_default_expr.reference | 4 +-- ..._rename_column_materialized_expr.reference | 4 +-- ...7_alter_rename_column_constraint.reference | 4 +-- ...name_column_constraint_zookeeper.reference | 4 +-- .../01278_alter_rename_combination.reference | 8 ++--- ...1_alter_rename_and_other_renames.reference | 12 +++---- 40 files changed, 143 insertions(+), 143 deletions(-) diff --git a/tests/queries/0_stateless/00061_merge_tree_alter.reference b/tests/queries/0_stateless/00061_merge_tree_alter.reference index b609bc257f1..dcc44b9bd81 100644 --- a/tests/queries/0_stateless/00061_merge_tree_alter.reference +++ b/tests/queries/0_stateless/00061_merge_tree_alter.reference @@ -1,14 +1,14 @@ d Date k UInt64 i32 Int32 -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 8 40 [1,2,3] ['12','13','14'] 2015-01-01 10 42 [] [] d Date @@ -17,7 +17,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] [] @@ -28,7 +28,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -39,7 +39,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 @@ -51,7 +51,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -65,7 +65,7 @@ k UInt64 i32 Int32 n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 ['120','130','140'] 0 2015-01-01 8 40 ['12','13','14'] 0 @@ -74,7 +74,7 @@ d Date k UInt64 i32 Int32 s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 @@ -85,7 +85,7 @@ i32 Int32 s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 [] [] 2015-01-01 7 39 0 [] [] 2015-01-01 8 40 0 [] [] @@ -94,7 +94,7 @@ d Date k UInt64 i32 Int32 s UInt32 DEFAULT \'0\' -CREATE TABLE default.alter_00061\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) +CREATE TABLE default.alter_00061\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 diff --git a/tests/queries/0_stateless/00642_cast.reference b/tests/queries/0_stateless/00642_cast.reference index 907861c1784..7f5333f590e 100644 --- a/tests/queries/0_stateless/00642_cast.reference +++ b/tests/queries/0_stateless/00642_cast.reference @@ -9,7 +9,7 @@ hello 1970-01-01 00:00:01 CREATE TABLE default.cast ( - `x` UInt8, + `x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') ) ENGINE = MergeTree diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.reference b/tests/queries/0_stateless/00643_cast_zookeeper.reference index b79eb07aee3..226390d8510 100644 --- a/tests/queries/0_stateless/00643_cast_zookeeper.reference +++ b/tests/queries/0_stateless/00643_cast_zookeeper.reference @@ -1,6 +1,6 @@ CREATE TABLE test.cast1 ( - `x` UInt8, + `x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') diff --git a/tests/queries/0_stateless/00725_comment_columns.reference b/tests/queries/0_stateless/00725_comment_columns.reference index 86794581daf..7543f5854d7 100644 --- a/tests/queries/0_stateless/00725_comment_columns.reference +++ b/tests/queries/0_stateless/00725_comment_columns.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3\', \n `fourth_column` UInt8 COMMENT \'comment 4\', \n `fifth_column` UInt8\n)\nENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1\',\n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2\',\n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3\',\n `fourth_column` UInt8 COMMENT \'comment 4\',\n `fifth_column` UInt8\n)\nENGINE = TinyLog first_column UInt8 DEFAULT 1 comment 1 second_column UInt8 MATERIALIZED first_column comment 2 third_column UInt8 ALIAS second_column comment 3 @@ -11,7 +11,7 @@ fifth_column UInt8 │ check_query_comment_column │ fourth_column │ comment 4 │ │ check_query_comment_column │ fifth_column │ │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_1\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_1\', \n `fourth_column` UInt8 COMMENT \'comment 4_1\', \n `fifth_column` UInt8 COMMENT \'comment 5_1\'\n)\nENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_1\',\n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\',\n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_1\',\n `fourth_column` UInt8 COMMENT \'comment 4_1\',\n `fifth_column` UInt8 COMMENT \'comment 5_1\'\n)\nENGINE = TinyLog ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_2 │ │ check_query_comment_column │ second_column │ comment 2_2 │ @@ -19,8 +19,8 @@ CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEF │ check_query_comment_column │ fourth_column │ comment 4_2 │ │ check_query_comment_column │ fifth_column │ comment 5_2 │ └────────────────────────────┴───────────────┴─────────────┘ -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_2\', \n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', \n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_2\', \n `fourth_column` UInt8 COMMENT \'comment 4_2\', \n `fifth_column` UInt8 COMMENT \'comment 5_2\'\n)\nENGINE = TinyLog -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1\', \n `second_column` UInt8 COMMENT \'comment 2\', \n `third_column` UInt8 COMMENT \'comment 3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_2\',\n `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\',\n `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_2\',\n `fourth_column` UInt8 COMMENT \'comment 4_2\',\n `fifth_column` UInt8 COMMENT \'comment 5_2\'\n)\nENGINE = TinyLog +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1\',\n `second_column` UInt8 COMMENT \'comment 2\',\n `third_column` UInt8 COMMENT \'comment 3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 first_column UInt8 comment 1 second_column UInt8 comment 2 third_column UInt8 comment 3 @@ -29,8 +29,8 @@ third_column UInt8 comment 3 │ check_query_comment_column │ second_column │ comment 2 │ │ check_query_comment_column │ third_column │ comment 3 │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_2\', \n `second_column` UInt8 COMMENT \'comment 2_2\', \n `third_column` UInt8 COMMENT \'comment 3_2\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 -CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_3\', \n `second_column` UInt8 COMMENT \'comment 2_3\', \n `third_column` UInt8 COMMENT \'comment 3_3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_2\',\n `second_column` UInt8 COMMENT \'comment 2_2\',\n `third_column` UInt8 COMMENT \'comment 3_2\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_query_comment_column\n(\n `first_column` UInt8 COMMENT \'comment 1_3\',\n `second_column` UInt8 COMMENT \'comment 2_3\',\n `third_column` UInt8 COMMENT \'comment 3_3\'\n)\nENGINE = MergeTree()\nPARTITION BY second_column\nORDER BY first_column\nSAMPLE BY first_column\nSETTINGS index_granularity = 8192 ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_3 │ │ check_query_comment_column │ second_column │ comment 2_3 │ diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference index 28051d15f65..69804e6cd24 100644 --- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference +++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ipv4_test\n(\n `ipv4_` IPv4\n)\nENGINE = Memory +CREATE TABLE default.ipv4_test\n(`ipv4_` IPv4\n)\nENGINE = Memory 0.0.0.0 00 8.8.8.8 08080808 127.0.0.1 7F000001 @@ -10,7 +10,7 @@ CREATE TABLE default.ipv4_test\n(\n `ipv4_` IPv4\n)\nENGINE = Memory > 127.0.0.1 255.255.255.255 = 127.0.0.1 127.0.0.1 euqality of IPv4-mapped IPv6 value and IPv4 promoted to IPv6 with function: 1 -CREATE TABLE default.ipv6_test\n(\n `ipv6_` IPv6\n)\nENGINE = Memory +CREATE TABLE default.ipv6_test\n(`ipv6_` IPv6\n)\nENGINE = Memory :: 00000000000000000000000000000000 :: 00000000000000000000000000000000 ::ffff:8.8.8.8 00000000000000000000FFFF08080808 diff --git a/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference b/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference index d2705135440..5d8c5dc9f72 100644 --- a/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference +++ b/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference @@ -1,6 +1,6 @@ -CREATE TABLE default.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'comment\', \n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'comment\',\n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 comment column_name2 UInt8 non default comment -CREATE TABLE default.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'another comment\', \n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 +CREATE TABLE default.check_comments\n(\n `column_name1` UInt8 DEFAULT 1 COMMENT \'another comment\',\n `column_name2` UInt8 COMMENT \'non default comment\'\n)\nENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\')\nORDER BY column_name1\nSETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 another comment column_name2 UInt8 non default comment diff --git a/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference b/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference index a1493508b61..900a3200467 100644 --- a/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference +++ b/tests/queries/0_stateless/00754_alter_modify_column_partitions.reference @@ -1,5 +1,5 @@ *** Check SHOW CREATE TABLE *** -CREATE TABLE default.alter_column\n(\n `x` UInt32, \n `y` Int32\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_column\n(\n `x` UInt32,\n `y` Int32\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 *** Check parts *** 0 0 10 -10 @@ -52,7 +52,7 @@ CREATE TABLE default.alter_column\n(\n `x` UInt32, \n `y` Int32\n)\nENGINE 8 -8 9 -9 *** Check SHOW CREATE TABLE after ALTER MODIFY *** -CREATE TABLE default.alter_column\n(\n `x` UInt32, \n `y` Int64\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_column\n(\n `x` UInt32,\n `y` Int64\n)\nENGINE = MergeTree\nPARTITION BY x\nORDER BY x\nSETTINGS index_granularity = 8192 *** Check parts after ALTER MODIFY *** 0 0 10 -10 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.reference b/tests/queries/0_stateless/00754_alter_modify_order_by.reference index f0dc413a186..0279e5ca11b 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.reference @@ -9,4 +9,4 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE default.summing\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `val` UInt32\n)\nENGINE = SummingMergeTree\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.summing\n(\n `x` UInt32,\n `y` UInt32,\n `z` UInt32,\n `val` UInt32\n)\nENGINE = SummingMergeTree\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference index 938a90a27b4..9303d45ea7d 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference @@ -9,6 +9,6 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE test.summing_r2\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2\n(\n `x` UInt32,\n `y` UInt32,\n `z` UInt32,\n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, -z)\nSETTINGS index_granularity = 8192 *** Check SHOW CREATE TABLE after offline ALTER *** -CREATE TABLE test.summing_r2\n(\n `x` UInt32, \n `y` UInt32, \n `z` UInt32, \n `t` UInt32, \n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, t * t)\nSETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2\n(\n `x` UInt32,\n `y` UInt32,\n `z` UInt32,\n `t` UInt32,\n `val` UInt32\n)\nENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\')\nPRIMARY KEY (x, y)\nORDER BY (x, y, t * t)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index f778c4f5d90..00556b0f8c9 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -9,10 +9,10 @@ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), \n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), \n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), \n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec_multiple_more_types\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! 222 !ZSTD -CREATE TABLE default.test_default_delta\n(\n `id` UInt64 CODEC(Delta(8)), \n `data` String CODEC(Delta(1)), \n `somedate` Date CODEC(Delta(2)), \n `somenum` Float64 CODEC(Delta(8)), \n `somestr` FixedString(3) CODEC(Delta(1)), \n `othernum` Int64 CODEC(Delta(8)), \n `yetothernum` Float32 CODEC(Delta(4)), \n `ddd.age` Array(UInt8) CODEC(Delta(1)), \n `ddd.Name` Array(String) CODEC(Delta(1)), \n `ddd.OName` Array(String) CODEC(Delta(1)), \n `ddd.BName` Array(String) CODEC(Delta(1))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test_default_delta\n(\n `id` UInt64 CODEC(Delta(8)),\n `data` String CODEC(Delta(1)),\n `somedate` Date CODEC(Delta(2)),\n `somenum` Float64 CODEC(Delta(8)),\n `somestr` FixedString(3) CODEC(Delta(1)),\n `othernum` Int64 CODEC(Delta(8)),\n `yetothernum` Float32 CODEC(Delta(4)),\n `ddd.age` Array(UInt8) CODEC(Delta(1)),\n `ddd.Name` Array(String) CODEC(Delta(1)),\n `ddd.OName` Array(String) CODEC(Delta(1)),\n `ddd.BName` Array(String) CODEC(Delta(1))\n)\nENGINE = MergeTree()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference index b33535364e5..113e413bfac 100644 --- a/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference +++ b/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference @@ -1,9 +1,9 @@ -CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4), \n `data` String CODEC(ZSTD(1)), \n `ddd` Date CODEC(NONE), \n `somenum` Float64 CODEC(ZSTD(2)), \n `somestr` FixedString(3) CODEC(LZ4HC(7)), \n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = Log() +CREATE TABLE default.compression_codec_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = Log() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), \n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), \n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), \n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = Log() +CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = Log() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 @@ -11,12 +11,12 @@ CREATE TABLE default.compression_codec_multiple_log\n(\n `id` UInt64 CODEC(LZ 10003 274972506.6 9175437371954010821 -CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4), \n `data` String CODEC(ZSTD(1)), \n `ddd` Date CODEC(NONE), \n `somenum` Float64 CODEC(ZSTD(2)), \n `somestr` FixedString(3) CODEC(LZ4HC(7)), \n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = TinyLog() +CREATE TABLE default.compression_codec_tiny_log\n(\n `id` UInt64 CODEC(LZ4),\n `data` String CODEC(ZSTD(1)),\n `ddd` Date CODEC(NONE),\n `somenum` Float64 CODEC(ZSTD(2)),\n `somestr` FixedString(3) CODEC(LZ4HC(7)),\n `othernum` Int64 CODEC(Delta(8))\n)\nENGINE = TinyLog() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), \n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), \n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), \n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = TinyLog() +CREATE TABLE default.compression_codec_multiple_tiny_log\n(\n `id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)),\n `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)),\n `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)),\n `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))\n)\nENGINE = TinyLog() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 diff --git a/tests/queries/0_stateless/00836_indices_alter.reference b/tests/queries/0_stateless/00836_indices_alter.reference index 6efa25f47b7..7fd63a45d31 100644 --- a/tests/queries/0_stateless/00836_indices_alter.reference +++ b/tests/queries/0_stateless/00836_indices_alter.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -6,15 +6,15 @@ CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n I 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -23,6 +23,6 @@ CREATE TABLE default.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n I 1 2 1 2 1 2 -CREATE TABLE default.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.minmax_idx2\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 diff --git a/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference b/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference index ec9de160fcc..ce03d1e7de6 100644 --- a/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference @@ -1,5 +1,5 @@ -CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,8 +14,8 @@ CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n IN 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -28,10 +28,10 @@ CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n IN 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -44,14 +44,14 @@ CREATE TABLE test.minmax_idx_r\n(\n `u64` UInt64, \n `i32` Int32, \n IN 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64, \n `i32` Int32, \n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, \n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64,\n `i32` Int32,\n INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10,\n INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 3 1 2 1 3 -CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\')\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r\n(\n `u64` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\')\nORDER BY u64\nSETTINGS index_granularity = 8192 1 2 1 3 1 2 diff --git a/tests/queries/0_stateless/00933_alter_ttl.reference b/tests/queries/0_stateless/00933_alter_ttl.reference index 9b5cec0f773..545f5644e94 100644 --- a/tests/queries/0_stateless/00933_alter_ttl.reference +++ b/tests/queries/0_stateless/00933_alter_ttl.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ttl\n(\n `d` Date, \n `a` Int32\n)\nENGINE = MergeTree\nPARTITION BY toDayOfMonth(d)\nORDER BY a\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.ttl\n(\n `d` Date,\n `a` Int32\n)\nENGINE = MergeTree\nPARTITION BY toDayOfMonth(d)\nORDER BY a\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 2100-10-10 3 2100-10-10 4 d Date diff --git a/tests/queries/0_stateless/00933_ttl_simple.reference b/tests/queries/0_stateless/00933_ttl_simple.reference index 102639947a3..a4ef8033328 100644 --- a/tests/queries/0_stateless/00933_ttl_simple.reference +++ b/tests/queries/0_stateless/00933_ttl_simple.reference @@ -6,11 +6,11 @@ 2000-10-10 00:00:00 0 2100-10-10 00:00:00 3 2100-10-10 2 -CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL now() - 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32,\n `a` Int32 TTL now() - 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 0 -CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL now() + 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32,\n `a` Int32 TTL now() + 1000\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 1 -CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL today() - 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32,\n `a` Int32 TTL today() - 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 0 -CREATE TABLE default.ttl_00933_1\n(\n `b` Int32, \n `a` Int32 TTL today() + 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.ttl_00933_1\n(\n `b` Int32,\n `a` Int32 TTL today() + 1\n)\nENGINE = MergeTree\nPARTITION BY tuple()\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 1 diff --git a/tests/queries/0_stateless/00980_merge_alter_settings.reference b/tests/queries/0_stateless/00980_merge_alter_settings.reference index 340cf29ce89..7f8aa23b722 100644 --- a/tests/queries/0_stateless/00980_merge_alter_settings.reference +++ b/tests/queries/0_stateless/00980_merge_alter_settings.reference @@ -1,6 +1,6 @@ -CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096 -CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 -CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100 2 -CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 -CREATE TABLE default.table_for_alter\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 15 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 30 +CREATE TABLE default.table_for_alter\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = MergeTree()\nORDER BY id\nSETTINGS index_granularity = 4096, parts_to_throw_insert = 100, parts_to_delay_insert = 100, check_delay_period = 15 diff --git a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference index ab006ea6931..2682051751b 100644 --- a/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference +++ b/tests/queries/0_stateless/00980_zookeeper_merge_tree_alter_settings.reference @@ -1,12 +1,12 @@ -CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 -CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 4 4 4 4 6 6 -CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1 -CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64, \n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 -CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1, check_delay_period = 15 -CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64, \n `Data` String, \n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1 +CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64,\n `Data` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 +CREATE TABLE default.replicated_table_for_alter1\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192, use_minimalistic_part_header_in_zookeeper = 1, check_delay_period = 15 +CREATE TABLE default.replicated_table_for_alter2\n(\n `id` UInt64,\n `Data` String,\n `Data2` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/replicated_table_for_alter\', \'2\')\nORDER BY id\nSETTINGS index_granularity = 8192, parts_to_throw_insert = 1, parts_to_delay_insert = 1 diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.reference b/tests/queries/0_stateless/00998_constraints_all_tables.reference index 3de251daa71..0ec8b004ae4 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.reference +++ b/tests/queries/0_stateless/00998_constraints_all_tables.reference @@ -10,5 +10,5 @@ 0 0 3 -CREATE TABLE default.constrained\n(\n `URL` String, \n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\', \n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log -CREATE TABLE default.constrained2\n(\n `URL` String, \n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\', \n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference b/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference index c2d7d849fae..548952c3a6a 100644 --- a/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference +++ b/tests/queries/0_stateless/01033_storage_odbc_parsing_exception_check.reference @@ -1 +1 @@ -CREATE TABLE default.BannerDict\n(\n `BannerID` UInt64, \n `CompaignID` UInt64\n)\nENGINE = ODBC(\'DSN=pgconn;Database=postgres\', \'somedb\', \'bannerdict\') +CREATE TABLE default.BannerDict\n(\n `BannerID` UInt64,\n `CompaignID` UInt64\n)\nENGINE = ODBC(\'DSN=pgconn;Database=postgres\', \'somedb\', \'bannerdict\') diff --git a/tests/queries/0_stateless/01055_compact_parts_1.reference b/tests/queries/0_stateless/01055_compact_parts_1.reference index b99f336d3b0..c5311a0b479 100644 --- a/tests/queries/0_stateless/01055_compact_parts_1.reference +++ b/tests/queries/0_stateless/01055_compact_parts_1.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.mt_compact\n(\n `a` Int32, \n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, index_granularity = 8192 -CREATE TABLE default.mt_compact\n(\n `a` Int32, \n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, index_granularity = 8192, parts_to_delay_insert = 300 +CREATE TABLE default.mt_compact\n(\n `a` Int32,\n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, index_granularity = 8192 +CREATE TABLE default.mt_compact\n(\n `a` Int32,\n `s` String\n)\nENGINE = MergeTree\nPARTITION BY a\nORDER BY a\nSETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, index_granularity = 8192, parts_to_delay_insert = 300 diff --git a/tests/queries/0_stateless/01069_database_memory.reference b/tests/queries/0_stateless/01069_database_memory.reference index e7486d57276..cfccf5b1757 100644 --- a/tests/queries/0_stateless/01069_database_memory.reference +++ b/tests/queries/0_stateless/01069_database_memory.reference @@ -5,4 +5,4 @@ CREATE DATABASE memory_01069\nENGINE = Memory() 4 3 4 -CREATE TABLE memory_01069.file\n(\n `n` UInt8\n)\nENGINE = File(\'CSV\') +CREATE TABLE memory_01069.file\n(`n` UInt8\n)\nENGINE = File(\'CSV\') diff --git a/tests/queries/0_stateless/01070_alter_with_ttl.reference b/tests/queries/0_stateless/01070_alter_with_ttl.reference index de7833472a1..202ac2ac10f 100644 --- a/tests/queries/0_stateless/01070_alter_with_ttl.reference +++ b/tests/queries/0_stateless/01070_alter_with_ttl.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.alter_ttl\n(\n `i` Int32, \n `s` String TTL toDate(\'2020-01-01\')\n)\nENGINE = MergeTree\nORDER BY i\nTTL toDate(\'2020-05-05\')\nSETTINGS index_granularity = 8192 -CREATE TABLE default.alter_ttl\n(\n `d` Date, \n `s` String TTL d + toIntervalDay(1)\n)\nENGINE = MergeTree\nORDER BY d\nTTL d + toIntervalMonth(1)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_ttl\n(\n `i` Int32,\n `s` String TTL toDate(\'2020-01-01\')\n)\nENGINE = MergeTree\nORDER BY i\nTTL toDate(\'2020-05-05\')\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_ttl\n(\n `d` Date,\n `s` String TTL d + toIntervalDay(1)\n)\nENGINE = MergeTree\nORDER BY d\nTTL d + toIntervalMonth(1)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01079_alter_default_zookeeper.reference b/tests/queries/0_stateless/01079_alter_default_zookeeper.reference index 62d26bc9b4b..758150a7799 100644 --- a/tests/queries/0_stateless/01079_alter_default_zookeeper.reference +++ b/tests/queries/0_stateless/01079_alter_default_zookeeper.reference @@ -1,11 +1,11 @@ -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` String DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` String DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 1000 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt64 DEFAULT \'10\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt64 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 1000 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt64 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt16 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt64 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt16 DEFAULT 100\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 10000 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10, \n `better_column` UInt8 DEFAULT \'1\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.alter_default\n(\n `date` Date, \n `key` UInt64, \n `value` UInt8 DEFAULT 10, \n `better_column` UInt8 DEFAULT \'1\', \n `other_date` String DEFAULT 1\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt8 DEFAULT 10\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt8 DEFAULT 10,\n `better_column` UInt8 DEFAULT \'1\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.alter_default\n(\n `date` Date,\n `key` UInt64,\n `value` UInt8 DEFAULT 10,\n `better_column` UInt8 DEFAULT \'1\',\n `other_date` String DEFAULT 1\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/alter_default\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference b/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference index ea3fbec34a8..deb26676f39 100644 --- a/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference +++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper.reference @@ -1,6 +1,6 @@ Wrong column name. -CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64, \n `value1` UInt8, \n `value2` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64, \n `value1` UInt8, \n `value2` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64,\n `value1` UInt8,\n `value2` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_bad_alters\n(\n `key` UInt64,\n `value1` UInt8,\n `value2` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_bad_alters\', \'1\')\nORDER BY key\nSETTINGS index_granularity = 8192 syntax error at begin of string. 7 Hello diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference index 2007eda0f07..138f09f2634 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference @@ -1,11 +1,11 @@ -CREATE TABLE test_01083.file\n(\n `n` Int8\n)\nENGINE = File(\'TSVWithNamesAndTypes\') -CREATE TABLE test_01083.buffer\n(\n `n` Int8\n)\nENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) -CREATE TABLE test_01083.merge\n(\n `n` Int8\n)\nENGINE = Merge(\'test_01083\', \'distributed\') +CREATE TABLE test_01083.file\n(`n` Int8\n)\nENGINE = File(\'TSVWithNamesAndTypes\') +CREATE TABLE test_01083.buffer\n(`n` Int8\n)\nENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) +CREATE TABLE test_01083.merge\n(`n` Int8\n)\nENGINE = Merge(\'test_01083\', \'distributed\') CREATE TABLE test_01083.merge_tf AS merge(\'test_01083\', \'.*\') -CREATE TABLE test_01083.distributed\n(\n `n` Int8\n)\nENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') +CREATE TABLE test_01083.distributed\n(`n` Int8\n)\nENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') CREATE TABLE test_01083.distributed_tf AS cluster(\'test_shard_localhost\', \'test_01083\', \'buffer\') -CREATE TABLE test_01083.url\n(\n `n` UInt64, \n `col` String\n)\nENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+test_01083.merge+format+CSV\', \'CSV\') +CREATE TABLE test_01083.url\n(\n `n` UInt64,\n `col` String\n)\nENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+test_01083.merge+format+CSV\', \'CSV\') CREATE TABLE test_01083.rich_syntax AS remote(\'localhos{x|y|t}\', cluster(\'test_shard_localhost\', remote(\'127.0.0.{1..4}\', \'test_01083\', \'view\'))) -CREATE VIEW test_01083.view\n(\n `n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM \n(\n SELECT toString(n) AS n\n FROM test_01083.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM test_01083.file -CREATE DICTIONARY test_01083.dict\n(\n `n` UInt64, \n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) +CREATE VIEW test_01083.view\n(`n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM \n(\n SELECT toString(n) AS n\n FROM test_01083.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM test_01083.file +CREATE DICTIONARY test_01083.dict\n(\n \n `n` UInt64,\n \n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) 16 diff --git a/tests/queries/0_stateless/01135_default_and_alter_zookeeper.reference b/tests/queries/0_stateless/01135_default_and_alter_zookeeper.reference index f7c4a48b4bc..6a5dd7223bd 100644 --- a/tests/queries/0_stateless/01135_default_and_alter_zookeeper.reference +++ b/tests/queries/0_stateless/01135_default_and_alter_zookeeper.reference @@ -1,2 +1,2 @@ 4 -CREATE TABLE default.default_table\n(\n `id` UInt64, \n `enum_column` Enum8(\'undefined\' = 0, \'fox\' = 1, \'index\' = 2) DEFAULT \'fox\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/default_table\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.default_table\n(\n `id` UInt64,\n `enum_column` Enum8(\'undefined\' = 0, \'fox\' = 1, \'index\' = 2) DEFAULT \'fox\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/default_table\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference b/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference index e2d6007c57f..5457becfeda 100644 --- a/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference +++ b/tests/queries/0_stateless/01213_alter_rename_column_zookeeper.reference @@ -1,6 +1,6 @@ 1 -CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date, \n `key` UInt64, \n `renamed_value1` String, \n `value2` String, \n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String,\n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_replicated\n(\n `date` Date,\n `key` UInt64,\n `renamed_value1` String,\n `value2` String,\n `value3` String\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/table_for_rename_replicated\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 1 date key renamed_value1 value2 value3 2019-10-02 1 1 1 1 diff --git a/tests/queries/0_stateless/01213_alter_rename_nested.reference b/tests/queries/0_stateless/01213_alter_rename_nested.reference index 2641df46aeb..403e87256fe 100644 --- a/tests/queries/0_stateless/01213_alter_rename_nested.reference +++ b/tests/queries/0_stateless/01213_alter_rename_nested.reference @@ -1,10 +1,10 @@ [8,9,10] ['a','b','c'] -CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.x` Array(UInt32), \n `n.y` Array(String), \n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date,\n `key` UInt64,\n `n.x` Array(UInt32),\n `n.y` Array(String),\n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date,\n `key` UInt64,\n `n.renamed_x` Array(UInt32),\n `n.renamed_y` Array(String),\n `value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 7 [8,9,10] 7 ['a','b','c'] [['7']] -CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `renamed_value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date,\n `key` UInt64,\n `n.renamed_x` Array(UInt32),\n `n.renamed_y` Array(String),\n `renamed_value1` Array(Array(LowCardinality(String)))\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 date key n.renamed_x n.renamed_y renamed_value1 2019-10-01 7 [8,9,10] ['a','b','c'] [['7']] diff --git a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference index 251e664b522..a4759ecb0f7 100644 --- a/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference +++ b/tests/queries/0_stateless/01213_alter_rename_with_default_zookeeper.reference @@ -1,17 +1,17 @@ date key value1 value2 2019-10-02 1 1 Hello 1 -CREATE TABLE default.table_rename_with_default\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String DEFAULT concat(\'Hello \', value1), \n `value3` String ALIAS concat(\'Word \', value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_default\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String DEFAULT concat(\'Hello \', value1),\n `value3` String ALIAS concat(\'Word \', value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 date key renamed_value1 value2 2019-10-02 1 1 Hello 1 -CREATE TABLE default.table_rename_with_default\n(\n `date` Date, \n `key` UInt64, \n `renamed_value1` String, \n `value2` String DEFAULT concat(\'Hello \', renamed_value1), \n `value3` String ALIAS concat(\'Word \', renamed_value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_default\n(\n `date` Date,\n `key` UInt64,\n `renamed_value1` String,\n `value2` String DEFAULT concat(\'Hello \', renamed_value1),\n `value3` String ALIAS concat(\'Word \', renamed_value1)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 Hello 1 Word 1 date1 date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl\n(\n `date1` Date, \n `date2` Date, \n `value1` String, \n `value2` String TTL date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `date1` Date,\n `date2` Date,\n `value1` String,\n `value2` String TTL date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 renamed_date1 date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date, \n `date2` Date, \n `value1` String, \n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date,\n `date2` Date,\n `value1` String,\n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 renamed_date1 renamed_date2 value1 value2 2019-10-02 2018-10-02 1 1 -CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date, \n `renamed_date2` Date, \n `value1` String, \n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL renamed_date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_rename_with_ttl\n(\n `renamed_date1` Date,\n `renamed_date2` Date,\n `value1` String,\n `value2` String TTL renamed_date1 + toIntervalMonth(10000)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/test/table_rename_with_ttl\', \'1\')\nORDER BY tuple()\nTTL renamed_date2 + toIntervalMonth(10000)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01213_alter_table_rename_nested.reference b/tests/queries/0_stateless/01213_alter_table_rename_nested.reference index 8e6d93dbcce..1b89cf8f461 100644 --- a/tests/queries/0_stateless/01213_alter_table_rename_nested.reference +++ b/tests/queries/0_stateless/01213_alter_table_rename_nested.reference @@ -1,6 +1,6 @@ [8,9,10] ['a','b','c'] -CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.x` Array(UInt32), \n `n.y` Array(String), \n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 -CREATE TABLE default.table_for_rename_nested\n(\n `date` Date, \n `key` UInt64, \n `n.renamed_x` Array(UInt32), \n `n.renamed_y` Array(String), \n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date,\n `key` UInt64,\n `n.x` Array(UInt32),\n `n.y` Array(String),\n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename_nested\n(\n `date` Date,\n `key` UInt64,\n `n.renamed_x` Array(UInt32),\n `n.renamed_y` Array(String),\n `value1` String\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 7 [8,9,10] 7 ['a','b','c'] diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference index 524fbdd26fc..96d4393e06b 100644 --- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference @@ -12,7 +12,7 @@ LAYOUT(FLAT()) NOT_LOADED CREATE TABLE dict_db_01224_dictionary.`dict_db_01224.dict` ( - `key` UInt64, + `key` UInt64, `val` UInt64 ) ENGINE = Dictionary(`dict_db_01224.dict`) diff --git a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference index 14ddc093143..3363df5fb98 100644 --- a/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference +++ b/tests/queries/0_stateless/01225_show_create_table_from_dictionary.reference @@ -1,6 +1,6 @@ CREATE TABLE dict_db_01225_dictionary.`dict_db_01225.dict` ( - `key` UInt64, + `key` UInt64, `val` UInt64 ) ENGINE = Dictionary(`dict_db_01225.dict`) diff --git a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference index 04ae001675f..dfff8c7be00 100644 --- a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference +++ b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference @@ -1,3 +1,3 @@ -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(0.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(0.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(1.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(0.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(0.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(1.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.reference b/tests/queries/0_stateless/01272_suspicious_codecs.reference index 559b6df2693..de91a1ddb25 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.reference +++ b/tests/queries/0_stateless/01272_suspicious_codecs.reference @@ -1,16 +1,16 @@ -CREATE TABLE default.codecs1\n(\n `a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs2\n(\n `a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs3\n(\n `a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs4\n(\n `a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs1\n(\n `a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs2\n(\n `a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs3\n(\n `a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs4\n(\n `a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs1\n(`a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs2\n(`a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs3\n(`a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs4\n(`a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs5\n(`a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs6\n(`a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs7\n(`a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs8\n(`a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs1\n(`a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs2\n(`a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs3\n(`a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs4\n(`a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs5\n(`a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs6\n(`a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs7\n(`a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs8\n(`a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01275_alter_rename_column_default_expr.reference b/tests/queries/0_stateless/01275_alter_rename_column_default_expr.reference index d81601b92c5..e1ea5a778da 100644 --- a/tests/queries/0_stateless/01275_alter_rename_column_default_expr.reference +++ b/tests/queries/0_stateless/01275_alter_rename_column_default_expr.reference @@ -7,7 +7,7 @@ 2019-10-01 6 6 7 6 + 7 2019-10-02 7 7 8 7 + 8 2019-10-03 8 8 9 8 + 9 -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value4` String, \n `value5` String, \n `value3` String DEFAULT concat(value4, \' + \', value5)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value4` String,\n `value5` String,\n `value3` String DEFAULT concat(value4, \' + \', value5)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 0 + 1 2019-10-02 1 1 2 1 + 2 2019-10-03 2 2 3 2 + 3 @@ -36,7 +36,7 @@ CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \ 2019-10-03 17 17 18 17 + 18 2019-10-01 18 18 19 18 + 19 2019-10-02 19 19 20 19 + 20 -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String DEFAULT concat(value1, \' + \', value2)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String,\n `value3` String DEFAULT concat(value1, \' + \', value2)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 0 + 1 2019-10-02 1 1 2 1 + 2 2019-10-03 2 2 3 2 + 3 diff --git a/tests/queries/0_stateless/01276_alter_rename_column_materialized_expr.reference b/tests/queries/0_stateless/01276_alter_rename_column_materialized_expr.reference index 5d721230db3..c430b6a28af 100644 --- a/tests/queries/0_stateless/01276_alter_rename_column_materialized_expr.reference +++ b/tests/queries/0_stateless/01276_alter_rename_column_materialized_expr.reference @@ -7,7 +7,7 @@ 2019-10-01 6 6 7 2019-10-02 7 7 8 2019-10-03 8 8 9 -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value4` String, \n `value5` String, \n `value3` String MATERIALIZED concat(value4, \' + \', value5)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value4` String,\n `value5` String,\n `value3` String MATERIALIZED concat(value4, \' + \', value5)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2019-10-02 1 1 2 2019-10-03 2 2 3 @@ -38,7 +38,7 @@ CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \ 2019-10-01 18 18 19 2019-10-02 19 19 20 -- rename columns back -- -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String MATERIALIZED concat(value1, \' + \', value2)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String,\n `value3` String MATERIALIZED concat(value1, \' + \', value2)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2019-10-02 1 1 2 2019-10-03 2 2 3 diff --git a/tests/queries/0_stateless/01277_alter_rename_column_constraint.reference b/tests/queries/0_stateless/01277_alter_rename_column_constraint.reference index cb1842f95da..4316c7fa1b9 100644 --- a/tests/queries/0_stateless/01277_alter_rename_column_constraint.reference +++ b/tests/queries/0_stateless/01277_alter_rename_column_constraint.reference @@ -7,7 +7,7 @@ 2019-10-01 6 6 7 8 2019-10-02 7 7 8 9 2019-10-03 8 8 9 10 -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value4` String, \n `value5` String, \n `value3` String, \n CONSTRAINT cs_value1 CHECK toInt64(value4) < toInt64(value5), \n CONSTRAINT cs_value2 CHECK toInt64(value5) < toInt64(value3)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value4` String,\n `value5` String,\n `value3` String,\n CONSTRAINT cs_value1 CHECK toInt64(value4) < toInt64(value5),\n CONSTRAINT cs_value2 CHECK toInt64(value5) < toInt64(value3)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2 2019-10-02 1 1 2 3 2019-10-03 2 2 3 4 @@ -38,7 +38,7 @@ CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \ 2019-10-01 18 18 19 20 2019-10-02 19 19 20 21 -- rename columns back -- -CREATE TABLE default.table_for_rename\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String, \n CONSTRAINT cs_value1 CHECK toInt64(value1) < toInt64(value2), \n CONSTRAINT cs_value2 CHECK toInt64(value2) < toInt64(value3)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String,\n `value3` String,\n CONSTRAINT cs_value1 CHECK toInt64(value1) < toInt64(value2),\n CONSTRAINT cs_value2 CHECK toInt64(value2) < toInt64(value3)\n)\nENGINE = MergeTree()\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2 2019-10-02 1 1 2 3 2019-10-03 2 2 3 4 diff --git a/tests/queries/0_stateless/01277_alter_rename_column_constraint_zookeeper.reference b/tests/queries/0_stateless/01277_alter_rename_column_constraint_zookeeper.reference index 9ca17dbbc0a..6a9d5a3bdcf 100644 --- a/tests/queries/0_stateless/01277_alter_rename_column_constraint_zookeeper.reference +++ b/tests/queries/0_stateless/01277_alter_rename_column_constraint_zookeeper.reference @@ -7,7 +7,7 @@ 2019-10-01 6 6 7 8 2019-10-02 7 7 8 9 2019-10-03 8 8 9 10 -CREATE TABLE default.table_for_rename1\n(\n `date` Date, \n `key` UInt64, \n `value4` String, \n `value5` String, \n `value3` String, \n CONSTRAINT cs_value1 CHECK toInt64(value4) < toInt64(value5), \n CONSTRAINT cs_value2 CHECK toInt64(value5) < toInt64(value3)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_for_rename\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename1\n(\n `date` Date,\n `key` UInt64,\n `value4` String,\n `value5` String,\n `value3` String,\n CONSTRAINT cs_value1 CHECK toInt64(value4) < toInt64(value5),\n CONSTRAINT cs_value2 CHECK toInt64(value5) < toInt64(value3)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_for_rename\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2 2019-10-02 1 1 2 3 2019-10-03 2 2 3 4 @@ -38,7 +38,7 @@ CREATE TABLE default.table_for_rename1\n(\n `date` Date, \n `key` UInt64, 2019-10-01 18 18 19 20 2019-10-02 19 19 20 21 -- rename columns back -- -CREATE TABLE default.table_for_rename1\n(\n `date` Date, \n `key` UInt64, \n `value1` String, \n `value2` String, \n `value3` String, \n CONSTRAINT cs_value1 CHECK toInt64(value1) < toInt64(value2), \n CONSTRAINT cs_value2 CHECK toInt64(value2) < toInt64(value3)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_for_rename\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 +CREATE TABLE default.table_for_rename1\n(\n `date` Date,\n `key` UInt64,\n `value1` String,\n `value2` String,\n `value3` String,\n CONSTRAINT cs_value1 CHECK toInt64(value1) < toInt64(value2),\n CONSTRAINT cs_value2 CHECK toInt64(value2) < toInt64(value3)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_for_rename\', \'1\')\nPARTITION BY date\nORDER BY key\nSETTINGS index_granularity = 8192 2019-10-01 0 0 1 2 2019-10-02 1 1 2 3 2019-10-03 2 2 3 4 diff --git a/tests/queries/0_stateless/01278_alter_rename_combination.reference b/tests/queries/0_stateless/01278_alter_rename_combination.reference index 3f00378b4b7..cc912e9b265 100644 --- a/tests/queries/0_stateless/01278_alter_rename_combination.reference +++ b/tests/queries/0_stateless/01278_alter_rename_combination.reference @@ -1,15 +1,15 @@ -CREATE TABLE default.rename_table\n(\n `key` Int32, \n `old_value1` Int32, \n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table\n(\n `key` Int32,\n `old_value1` Int32,\n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 key old_value1 value1 1 2 3 -CREATE TABLE default.rename_table\n(\n `k` Int32, \n `v1` Int32, \n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table\n(\n `k` Int32,\n `v1` Int32,\n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 k v1 v2 1 2 3 4 5 6 ---polymorphic--- -CREATE TABLE default.rename_table_polymorphic\n(\n `key` Int32, \n `old_value1` Int32, \n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 10000, index_granularity = 8192 +CREATE TABLE default.rename_table_polymorphic\n(\n `key` Int32,\n `old_value1` Int32,\n `value1` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 10000, index_granularity = 8192 key old_value1 value1 1 2 3 -CREATE TABLE default.rename_table_polymorphic\n(\n `k` Int32, \n `v1` Int32, \n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 10000, index_granularity = 8192 +CREATE TABLE default.rename_table_polymorphic\n(\n `k` Int32,\n `v1` Int32,\n `v2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 10000, index_granularity = 8192 k v1 v2 1 2 3 4 5 6 diff --git a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference index f0a906147ac..bf3358aea60 100644 --- a/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference +++ b/tests/queries/0_stateless/01281_alter_rename_and_other_renames.reference @@ -1,23 +1,23 @@ -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32, \n `value1_string` String, \n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 key value1_string value2 1 2 3 -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32, \n `value1_string` String, \n `value2_old` Int32, \n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int32,\n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 key value1_string value2_old value2 1 2 3 7 4 5 6 7 -CREATE TABLE default.rename_table_multiple\n(\n `key` Int32, \n `value1_string` String, \n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.rename_table_multiple\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 key value1_string value2_old 1 2 7 4 5 7 7 8 10 -CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32, \n `value1_string` String, \n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32,\n `value1_string` String,\n `value2` Int32\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 key value1_string value2 1 2 3 -CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32, \n `value1_string` String, \n `value2_old` Int32, \n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int32,\n `value2` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 key value1_string value2_old value2 1 2 3 7 4 5 6 7 -CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32, \n `value1_string` String, \n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.rename_table_multiple_compact\n(\n `key` Int32,\n `value1_string` String,\n `value2_old` Int64 DEFAULT 7\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, index_granularity = 8192 key value1_string value2_old 1 2 7 4 5 7 From d717919c0cb499cd54a3124ed9fbd44b4ca1580f Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Tue, 2 Jun 2020 03:23:41 +0300 Subject: [PATCH 0320/2229] some improvements of vectorization compare --- src/Columns/ColumnAggregateFunction.h | 3 +- src/Columns/ColumnArray.cpp | 7 +++- src/Columns/ColumnArray.h | 4 +- src/Columns/ColumnConst.h | 7 +++- src/Columns/ColumnDecimal.cpp | 7 +++- src/Columns/ColumnDecimal.h | 4 +- src/Columns/ColumnFixedString.h | 7 +++- src/Columns/ColumnFunction.h | 4 +- src/Columns/ColumnLowCardinality.cpp | 7 +++- src/Columns/ColumnLowCardinality.h | 4 +- src/Columns/ColumnNullable.cpp | 7 +++- src/Columns/ColumnNullable.h | 4 +- src/Columns/ColumnString.h | 7 +++- src/Columns/ColumnTuple.cpp | 7 +++- src/Columns/ColumnTuple.h | 4 +- src/Columns/ColumnUnique.h | 12 ++++-- src/Columns/ColumnVector.h | 13 +++--- src/Columns/IColumn.h | 10 +++-- src/Columns/IColumnDummy.h | 4 +- src/Columns/IColumnImpl.h | 30 ++++++++++--- .../Transforms/PartialSortingTransform.cpp | 42 ++++++++----------- 21 files changed, 126 insertions(+), 68 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 7d5c62fc49c..91cdb58fb70 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -192,9 +192,8 @@ public: return 0; } - std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override + void compareColumn(const IColumn &, size_t, PaddedPODArray &, PaddedPODArray &, int, int) const override { - return std::vector(getData().size(), 0); } void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 9f27c4e9f18..3940dbf2b2d 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -309,9 +309,12 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir : 1); } -std::vector ColumnArray::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } namespace diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 85db887e324..6315efe34db 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -72,7 +72,9 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 3ffefd6cf6c..02bf81b4fca 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -187,9 +187,12 @@ public: return data->compareAt(0, 0, *assert_cast(rhs).data, nan_direction_hint); } - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override { - return data->compareAt(rhs, rhs_row_num, mask, nan_direction_hint); + return data->compareColumn(rhs, rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index f3dc9750c52..672f185b0a0 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -40,9 +40,12 @@ int ColumnDecimal::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c } template -std::vector ColumnDecimal::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnDecimal::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl(static_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(static_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } template diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index caee9ffdadb..a32c56ab9db 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -107,7 +107,9 @@ public: void updateHashWithValue(size_t n, SipHash & hash) const override; void updateWeakHash32(WeakHash32 & hash) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_range) const override; diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 01a00f8b2b9..b81faff92ba 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -116,9 +116,12 @@ public: return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n); } - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 9f26a520c79..903de86d5c9 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -116,9 +116,9 @@ public: throw Exception("compareAt is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override + void compareColumn(const IColumn &, size_t, PaddedPODArray &, PaddedPODArray &, int, int) const override { - throw Exception("compareAt(const IColumn &, size_t, const std::vector &, int) is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception("compareColumn is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void getPermutation(bool, size_t, int, Permutation &) const override diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 73281923877..5b03f39ae0d 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -279,9 +279,12 @@ int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } -std::vector ColumnLowCardinality::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index ccf097938e7..7edd9d1e445 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -109,7 +109,9 @@ public: int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index c753dc638e0..0818eccad1c 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -248,9 +248,12 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } -std::vector ColumnNullable::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 2fcc6a98af3..e42d1508bc6 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -77,7 +77,9 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 3a4a15f1e4d..fa3ee96105f 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -220,9 +220,12 @@ public: return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1); } - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } /// Variant of compareAt for string comparison with respect of collation. diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 75e2c9600f9..b350e204bb6 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -279,9 +279,12 @@ int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_dire return 0; } -std::vector ColumnTuple::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl(assert_cast(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } template diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 59c02a19a79..d6ef0e5c70a 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -70,7 +70,9 @@ public: MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override; void gather(ColumnGathererStream & gatherer_stream) override; int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index c161d738e1e..13d93004128 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -77,7 +78,9 @@ public: } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override; + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } @@ -377,9 +380,12 @@ int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, } template -std::vector ColumnUnique::compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void ColumnUnique::compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { - return compareImpl>(static_cast &>(rhs), rhs_row_num, mask, nan_direction_hint); + return compareImpl>(static_cast &>(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); } template diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 1b91b23257a..527d0a36826 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -188,6 +188,14 @@ public: return CompareHelper::compare(data[n], static_cast(rhs_).data[m], nan_direction_hint); } + void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const override + { + return this->template compareImpl(static_cast(rhs), rhs_row_num, row_indexes, + compare_results, direction, nan_direction_hint); + } + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, IColumn::SpecialSort) const override; @@ -276,11 +284,6 @@ public: return typeid(rhs) == typeid(ColumnVector); } - std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const override - { - return this->template compareImpl(static_cast(rhs), rhs_row_num, mask, nan_direction_hint); - } - /** More efficient methods of manipulation - to manipulate with data directly. */ Container & getData() { diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index ad5446f713b..d6fcb35b826 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -244,9 +244,9 @@ public: */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; - - virtual std::vector compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const = 0; - + virtual void compareColumn(const IColumn & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const = 0; /** Returns a permutation that sorts elements of this column, * i.e. perm[i]-th element of source column should be i-th element of sorted column. @@ -419,7 +419,9 @@ protected: std::vector scatterImpl(ColumnIndex num_columns, const Selector & selector) const; template - std::vector compareImpl(const Derived & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const; + void compareImpl(const Derived & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const; }; using ColumnPtr = IColumn::Ptr; diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 38ae303e16f..55ac9f0a8d0 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -35,7 +35,9 @@ public: size_t byteSize() const override { return 0; } size_t allocatedBytes() const override { return 0; } int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; } - std::vector compareAt(const IColumn &, size_t, const std::vector &, int) const override { return std::vector(s, 0); } + void compareColumn(const IColumn &, size_t, PaddedPODArray &, PaddedPODArray &, int, int) const override + { + } Field operator[](size_t) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } void get(size_t, Field &) const override { throw Exception("Cannot get value from " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 7eeb362f769..cb4c2ddb776 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -47,17 +47,35 @@ std::vector IColumn::scatterImpl(ColumnIndex num_columns, } template -std::vector IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, const std::vector & mask, int nan_direction_hint) const +void IColumn::compareImpl(const Derived & rhs, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int nan_direction_hint) const { size_t rows_num = size(); - std::vector results(rows_num, 0); + size_t row_indexes_size = row_indexes.size(); - for (size_t i = 0; i < rows_num; ++i) + if (compare_results.empty()) + compare_results.resize(rows_num, 0); + else if (compare_results.size() != rows_num) + throw Exception( + "Size of compare_results: " + std::to_string(compare_results.size()) + " doesn't match rows_num: " + std::to_string(rows_num), + ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + for (size_t i = 0; i < row_indexes_size;) { - if (mask[i]) - results[i] = compareAt(i, rhs_row_num, rhs, nan_direction_hint); + UInt64 index = row_indexes[i]; + if (compare_results[index] = direction * compareAt(index, rhs_row_num, rhs, nan_direction_hint); compare_results[index] != 0) + { + std::swap(row_indexes[i], row_indexes[row_indexes_size - 1]); + --row_indexes_size; + } + else + { + ++i; + } } - return results; + + row_indexes.resize(row_indexes_size); } } diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index b806317d1fa..92dffcc6f2b 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -44,32 +44,30 @@ bool less(const ColumnRawPtrs & lhs, UInt64 lhs_row_num, return false; } -IColumn::Filter getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_t rhs_row_num, const SortDescription & description, size_t rows_num) +size_t getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_t rhs_row_num, + const SortDescription & description, size_t rows_num, IColumn::Filter & filter) { - IColumn::Filter filter(rows_num, 1); - std::vector mask(rows_num, 1); + filter.resize_fill(rows_num, 0); + PaddedPODArray rows_to_compare(rows_num); + PaddedPODArray compare_results(rows_num, 0); + size_t filtered_count = rows_num; + + for (size_t i = 0; i < rows_num; ++i) + rows_to_compare[i] = i; size_t size = description.size(); for (size_t i = 0; i < size; ++i) { - std::vector compare_result = lhs[i]->compareAt(*rhs[i], rhs_row_num, mask, 1); - int direction = description[i].direction; + lhs[i]->compareColumn(*rhs[i], rhs_row_num, rows_to_compare, compare_results, description[i].direction, 1); - for (size_t j = 0; j < rows_num; ++j) - { - if (mask[j]) - { - int res = direction * compare_result[j]; - if (res) - { - filter[j] = (res >= 0); - mask[j] = 0; - } - } - } + if (rows_to_compare.empty()) + break; } - return filter; + for (size_t i = 0; i != rows_num; ++i) + filtered_count -= filter[i] = (compare_results[i] >= 0); + + return filtered_count; } void PartialSortingTransform::transform(Chunk & chunk) @@ -89,12 +87,8 @@ void PartialSortingTransform::transform(Chunk & chunk) if (!threshold_block_columns.empty()) { block_columns = extractColumns(block, description); - size_t filtered_count = 0; - - IColumn::Filter filter = getFilterMask(block_columns, threshold_block_columns, limit - 1, description, rows_num); - - for (const auto & item : filter) - filtered_count += !item; + IColumn::Filter filter; + size_t filtered_count = getFilterMask(block_columns, threshold_block_columns, limit - 1, description, rows_num, filter); if (filtered_count) { From 35734aadde1b7cd0f68cc6a513bab1e8497229f8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:15:11 +0300 Subject: [PATCH 0321/2229] apply comment --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index dbe3d005be7..793de7709cc 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames = std::array(); + frames.emplace({}); symbolize(frame_pointers.data(), offset, size, frames); } return frames; From 03f4aa19aa64e10e8433e938931e95400db7fdf6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:16:41 +0300 Subject: [PATCH 0322/2229] apply comment --- src/Common/StackTrace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 793de7709cc..819f74f37cb 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames.emplace({}); + frames.emplace(); symbolize(frame_pointers.data(), offset, size, frames); } return frames; @@ -357,7 +357,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames{}; - frames = {{}}; + frames.emplace(); symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); From 40f6e559e2d2424e29604d5191dee6b941bc3d6e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:29:13 +0300 Subject: [PATCH 0323/2229] fix compiling when disabled --- base/daemon/SentryWriter.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 95189b72e81..15602be2581 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #if !defined(ARCADIA_BUILD) @@ -105,6 +106,8 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { LOG_INFO(logger, "Sending crash reports is disabled"); } +#else + UNUSED(config); #endif } @@ -191,5 +194,10 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c { LOG_INFO(logger, "Not sending crash report"); } +#else + UNUSED(sig); + UNUSED(info); + UNUSED(context); + UNUSED(stack_trace); #endif } From 9c1ac2f1c1af35b20bb7ea031370a3c8e347f4df Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 09:46:36 +0300 Subject: [PATCH 0324/2229] experiment --- cmake/find/sentry.cmake | 9 +-------- contrib/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 309f63e9165..94c4f4a6e93 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,16 +5,9 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT UNBUNDLED AND NOT SPLITTED AND NOT (COMPILER_CLANG AND OS_DARWIN)) - if (POLICY CMP0077) - cmake_policy (SET CMP0077 NEW) - endif () +if (NOT OS_FREEBSD) option (USE_SENTRY "Use Sentry" ON) - set (BUILD_SHARED_LIBS OFF) - set (SENTRY_PIC OFF) - set (SENTRY_BACKEND "none") - set (SENTRY_TRANSPORT "curl") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ea13969db16..d9af4bc0ac5 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -321,6 +321,8 @@ if (USE_FASTOPS) endif() if (USE_SENTRY) + set (SENTRY_BACKEND "none") + set (SENTRY_TRANSPORT "curl") add_subdirectory (sentry-native) endif() From 280eea1e12fa4770f114ad952efa0be0eecc3e34 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 10:33:11 +0300 Subject: [PATCH 0325/2229] fix compiling when disabled --- base/daemon/SentryWriter.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 15602be2581..c8197d8a160 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -16,14 +16,16 @@ #endif +#if USE_SENTRY namespace { + bool initialized = false; bool anonymize = false; void setExtras() { -#if USE_SENTRY + if (!anonymize) { sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); @@ -36,9 +38,9 @@ void setExtras() sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); +} +} #endif -} -} void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { From fded020cdc28aae4529798d3626b51bb5927f372 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Jun 2020 10:53:55 +0300 Subject: [PATCH 0326/2229] Fix build. --- src/Columns/ColumnDecimal.cpp | 2 +- src/Columns/ColumnUnique.h | 12 ------------ src/Columns/IColumnUnique.h | 5 +++++ .../Transforms/PartialSortingTransform.cpp | 3 +++ 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 672f185b0a0..b4923619d3a 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -44,7 +44,7 @@ void ColumnDecimal::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray & row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const { - return compareImpl(static_cast(rhs), rhs_row_num, row_indexes, + return this->template compareImpl>(static_cast(rhs), rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint); } diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 13d93004128..d2d93647538 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -78,9 +78,6 @@ public: } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; - void compareColumn(const IColumn & rhs, size_t rhs_row_num, - PaddedPODArray & row_indexes, PaddedPODArray & compare_results, - int direction, int nan_direction_hint) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } @@ -379,15 +376,6 @@ int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); } -template -void ColumnUnique::compareColumn(const IColumn & rhs, size_t rhs_row_num, - PaddedPODArray & row_indexes, PaddedPODArray & compare_results, - int direction, int nan_direction_hint) const -{ - return compareImpl>(static_cast &>(rhs), rhs_row_num, row_indexes, - compare_results, direction, nan_direction_hint); -} - template void ColumnUnique::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const { diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index af5d9878a3b..648fce6288d 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -141,6 +141,11 @@ public: { throw Exception("Method updateWeakHash32 is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); } + + void compareColumn(const IColumn &, size_t, PaddedPODArray &, PaddedPODArray &, int, int) const override + { + throw Exception("Method compareColumn is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); + } }; using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr; diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 92dffcc6f2b..ebfdef3eaea 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -90,6 +90,9 @@ void PartialSortingTransform::transform(Chunk & chunk) IColumn::Filter filter; size_t filtered_count = getFilterMask(block_columns, threshold_block_columns, limit - 1, description, rows_num, filter); + if (filtered_count == rows_num) + return; + if (filtered_count) { for (auto & column : block.getColumns()) From cc3abb12d3b7e61151cd8f2a81aeb89d06c1a782 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Jun 2020 11:28:57 +0300 Subject: [PATCH 0327/2229] Disable filtration for PartialSortingTransform --- src/Processors/Transforms/PartialSortingTransform.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index ebfdef3eaea..9ebe276219d 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -80,6 +80,7 @@ void PartialSortingTransform::transform(Chunk & chunk) ColumnRawPtrs block_columns; UInt64 rows_num = block.rows(); + auto block_limit = limit; /** If we've saved columns from previously blocks we could filter all rows from current block * which are unnecessary for sortBlock(...) because they obviously won't be in the top LIMIT rows. @@ -93,16 +94,19 @@ void PartialSortingTransform::transform(Chunk & chunk) if (filtered_count == rows_num) return; - if (filtered_count) + if (rows_num - filtered_count < block_limit) { + block_limit = rows_num - filtered_count; +/* for (auto & column : block.getColumns()) { column = column->filter(filter, rows_num - filtered_count); } +*/ } } - sortBlock(block, description, limit); + sortBlock(block, description, block_limit); if (!threshold_block_columns.empty()) { From e41062172af6a6f7e6d600892384a8b127615f0a Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Tue, 2 Jun 2020 13:35:40 +0400 Subject: [PATCH 0328/2229] Stylistic changes/renaming --- src/Parsers/ASTCreateUserQuery.cpp | 14 +++++++------- src/Parsers/ParserCreateUserQuery.cpp | 14 +++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index 087c8160bad..8782c112060 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -32,7 +32,7 @@ namespace } String authentication_type_name = Authentication::TypeInfo::get(authentication_type).name; - std::optional password; + std::optional by_value; if (show_password || authentication_type == Authentication::LDAP_SERVER) { @@ -40,24 +40,24 @@ namespace { case Authentication::PLAINTEXT_PASSWORD: { - password = authentication.getPassword(); + by_value = authentication.getPassword(); break; } case Authentication::SHA256_PASSWORD: { authentication_type_name = "sha256_hash"; - password = authentication.getPasswordHashHex(); + by_value = authentication.getPasswordHashHex(); break; } case Authentication::DOUBLE_SHA1_PASSWORD: { authentication_type_name = "double_sha1_hash"; - password = authentication.getPasswordHashHex(); + by_value = authentication.getPasswordHashHex(); break; } case Authentication::LDAP_SERVER: { - password = authentication.getServerName(); + by_value = authentication.getServerName(); break; } @@ -69,8 +69,8 @@ namespace settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << authentication_type_name << (settings.hilite ? IAST::hilite_none : ""); - if (password) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " BY " << quoteString(*password); + if (by_value) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " BY " << quoteString(*by_value); } diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 1317f18ca65..00288709cd8 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -51,7 +51,7 @@ namespace std::optional type; bool expect_password = false; bool expect_hash = false; - bool expect_server = false; + bool expect_server_name = false; if (ParserKeyword{"WITH"}.ignore(pos, expected)) { @@ -60,8 +60,12 @@ namespace if (ParserKeyword{Authentication::TypeInfo::get(check_type).raw_name}.ignore(pos, expected)) { type = check_type; - expect_password = (check_type != Authentication::NO_PASSWORD && check_type != Authentication::LDAP_SERVER); - expect_server = (check_type == Authentication::LDAP_SERVER); + + if (check_type == Authentication::LDAP_SERVER) + expect_server_name = true; + else if (check_type != Authentication::NO_PASSWORD) + expect_password = true; + break; } } @@ -90,7 +94,7 @@ namespace } String value; - if (expect_password || expect_hash || expect_server) + if (expect_password || expect_hash || expect_server_name) { ASTPtr ast; if (!ParserKeyword{"BY"}.ignore(pos, expected) || !ParserStringLiteral{}.parse(pos, ast, expected)) @@ -104,7 +108,7 @@ namespace authentication->setPassword(value); else if (expect_hash) authentication->setPasswordHashHex(value); - else if (expect_server) + else if (expect_server_name) authentication->setServerName(value); return true; From 6ff0550e4eb4764edc20e860c5b07c273e4d4a84 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Tue, 2 Jun 2020 13:37:02 +0400 Subject: [PATCH 0329/2229] Move parseExternalAuthenticators functionality into the c-tor of ExternalAuthenticators --- src/Access/AccessControlManager.cpp | 2 +- src/Access/ExternalAuthenticators.cpp | 12 +++++------- src/Access/ExternalAuthenticators.h | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 07002971734..940ddc5dcdc 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -101,7 +101,7 @@ void AccessControlManager::setLocalDirectory(const String & directory_path) void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config) { - external_authenticators = parseExternalAuthenticators(users_config, getLogger()); + external_authenticators = std::make_unique(users_config, getLogger()); auto & users_config_access_storage = dynamic_cast(getStorageByIndex(USERS_CONFIG_ACCESS_STORAGE_INDEX)); users_config_access_storage.setConfiguration(users_config); } diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 97f2f24a72e..5d46f5e96ee 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -115,6 +115,11 @@ void parseAndAddLDAPServers(ExternalAuthenticators & external_authenticators, co } +ExternalAuthenticators::ExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) +{ + parseAndAddLDAPServers(*this, config, log); +} + void ExternalAuthenticators::setLDAPServerParams(const String & server, const LDAPServerParams & params) { std::scoped_lock lock(mutex); @@ -131,11 +136,4 @@ LDAPServerParams ExternalAuthenticators::getLDAPServerParams(const String & serv return it->second; } -std::unique_ptr parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log) -{ - auto external_authenticators = std::make_unique(); - parseAndAddLDAPServers(*external_authenticators, config, log); - return external_authenticators; -} - } diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index dfd4e7061bd..50d9e68f91f 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -25,6 +25,8 @@ namespace DB class ExternalAuthenticators { public: + explicit ExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log); + void setLDAPServerParams(const String & server, const LDAPServerParams & params); LDAPServerParams getLDAPServerParams(const String & server) const; @@ -33,6 +35,4 @@ private: std::map ldap_server_params; }; -std::unique_ptr parseExternalAuthenticators(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log); - } From 920e792f3392f295a44fba41040c3684c76e46ab Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Tue, 2 Jun 2020 13:38:20 +0400 Subject: [PATCH 0330/2229] Add USE_LDAP to table system.build_options --- src/Storages/System/StorageSystemBuildOptions.generated.cpp.in | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 9f73c00d22b..275342996ef 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -54,6 +54,7 @@ const char * auto_config_build[] "USE_HYPERSCAN", "@USE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_GRPC", "@USE_GRPC@", + "USE_LDAP", "@USE_LDAP@", nullptr, nullptr }; From e75bb335e304ffe9ef0f90491f5381c7d6a40471 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Tue, 2 Jun 2020 13:40:32 +0400 Subject: [PATCH 0331/2229] Fix: set auth_params column value for each row, by default to "{}" empty JSON object --- src/Storages/System/StorageSystemUsers.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index a1e7eba7232..604831df425 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -90,10 +90,11 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & column_storage.insertData(storage_name.data(), storage_name.length()); column_auth_type.push_back(static_cast(authentication.getType())); - if (authentication.getType() == Authentication::Type::LDAP_SERVER) { Poco::JSON::Object auth_params_json; - auth_params_json.set("server", authentication.getServerName()); + + if (authentication.getType() == Authentication::Type::LDAP_SERVER) + auth_params_json.set("server", authentication.getServerName()); std::ostringstream oss; Poco::JSON::Stringifier::stringify(auth_params_json, oss); From 0e8d559d832df40c23942673aac254728b0e77b1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 13:13:21 +0300 Subject: [PATCH 0332/2229] disable for splitted --- cmake/find/sentry.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 94c4f4a6e93..4a5fe6f2478 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) From 5036ad7c6afad995da38bf38de76f1e7134a4137 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 13:13:29 +0300 Subject: [PATCH 0333/2229] back to upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index ff4e644f657..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 9e214a1265a..aed9c18536d 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 9e214a1265a4ea628c21045b7f43d1aec15e385d +Subproject commit aed9c18536dff1851b1240f84263a55ef716acb6 From 46493b987837bad7642f501dca3c0b9379ffeb21 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 2 Jun 2020 14:56:58 +0300 Subject: [PATCH 0334/2229] Fix PartialSortingTransform. --- src/Processors/Transforms/PartialSortingTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 9ebe276219d..d42c8c4233b 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -65,7 +65,7 @@ size_t getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_ } for (size_t i = 0; i != rows_num; ++i) - filtered_count -= filter[i] = (compare_results[i] >= 0); + filtered_count -= filter[i] = (compare_results[i] <= 0); return filtered_count; } From 862693d78dc1924f472646584fbd70f955239c0c Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 16:59:45 +0300 Subject: [PATCH 0335/2229] change sentry-native commit --- contrib/sentry-native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sentry-native b/contrib/sentry-native index aed9c18536d..b48c21d2440 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit aed9c18536dff1851b1240f84263a55ef716acb6 +Subproject commit b48c21d244092658d6e2d1bb243b705fd968b9f7 From 786874e86754a60f711ed2fc93399edf51eb4f35 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Jun 2020 13:15:53 +0000 Subject: [PATCH 0336/2229] Better publish & some fixes --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../WriteBufferToRabbitMQProducer.cpp | 70 +++++++++++++++---- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 7 +- 6 files changed, 69 insertions(+), 16 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 0858e2101df..17e4db3fb89 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -35,7 +35,7 @@ void RabbitMQBlockOutputStream::writePrefix() child = FormatFactory::instance().getOutput( storage.getFormatName(), *buffer, getHeader(), context, [this](const Columns & /* columns */, size_t /* rows */) { - buffer->count_row(); + buffer->countRow(); }); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 6308e2e0089..775db87a1f8 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -16,7 +16,7 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes { LOG_ERROR(log, "Library error report: {}", message); - if (!connection->usable() || !connection->ready()) + if (connection->closed()) { std::cerr << "Connection lost, no recovery is possible"; throw; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index a70b08aba55..117f80d26f8 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include #include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 147d3ba2115..7cbfb164a2d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -216,7 +216,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { return std::make_shared(parsed_address, routing_key, exchange_name, - log, num_consumers, bind_by_id, hash_exchange, + log, num_consumers * num_queues, bind_by_id, hash_exchange, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 73434bc0ea6..86d3b32925a 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -46,7 +46,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( { /* The reason behind making a separate connection for each concurrent producer is explained here: * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from - * different threads (as outputStreams are asynchronous) with the same connection leads to internal libary errors. + * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -73,7 +73,7 @@ WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() } -void WriteBufferToRabbitMQProducer::count_row() +void WriteBufferToRabbitMQProducer::countRow() { if (++rows % max_rows == 0) { @@ -108,42 +108,88 @@ void WriteBufferToRabbitMQProducer::count_row() void WriteBufferToRabbitMQProducer::flush() { - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing - * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it - * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to - * exchange becoming inactive at some point and part of messages is lost as a result. - */ std::atomic exchange_declared = false, exchange_error = false; + /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name + * and makes it visible from current producer_channel. + */ + producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) .onSuccess([&]() { + exchange_declared = true; + + /// The case that should not normally happen: message was not delivered to queue (queue ttl exceeded) / not forwareded to consumer + if (flush_returned) + { + /// Needed to avoid data race because two different threads may access this vector + std::lock_guard lock(mutex); + + LOG_TRACE(log, "Redelivering returned messages"); + for (auto & payload : returned) + { + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + + --message_counter; + } + + returned.clear(); + } + + /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing + * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it + * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to + * exchange becoming inactive at some point and part of messages is lost as a result. + */ for (auto & payload : messages) { if (!message_counter) - return; + break; next_queue = next_queue % num_queues + 1; if (bind_by_id || hash_exchange) { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + producer_channel->publish(exchange_name, std::to_string(next_queue), payload, AMQP::mandatory || AMQP::immediate) + .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) + { + flush_returned = true; + + /// Needed to avoid data race because two different threads may access this variable + std::lock_guard lock(mutex); + returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); + }); } else { - producer_channel->publish(exchange_name, routing_key, payload); + producer_channel->publish(exchange_name, routing_key, payload, AMQP::mandatory || AMQP::immediate) + .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) + { + flush_returned = true; + + /// Needed to avoid data race because two different threads may access this vector + std::lock_guard lock(mutex); + returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); + }); } --message_counter; } - exchange_declared = true; messages.clear(); }) .onError([&](const char * message) { exchange_error = true; - exchange_declared = false; LOG_ERROR(log, "Exchange was not declared: {}", message); }); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index d7a1715d491..146be0c5796 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -30,7 +32,7 @@ public: ~WriteBufferToRabbitMQProducer() override; - void count_row(); + void countRow(); void flush(); private: @@ -52,8 +54,11 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; String channel_id; + std::atomic flush_returned = false; + std::mutex mutex; Messages messages; + Messages returned; Poco::Logger * log; const std::optional delim; From 711e7d101da19c8364b761ee09ca042bf9c680f8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 21:50:55 +0300 Subject: [PATCH 0337/2229] experiment --- base/CMakeLists.txt | 5 +++++ base/daemon/CMakeLists.txt | 10 +++------- cmake/find/sentry.cmake | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index cfa54fe2ca4..ad3bf56cd00 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -11,3 +11,8 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) endif () + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +endif () \ No newline at end of file diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 0b6a7188c83..8f70f30aeb1 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -1,12 +1,8 @@ add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp - SentryWriter.cpp) + SentryWriter.cpp +) target_include_directories (daemon PUBLIC ..) -target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) - -if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) - target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -endif () \ No newline at end of file +target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) \ No newline at end of file diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 4a5fe6f2478..449d995935d 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -8,6 +8,8 @@ endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) option (USE_SENTRY "Use Sentry" ON) + set (SENTRY_TRANSPORT "url") + set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) From 921b7c748000cc4a33c9db618716922fc34f1f17 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 22:25:34 +0300 Subject: [PATCH 0338/2229] partial revert --- base/CMakeLists.txt | 5 ----- base/daemon/CMakeLists.txt | 7 ++++++- cmake/find/sentry.cmake | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index ad3bf56cd00..a8dedec9269 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -10,9 +10,4 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) -endif () - -if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) - target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) endif () \ No newline at end of file diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 8f70f30aeb1..36de193bccd 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -5,4 +5,9 @@ add_library (daemon ) target_include_directories (daemon PUBLIC ..) -target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) \ No newline at end of file +target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +endif () \ No newline at end of file diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 449d995935d..6848dc00b43 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (SENTRY_TRANSPORT "url") From b7caa154e29beacdf1d2b6bad3c21573b7e7fb58 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 00:37:14 +0400 Subject: [PATCH 0339/2229] Add ARCADIA_BUILD check --- src/Access/LDAPClient.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index a07dc93f4b1..d166a59aac2 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -1,6 +1,6 @@ #pragma once -#if __has_include("config_core.h") +#if !defined(ARCADIA_BUILD) && __has_include("config_core.h") #include "config_core.h" #endif From 979fa13b269cde9842d1409b955b818e29ff172d Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 00:38:00 +0400 Subject: [PATCH 0340/2229] Simplify condition --- src/Server/MySQLHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index a947a322488..c5107b22e4d 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -222,7 +222,7 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl // For compatibility with JavaScript MySQL client, Native41 authentication plugin is used when possible (if password is specified using double SHA1). Otherwise SHA256 plugin is used. auto user = connection_context.getAccessControlManager().read(user_name); const DB::Authentication::Type user_auth_type = user->authentication.getType(); - if (user_auth_type != DB::Authentication::LDAP_SERVER && user_auth_type != DB::Authentication::DOUBLE_SHA1_PASSWORD && user_auth_type != DB::Authentication::PLAINTEXT_PASSWORD && user_auth_type != DB::Authentication::NO_PASSWORD) + if (user_auth_type == DB::Authentication::SHA256_PASSWORD) { authPluginSSL(); } From 2f74c58b0598a14db9583b660f2316b01013f052 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 23:50:18 +0300 Subject: [PATCH 0341/2229] experiment with BUILD_SHARED_LIBS --- cmake/find/sentry.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 6848dc00b43..08f712d5574 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -12,8 +12,11 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + if (NOT_UNBUNDLED) + set (BUILD_SHARED_LIBS OFF) + endif() message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") -endif () \ No newline at end of file +endif () From c427524bc81f90e061cbe32c6d1b6386cd3823e4 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 01:02:31 +0400 Subject: [PATCH 0342/2229] Simplefy ExternalAuthenticators exposure to isCorrectPassword() --- src/Access/ContextAccess.cpp | 4 ++-- src/Access/ContextAccess.h | 2 +- src/Interpreters/Context.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 09f613b6851..9e02ab5be48 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -176,12 +176,12 @@ void ContextAccess::setRolesInfo(const std::shared_ptr & } -bool ContextAccess::isCorrectPassword(const String & password, const ExternalAuthenticators & external_authenticators) const +bool ContextAccess::isCorrectPassword(const String & password) const { std::lock_guard lock{mutex}; if (!user) return false; - return user->authentication.isCorrectPassword(password, user_name, external_authenticators); + return user->authentication.isCorrectPassword(password, user_name, manager->getExternalAuthenticators()); } bool ContextAccess::isClientHostAllowed() const diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 062ab37e414..19a799e49d4 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -64,7 +64,7 @@ public: String getUserName() const; /// External authenticators may be used by only some of the authentication mechanisms. - bool isCorrectPassword(const String & password, const ExternalAuthenticators & external_authenticators) const; + bool isCorrectPassword(const String & password) const; bool isClientHostAllowed() const; /// Returns information about current and enabled roles. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 8212513650a..5e2f4ecadab 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -665,7 +665,7 @@ void Context::setUser(const String & name, const String & password, const Poco:: if (new_user_id) { new_access = getAccessControlManager().getContextAccess(*new_user_id, {}, true, settings, current_database, client_info); - if (!new_access->isClientHostAllowed() || !new_access->isCorrectPassword(password, getAccessControlManager().getExternalAuthenticators())) + if (!new_access->isClientHostAllowed() || !new_access->isCorrectPassword(password)) { new_user_id = {}; new_access = nullptr; From 48f3d4094a02fd094c88d40c4c8dfbc718633122 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 01:06:44 +0400 Subject: [PATCH 0343/2229] Remove irrelevant stuff --- src/Access/ContextAccess.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 19a799e49d4..27bb29a878c 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -26,7 +26,6 @@ struct QuotaUsage; struct Settings; class SettingsConstraints; class AccessControlManager; -class ExternalAuthenticators; class IAST; using ASTPtr = std::shared_ptr; @@ -63,7 +62,6 @@ public: UserPtr getUser() const; String getUserName() const; - /// External authenticators may be used by only some of the authentication mechanisms. bool isCorrectPassword(const String & password) const; bool isClientHostAllowed() const; From ca1381d47560a9ad1883f2f7b712b64ffe32cf19 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 01:17:59 +0400 Subject: [PATCH 0344/2229] Write empty json in more efficient way --- src/Storages/System/StorageSystemUsers.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 604831df425..780542677c9 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -90,11 +90,11 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & column_storage.insertData(storage_name.data(), storage_name.length()); column_auth_type.push_back(static_cast(authentication.getType())); + if (authentication.getType() == Authentication::Type::LDAP_SERVER) { Poco::JSON::Object auth_params_json; - if (authentication.getType() == Authentication::Type::LDAP_SERVER) - auth_params_json.set("server", authentication.getServerName()); + auth_params_json.set("server", authentication.getServerName()); std::ostringstream oss; Poco::JSON::Stringifier::stringify(auth_params_json, oss); @@ -102,6 +102,11 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & column_auth_params.insertData(str.data(), str.size()); } + else + { + static constexpr std::string_view empty_json{"{}"}; + column_auth_params.insertData(empty_json.data(), empty_json.length()); + } if (allowed_hosts.containsAnyHost()) { From caf1e4e8cc2261c8fef3b3b9d2e809029e00e92d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jun 2020 12:51:23 +0300 Subject: [PATCH 0345/2229] in-memory-parts: fixes --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 9 +++++++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 9 +++++---- src/Storages/MergeTree/MergeTreeData.h | 3 ++- src/Storages/MergeTree/MergeTreeReaderInMemory.cpp | 4 ++++ .../0_stateless/01130_in_memory_parts_nested.reference | 5 +++++ .../queries/0_stateless/01130_in_memory_parts_nested.sql | 1 + 7 files changed, 26 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a0370dfad16..3cb9a4c5924 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -766,7 +766,7 @@ void IMergeTreeDataPart::remove() const } } -String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const +String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix) const { /// Do not allow underscores in the prefix because they are used as separators. @@ -780,7 +780,7 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) */ for (int try_no = 0; try_no < 10; try_no++) { - res = "detached/" + (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); + res = (prefix.empty() ? "" : prefix + "_") + name + (try_no ? "_try" + DB::toString(try_no) : ""); if (!disk->exists(getFullRelativePath() + res)) return res; @@ -792,6 +792,11 @@ String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) return res; } +String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const +{ + return "detached/" + getRelativePathForPrefix(prefix); +} + void IMergeTreeDataPart::renameToDetached(const String & prefix) const { renameTo(getRelativePathForDetachedPart(prefix)); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 5ce8c854156..3158419a16f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -301,6 +301,8 @@ public: static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk_, const String & from); void calculateColumnsSizesOnDisk(); + String getRelativePathForPrefix(const String & prefix) const; + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk ColumnSize total_columns_size; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6b6f3a2f45d..df7d484f751 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1700,7 +1700,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( else { /// Didn't find any mark file, suppose that part is empty. - type = choosePartType(0, 0); + type = choosePartTypeOnDisk(0, 0); } return createPart(name, type, part_info, disk, relative_path); @@ -3323,9 +3323,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( /// If source part is in memory, flush it to disk and clone it already in on-disk format if (auto * src_part_in_memory = dynamic_cast(src_part.get())) { - auto flushed_part_path = tmp_part_prefix + src_part_in_memory->name; - src_part_in_memory->flushToDisk(relative_data_path, flushed_part_path); - src_part_path = src_part_in_memory->storage.relative_data_path + flushed_part_path + "/"; + const auto & src_relative_data_path = src_part_in_memory->storage.relative_data_path; + auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); + src_part_in_memory->flushToDisk(src_relative_data_path, flushed_part_path); + src_part_path = src_relative_data_path + flushed_part_path + "/"; } LOG_DEBUG(log, "Cloning part " << fullPath(disk, src_part_path) << " to " << fullPath(disk, dst_part_path)); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index d56f6d57283..31a05bf46b1 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -199,7 +199,8 @@ public: MergeTreeDataPartType type, const MergeTreePartInfo & part_info, const DiskPtr & disk, const String & relative_path) const; - /// After this methods 'loadColumnsChecksumsIndexes' must be called + /// Create part, that already exists on filesystem. + /// After this methods 'loadColumnsChecksumsIndexes' must be called. MutableDataPartPtr createPart(const String & name, const DiskPtr & disk, const String & relative_path) const; diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index ce6eb44a50b..924198b49f1 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -28,6 +28,9 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( for (const auto & name_and_type : columns) { auto [name, type] = getColumnFromPart(name_and_type); + + /// If array of Nested column is missing in part, + /// we have to read its offsets if they exist. if (!part_in_memory->block.has(name) && typeid_cast(type.get())) if (auto offset_position = findColumnForOffsets(name)) positions_for_offsets[name] = *offset_position; @@ -58,6 +61,7 @@ size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading { auto [name, type] = getColumnFromPart(*column_it); + /// Copy offsets, if array of Nested column is missing in part. auto offsets_it = positions_for_offsets.find(name); if (offsets_it != positions_for_offsets.end()) { diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.reference b/tests/queries/0_stateless/01130_in_memory_parts_nested.reference index abc233c46f4..23e93d4be90 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts_nested.reference +++ b/tests/queries/0_stateless/01130_in_memory_parts_nested.reference @@ -4,6 +4,11 @@ [0,0,0,0,0,0,0] [0,0,0,0,0,0,0,0,0] [0] +[0,0,0] +[0,0,0,0,0] +[0,0,0,0,0,0,0] +[0,0,0,0,0,0,0,0,0] +[0] [0,2,4] [0,2,4,6,8] [0,2,4,6,8,10,12] diff --git a/tests/queries/0_stateless/01130_in_memory_parts_nested.sql b/tests/queries/0_stateless/01130_in_memory_parts_nested.sql index c09593d01bc..f643a65e041 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts_nested.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts_nested.sql @@ -8,6 +8,7 @@ INSERT INTO nested SELECT number, number % 2, range(number % 10) FROM system.num ALTER TABLE nested ADD COLUMN n.b Array(UInt64); SELECT DISTINCT n.b FROM nested PREWHERE filter; +SELECT DISTINCT n.b FROM nested PREWHERE filter SETTINGS max_block_size = 10; ALTER TABLE nested ADD COLUMN n.c Array(UInt64) DEFAULT arrayMap(x -> x * 2, n.a); SELECT DISTINCT n.c FROM nested PREWHERE filter; From 2f2275c3ba5090031c506e1a93bd72d2cdbdc3d5 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 15:16:58 +0400 Subject: [PATCH 0346/2229] Add "-D ENABLE_LDAP=0" --- utils/ci/jobs/quick-build/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/ci/jobs/quick-build/run.sh b/utils/ci/jobs/quick-build/run.sh index 56f0950c717..013f31ca46d 100755 --- a/utils/ci/jobs/quick-build/run.sh +++ b/utils/ci/jobs/quick-build/run.sh @@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse BUILD_TYPE=Debug ENABLE_EMBEDDED_COMPILER=0 -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0" +CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_LDAP=0" [[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin From b28def8f4ce2be0a57cfff4b719e5344187a3299 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 15:20:53 +0400 Subject: [PATCH 0347/2229] Stylistic changes --- src/Access/LDAPClient.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index d166a59aac2..5aad2ed3061 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -1,17 +1,17 @@ #pragma once -#if !defined(ARCADIA_BUILD) && __has_include("config_core.h") -#include "config_core.h" +#if !defined(ARCADIA_BUILD) +# include "config_core.h" #endif #include #include #if USE_LDAP -#include -#define MAYBE_NORETURN +# include +# define MAYBE_NORETURN #else -#define MAYBE_NORETURN [[noreturn]] +# define MAYBE_NORETURN [[noreturn]] #endif From 73926b2cf9c71093dac41b9fadaaa866122add1a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Jun 2020 14:54:26 +0300 Subject: [PATCH 0348/2229] support loading from multiple partitions with complex key --- .../CassandraDictionarySource.cpp | 161 +++++++++++------- src/Dictionaries/CassandraDictionarySource.h | 52 ++++-- src/Dictionaries/CassandraHelpers.cpp | 30 ++++ src/Dictionaries/CassandraHelpers.h | 6 + src/Dictionaries/ExternalQueryBuilder.cpp | 76 ++++++--- src/Dictionaries/ExternalQueryBuilder.h | 17 +- .../external_sources.py | 6 +- 7 files changed, 236 insertions(+), 112 deletions(-) diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index a58dd383b46..fec60fe3d83 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,8 +1,10 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "ExternalQueryBuilder.h" #include +#include +#include +#include namespace DB { @@ -21,6 +23,7 @@ namespace DB bool /*check_config*/) -> DictionarySourcePtr { #if USE_CASSANDRA + setupCassandraDriverLibraryLogging(CASS_LOG_TRACE); return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", @@ -35,107 +38,149 @@ namespace DB #if USE_CASSANDRA #include +#include +#include #include "CassandraBlockInputStream.h" namespace DB { namespace ErrorCodes { - extern const int UNSUPPORTED_METHOD; - extern const int WRONG_PASSWORD; + extern const int LOGICAL_ERROR; + extern const int INVALID_CONFIG_PARAMETER; +} + +CassandraSettings::CassandraSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix) + : host(config.getString(config_prefix + ".host")) + , port(config.getUInt(config_prefix + ".port", 0)) + , user(config.getString(config_prefix + ".user", "")) + , password(config.getString(config_prefix + ".password", "")) + , db(config.getString(config_prefix + ".keyspace", "")) + , table(config.getString(config_prefix + ".column_family")) + , allow_filtering(config.getBool(config_prefix + ".allow_filtering", false)) + , partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1)) + , max_threads(config.getUInt(config_prefix + ".max_threads", 8)) + , where(config.getString(config_prefix + ".where", "")) +{ + setConsistency(config.getString(config_prefix + ".consistency", "One")); +} + +void CassandraSettings::setConsistency(const String & config_str) +{ + if (config_str == "One") + consistency = CASS_CONSISTENCY_ONE; + else if (config_str == "Two") + consistency = CASS_CONSISTENCY_TWO; + else if (config_str == "Three") + consistency = CASS_CONSISTENCY_THREE; + else if (config_str == "All") + consistency = CASS_CONSISTENCY_ALL; + else if (config_str == "EachQuorum") + consistency = CASS_CONSISTENCY_EACH_QUORUM; + else if (config_str == "Quorum") + consistency = CASS_CONSISTENCY_QUORUM; + else if (config_str == "LocalQuorum") + consistency = CASS_CONSISTENCY_LOCAL_QUORUM; + else if (config_str == "LocalOne") + consistency = CASS_CONSISTENCY_LOCAL_ONE; + else if (config_str == "Serial") + consistency = CASS_CONSISTENCY_SERIAL; + else if (config_str == "LocalSerial") + consistency = CASS_CONSISTENCY_LOCAL_SERIAL; + else /// CASS_CONSISTENCY_ANY is only valid for writes + throw Exception("Unsupported consistency level: " + config_str, ErrorCodes::INVALID_CONFIG_PARAMETER); } static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure & dict_struct_, - const String & host_, - UInt16 port_, - const String & user_, - const String & password_, - const String & db_, - const String & table_, - const DB::Block & sample_block_) + const DictionaryStructure & dict_struct_, + const CassandraSettings & settings_, + const Block & sample_block_) : log(&Poco::Logger::get("CassandraDictionarySource")) , dict_struct(dict_struct_) - , host(host_) - , port(port_) - , user(user_) - , password(password_) - , db(db_) - , table(table_) + , settings(settings_) , sample_block(sample_block_) + , query_builder(dict_struct, settings.db, settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) { - cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); - if (port) - cassandraCheck(cass_cluster_set_port(cluster, port)); - cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); + cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str())); + if (settings.port) + cassandraCheck(cass_cluster_set_port(cluster, settings.port)); + cass_cluster_set_credentials(cluster, settings.user.c_str(), settings.password.c_str()); + cassandraCheck(cass_cluster_set_consistency(cluster, settings.consistency)); } CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure & dict_struct_, + const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DB::Block & sample_block_) + const String & config_prefix, + Block & sample_block_) : CassandraDictionarySource( dict_struct_, - config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port", 0), - config.getString(config_prefix + ".user", ""), - config.getString(config_prefix + ".password", ""), - config.getString(config_prefix + ".keyspace", ""), - config.getString(config_prefix + ".column_family"), + CassandraSettings(config, config_prefix), sample_block_) { } -CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySource & other) - : CassandraDictionarySource{other.dict_struct, - other.host, - other.port, - other.user, - other.password, - other.db, - other.table, - other.sample_block} +void CassandraDictionarySource::maybeAllowFiltering(String & query) { + if (!settings.allow_filtering) + return; + query.pop_back(); /// remove semicolon + query += " ALLOW FILTERING;"; } BlockInputStreamPtr CassandraDictionarySource::loadAll() { - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadAllQuery(); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading all using query: ", query); + String query = query_builder.composeLoadAllQuery(); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading all using query: {}", query); return std::make_shared(cluster, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { - return "Cassandra: " + db + '.' + table; + return "Cassandra: " + settings.db + '.' + settings.table; } BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) { - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadIdsQuery(ids); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading ids using query: ", query); + String query = query_builder.composeLoadIdsQuery(ids); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading ids using query: {}", query); return std::make_shared(cluster, query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { - //FIXME split conditions on partition key and clustering key - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading keys using query: ", query); - return std::make_shared(cluster, query, sample_block, max_block_size); -} + if (requested_rows.empty()) + throw Exception("No rows requested", ErrorCodes::LOGICAL_ERROR); + /// TODO is there a better way to load data by complex keys? + std::unordered_map> partitions; + for (const auto & row : requested_rows) + { + SipHash partition_key; + for (const auto i : ext::range(0, settings.partition_key_prefix)) + key_columns[i]->updateHashWithValue(row, partition_key); + partitions[partition_key.get64()].push_back(row); + } + + BlockInputStreams streams; + for (const auto & partition : partitions) + { + String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query); + streams.push_back(std::make_shared(cluster, query, sample_block, max_block_size)); + } + + if (streams.size() == 1) + return streams.front(); + + return std::make_shared(streams, nullptr, settings.max_threads); +} } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 564fa75c3a2..dff93fcd029 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -6,32 +6,47 @@ #include "DictionaryStructure.h" #include "IDictionarySource.h" +#include "ExternalQueryBuilder.h" #include #include namespace DB { +struct CassandraSettings +{ + String host; + UInt16 port; + String user; + String password; + String db; + String table; + + CassConsistency consistency; + bool allow_filtering; + /// TODO get information about key from the driver + size_t partition_key_prefix; + size_t max_threads; + String where; + + CassandraSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + + void setConsistency(const String & config_str); +}; + class CassandraDictionarySource final : public IDictionarySource { +public: CassandraDictionarySource( const DictionaryStructure & dict_struct, - const String & host, - UInt16 port, - const String & user, - const String & password, - const String & db, - const String & table, + const CassandraSettings & settings_, const Block & sample_block); -public: CassandraDictionarySource( const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, + const String & config_prefix, Block & sample_block); - CassandraDictionarySource(const CassandraDictionarySource & other); - BlockInputStreamPtr loadAll() override; bool supportsSelectiveLoad() const override { return true; } @@ -40,7 +55,10 @@ public: bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override + { + return std::make_unique(dict_struct, settings, sample_block); + } BlockInputStreamPtr loadIds(const std::vector & ids) override; @@ -51,18 +69,16 @@ public: throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; } - std::string toString() const override; + String toString() const override; private: + void maybeAllowFiltering(String & query); + Poco::Logger * log; const DictionaryStructure dict_struct; - const String host; - const UInt16 port; - const String user; - const String password; - const String db; - const String table; + const CassandraSettings settings; Block sample_block; + ExternalQueryBuilder query_builder; CassClusterPtr cluster; }; diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp index 9c181abcf43..4f92a75a1f3 100644 --- a/src/Dictionaries/CassandraHelpers.cpp +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -2,6 +2,8 @@ #if USE_CASSANDRA #include +#include +#include namespace DB { @@ -33,6 +35,34 @@ void cassandraWaitAndCheck(CassFuturePtr && future) throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); } +static std::once_flag setup_logging_flag; + +void setupCassandraDriverLibraryLogging(CassLogLevel level) +{ + std::call_once(setup_logging_flag, [level]() + { + Poco::Logger * logger = &Poco::Logger::get("CassandraDriverLibrary"); + cass_log_set_level(level); + if (level != CASS_LOG_DISABLED) + cass_log_set_callback(cassandraLogCallback, logger); + }); +} + +void cassandraLogCallback(const CassLogMessage * message, void * data) +{ + Poco::Logger * logger = static_cast(data); + if (message->severity == CASS_LOG_CRITICAL || message->severity == CASS_LOG_ERROR) + LOG_ERROR(logger, message->message); + else if (message->severity == CASS_LOG_WARN) + LOG_WARNING(logger, message->message); + else if (message->severity == CASS_LOG_INFO) + LOG_INFO(logger, message->message); + else if (message->severity == CASS_LOG_DEBUG) + LOG_DEBUG(logger, message->message); + else if (message->severity == CASS_LOG_TRACE) + LOG_TRACE(logger, message->message); +} + } #endif diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 48573c1080f..2a91815e37d 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -67,6 +67,12 @@ using CassIteratorPtr = Cassandra::ObjectHolder std::string -ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method) +ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method, size_t partition_key_prefix) { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; @@ -307,25 +307,30 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st writeString(" OR ", out); first = false; - composeKeyCondition(key_columns, row, out); + + writeString("(", out); + composeKeyCondition(key_columns, row, out, 0, key_columns.size()); + writeString(")", out); } } - else /* if (method == IN_WITH_TUPLES) */ + else if (method == IN_WITH_TUPLES) { - composeKeyTupleDefinition(out); - writeString(" IN (", out); - - first = true; - for (const auto row : requested_rows) - { - if (!first) - writeString(", ", out); - - first = false; - composeKeyTuple(key_columns, row, out); - } - - writeString(")", out); + composeInWithTuples(key_columns, requested_rows, out, 0, key_columns.size()); + } + else /* if (method == CASSANDRA_SEPARATE_PARTITION_KEY) */ + { + /// CQL does not allow using OR conditions + /// and does not allow using multi-column IN expressions with partition key columns. + /// So we have to use multiple queries with conditions like + /// (partition_key_1 = val1 AND partition_key_2 = val2 ...) AND (clustering_key_1, ...) IN ((val3, ...), ...) + /// for each partition key. + /// `partition_key_prefix` is a number of columns from partition key. + /// All `requested_rows` must have the same values of partition key. + composeKeyCondition(key_columns, requested_rows.at(0), out, 0, partition_key_prefix); + if (partition_key_prefix && partition_key_prefix < key_columns.size()) + writeString(" AND ", out); + if (partition_key_prefix < key_columns.size()) + composeInWithTuples(key_columns, requested_rows, out, partition_key_prefix, key_columns.size()); } if (!where.empty()) @@ -339,13 +344,11 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st } -void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out, + size_t beg, size_t end) const { - writeString("(", out); - - const auto keys_size = key_columns.size(); auto first = true; - for (const auto i : ext::range(0, keys_size)) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(" AND ", out); @@ -359,12 +362,30 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons writeString("=", out); key_description.type->serializeAsTextQuoted(*key_columns[i], row, out, format_settings); } +} + + +void ExternalQueryBuilder::composeInWithTuples(const Columns & key_columns, const std::vector & requested_rows, + WriteBuffer & out, size_t beg, size_t end) +{ + composeKeyTupleDefinition(out, beg, end); + writeString(" IN (", out); + + bool first = true; + for (const auto row : requested_rows) + { + if (!first) + writeString(", ", out); + + first = false; + composeKeyTuple(key_columns, row, out, beg, end); + } writeString(")", out); } -void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; @@ -372,26 +393,25 @@ void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const writeChar('(', out); auto first = true; - for (const auto & key : *dict_struct.key) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(", ", out); first = false; - writeQuoted(key.name, out); + writeQuoted((*dict_struct.key)[i].name, out); } writeChar(')', out); } -void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const { writeString("(", out); - const auto keys_size = key_columns.size(); auto first = true; - for (const auto i : ext::range(0, keys_size)) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(", ", out); diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 2ffc6a475ee..3011efbc895 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -42,17 +42,19 @@ struct ExternalQueryBuilder std::string composeLoadIdsQuery(const std::vector & ids); /** Generate a query to load data by set of composite keys. - * There are two methods of specification of composite keys in WHERE: + * There are three methods of specification of composite keys in WHERE: * 1. (x = c11 AND y = c12) OR (x = c21 AND y = c22) ... * 2. (x, y) IN ((c11, c12), (c21, c22), ...) + * 3. (x = c1 AND (y, z) IN ((c2, c3), ...)) */ enum LoadKeysMethod { AND_OR_CHAIN, IN_WITH_TUPLES, + CASSANDRA_SEPARATE_PARTITION_KEY, }; - std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method); + std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0); private: @@ -60,14 +62,19 @@ private: void composeLoadAllQuery(WriteBuffer & out) const; + /// In the following methods `beg` and `end` specifies which columns to write in expression + /// Expression in form (x = c1 AND y = c2 ...) - void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const; + void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const; + + /// Expression in form (x, y, ...) IN ((c1, c2, ...), ...) + void composeInWithTuples(const Columns & key_columns, const std::vector & requested_rows, WriteBuffer & out, size_t beg, size_t end); /// Expression in form (x, y, ...) - void composeKeyTupleDefinition(WriteBuffer & out) const; + void composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const; /// Expression in form (c1, c2, ...) - void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const; + void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const; /// Write string with specified quoting style. void writeQuoted(const std::string & s, WriteBuffer & out) const; diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 336f3ddc28b..04fe83414e7 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -436,6 +436,7 @@ class SourceCassandra(ExternalSource): {port} test {table} + 1 '''.format( host=self.docker_hostname, @@ -451,9 +452,8 @@ class SourceCassandra(ExternalSource): self.structure[table_name] = structure columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] keys = ['"' + col.name + '"' for col in structure.keys] - # FIXME use partition key - query = 'create table test."{name}" ({columns}, primary key ("{some_col}", {pk}));'.format( - name=table_name, columns=', '.join(columns), some_col=structure.ordinary_fields[0].name, pk=', '.join(keys)) + query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format( + name=table_name, columns=', '.join(columns), pk=', '.join(keys)) self.session.execute(query) self.prepared = True From f44449f6d6e0829ca689176ad60b342f2269de3e Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 17:31:53 +0400 Subject: [PATCH 0349/2229] Fix the test --- tests/integration/test_grant_and_revoke/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 7054ce28e59..f01f61f4e22 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -156,9 +156,9 @@ def test_introspection(): assert instance.query("SHOW ENABLED ROLES", user='B') == TSV([[ "R2", 1, 1, 1 ]]) assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ - TSV([[ "A", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], - [ "B", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) - + TSV([[ "A", "disk", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], + [ "B", "disk", "no_password", "{}", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) + assert instance.query("SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name") ==\ TSV([[ "R1", "disk" ], [ "R2", "disk" ]]) From 0197627f3fc989b533a06ea195a349faf3a38bd8 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Wed, 3 Jun 2020 22:52:12 +0400 Subject: [PATCH 0350/2229] Added checks for empty server name --- src/Access/ExternalAuthenticators.cpp | 3 +++ src/Access/UsersConfigAccessStorage.cpp | 2 ++ 2 files changed, 5 insertions(+) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 5d46f5e96ee..8d0487bfd31 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -18,6 +18,9 @@ namespace auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const String & ldap_server_name) { + if (ldap_server_name.empty()) + throw Exception("LDAP server name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + LDAPServerParams params; const String ldap_server_config = "ldap_servers." + ldap_server_name; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 38925142460..4560c3f41ba 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -88,6 +88,8 @@ namespace throw Exception("Missing mandatory 'server' in 'ldap', with LDAP server name, for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); const auto ldap_server_name = config.getString(user_config + ".ldap.server"); + if (ldap_server_name.empty()) + throw Exception("LDAP server name cannot be empty for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); user->authentication = Authentication{Authentication::LDAP_SERVER}; user->authentication.setServerName(ldap_server_name); From 11c4e9dde3c248e66692860337eb3ac92c618bb3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 3 Jun 2020 21:59:18 +0300 Subject: [PATCH 0351/2229] in-memory parts: fix 'check' query --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 +++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 10 +++++----- src/Storages/MergeTree/MergeTreeDataPartInMemory.h | 7 +------ src/Storages/MergeTree/checkDataPart.cpp | 3 +++ src/Storages/StorageMergeTree.cpp | 2 +- .../0_stateless/01130_in_memory_parts_check.reference | 1 + .../0_stateless/01130_in_memory_parts_check.sql | 10 ++++++++++ 9 files changed, 28 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_check.reference create mode 100644 tests/queries/0_stateless/01130_in_memory_parts_check.sql diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index ca5ad352564..6ef4cc15032 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -289,7 +289,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA); NativeBlockInputStream block_in(in, 0); - auto block = block_in.read(); + auto block = block_in.read(); auto volume = std::make_shared("volume_" + part_name, reservation->getDisk()); MergeTreeData::MutableDataPartPtr new_data_part = diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e06ab8eed44..22accdbe190 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -889,6 +889,11 @@ void IMergeTreeDataPart::checkConsistencyBase() const } } +void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) const +{ + throw Exception("Method 'checkConsistency' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED); +} + void IMergeTreeDataPart::calculateColumnsSizesOnDisk() { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 0921b24673c..f9f1965aa57 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -332,7 +332,7 @@ protected: void removeIfNeeded(); - virtual void checkConsistency(bool require_part_metadata) const = 0; + virtual void checkConsistency(bool require_part_metadata) const; void checkConsistencyBase() const; /// Fill each_columns_size and total_size with sizes from columns files on diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 9abfd44e42f..fac7e4982bb 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -108,14 +108,14 @@ void MergeTreeDataPartInMemory::notifyMerged() const is_merged.notify_one(); } +void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) const +{ + relative_path = new_relative_path; +} + void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName & /*each_columns_size*/, ColumnSize & /*total_size*/) const { // throw Exception("calculateEachColumnSizesOnDisk of in memory part", ErrorCodes::NOT_IMPLEMENTED); } -void MergeTreeDataPartInMemory::loadIndexGranularity() -{ - throw Exception("loadIndexGranularity of in memory part", ErrorCodes::NOT_IMPLEMENTED); -} - } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index ad9a583f5c8..3e2ec82b038 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -40,7 +40,7 @@ public: bool isStoredOnDisk() const override { return false; } bool hasColumnFiles(const String & column_name, const IDataType & /* type */) const override { return !!getColumnPosition(column_name); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } - void renameTo(const String & /*new_relative_path*/, bool /*remove_new_dir_if_exists*/) const override {} + void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const override; void makeCloneInDetached(const String & prefix) const override; void flushToDisk(const String & base_path, const String & new_relative_path) const; @@ -53,11 +53,6 @@ public: private: mutable std::condition_variable is_merged; - void checkConsistency(bool /* require_part_metadata */) const override {} - - /// Loads marks index granularity into memory - void loadIndexGranularity() override; - /// Compact parts doesn't support per column size, only total size void calculateEachColumnSizesOnDisk(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; }; diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 58ff2af9466..8395a7461a7 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -166,6 +166,9 @@ IMergeTreeDataPart::Checksums checkDataPart( bool require_checksums, std::function is_cancelled) { + if (!data_part->isStoredOnDisk()) + return data_part->checksums; + return checkDataPart( data_part->volume->getDisk(), data_part->getFullRelativePath(), diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index b32fcc1d188..44a942551fb 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1315,7 +1315,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, const Context & c /// If the checksums file is not present, calculate the checksums and write them to disk. String checksums_path = part_path + "checksums.txt"; String tmp_checksums_path = part_path + "checksums.txt.tmp"; - if (!disk->exists(checksums_path)) + if (part->isStoredOnDisk() && !disk->exists(checksums_path)) { try { diff --git a/tests/queries/0_stateless/01130_in_memory_parts_check.reference b/tests/queries/0_stateless/01130_in_memory_parts_check.reference new file mode 100644 index 00000000000..15f72836ff1 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_check.reference @@ -0,0 +1 @@ +201901_1_1_0 1 diff --git a/tests/queries/0_stateless/01130_in_memory_parts_check.sql b/tests/queries/0_stateless/01130_in_memory_parts_check.sql new file mode 100644 index 00000000000..57cd1c83528 --- /dev/null +++ b/tests/queries/0_stateless/01130_in_memory_parts_check.sql @@ -0,0 +1,10 @@ +-- Part of 00961_check_table test, but with in-memory parts +SET check_query_single_value_result = 0; +DROP TABLE IF EXISTS mt_table; +CREATE TABLE mt_table (d Date, key UInt64, data String) ENGINE = MergeTree() PARTITION BY toYYYYMM(d) ORDER BY key + SETTINGS min_rows_for_compact_part = 1000, min_rows_for_compact_part = 1000; + +CHECK TABLE mt_table; +INSERT INTO mt_table VALUES (toDate('2019-01-02'), 1, 'Hello'), (toDate('2019-01-02'), 2, 'World'); +CHECK TABLE mt_table; +DROP TABLE mt_table; From 83b6467308ad9cf0ca21d2873ee42bbf23ea6d9f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Jun 2020 22:50:11 +0300 Subject: [PATCH 0352/2229] Added RemoteSource. --- src/DataStreams/RemoteQueryExecutor.h | 4 +- src/Processors/QueryPipeline.cpp | 11 +- src/Processors/RowsBeforeLimitCounter.h | 6 + src/Processors/Sources/DelayedSource.cpp | 6 + src/Processors/Sources/DelayedSource.h | 23 ++++ src/Processors/Sources/RemoteSource.cpp | 107 ++++++++++++++++++ src/Processors/Sources/RemoteSource.h | 80 +++++++++++++ src/Processors/Sources/SourceWithProgress.cpp | 7 +- src/Processors/Sources/SourceWithProgress.h | 5 + src/Processors/ya.make | 2 + 10 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 src/Processors/Sources/DelayedSource.cpp create mode 100644 src/Processors/Sources/DelayedSource.h create mode 100644 src/Processors/Sources/RemoteSource.cpp create mode 100644 src/Processors/Sources/RemoteSource.h diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h index e39a7ccc94b..ce6c46d5a2a 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/DataStreams/RemoteQueryExecutor.h @@ -61,8 +61,8 @@ public: void cancel(); /// Get totals and extremes if any. - Block getTotals() const { return totals; } - Block getExtremes() const { return extremes; } + Block getTotals() const { return std::move(totals); } + Block getExtremes() const { return std::move(extremes); } /// Set callback for progress. It will be called on Progress packet. void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index 92c91a81b8a..5b6109440d5 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB { @@ -673,8 +674,10 @@ void QueryPipeline::initRowsBeforeLimit() { RowsBeforeLimitCounterPtr rows_before_limit_at_least; + /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; std::vector sources; + std::vector remote_sources; std::unordered_set visited; @@ -705,6 +708,9 @@ void QueryPipeline::initRowsBeforeLimit() if (auto * source = typeid_cast(processor)) sources.emplace_back(source); + + if (auto * source = typeid_cast(processor)) + remote_sources.emplace_back(source); } else if (auto * sorting = typeid_cast(processor)) { @@ -735,7 +741,7 @@ void QueryPipeline::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); @@ -744,6 +750,9 @@ void QueryPipeline::initRowsBeforeLimit() for (auto & source : sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : remote_sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } /// If there is a limit, then enable rows_before_limit_at_least diff --git a/src/Processors/RowsBeforeLimitCounter.h b/src/Processors/RowsBeforeLimitCounter.h index 36ea4a557a8..f5eb40ff84a 100644 --- a/src/Processors/RowsBeforeLimitCounter.h +++ b/src/Processors/RowsBeforeLimitCounter.h @@ -15,6 +15,12 @@ public: rows_before_limit.fetch_add(rows, std::memory_order_release); } + void set(uint64_t rows) + { + setAppliedLimit(); + rows_before_limit.store(rows, std::memory_order_release); + } + uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } void setAppliedLimit() { has_applied_limit.store(true, std::memory_order_release); } diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp new file mode 100644 index 00000000000..e5931c75489 --- /dev/null +++ b/src/Processors/Sources/DelayedSource.cpp @@ -0,0 +1,6 @@ +#include + +namespace DB +{ + +} diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h new file mode 100644 index 00000000000..28cad6bc816 --- /dev/null +++ b/src/Processors/Sources/DelayedSource.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +class DelayedSource : public IProcessor +{ +public: + using Creator = std::function; + + DelayedSource(Block header, Creator processors_creator); + String getName() const override { return "Delayed"; } + + Status prepare() override; + void work() override; + +private: + Creator creator; +}; + +} diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp new file mode 100644 index 00000000000..090f3743709 --- /dev/null +++ b/src/Processors/Sources/RemoteSource.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include + +namespace DB +{ + +RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_) + : SourceWithProgress(executor->getHeader(), false) + , add_aggregation_info(add_aggregation_info_), query_executor(std::move(executor)) +{ + /// Add AggregatedChunkInfo if we expect DataTypeAggregateFunction as a result. + const auto & sample = getPort().getHeader(); + for (auto & type : sample.getDataTypes()) + if (typeid_cast(type.get())) + add_aggregation_info = true; +} + +RemoteSource::~RemoteSource() = default; + +Chunk RemoteSource::generate() +{ + if (!was_query_sent) + { + /// Progress method will be called on Progress packet. + query_executor->setProgressCallback([this](const Progress & value) { progress(value); }); + + /// Get rows_before_limit result for remote query from ProfileInfo packet. + query_executor->setProfileInfoCallback([this](const BlockStreamProfileInfo & info) + { + if (rows_before_limit && info.hasAppliedLimit()) + rows_before_limit->set(info.getRowsBeforeLimit()); + }); + + query_executor->sendQuery(); + + was_query_sent = true; + } + + auto block = query_executor->read(); + + if (!block) + { + query_executor->finish(); + return {}; + } + + UInt64 num_rows = block.rows(); + Chunk chunk(block.getColumns(), num_rows); + + if (add_aggregation_info) + { + auto info = std::make_shared(); + info->bucket_num = block.info.bucket_num; + info->is_overflows = block.info.is_overflows; + chunk.setChunkInfo(std::move(info)); + } + + return chunk; +} + +void RemoteSource::onCancel() +{ + query_executor->cancel(); +} + + +RemoteTotalsSource::RemoteTotalsSource(Block header) : ISource(std::move(header)) {} +RemoteTotalsSource::~RemoteTotalsSource() = default; + +Chunk RemoteTotalsSource::generate() +{ + /// Check use_count instead of comparing with nullptr just in case. + /// setQueryExecutor() may be called from other thread, but there shouldn't be any race, + /// because totals end extremes are always read after main data. + if (query_executor.use_count()) + { + if (auto block = query_executor->getTotals()) + { + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); + } + } + + return {}; +} + + +RemoteExtremesSource::RemoteExtremesSource(Block header) : ISource(std::move(header)) {} +RemoteExtremesSource::~RemoteExtremesSource() = default; + +Chunk RemoteExtremesSource::generate() +{ + if (query_executor.use_count()) + { + if (auto block = query_executor->getExtremes()) + { + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); + } + } + + return {}; +} + +} diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h new file mode 100644 index 00000000000..9cc3ea9c459 --- /dev/null +++ b/src/Processors/Sources/RemoteSource.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class RemoteQueryExecutor; +using RemoteQueryExecutorPtr = std::shared_ptr; + +/// Source from RemoteQueryExecutor. Executes remote query and returns query result chunks. +class RemoteSource : public SourceWithProgress +{ +public: + /// Flag add_aggregation_info tells if AggregatedChunkInfo should be added to result chunk. + /// AggregatedChunkInfo stores the bucket number used for two-level aggregation. + /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState. + RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_); + ~RemoteSource(); + + String getName() const override { return "Remote"; } + + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } + + /// Stop reading from stream if output port is finished. + void onUpdatePorts() override + { + if (getPort().isFinished()) + cancel(); + } + +protected: + Chunk generate() override; + void onCancel() override; + +private: + bool was_query_sent = false; + bool add_aggregation_info = false; + RemoteQueryExecutorPtr query_executor; + RowsBeforeLimitCounterPtr rows_before_limit; +}; + +/// Totals source from RemoteQueryExecutor. +class RemoteTotalsSource : public ISource +{ +public: + explicit RemoteTotalsSource(Block header); + ~RemoteTotalsSource(); + + String getName() const override { return "RemoteTotals"; } + + void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } + +protected: + Chunk generate() override; + +private: + RemoteQueryExecutorPtr query_executor; +}; + +/// Extremes source from RemoteQueryExecutor. +class RemoteExtremesSource : public ISource +{ +public: + explicit RemoteExtremesSource(Block header); + ~RemoteExtremesSource(); + + String getName() const override { return "RemoteExtremes"; } + + void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } + +protected: + Chunk generate() override; + +private: + RemoteQueryExecutorPtr query_executor; +}; + +} diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 8d7a0a3d946..6488289d5ce 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -12,6 +12,11 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; } +SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress) + : ISourceWithProgress(header), auto_progress(enable_auto_progress) +{ +} + void SourceWithProgress::work() { if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode)) @@ -24,7 +29,7 @@ void SourceWithProgress::work() ISourceWithProgress::work(); - if (!was_progress_called && has_input) + if (auto_progress && !was_progress_called && has_input) progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() }); } } diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h index 4778c50e49d..34810045143 100644 --- a/src/Processors/Sources/SourceWithProgress.h +++ b/src/Processors/Sources/SourceWithProgress.h @@ -44,6 +44,8 @@ class SourceWithProgress : public ISourceWithProgress { public: using ISourceWithProgress::ISourceWithProgress; + /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk. + SourceWithProgress(Block header, bool enable_auto_progress); using LocalLimits = IBlockInputStream::LocalLimits; using LimitsMode = IBlockInputStream::LimitsMode; @@ -76,6 +78,9 @@ private: /// This flag checks if progress() was manually called at generate() call. /// If not, it will be called for chunk after generate() was finished. bool was_progress_called = false; + + /// If enabled, progress() will be automatically called on each generated chunk. + bool auto_progress = true; }; } diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 62320f1c147..af5ebbcf3e9 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -106,9 +106,11 @@ SRCS( Port.cpp QueryPipeline.cpp ResizeProcessor.cpp + Sources/DelayedSource.cpp Sources/SinkToOutputStream.cpp Sources/SourceFromInputStream.cpp Sources/SourceWithProgress.cpp + Sources/RemoteSource.cpp Transforms/AddingMissedTransform.cpp Transforms/AddingSelectorTransform.cpp Transforms/AggregatingTransform.cpp From e67837bc4ae82dcb5d7c23067f952d24d60252fb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Jun 2020 18:07:37 +0300 Subject: [PATCH 0353/2229] cleenup --- .gitmodules | 6 +- CMakeLists.txt | 1 + cmake/find/cassandra.cmake | 12 +- contrib/CMakeLists.txt | 6 +- contrib/cassandra | 2 +- contrib/cassandra-cmake/CMakeLists.txt | 0 contrib/libuv | 2 +- contrib/libuv-cmake/CMakeLists.txt | 441 ------------------ .../compose}/docker_compose_cassandra.yml | 0 .../external-dicts-dict-sources.md | 28 +- .../CassandraBlockInputStream.cpp | 390 +++++++++------- src/Dictionaries/CassandraBlockInputStream.h | 43 +- .../CassandraDictionarySource.cpp | 86 ++-- src/Dictionaries/CassandraDictionarySource.h | 13 +- src/Dictionaries/CassandraHelpers.cpp | 2 +- src/Dictionaries/CassandraHelpers.h | 10 +- src/Dictionaries/ExternalQueryBuilder.cpp | 10 +- src/Dictionaries/registerDictionaries.h | 1 - tests/integration/helpers/cluster.py | 4 +- .../external_sources.py | 1 + .../test.py | 5 +- 21 files changed, 381 insertions(+), 682 deletions(-) delete mode 100644 contrib/cassandra-cmake/CMakeLists.txt delete mode 100644 contrib/libuv-cmake/CMakeLists.txt rename {tests/integration/helpers => docker/test/integration/compose}/docker_compose_cassandra.yml (100%) diff --git a/.gitmodules b/.gitmodules index a4c84301fc9..c05da0c9ff9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -159,11 +159,11 @@ url = https://github.com/openldap/openldap.git [submodule "contrib/cassandra"] path = contrib/cassandra - url = https://github.com/tavplubix/cpp-driver.git - branch = ch-tmp + url = https://github.com/ClickHouse-Extras/cpp-driver.git + branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/tavplubix/libuv.git + url = https://github.com/ClickHouse-Extras/libuv.git branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib diff --git a/CMakeLists.txt b/CMakeLists.txt index 54a88404579..27005bd8d87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -328,6 +328,7 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE include (GNUInstallDirs) include (cmake/contrib_finder.cmake) include (cmake/lib_name.cmake) + find_contrib_lib(double-conversion) # Must be before parquet include (cmake/find/ssl.cmake) include (cmake/find/ldap.cmake) # after ssl diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake index b1d76702cfa..f41e0f645f4 100644 --- a/cmake/find/cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -1,8 +1,10 @@ -if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) +option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES}) + +if (ENABLE_CASSANDRA) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR @@ -17,6 +19,8 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) set (USE_CASSANDRA 1) set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") endif() endif() + +message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}") +message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0a907f01573..99b94d04473 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -291,7 +291,7 @@ if (USE_INTERNAL_AWS_S3_LIBRARY) endif () if (USE_BASE64) - add_subdirectory(base64-cmake) + add_subdirectory (base64-cmake) endif() if (USE_INTERNAL_HYPERSCAN_LIBRARY) @@ -315,8 +315,8 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) - add_subdirectory(libuv-cmake) - add_subdirectory(cassandra) + add_subdirectory (libuv) + add_subdirectory (cassandra) endif() add_subdirectory (fmtlib-cmake) diff --git a/contrib/cassandra b/contrib/cassandra index 58a71947d9d..a49b4e0e269 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 58a71947d9dd8412f5aeb38275fa81417ea27ee0 +Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509 diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/contrib/libuv b/contrib/libuv index 379988fef9b..84438304f41 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 379988fef9b0c6ac706a624dbac6be8924a3a0da +Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28 diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt deleted file mode 100644 index 65aeabdd9cb..00000000000 --- a/contrib/libuv-cmake/CMakeLists.txt +++ /dev/null @@ -1,441 +0,0 @@ -cmake_minimum_required(VERSION 3.4) -project(libuv LANGUAGES C) - -include(CMakePackageConfigHelpers) -include(CMakeDependentOption) -include(GNUInstallDirs) -include(CTest) - -#cmake_dependent_option(LIBUV_BUILD_TESTS -# "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON -# "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) - -if(MSVC) - list(APPEND uv_cflags /W4) -elseif(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND uv_cflags -fvisibility=hidden --std=gnu89) - list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes) - list(APPEND uv_cflags -Wno-unused-parameter) -endif() - -set(uv_sources - src/fs-poll.c - src/idna.c - src/inet.c - src/random.c - src/strscpy.c - src/threadpool.c - src/timer.c - src/uv-common.c - src/uv-data-getter-setters.c - src/version.c) - -set(uv_test_sources - test/blackhole-server.c - test/echo-server.c - test/run-tests.c - test/runner.c - test/test-active.c - test/test-async-null-cb.c - test/test-async.c - test/test-barrier.c - test/test-callback-order.c - test/test-callback-stack.c - test/test-close-fd.c - test/test-close-order.c - test/test-condvar.c - test/test-connect-unspecified.c - test/test-connection-fail.c - test/test-cwd-and-chdir.c - test/test-default-loop-close.c - test/test-delayed-accept.c - test/test-dlerror.c - test/test-eintr-handling.c - test/test-embed.c - test/test-emfile.c - test/test-env-vars.c - test/test-error.c - test/test-fail-always.c - test/test-fork.c - test/test-fs-copyfile.c - test/test-fs-event.c - test/test-fs-poll.c - test/test-fs.c - test/test-fs-readdir.c - test/test-fs-fd-hash.c - test/test-fs-open-flags.c - test/test-get-currentexe.c - test/test-get-loadavg.c - test/test-get-memory.c - test/test-get-passwd.c - test/test-getaddrinfo.c - test/test-gethostname.c - test/test-getnameinfo.c - test/test-getsockname.c - test/test-getters-setters.c - test/test-gettimeofday.c - test/test-handle-fileno.c - test/test-homedir.c - test/test-hrtime.c - test/test-idle.c - test/test-idna.c - test/test-ip4-addr.c - test/test-ip6-addr.c - test/test-ipc-heavy-traffic-deadlock-bug.c - test/test-ipc-send-recv.c - test/test-ipc.c - test/test-loop-alive.c - test/test-loop-close.c - test/test-loop-configure.c - test/test-loop-handles.c - test/test-loop-stop.c - test/test-loop-time.c - test/test-multiple-listen.c - test/test-mutexes.c - test/test-osx-select.c - test/test-pass-always.c - test/test-ping-pong.c - test/test-pipe-bind-error.c - test/test-pipe-close-stdout-read-stdin.c - test/test-pipe-connect-error.c - test/test-pipe-connect-multiple.c - test/test-pipe-connect-prepare.c - test/test-pipe-getsockname.c - test/test-pipe-pending-instances.c - test/test-pipe-sendmsg.c - test/test-pipe-server-close.c - test/test-pipe-set-fchmod.c - test/test-pipe-set-non-blocking.c - test/test-platform-output.c - test/test-poll-close-doesnt-corrupt-stack.c - test/test-poll-close.c - test/test-poll-closesocket.c - test/test-poll-oob.c - test/test-poll.c - test/test-process-priority.c - test/test-process-title-threadsafe.c - test/test-process-title.c - test/test-queue-foreach-delete.c - test/test-random.c - test/test-ref.c - test/test-run-nowait.c - test/test-run-once.c - test/test-semaphore.c - test/test-shutdown-close.c - test/test-shutdown-eof.c - test/test-shutdown-twice.c - test/test-signal-multiple-loops.c - test/test-signal-pending-on-close.c - test/test-signal.c - test/test-socket-buffer-size.c - test/test-spawn.c - test/test-stdio-over-pipes.c - test/test-strscpy.c - test/test-tcp-alloc-cb-fail.c - test/test-tcp-bind-error.c - test/test-tcp-bind6-error.c - test/test-tcp-close-accept.c - test/test-tcp-close-while-connecting.c - test/test-tcp-close.c - test/test-tcp-close-reset.c - test/test-tcp-connect-error-after-write.c - test/test-tcp-connect-error.c - test/test-tcp-connect-timeout.c - test/test-tcp-connect6-error.c - test/test-tcp-create-socket-early.c - test/test-tcp-flags.c - test/test-tcp-oob.c - test/test-tcp-open.c - test/test-tcp-read-stop.c - test/test-tcp-shutdown-after-write.c - test/test-tcp-try-write.c - test/test-tcp-try-write-error.c - test/test-tcp-unexpected-read.c - test/test-tcp-write-after-connect.c - test/test-tcp-write-fail.c - test/test-tcp-write-queue-order.c - test/test-tcp-write-to-half-open-connection.c - test/test-tcp-writealot.c - test/test-thread-equal.c - test/test-thread.c - test/test-threadpool-cancel.c - test/test-threadpool.c - test/test-timer-again.c - test/test-timer-from-check.c - test/test-timer.c - test/test-tmpdir.c - test/test-tty-duplicate-key.c - test/test-tty.c - test/test-udp-alloc-cb-fail.c - test/test-udp-bind.c - test/test-udp-connect.c - test/test-udp-create-socket-early.c - test/test-udp-dgram-too-big.c - test/test-udp-ipv6.c - test/test-udp-multicast-interface.c - test/test-udp-multicast-interface6.c - test/test-udp-multicast-join.c - test/test-udp-multicast-join6.c - test/test-udp-multicast-ttl.c - test/test-udp-open.c - test/test-udp-options.c - test/test-udp-send-and-recv.c - test/test-udp-send-hang-loop.c - test/test-udp-send-immediate.c - test/test-udp-send-unreachable.c - test/test-udp-try-send.c - test/test-uname.c - test/test-walk-handles.c - test/test-watcher-cross-stop.c) - -#if(WIN32) -# list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) -# list(APPEND uv_libraries -# advapi32 -# iphlpapi -# psapi -# shell32 -# user32 -# userenv -# ws2_32) -# list(APPEND uv_sources -# src/win/async.c -# src/win/core.c -# src/win/detect-wakeup.c -# src/win/dl.c -# src/win/error.c -# src/win/fs.c -# src/win/fs-event.c -# src/win/getaddrinfo.c -# src/win/getnameinfo.c -# src/win/handle.c -# src/win/loop-watcher.c -# src/win/pipe.c -# src/win/thread.c -# src/win/poll.c -# src/win/process.c -# src/win/process-stdio.c -# src/win/signal.c -# src/win/snprintf.c -# src/win/stream.c -# src/win/tcp.c -# src/win/tty.c -# src/win/udp.c -# src/win/util.c -# src/win/winapi.c -# src/win/winsock.c) -# list(APPEND uv_test_libraries ws2_32) -# list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) -#else() - -if(CMAKE_SIZEOF_VOID_P EQUAL 4) - list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) -endif() - -if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") - # Android has pthread as part of its c library, not as a separate - # libpthread.so. - list(APPEND uv_libraries pthread) -endif() -list(APPEND uv_sources - src/unix/async.c - src/unix/core.c - src/unix/dl.c - src/unix/fs.c - src/unix/getaddrinfo.c - src/unix/getnameinfo.c - src/unix/loop-watcher.c - src/unix/loop.c - src/unix/pipe.c - src/unix/poll.c - src/unix/process.c - src/unix/random-devurandom.c - src/unix/signal.c - src/unix/stream.c - src/unix/tcp.c - src/unix/thread.c - src/unix/tty.c - src/unix/udp.c) -list(APPEND uv_test_sources test/runner-unix.c) -#endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "AIX") - list(APPEND uv_defines - _ALL_SOURCE - _LINUX_SOURCE_COMPAT - _THREAD_SAFE - _XOPEN_SOURCE=500) - list(APPEND uv_libraries perfstat) - list(APPEND uv_sources src/unix/aix.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Android") - list(APPEND uv_libs dl) - list(APPEND uv_sources - src/unix/android-ifaddrs.c - src/unix/linux-core.c - src/unix/linux-inotify.c - src/unix/linux-syscalls.c - src/unix/procfs-exepath.c - src/unix/pthread-fixes.c - src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c - src/unix/sysinfo-loadavg.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS/390") - list(APPEND uv_sources src/unix/proctitle.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD") - list(APPEND uv_sources src/unix/freebsd.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") - list(APPEND uv_sources src/unix/posix-hrtime.c src/unix/bsd-proctitle.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") - list(APPEND uv_sources src/unix/bsd-ifaddrs.c src/unix/kqueue.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - list(APPEND uv_sources src/unix/random-getrandom.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") - list(APPEND uv_sources src/unix/random-getentropy.c) -endif() - -if(APPLE) - list(APPEND uv_defines _DARWIN_UNLIMITED_SELECT=1 _DARWIN_USE_64_BIT_INODE=1) - list(APPEND uv_sources - src/unix/darwin-proctitle.c - src/unix/darwin.c - src/unix/fsevents.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) - list(APPEND uv_libraries dl rt) - list(APPEND uv_sources - src/unix/linux-core.c - src/unix/linux-inotify.c - src/unix/linux-syscalls.c - src/unix/procfs-exepath.c - src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c - src/unix/sysinfo-loadavg.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD") - list(APPEND uv_sources src/unix/netbsd.c) - list(APPEND uv_libraries kvm) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") - list(APPEND uv_sources src/unix/openbsd.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OS/390") - list(APPEND uv_defines PATH_MAX=255) - list(APPEND uv_defines _AE_BIMODAL) - list(APPEND uv_defines _ALL_SOURCE) - list(APPEND uv_defines _LARGE_TIME_API) - list(APPEND uv_defines _OPEN_MSGQ_EXT) - list(APPEND uv_defines _OPEN_SYS_FILE_EXT) - list(APPEND uv_defines _OPEN_SYS_IF_EXT) - list(APPEND uv_defines _OPEN_SYS_SOCK_EXT3) - list(APPEND uv_defines _OPEN_SYS_SOCK_IPV6) - list(APPEND uv_defines _UNIX03_SOURCE) - list(APPEND uv_defines _UNIX03_THREADS) - list(APPEND uv_defines _UNIX03_WITHDRAWN) - list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) - list(APPEND uv_sources - src/unix/pthread-fixes.c - src/unix/pthread-barrier.c - src/unix/os390.c - src/unix/os390-syscalls.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") - list(APPEND uv_defines __EXTENSIONS__ _XOPEN_SOURCE=500) - list(APPEND uv_libraries kstat nsl sendfile socket) - list(APPEND uv_sources src/unix/no-proctitle.c src/unix/sunos.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|Linux|NetBSD|OpenBSD") - list(APPEND uv_test_libraries util) -endif() - -set(uv_sources_tmp "") -foreach(file ${uv_sources}) - list(APPEND uv_sources_tmp "${LIBUV_ROOT_DIR}/${file}") -endforeach(file) -set(uv_sources "${uv_sources_tmp}") - -list(APPEND uv_defines CLICKHOUSE_GLIBC_COMPATIBILITY) - -add_library(uv SHARED ${uv_sources}) -target_compile_definitions(uv - INTERFACE USING_UV_SHARED=1 - PRIVATE ${uv_defines} BUILDING_UV_SHARED=1) -target_compile_options(uv PRIVATE ${uv_cflags}) -target_include_directories(uv PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) -target_link_libraries(uv ${uv_libraries}) - -add_library(uv_a STATIC ${uv_sources}) -target_compile_definitions(uv_a PRIVATE ${uv_defines}) -target_compile_options(uv_a PRIVATE ${uv_cflags}) -target_include_directories(uv_a PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) -target_link_libraries(uv_a ${uv_libraries}) - -#if(LIBUV_BUILD_TESTS) -# add_executable(uv_run_tests ${uv_test_sources}) -# target_compile_definitions(uv_run_tests -# PRIVATE ${uv_defines} USING_UV_SHARED=1) -# target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_tests uv ${uv_test_libraries}) -# add_test(NAME uv_test -# COMMAND uv_run_tests -# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -# add_executable(uv_run_tests_a ${uv_test_sources}) -# target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) -# target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) -# add_test(NAME uv_test_a -# COMMAND uv_run_tests_a -# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -#endif() - -if(UNIX) - # Now for some gibbering horrors from beyond the stars... - foreach(x ${uv_libraries}) - set(LIBS "${LIBS} -l${x}") - endforeach(x) - file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) - string(REGEX MATCH [0-9]+[.][0-9]+[.][0-9]+ PACKAGE_VERSION "${configure_ac}") - string(REGEX MATCH ^[0-9]+ UV_VERSION_MAJOR "${PACKAGE_VERSION}") - # The version in the filename is mirroring the behaviour of autotools. - set_target_properties(uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 - SOVERSION ${UV_VERSION_MAJOR}) - set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) - set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) - set(prefix ${CMAKE_INSTALL_PREFIX}) - configure_file(${LIBUV_ROOT_DIR}/libuv.pc.in ${LIBUV_ROOT_DIR}/libuv.pc @ONLY) - - install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libuv.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) - install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) - install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() - -#if(WIN32) -# install(DIRECTORY include/ DESTINATION include) -# install(FILES LICENSE DESTINATION .) -# install(TARGETS uv uv_a -# RUNTIME DESTINATION lib/$ -# ARCHIVE DESTINATION lib/$) -#endif() diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/docker/test/integration/compose/docker_compose_cassandra.yml similarity index 100% rename from tests/integration/helpers/docker_compose_cassandra.yml rename to docker/test/integration/compose/docker_compose_cassandra.yml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 650630701be..71b719ce996 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -633,9 +633,35 @@ Example of settings: localhost - 6349 + 9042 + username + qwerty123 + database_name + table_name + 1 + 1 + One + "SomeColumn" = 42 + 8 ``` +Setting fields: +- `host` – The Cassandra host or comma-separated list of hosts. +- `port` – The port on the Cassandra servers. If not specified, default port is used. +- `user` – Name of the Cassandra user. +- `password` – Password of the Cassandra user. +- `keyspace` – Name of the keyspace (database). +- `column_family` – Name of the column family (table). +- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. +- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. + Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. + Default value is 1 (the first key column is a partition key and other key columns are clustering key). +- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, + `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`. +- `where` – Optional selection criteria. +- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. + + [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index e00fd5ec3e9..8fb9eb3f93c 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -16,189 +16,261 @@ namespace DB { + namespace ErrorCodes { - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; extern const int CASSANDRA_INTERNAL_ERROR; } CassandraBlockInputStream::CassandraBlockInputStream( - const CassClusterPtr & cluster, + const CassSessionShared & session_, const String & query_str, const Block & sample_block, - const size_t max_block_size_) - : statement(query_str.c_str(), /*parameters count*/ 0) + size_t max_block_size_) + : session(session_) + , statement(query_str.c_str(), /*parameters count*/ 0) , max_block_size(max_block_size_) , has_more_pages(cass_true) { description.init(sample_block); cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); - cassandraWaitAndCheck(cass_session_connect(session, cluster)); } -namespace +void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const { - using ValueType = ExternalResultDescription::ValueType; - - void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) + switch (type) { - /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) - switch (type) + case ValueType::vtUInt8: { - case ValueType::vtUInt8: - { - cass_int8_t value; - cass_value_get_int8(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt16: - { - cass_int16_t value; - cass_value_get_int16(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt32: - { - cass_int32_t value; - cass_value_get_int32(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt64: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt8: - { - cass_int8_t value; - cass_value_get_int8(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt16: - { - cass_int16_t value; - cass_value_get_int16(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt32: - { - cass_int32_t value; - cass_value_get_int32(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt64: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtFloat32: - { - cass_float_t value; - cass_value_get_float(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtFloat64: - { - cass_double_t value; - cass_value_get_double(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtString: - { - const char * value; - size_t value_length; - cass_value_get_string(cass_value, &value, &value_length); - assert_cast(column).insertData(value, value_length); - break; - } - case ValueType::vtDate: - { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - assert_cast(column).insertValue(static_cast(value)); - break; - } - case ValueType::vtDateTime: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(static_cast(value / 1000)); - break; - } - case ValueType::vtUUID: - { - CassUuid value; - cass_value_get_uuid(cass_value, &value); - std::array uuid_str; - cass_uuid_string(value, uuid_str.data()); - assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); - break; - } + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt16: + { + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt32: + { + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt64: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtInt8: + { + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt16: + { + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt32: + { + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt64: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtFloat32: + { + cass_float_t value; + cass_value_get_float(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtFloat64: + { + cass_double_t value; + cass_value_get_double(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtString: + { + const char * value = nullptr; + size_t value_length; + cass_value_get_string(cass_value, &value, &value_length); + assert_cast(column).insertData(value, value_length); + break; + } + case ValueType::vtDate: + { + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtDateTime: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(static_cast(value / 1000)); + break; + } + case ValueType::vtUUID: + { + CassUuid value; + cass_value_get_uuid(cass_value, &value); + std::array uuid_str; + cass_uuid_string(value, uuid_str.data()); + assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); + break; } } } - Block CassandraBlockInputStream::readImpl() +void CassandraBlockInputStream::readPrefix() +{ + result_future = cass_session_execute(*session, statement); +} + +Block CassandraBlockInputStream::readImpl() +{ + if (!has_more_pages) + return {}; + + MutableColumns columns = description.sample_block.cloneEmptyColumns(); + + cassandraWaitAndCheck(result_future); + CassResultPtr result = cass_future_get_result(result_future); + + assert(cass_result_column_count(result) == columns.size()); + + assertTypes(result); + + has_more_pages = cass_result_has_more_pages(result); + if (has_more_pages) { - if (!has_more_pages) - return {}; - - MutableColumns columns = description.sample_block.cloneEmptyColumns(); - CassFuturePtr query_future = cass_session_execute(session, statement); - - CassResultPtr result = cass_future_get_result(query_future); - - if (!result) { - const char* error_message; - size_t error_message_length; - cass_future_error_message(query_future, &error_message, &error_message_length); - - throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; - } - - [[maybe_unused]] size_t row_count = 0; - assert(cass_result_column_count(result) == columns.size()); - CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] - while (cass_iterator_next(rows_iter)) - { - const CassRow * row = cass_iterator_get_row(rows_iter); - for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) - { - const CassValue * val = cass_row_get_column(row, col_idx); - if (cass_value_is_null(val)) - columns[col_idx]->insertDefault(); - else if (description.types[col_idx].second) - { - ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); - insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); - column_nullable.getNullMapData().emplace_back(0); - } - else - insertValue(*columns[col_idx], description.types[col_idx].first, val); - } - ++row_count; - } - assert(cass_result_row_count(result) == row_count); - - has_more_pages = cass_result_has_more_pages(result); - - if (has_more_pages) - cassandraCheck(cass_statement_set_paging_state(statement, result)); - - return description.sample_block.cloneWithColumns(std::move(columns)); + cassandraCheck(cass_statement_set_paging_state(statement, result)); + result_future = cass_session_execute(*session, statement); } + CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + while (cass_iterator_next(rows_iter)) + { + const CassRow * row = cass_iterator_get_row(rows_iter); + for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) + { + const CassValue * val = cass_row_get_column(row, col_idx); + if (cass_value_is_null(val)) + columns[col_idx]->insertDefault(); + else if (description.types[col_idx].second) + { + ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); + insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[col_idx], description.types[col_idx].first, val); + } + } + + assert(cass_result_row_count(result) == columns.front()->size()); + + return description.sample_block.cloneWithColumns(std::move(columns)); +} + +void CassandraBlockInputStream::assertTypes(const CassResultPtr & result) +{ + if (!assert_types) + return; + + size_t column_count = cass_result_column_count(result); + for (size_t i = 0; i < column_count; ++i) + { + CassValueType expected; + String expected_text; + + /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) + switch (description.types[i].first) + { + case ExternalResultDescription::ValueType::vtInt8: + case ExternalResultDescription::ValueType::vtUInt8: + expected = CASS_VALUE_TYPE_TINY_INT; + expected_text = "tinyint"; + break; + case ExternalResultDescription::ValueType::vtInt16: + case ExternalResultDescription::ValueType::vtUInt16: + expected = CASS_VALUE_TYPE_SMALL_INT; + expected_text = "smallint"; + break; + case ExternalResultDescription::ValueType::vtUInt32: + case ExternalResultDescription::ValueType::vtInt32: + expected = CASS_VALUE_TYPE_INT; + expected_text = "int"; + break; + case ExternalResultDescription::ValueType::vtInt64: + case ExternalResultDescription::ValueType::vtUInt64: + expected = CASS_VALUE_TYPE_BIGINT; + expected_text = "bigint"; + break; + case ExternalResultDescription::ValueType::vtFloat32: + expected = CASS_VALUE_TYPE_FLOAT; + expected_text = "float"; + break; + case ExternalResultDescription::ValueType::vtFloat64: + expected = CASS_VALUE_TYPE_DOUBLE; + expected_text = "double"; + break; + case ExternalResultDescription::ValueType::vtString: + expected = CASS_VALUE_TYPE_TEXT; + expected_text = "text, ascii or varchar"; + break; + case ExternalResultDescription::ValueType::vtDate: + expected = CASS_VALUE_TYPE_DATE; + expected_text = "date"; + break; + case ExternalResultDescription::ValueType::vtDateTime: + expected = CASS_VALUE_TYPE_TIMESTAMP; + expected_text = "timestamp"; + break; + case ExternalResultDescription::ValueType::vtUUID: + expected = CASS_VALUE_TYPE_UUID; + expected_text = "uuid"; + break; + } + + CassValueType got = cass_result_column_type(result, i); + + if (got != expected) + { + if (expected == CASS_VALUE_TYPE_TEXT && (got == CASS_VALUE_TYPE_ASCII || got == CASS_VALUE_TYPE_VARCHAR)) + continue; + + const auto & column_name = description.sample_block.getColumnsWithTypeAndName()[i].name; + throw Exception("Type mismatch for column " + column_name + ": expected Cassandra type " + expected_text, + ErrorCodes::TYPE_MISMATCH); + } + } + + assert_types = false; +} + } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 700211ebb3e..5208e516a0e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -9,30 +9,35 @@ namespace DB { +class CassandraBlockInputStream final : public IBlockInputStream +{ +public: + CassandraBlockInputStream( + const CassSessionShared & session_, + const String & query_str, + const Block & sample_block, + size_t max_block_size); + String getName() const override { return "Cassandra"; } -/// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining - class CassandraBlockInputStream final : public IBlockInputStream - { - public: - CassandraBlockInputStream( - const CassClusterPtr & cluster, - const String & query_str, - const Block & sample_block, - const size_t max_block_size); + Block getHeader() const override { return description.sample_block.cloneEmpty(); } - String getName() const override { return "Cassandra"; } + void readPrefix() override; - Block getHeader() const override { return description.sample_block.cloneEmpty(); } +private: + using ValueType = ExternalResultDescription::ValueType; - private: - Block readImpl() override; + Block readImpl() override; + void insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const; + void assertTypes(const CassResultPtr & result); - CassSessionPtr session; - CassStatementPtr statement; - const size_t max_block_size; - ExternalResultDescription description; - cass_bool_t has_more_pages; - }; + CassSessionShared session; + CassStatementPtr statement; + CassFuturePtr result_future; + const size_t max_block_size; + ExternalResultDescription description; + cass_bool_t has_more_pages; + bool assert_types = true; +}; } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index fec60fe3d83..c41f528db91 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,37 +1,35 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include -#include -#include -#include namespace DB { - namespace ErrorCodes - { - extern const int SUPPORT_IS_DISABLED; - } - void registerDictionarySourceCassandra(DictionarySourceFactory & factory) +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; + extern const int NOT_IMPLEMENTED; +} + +void registerDictionarySourceCassandra(DictionarySourceFactory & factory) +{ + auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, + [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, + [[maybe_unused]] const std::string & config_prefix, + [[maybe_unused]] Block & sample_block, + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr { - auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, - [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, - [[maybe_unused]] const std::string & config_prefix, - [[maybe_unused]] Block & sample_block, - const Context & /* context */, - bool /*check_config*/) -> DictionarySourcePtr - { #if USE_CASSANDRA - setupCassandraDriverLibraryLogging(CASS_LOG_TRACE); - return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); + setupCassandraDriverLibraryLogging(CASS_LOG_INFO); + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else - throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception{"Dictionary source of type `cassandra` is disabled because ClickHouse was built without cassandra support.", + ErrorCodes::SUPPORT_IS_DISABLED}; #endif - }; - factory.registerSource("cassandra", create_table_source); - } + }; + factory.registerSource("cassandra", create_table_source); +} } @@ -39,8 +37,9 @@ namespace DB #include #include -#include #include "CassandraBlockInputStream.h" +#include +#include namespace DB { @@ -57,7 +56,7 @@ CassandraSettings::CassandraSettings( , port(config.getUInt(config_prefix + ".port", 0)) , user(config.getString(config_prefix + ".user", "")) , password(config.getString(config_prefix + ".password", "")) - , db(config.getString(config_prefix + ".keyspace", "")) + , db(config.getString(config_prefix + ".keyspace")) , table(config.getString(config_prefix + ".column_family")) , allow_filtering(config.getBool(config_prefix + ".allow_filtering", false)) , partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1)) @@ -124,7 +123,7 @@ CassandraDictionarySource::CassandraDictionarySource( { } -void CassandraDictionarySource::maybeAllowFiltering(String & query) +void CassandraDictionarySource::maybeAllowFiltering(String & query) const { if (!settings.allow_filtering) return; @@ -137,10 +136,11 @@ BlockInputStreamPtr CassandraDictionarySource::loadAll() String query = query_builder.composeLoadAllQuery(); maybeAllowFiltering(query); LOG_INFO(log, "Loading all using query: {}", query); - return std::make_shared(cluster, query, sample_block, max_block_size); + return std::make_shared(getSession(), query, sample_block, max_block_size); } -std::string CassandraDictionarySource::toString() const { +std::string CassandraDictionarySource::toString() const +{ return "Cassandra: " + settings.db + '.' + settings.table; } @@ -149,7 +149,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector String query = query_builder.composeLoadIdsQuery(ids); maybeAllowFiltering(query); LOG_INFO(log, "Loading ids using query: {}", query); - return std::make_shared(cluster, query, sample_block, max_block_size); + return std::make_shared(getSession(), query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -162,7 +162,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu for (const auto & row : requested_rows) { SipHash partition_key; - for (const auto i : ext::range(0, settings.partition_key_prefix)) + for (size_t i = 0; i < settings.partition_key_prefix; ++i) key_columns[i]->updateHashWithValue(row, partition_key); partitions[partition_key.get64()].push_back(row); } @@ -173,7 +173,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix); maybeAllowFiltering(query); LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query); - streams.push_back(std::make_shared(cluster, query, sample_block, max_block_size)); + streams.push_back(std::make_shared(getSession(), query, sample_block, max_block_size)); } if (streams.size() == 1) @@ -182,6 +182,30 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu return std::make_shared(streams, nullptr, settings.max_threads); } +BlockInputStreamPtr CassandraDictionarySource::loadUpdatedAll() +{ + throw Exception("Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED); +} + +CassSessionShared CassandraDictionarySource::getSession() +{ + /// Reuse connection if exists, create new one if not + auto session = maybe_session.lock(); + if (session) + return session; + + std::lock_guard lock(connect_mutex); + session = maybe_session.lock(); + if (session) + return session; + + session = std::make_shared(); + CassFuturePtr future = cass_session_connect(*session, cluster); + cassandraWaitAndCheck(future); + maybe_session = session; + return session; +} + } #endif diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index dff93fcd029..18db66b94c2 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -34,7 +34,8 @@ struct CassandraSettings void setConsistency(const String & config_str); }; -class CassandraDictionarySource final : public IDictionarySource { +class CassandraDictionarySource final : public IDictionarySource +{ public: CassandraDictionarySource( const DictionaryStructure & dict_struct, @@ -64,15 +65,13 @@ public: BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; - BlockInputStreamPtr loadUpdatedAll() override - { - throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadUpdatedAll() override; String toString() const override; private: - void maybeAllowFiltering(String & query); + void maybeAllowFiltering(String & query) const; + CassSessionShared getSession(); Poco::Logger * log; const DictionaryStructure dict_struct; @@ -80,7 +79,9 @@ private: Block sample_block; ExternalQueryBuilder query_builder; + std::mutex connect_mutex; CassClusterPtr cluster; + CassSessionWeak maybe_session; }; } diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp index 4f92a75a1f3..6de80a455c7 100644 --- a/src/Dictionaries/CassandraHelpers.cpp +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -21,7 +21,7 @@ void cassandraCheck(CassError code) } -void cassandraWaitAndCheck(CassFuturePtr && future) +void cassandraWaitAndCheck(CassFuturePtr & future) { auto code = cass_future_error_code(future); /// Waits if not ready if (code == CASS_OK) diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 2a91815e37d..70b38acf15c 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -7,6 +7,7 @@ #if USE_CASSANDRA #include #include +#include namespace DB { @@ -37,6 +38,7 @@ public: Dtor(ptr); ptr = rhs.ptr; rhs.ptr = nullptr; + return *this; } ~ObjectHolder() @@ -54,8 +56,12 @@ public: /// These object are created on pointer construction using CassClusterPtr = Cassandra::ObjectHolder; -using CassSessionPtr = Cassandra::ObjectHolder; using CassStatementPtr = Cassandra::ObjectHolder; +using CassSessionPtr = Cassandra::ObjectHolder; + +/// Share connections between streams. Executing statements in one session object is thread-safe +using CassSessionShared = std::shared_ptr; +using CassSessionWeak = std::weak_ptr; /// The following objects are created inside Cassandra driver library, /// but must be freed by user code @@ -65,7 +71,7 @@ using CassIteratorPtr = Cassandra::ObjectHoldertest {table} 1 + "Int64_" < 1000000000000000000 '''.format( host=self.docker_hostname, diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 98ba191c948..8cd6940d587 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -3,12 +3,10 @@ import os from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout - from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed -from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra import math -import time + SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') @@ -212,7 +210,6 @@ def get_dictionaries(fold, total_folds, all_dicts): return all_dicts[fold * chunk_len : (fold + 1) * chunk_len] -#@pytest.mark.timeout(3000) @pytest.mark.parametrize("fold", list(range(10))) def test_simple_dictionaries(started_cluster, fold): fields = FIELDS["simple"] From df3dfd5b81311ab5a3555a91de9f6d46127358d0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jun 2020 01:00:02 +0300 Subject: [PATCH 0354/2229] fix clang-tidy build --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 7 +++---- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/IMergeTreeReader.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6ef4cc15032..61561a8e3cf 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -135,7 +135,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out) { - auto part_in_memory = dynamic_cast(part.get()); + const auto * part_in_memory = dynamic_cast(part.get()); if (!part_in_memory) throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 22accdbe190..adefa52fc13 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -773,9 +773,6 @@ void IMergeTreeDataPart::remove() const String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix) const { - /// Do not allow underscores in the prefix because they are used as separators. - - assert(prefix.find_first_of('_') == String::npos); String res; /** If you need to detach a part, and directory into which we want to rename it already exists, @@ -798,12 +795,14 @@ String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix) const String IMergeTreeDataPart::getRelativePathForDetachedPart(const String & prefix) const { + /// Do not allow underscores in the prefix because they are used as separators. + assert(prefix.find_first_of('_') == String::npos); return "detached/" + getRelativePathForPrefix(prefix); } void IMergeTreeDataPart::renameToDetached(const String & prefix) const { - renameTo(getRelativePathForDetachedPart(prefix)); + renameTo(getRelativePathForDetachedPart(prefix), true); } void IMergeTreeDataPart::makeCloneInDetached(const String & prefix) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f9f1965aa57..f091d8ec519 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -297,7 +297,7 @@ public: String getFullPath() const; void renameToDetached(const String & prefix) const; - virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists = false) const; + virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) const; virtual void makeCloneInDetached(const String & prefix) const; /// Makes full clone of part in detached/ on another disk diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 90a43a61536..98dbe9a6834 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -61,7 +61,7 @@ protected: /// Returns actual column type in part, which can differ from table metadata. NameAndTypePair getColumnFromPart(const NameAndTypePair & required_column) const; - void checkNumberOfColumns(size_t columns_num_to_read) const; + void checkNumberOfColumns(size_t num_columns_to_read) const; /// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size. ValueSizeMap avg_value_size_hints; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d0659a2bcdd..ef526552e12 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1318,7 +1318,7 @@ void MergeTreeData::clearOldWriteAheadLogs() auto is_range_on_disk = [&block_numbers_on_disk](Int64 min_block, Int64 max_block) { - auto lower = std::lower_bound(block_numbers_on_disk.begin(), block_numbers_on_disk.end(), std::make_pair(min_block, -1L)); + auto lower = std::lower_bound(block_numbers_on_disk.begin(), block_numbers_on_disk.end(), std::make_pair(min_block, Int64(-1L))); if (lower != block_numbers_on_disk.end() && min_block >= lower->first && max_block <= lower->second) return true; @@ -1919,7 +1919,7 @@ void MergeTreeData::renameTempPartAndReplace( part->info = part_info; part->is_temp = false; part->state = DataPartState::PreCommitted; - part->renameTo(part_name); + part->renameTo(part_name, true); auto part_it = data_parts_indexes.insert(part).first; @@ -3271,7 +3271,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( throw Exception("Part in " + fullPath(disk, dst_part_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); /// If source part is in memory, flush it to disk and clone it already in on-disk format - if (auto * src_part_in_memory = dynamic_cast(src_part.get())) + if (const auto * src_part_in_memory = dynamic_cast(src_part.get())) { const auto & src_relative_data_path = src_part_in_memory->storage.relative_data_path; auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); @@ -3367,7 +3367,7 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path); String backup_part_path = backup_path + relative_data_path + part->relative_path; - if (auto part_in_memory = dynamic_cast(part.get())) + if (const auto * part_in_memory = dynamic_cast(part.get())) part_in_memory->flushToDisk(backup_path + relative_data_path, part->relative_path); else localBackup(part->volume->getDisk(), part->getFullRelativePath(), backup_part_path); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2a11f465805..11c12d47823 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1007,7 +1007,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); if (part) { - if (auto * part_in_memory = dynamic_cast(part.get())) + if (const auto * part_in_memory = dynamic_cast(part.get())) sum_parts_size_in_bytes += part_in_memory->block.bytes(); else sum_parts_size_in_bytes += part->getBytesOnDisk(); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a1e203a9769..9babf9476d1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2896,7 +2896,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, } else { - part->renameTo("detached/" + part_name); + part->renameTo("detached/" + part_name, true); } } catch (...) From 64c013610a852211148e5a9ef1694a7fa13c7a28 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 01:03:49 +0300 Subject: [PATCH 0355/2229] fix --- src/Dictionaries/CassandraBlockInputStream.cpp | 2 -- src/Dictionaries/CassandraBlockInputStream.h | 4 ++++ .../test_dictionaries_all_layouts_and_sources/test.py | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 8fb9eb3f93c..8a14add868e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "CassandraBlockInputStream.h" @@ -20,7 +19,6 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int CASSANDRA_INTERNAL_ERROR; } CassandraBlockInputStream::CassandraBlockInputStream( diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 5208e516a0e..667d686fd31 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -1,6 +1,8 @@ #pragma once #include + +#if USE_CASSANDRA #include #include #include @@ -41,3 +43,5 @@ private: }; } + +#endif diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 8cd6940d587..0a812ea2a8b 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -225,8 +225,6 @@ def test_simple_dictionaries(started_cluster, fold): node.query("system reload dictionaries") - #time.sleep(3000) - queries_with_answers = [] for dct in simple_dicts: for row in data: From ae7bad368b3595e0e99a858d27bcc2d7950d8ae2 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Thu, 4 Jun 2020 01:50:58 +0300 Subject: [PATCH 0356/2229] run optimization for limit >= 1500 and hint for comparing ColumnString --- src/Columns/ColumnString.cpp | 35 +++++++++++++++++++ src/Columns/ColumnString.h | 6 +--- .../Transforms/PartialSortingTransform.cpp | 21 +++++------ 3 files changed, 45 insertions(+), 17 deletions(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 136a30d475a..4067f7acbff 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -260,6 +260,40 @@ ColumnPtr ColumnString::indexImpl(const PaddedPODArray & indexes, size_t l return res; } +void ColumnString::compareColumn( + const IColumn & rhs_, size_t rhs_row_num, + PaddedPODArray & row_indexes, PaddedPODArray & compare_results, + int direction, int) const +{ + size_t rows_num = size(); + size_t row_indexes_size = row_indexes.size(); + + if (compare_results.empty()) + compare_results.resize(rows_num, 0); + + else if (compare_results.size() != rows_num) + throw Exception( + "Size of compare_results: " + std::to_string(compare_results.size()) + " doesn't match rows_num: " + std::to_string(rows_num), + ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + const ColumnString & rhs = assert_cast(rhs_); + const auto * rhs_data = rhs.chars.data() + rhs.offsetAt(rhs_row_num); + auto rhs_size = rhs.sizeAt(rhs_row_num) - 1; + + size_t cur_row = 0, i; + for (i = 0; i < row_indexes_size; ++i) + { + UInt64 index = row_indexes[i]; + compare_results[index] = direction * memcmpSmallAllowOverflow15(chars.data() + offsetAt(index), sizeAt(index) - 1, rhs_data, rhs_size); + if (compare_results[index] == 0) + { + row_indexes[cur_row] = index; + ++cur_row; + } + } + + row_indexes.resize(row_indexes_size); +} template struct ColumnString::less @@ -268,6 +302,7 @@ struct ColumnString::less explicit less(const ColumnString & parent_) : parent(parent_) {} bool operator()(size_t lhs, size_t rhs) const { + ++countLess; int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index fa3ee96105f..1d0ec0116c1 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -222,11 +222,7 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray & row_indexes, PaddedPODArray & compare_results, - int direction, int nan_direction_hint) const override - { - return compareImpl(assert_cast(rhs), rhs_row_num, row_indexes, - compare_results, direction, nan_direction_hint); - } + int direction, int nan_direction_hint) const override; /// Variant of compareAt for string comparison with respect of collation. int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index d42c8c4233b..d461f968f2e 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -65,7 +65,7 @@ size_t getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_ } for (size_t i = 0; i != rows_num; ++i) - filtered_count -= filter[i] = (compare_results[i] <= 0); + filtered_count -= (filter[i] = (compare_results[i] >= 0)); return filtered_count; } @@ -80,7 +80,6 @@ void PartialSortingTransform::transform(Chunk & chunk) ColumnRawPtrs block_columns; UInt64 rows_num = block.rows(); - auto block_limit = limit; /** If we've saved columns from previously blocks we could filter all rows from current block * which are unnecessary for sortBlock(...) because they obviously won't be in the top LIMIT rows. @@ -91,22 +90,20 @@ void PartialSortingTransform::transform(Chunk & chunk) IColumn::Filter filter; size_t filtered_count = getFilterMask(block_columns, threshold_block_columns, limit - 1, description, rows_num, filter); - if (filtered_count == rows_num) - return; - - if (rows_num - filtered_count < block_limit) + if (filtered_count) { - block_limit = rows_num - filtered_count; -/* + auto expected_size = rows_num - filtered_count; + size_t i = 0; + Columns new_columns(block.columns()); for (auto & column : block.getColumns()) { - column = column->filter(filter, rows_num - filtered_count); + new_columns[i++] = column->filter(filter, expected_size); } -*/ + block.setColumns(new_columns); } } - sortBlock(block, description, block_limit); + sortBlock(block, description, limit); if (!threshold_block_columns.empty()) { @@ -116,7 +113,7 @@ void PartialSortingTransform::transform(Chunk & chunk) /** If this is the first processed block or (limit - 1)'th row of the current block * is less than current threshold row then we could update threshold. */ - if (limit && limit <= block.rows() && + if (1500 <= limit && limit <= block.rows() && (threshold_block_columns.empty() || less(block_columns, limit - 1, threshold_block_columns, limit - 1, description))) { threshold_block = block.cloneWithColumns(block.getColumns()); From b312ac9786be81f59779372590d26078b699a8bf Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Jun 2020 01:52:21 +0300 Subject: [PATCH 0357/2229] in-memory parts: fix columns sizes --- src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 10 ++++++++-- src/Storages/MergeTree/MergeTreeSettings.h | 4 ++-- src/Storages/MergeTree/MergedBlockOutputStream.cpp | 2 +- .../0_stateless/01130_in_memory_parts.reference | 3 +++ tests/queries/0_stateless/01130_in_memory_parts.sql | 3 +++ 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index fac7e4982bb..0d930eba4e8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -113,9 +113,15 @@ void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool relative_path = new_relative_path; } -void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName & /*each_columns_size*/, ColumnSize & /*total_size*/) const +/// Calculates uncompressed sizes in memory. +void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const { - // throw Exception("calculateEachColumnSizesOnDisk of in memory part", ErrorCodes::NOT_IMPLEMENTED); + auto it = checksums.files.find("data.bin"); + if (it != checksums.files.end()) + total_size.data_uncompressed += it->second.uncompressed_size; + + for (const auto & column : columns) + each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize(); } } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 6dd48dc2bba..ad70a541611 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -33,8 +33,8 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, min_rows_for_wide_part, 0, "Minimal number of rows to create part in wide format instead of compact", 0) \ M(SettingUInt64, min_bytes_for_compact_part, 0, "Minimal uncompressed size in bytes to create part in compact format instead of saving it in RAM", 0) \ M(SettingUInt64, min_rows_for_compact_part, 0, "Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ - M(SettingBool, in_memory_parts_enable_wal, 1, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ - M(SettingBool, in_memory_parts_insert_sync, 0, "", 0) \ + M(SettingBool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ + M(SettingBool, in_memory_parts_insert_sync, false, "If true and in-memory parts are enabled, insert will wait while part will persist on disk in result of merge", 0) \ M(SettingUInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ \ /** Merge settings. */ \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index de5d8599fb7..af083901985 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -112,7 +112,7 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); - // new_part->calculateColumnsSizesOnDisk(); // TODO: Fix + new_part->calculateColumnsSizesOnDisk(); } void MergedBlockOutputStream::finalizePartOnDisk( diff --git a/tests/queries/0_stateless/01130_in_memory_parts.reference b/tests/queries/0_stateless/01130_in_memory_parts.reference index ae32d3ea7a3..4a22f17c644 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.reference +++ b/tests/queries/0_stateless/01130_in_memory_parts.reference @@ -1,4 +1,7 @@ +system.parts InMemory 2 +1 +1 Simple selects 0 0 1 1 diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql index d6471cfb35f..21665faefd6 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts.sql @@ -4,7 +4,10 @@ CREATE TABLE in_memory (a UInt32, b UInt32) SETTINGS min_rows_for_compact_part = 1000, min_rows_for_compact_part = 1000; INSERT INTO in_memory SELECT number, number % 3 FROM numbers(100); +SELECT 'system.parts'; SELECT DISTINCT part_type, marks FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; +SELECT DISTINCT data_uncompressed_bytes > 0 FROM system.parts WHERE database = currentDatabase() AND table = 'in_memory' AND active; +SELECT DISTINCT column_data_uncompressed_bytes > 0 FROM system.parts_columns WHERE database = currentDatabase() AND table = 'in_memory' AND active; SELECT 'Simple selects'; From 5624066195f94f78a876926ff88db0f1aad4ff72 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jun 2020 06:14:09 +0000 Subject: [PATCH 0358/2229] Fix producer --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 2 + src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 +- .../WriteBufferToRabbitMQProducer.cpp | 49 ++----------------- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 -- 4 files changed, 8 insertions(+), 50 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 17e4db3fb89..8e867db6de9 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -46,6 +46,8 @@ void RabbitMQBlockOutputStream::write(const Block & block) if (buffer) buffer->flush(); + + storage.pingConnection(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 5aa77a9a732..635d53e6cf0 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -54,6 +54,8 @@ public: const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; + const void pingConnection() { connection.heartbeat(); } + protected: StorageRabbitMQ( const StorageID & table_id_, @@ -88,7 +90,7 @@ private: event_base * evbase; RabbitMQHandler eventHandler; - AMQP::TcpConnection connection; + AMQP::TcpConnection connection; /// Connection for all consumers Poco::Semaphore semaphore; std::mutex mutex; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 86d3b32925a..e61a8e1ccd8 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -16,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 5000 /// It is important to keep it low in order not to kill consumers + Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -113,38 +113,11 @@ void WriteBufferToRabbitMQProducer::flush() /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name * and makes it visible from current producer_channel. */ - producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) .onSuccess([&]() { exchange_declared = true; - /// The case that should not normally happen: message was not delivered to queue (queue ttl exceeded) / not forwareded to consumer - if (flush_returned) - { - /// Needed to avoid data race because two different threads may access this vector - std::lock_guard lock(mutex); - - LOG_TRACE(log, "Redelivering returned messages"); - for (auto & payload : returned) - { - next_queue = next_queue % num_queues + 1; - - if (bind_by_id || hash_exchange) - { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); - } - else - { - producer_channel->publish(exchange_name, routing_key, payload); - } - - --message_counter; - } - - returned.clear(); - } - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to @@ -159,27 +132,11 @@ void WriteBufferToRabbitMQProducer::flush() if (bind_by_id || hash_exchange) { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload, AMQP::mandatory || AMQP::immediate) - .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) - { - flush_returned = true; - - /// Needed to avoid data race because two different threads may access this variable - std::lock_guard lock(mutex); - returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); - }); + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); } else { - producer_channel->publish(exchange_name, routing_key, payload, AMQP::mandatory || AMQP::immediate) - .onReturned([&](const AMQP::Message & message, int16_t /* code */, const std::string & /* description */) - { - flush_returned = true; - - /// Needed to avoid data race because two different threads may access this vector - std::lock_guard lock(mutex); - returned.emplace_back(std::string(message.body(), message.body() + message.bodySize())); - }); + producer_channel->publish(exchange_name, routing_key, payload); } --message_counter; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 146be0c5796..c61a76a3e74 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -54,11 +54,8 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; String channel_id; - std::atomic flush_returned = false; - std::mutex mutex; Messages messages; - Messages returned; Poco::Logger * log; const std::optional delim; From 972611e31b3c6f1ad18f94898372590eafd8e509 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Jun 2020 06:22:53 +0000 Subject: [PATCH 0359/2229] Fix consumer --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 54 +++++++++++++------ src/Storages/RabbitMQ/RabbitMQHandler.h | 4 +- .../ReadBufferFromRabbitMQConsumer.cpp | 50 +++++++++++++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 3 ++ 4 files changed, 91 insertions(+), 20 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 775db87a1f8..1a3ede79420 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -4,6 +4,13 @@ namespace DB { +enum +{ + Lock_timeout = 50, + Max_threads_to_pass = 10 +}; + + RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : LibEventHandler(evbase_), evbase(evbase_), @@ -16,10 +23,9 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes { LOG_ERROR(log, "Library error report: {}", message); - if (connection->closed()) + if (!connection->usable() || !connection->ready()) { - std::cerr << "Connection lost, no recovery is possible"; - throw; + LOG_ERROR(log, "Connection lost completely"); } stop(); @@ -28,24 +34,42 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes void RabbitMQHandler::start(std::atomic & check_param) { - /* The object of this class is shared between concurrent consumers, who call this method repeatedly at the same time. - * But the loop should not be attempted to start if it is already running. Also note that the loop is blocking to - * the thread that has started it. + /* The object of this class is shared between concurrent consumers (who share the same connection == share the same + * event loop). But the loop should not be attempted to start if it is already running. */ - std::lock_guard lock(mutex); + if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) + { + /* The callback, which changes this variable, could have already been activated by another thread while we waited + * for the mutex to unlock (as it runs all active events on the connection). This means that there is no need to + * start event loop again. + */ + if (!check_param) + { + event_base_loop(evbase, EVLOOP_NONBLOCK); + } - /* The callback, which changes this variable, could have already been activated by another thread while we waited for the - * mutex to unlock (as it runs all active events on the connection). This means that there is no need to start event loop again. - */ - if (check_param) - return; - - event_base_loop(evbase, EVLOOP_NONBLOCK); + mutex_before_event_loop.unlock(); + } + else + { + if (++count_passed == Max_threads_to_pass) + { + /* Event loop is blocking to the thread that started it and it is not good to block one single thread as it loops + * untill there are no active events, but there can be too many of them for one thread to be blocked for so long. + */ + stop(); + count_passed = 0; + } + } } void RabbitMQHandler::stop() { - event_base_loopbreak(evbase); + if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + { + event_base_loopbreak(evbase); + mutex_before_loop_stop.unlock(); + } } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 117f80d26f8..39fccd4dace 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -26,7 +26,9 @@ private: event_base * evbase; Poco::Logger * log; - std::mutex mutex; + size_t count_passed = 0; + std::timed_mutex mutex_before_event_loop; + std::timed_mutex mutex_before_loop_stop; }; } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 27c5ab800f0..f8259ce8c4c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,6 +13,11 @@ namespace DB { +enum +{ + Received_max_to_stop_loop = 10000 // Explained below +}; + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -117,7 +122,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) std::atomic bindings_created = false, bindings_error = false; - consumer_channel->declareQueue(AMQP::exclusive) + consumer_channel->declareQueue(AMQP::durable) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); @@ -145,6 +150,12 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&] { bindings_created = true; + + /// Unblock current thread so that it does not continue to execute all callbacks on the connection + if (++count_bound_queues == num_queues) + { + stopEventLoop(); + } }) .onError([&](const char * message) { @@ -196,6 +207,12 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) consumer_created = true; LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); + + /// Unblock current thread so that it does not continue to execute all callbacks on the connection + if (++count_subscribed == queues.size()) + { + stopEventLoop(); + } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -207,15 +224,34 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (row_delimiter != '\0') message_received += row_delimiter; + //LOG_TRACE(log, "Consumer {} received a message", channel_id); + + bool stop_loop = false; + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). - std::lock_guard lock(mutex); - received.push_back(message_received); + { + std::lock_guard lock(mutex); + received.push_back(message_received); + + /* As event loop is blocking to the thread that started it and a single thread should not be blocked while + * executing all callbacks on the connection (not only its own), then there should be some point to unblock + */ + if (received.size() >= Received_max_to_stop_loop) + { + stop_loop = true; + } + } + + if (stop_loop) + { + stopEventLoop(); + } } }) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer failed: {}", message); + LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); while (!consumer_created && !consumer_error) @@ -226,6 +262,12 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) } +void ReadBufferFromRabbitMQConsumer::stopEventLoop() +{ + eventHandler.stop(); +} + + void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) { eventHandler.start(check_param); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 31babc5033f..55adb39bdce 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -64,6 +64,8 @@ private: Queues queues; bool subscribed = false; String current_exchange_name; + size_t count_subscribed = 0; + size_t count_bound_queues = 0; Messages received; Messages messages; @@ -77,6 +79,7 @@ private: void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); void startEventLoop(std::atomic & check_param); + void stopEventLoop(); }; } From be95da34b38560dba495fa644b1cb3fda84cb1c7 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Thu, 4 Jun 2020 09:39:08 +0300 Subject: [PATCH 0360/2229] fix --- src/Columns/ColumnString.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 4067f7acbff..c47827b2917 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -302,7 +302,6 @@ struct ColumnString::less explicit less(const ColumnString & parent_) : parent(parent_) {} bool operator()(size_t lhs, size_t rhs) const { - ++countLess; int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); From 6f0e754f1e6bfd5753c04b1f81b231eece4f82fa Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 4 Jun 2020 11:57:01 +0300 Subject: [PATCH 0361/2229] try to fix the glibc compatibility --- .gitmodules | 2 +- cmake/find/sentry.cmake | 9 ++++----- contrib/CMakeLists.txt | 2 -- contrib/sentry-native | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 08f712d5574..e1cd28c1d59 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -7,15 +7,14 @@ endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) - - set (SENTRY_TRANSPORT "url") - set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) - if (NOT_UNBUNDLED) + set (SENTRY_TRANSPORT "curl" CACHE STRING "") + set (SENTRY_BACKEND "none" CACHE STRING "") + set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") + if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) endif() - message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d9af4bc0ac5..ea13969db16 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -321,8 +321,6 @@ if (USE_FASTOPS) endif() if (USE_SENTRY) - set (SENTRY_BACKEND "none") - set (SENTRY_TRANSPORT "curl") add_subdirectory (sentry-native) endif() diff --git a/contrib/sentry-native b/contrib/sentry-native index b48c21d2440..18835dd8c49 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit b48c21d244092658d6e2d1bb243b705fd968b9f7 +Subproject commit 18835dd8c496f22859bd6a1a7054a2bd4762e7ed From f54f9481621b8d9deb8f36f3333220c3be725347 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 15:17:35 +0300 Subject: [PATCH 0362/2229] Added DelayedSource. --- .../ClusterProxy/SelectStreamFactory.cpp | 53 +++----- src/Processors/Sources/DelayedSource.cpp | 113 ++++++++++++++++++ src/Processors/Sources/DelayedSource.h | 26 +++- src/Processors/Sources/RemoteSource.cpp | 59 ++++++--- src/Processors/Sources/RemoteSource.h | 14 ++- 5 files changed, 205 insertions(+), 60 deletions(-) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 5d41b0e87ce..bfa6fae0977 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include #include @@ -13,9 +11,8 @@ #include #include #include -#include -#include -#include +#include +#include namespace ProfileEvents { @@ -118,13 +115,13 @@ void SelectStreamFactory::createForShard( const SelectQueryInfo &, Pipes & res) { - bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - bool add_totals_port = false; - bool add_extremes_port = false; + bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + bool add_totals = false; + bool add_extremes = false; if (processed_stage == QueryProcessingStage::Complete) { - add_totals_port = query_ast->as().group_by_with_totals; - add_extremes_port = context.getSettingsRef().extremes; + add_totals = query_ast->as().group_by_with_totals; + add_extremes = context.getSettingsRef().extremes; } auto modified_query_ast = query_ast->clone(); @@ -140,20 +137,13 @@ void SelectStreamFactory::createForShard( auto emplace_remote_stream = [&]() { - auto stream = std::make_shared( + auto remote_query_executor = std::make_shared( shard_info.pool, modified_query, header, context, nullptr, throttler, scalars, external_tables, processed_stage); - stream->setPoolMode(PoolMode::GET_MANY); + remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - stream->setMainTable(main_table); + remote_query_executor->setMainTable(main_table); - auto source = std::make_shared(std::move(stream), force_add_agg_info); - - if (add_totals_port) - source->addTotalsPort(); - if (add_extremes_port) - source->addExtremesPort(); - - res.emplace_back(std::move(source)); + res.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes)); }; const auto & settings = context.getSettingsRef(); @@ -246,8 +236,8 @@ void SelectStreamFactory::createForShard( auto lazily_create_stream = [ pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast, context, throttler, main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, - stage = processed_stage, local_delay]() - -> BlockInputStreamPtr + stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes]() + -> Pipe { auto current_settings = context.getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( @@ -277,8 +267,7 @@ void SelectStreamFactory::createForShard( } if (try_results.empty() || local_delay < max_remote_delay) - return std::make_shared( - createLocalStream(modified_query_ast, header, context, stage)); + return createLocalStream(modified_query_ast, header, context, stage).getPipe(); else { std::vector connections; @@ -286,20 +275,14 @@ void SelectStreamFactory::createForShard( for (auto & try_result : try_results) connections.emplace_back(std::move(try_result.entry)); - return std::make_shared( + auto remote_query_executor = std::make_shared( std::move(connections), modified_query, header, context, nullptr, throttler, scalars, external_tables, stage); + + return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes); } }; - auto lazy_stream = std::make_shared("LazyShardWithLocalReplica", header, lazily_create_stream); - auto source = std::make_shared(std::move(lazy_stream), force_add_agg_info); - - if (add_totals_port) - source->addTotalsPort(); - if (add_extremes_port) - source->addExtremesPort(); - - res.emplace_back(std::move(source)); + res.emplace_back(createDelayedPipe(header, lazily_create_stream)); } else emplace_remote_stream(); diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index e5931c75489..9f7f26ab141 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -1,6 +1,119 @@ #include +#include "NullSource.h" namespace DB { +DelayedSource::DelayedSource(const Block & header, Creator processors_creator) + : IProcessor({}, OutputPorts(3, header)) + , creator(std::move(processors_creator)) +{ +} + +IProcessor::Status DelayedSource::prepare() +{ + /// At first, wait for main input is needed and expand pipeline. + if (inputs.empty()) + { + auto & first_output = outputs.front(); + + /// If main port was finished before callback was called, stop execution. + if (first_output.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + if (!first_output.isNeeded()) + return Status::PortFull; + + /// Call creator callback to get processors. + if (processors.empty()) + return Status::Ready; + + return Status::ExpandPipeline; + } + + /// Process ports in order: main, totals, extremes + auto output = outputs.begin(); + for (auto & input : inputs) + { + if (output->isFinished()) + { + input.close(); + continue; + } + + if (!output->isNeeded()) + return Status::PortFull; + + if (input.isFinished()) + { + output->finish(); + continue; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::PortFull; + + output->pushData(input.pullData(true)); + return Status::PortFull; + } + + return Status::Finished; +} + +void DelayedSource::work() +{ + auto pipe = creator(); + + main_output = &pipe.getPort(); + totals_output = pipe.getTotalsPort(); + extremes_output = pipe.getExtremesPort(); + + processors = std::move(pipe).detachProcessors(); + + if (!totals_output) + { + processors.emplace_back(std::make_shared(main_output->getHeader())); + totals_output = &processors.back()->getOutputs().back(); + } + + if (!extremes_output) + { + processors.emplace_back(std::make_shared(main_output->getHeader())); + extremes_output = &processors.back()->getOutputs().back(); + } +} + +Processors DelayedSource::expandPipeline() +{ + /// Add new inputs. They must have the same header as output. + for (const auto & output : {main_output, totals_output, extremes_output}) + { + inputs.emplace_back(outputs.front().getHeader(), this); + /// Connect checks that header is same for ports. + connect(*output, inputs.back()); + inputs.back().setNeeded(); + } + + /// Executor will check that all processors are connected. + return std::move(processors); +} + +Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator) +{ + auto source = std::make_shared(header, std::move(processors_creator)); + + Pipe pipe(&source->getPort(DelayedSource::Main)); + pipe.setTotalsPort(&source->getPort(DelayedSource::Totals)); + pipe.setExtremesPort(&source->getPort(DelayedSource::Extremes)); + + pipe.addProcessors({std::move(source)}); + return pipe; +} + } diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index 28cad6bc816..31ec1e054fe 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -1,23 +1,45 @@ #pragma once #include +#include namespace DB { +/// DelayedSource delays pipeline calculation until it starts execution. +/// It accepts callback which creates a new pipe. +/// +/// First time when DelayedSource's main output port needs data, callback is called. +/// Then, DelayedSource expands pipeline: adds new inputs and connects pipe with it. +/// Then, DelayedSource just move data from inputs to outputs until finished. +/// +/// It main output port of DelayedSource is never needed, callback won't be called. class DelayedSource : public IProcessor { public: - using Creator = std::function; + using Creator = std::function; - DelayedSource(Block header, Creator processors_creator); + DelayedSource(const Block & header, Creator processors_creator); String getName() const override { return "Delayed"; } Status prepare() override; void work() override; + Processors expandPipeline() override; + + enum PortKind { Main = 0, Totals = 1, Extremes = 2 }; + OutputPort & getPort(PortKind kind) { return *std::next(outputs.begin(), kind); } private: Creator creator; + Processors processors; + + /// Outputs from returned pipe. + OutputPort * main_output = nullptr; + OutputPort * totals_output = nullptr; + OutputPort * extremes_output = nullptr; }; +/// Creates pipe from DelayedSource. +Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator); + } diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 090f3743709..2f76e0c87d4 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -66,42 +66,67 @@ void RemoteSource::onCancel() } -RemoteTotalsSource::RemoteTotalsSource(Block header) : ISource(std::move(header)) {} +RemoteTotalsSource::RemoteTotalsSource(RemoteQueryExecutorPtr executor) + : ISource(executor->getHeader()) + , query_executor(std::move(executor)) +{ +} + RemoteTotalsSource::~RemoteTotalsSource() = default; Chunk RemoteTotalsSource::generate() { - /// Check use_count instead of comparing with nullptr just in case. - /// setQueryExecutor() may be called from other thread, but there shouldn't be any race, - /// because totals end extremes are always read after main data. - if (query_executor.use_count()) + if (auto block = query_executor->getTotals()) { - if (auto block = query_executor->getTotals()) - { - UInt64 num_rows = block.rows(); - return Chunk(block.getColumns(), num_rows); - } + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); } return {}; } -RemoteExtremesSource::RemoteExtremesSource(Block header) : ISource(std::move(header)) {} +RemoteExtremesSource::RemoteExtremesSource(RemoteQueryExecutorPtr executor) + : ISource(executor->getHeader()) + , query_executor(std::move(executor)) +{ +} + RemoteExtremesSource::~RemoteExtremesSource() = default; Chunk RemoteExtremesSource::generate() { - if (query_executor.use_count()) + if (auto block = query_executor->getExtremes()) { - if (auto block = query_executor->getExtremes()) - { - UInt64 num_rows = block.rows(); - return Chunk(block.getColumns(), num_rows); - } + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); } return {}; } + +Pipe createRemoteSourcePipe( + RemoteQueryExecutorPtr query_executor, + bool add_aggregation_info, bool add_totals, bool add_extremes) +{ + Pipe pipe(std::make_shared(query_executor, add_aggregation_info)); + + if (add_totals) + { + auto totals_source = std::make_shared(query_executor); + pipe.setTotalsPort(&totals_source->getPort()); + pipe.addProcessors({std::move(totals_source)}); + } + + if (add_extremes) + { + auto extremes_source = std::make_shared(query_executor); + pipe.setExtremesPort(&extremes_source->getPort()); + pipe.addProcessors({std::move(extremes_source)}); + } + + return pipe; +} + } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 9cc3ea9c459..85ac1d756c8 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -45,13 +46,11 @@ private: class RemoteTotalsSource : public ISource { public: - explicit RemoteTotalsSource(Block header); + explicit RemoteTotalsSource(RemoteQueryExecutorPtr executor); ~RemoteTotalsSource(); String getName() const override { return "RemoteTotals"; } - void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } - protected: Chunk generate() override; @@ -63,13 +62,11 @@ private: class RemoteExtremesSource : public ISource { public: - explicit RemoteExtremesSource(Block header); + explicit RemoteExtremesSource(RemoteQueryExecutorPtr executor); ~RemoteExtremesSource(); String getName() const override { return "RemoteExtremes"; } - void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } - protected: Chunk generate() override; @@ -77,4 +74,9 @@ private: RemoteQueryExecutorPtr query_executor; }; +/// Create pipe with remote sources. +Pipe createRemoteSourcePipe( + RemoteQueryExecutorPtr query_executor, + bool add_aggregation_info, bool add_totals, bool add_extremes); + } From 4dae169216c7721739bf843780d777af4c8bae16 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 16:03:06 +0300 Subject: [PATCH 0363/2229] fix gcc warnings --- src/Dictionaries/CassandraBlockInputStream.cpp | 4 ++-- src/Dictionaries/CassandraBlockInputStream.h | 2 +- src/Dictionaries/CassandraDictionarySource.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 8a14add868e..4f6a62a0eea 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -35,7 +35,7 @@ CassandraBlockInputStream::CassandraBlockInputStream( cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); } -void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const +void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) { switch (type) { @@ -202,7 +202,7 @@ void CassandraBlockInputStream::assertTypes(const CassResultPtr & result) size_t column_count = cass_result_column_count(result); for (size_t i = 0; i < column_count; ++i) { - CassValueType expected; + CassValueType expected = CASS_VALUE_TYPE_UNKNOWN; String expected_text; /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 667d686fd31..3b0e583e3ad 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -30,7 +30,7 @@ private: using ValueType = ExternalResultDescription::ValueType; Block readImpl() override; - void insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const; + static void insertValue(IColumn & column, ValueType type, const CassValue * cass_value); void assertTypes(const CassResultPtr & result); CassSessionShared session; diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 18db66b94c2..c0a4e774d23 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -9,6 +9,7 @@ #include "ExternalQueryBuilder.h" #include #include +#include namespace DB { From dcf9b9ef39f6106c69feb69d76325e4e9168eb6b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:08:09 +0300 Subject: [PATCH 0364/2229] Fix build. --- src/Processors/Sources/RemoteSource.cpp | 2 ++ src/Processors/Sources/RemoteSource.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 2f76e0c87d4..29946e7322c 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -79,6 +79,7 @@ Chunk RemoteTotalsSource::generate() if (auto block = query_executor->getTotals()) { UInt64 num_rows = block.rows(); +std::cerr << "Got toals " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } @@ -99,6 +100,7 @@ Chunk RemoteExtremesSource::generate() if (auto block = query_executor->getExtremes()) { UInt64 num_rows = block.rows(); +std::cerr << "Got extrees " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 85ac1d756c8..0b4405a0905 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -18,7 +18,7 @@ public: /// AggregatedChunkInfo stores the bucket number used for two-level aggregation. /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState. RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_); - ~RemoteSource(); + ~RemoteSource() override; String getName() const override { return "Remote"; } @@ -47,7 +47,7 @@ class RemoteTotalsSource : public ISource { public: explicit RemoteTotalsSource(RemoteQueryExecutorPtr executor); - ~RemoteTotalsSource(); + ~RemoteTotalsSource() override; String getName() const override { return "RemoteTotals"; } @@ -63,7 +63,7 @@ class RemoteExtremesSource : public ISource { public: explicit RemoteExtremesSource(RemoteQueryExecutorPtr executor); - ~RemoteExtremesSource(); + ~RemoteExtremesSource() override; String getName() const override { return "RemoteExtremes"; } From b419d73880776f831bdf0dcb3ce8cfa3d4ab9642 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:16:58 +0300 Subject: [PATCH 0365/2229] Fix build. --- src/DataStreams/RemoteQueryExecutor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h index ce6c46d5a2a..0db0e0218be 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/DataStreams/RemoteQueryExecutor.h @@ -61,8 +61,8 @@ public: void cancel(); /// Get totals and extremes if any. - Block getTotals() const { return std::move(totals); } - Block getExtremes() const { return std::move(extremes); } + Block getTotals() { return std::move(totals); } + Block getExtremes() { return std::move(extremes); } /// Set callback for progress. It will be called on Progress packet. void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } From 563fe4ea359295f9aef30abb234ecc150483f3fe Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:45:23 +0300 Subject: [PATCH 0366/2229] Fix DelayedSource. --- src/Processors/Sources/DelayedSource.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 9f7f26ab141..267eb78f77b 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -38,25 +38,25 @@ IProcessor::Status DelayedSource::prepare() /// Process ports in order: main, totals, extremes auto output = outputs.begin(); - for (auto & input : inputs) + for (auto input = inputs.begin(); input != inputs.end(); ++input, ++output) { if (output->isFinished()) { - input.close(); + input->close(); continue; } if (!output->isNeeded()) return Status::PortFull; - if (input.isFinished()) + if (input->isFinished()) { output->finish(); continue; } - input.setNeeded(); - if (!input.hasData()) + input->setNeeded(); + if (!input->hasData()) return Status::PortFull; output->pushData(input.pullData(true)); From 18516ba09f9f51156fb6cc8400e91ec426ab278b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:46:27 +0300 Subject: [PATCH 0367/2229] Fix DelayedSource. --- src/Processors/Sources/DelayedSource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 267eb78f77b..42a33d00196 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -59,7 +59,7 @@ IProcessor::Status DelayedSource::prepare() if (!input->hasData()) return Status::PortFull; - output->pushData(input.pullData(true)); + output->pushData(input->pullData(true)); return Status::PortFull; } From 1c982d00e38cc04d3cf83a612fb7c9bae7a662b3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 16:59:12 +0300 Subject: [PATCH 0368/2229] try fix sync --- src/Dictionaries/CassandraHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 70b38acf15c..8a00e372c96 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -5,7 +5,7 @@ #endif #if USE_CASSANDRA -#include +#include // Y_IGNORE #include #include From 60b86f524fff777d4869777d22519737cd00291e Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Thu, 4 Jun 2020 20:10:52 +0300 Subject: [PATCH 0369/2229] add perf-test and fix --- src/Processors/Transforms/PartialSortingTransform.cpp | 2 +- tests/performance/string_sort.xml | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index d461f968f2e..1cfd70546ae 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -65,7 +65,7 @@ size_t getFilterMask(const ColumnRawPtrs & lhs, const ColumnRawPtrs & rhs, size_ } for (size_t i = 0; i != rows_num; ++i) - filtered_count -= (filter[i] = (compare_results[i] >= 0)); + filtered_count -= (filter[i] = (compare_results[i] <= 0)); return filtered_count; } diff --git a/tests/performance/string_sort.xml b/tests/performance/string_sort.xml index 71b56bdb9d6..1d528f39428 100644 --- a/tests/performance/string_sort.xml +++ b/tests/performance/string_sort.xml @@ -32,6 +32,11 @@ + + + + + From 31ad5d7e5d224ba1df8e33f6a14a93e1100e70b8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 23:42:03 +0300 Subject: [PATCH 0370/2229] Remove debug output. --- src/Processors/Sources/RemoteSource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 29946e7322c..c6fc45d2296 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -100,7 +100,6 @@ Chunk RemoteExtremesSource::generate() if (auto block = query_executor->getExtremes()) { UInt64 num_rows = block.rows(); -std::cerr << "Got extrees " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } From d87b4746656bec285ac098e40457b690cdf5851e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 23:42:59 +0300 Subject: [PATCH 0371/2229] Remove debug output. --- src/Processors/Sources/RemoteSource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index c6fc45d2296..2f76e0c87d4 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -79,7 +79,6 @@ Chunk RemoteTotalsSource::generate() if (auto block = query_executor->getTotals()) { UInt64 num_rows = block.rows(); -std::cerr << "Got toals " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } From 61cf6fe711829962ece84af70c2d2caa29635330 Mon Sep 17 00:00:00 2001 From: Albert Kidrachev Date: Fri, 5 Jun 2020 06:08:37 +0300 Subject: [PATCH 0372/2229] add perftest --- tests/performance/order_by_decimals.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/performance/order_by_decimals.xml b/tests/performance/order_by_decimals.xml index 4889137865d..fda201f9ed1 100644 --- a/tests/performance/order_by_decimals.xml +++ b/tests/performance/order_by_decimals.xml @@ -7,9 +7,15 @@ SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC FORMAT Null + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 300 + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 1500 + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 2000 + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 5000 + SELECT toInt32(number) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 10000 SELECT toDecimal32(number, 0) AS n FROM numbers(1000000) ORDER BY n FORMAT Null SELECT toDecimal32(number, 0) AS n FROM numbers(1000000) ORDER BY n DESC FORMAT Null + SELECT toDecimal64(number, 8) AS n FROM numbers(1000000) ORDER BY n DESC LIMIT 5000 SELECT toDecimal64(number, 8) AS n FROM numbers(1000000) ORDER BY n DESC FORMAT Null SELECT toDecimal128(number, 10) AS n FROM numbers(1000000) ORDER BY n DESC FORMAT Null From d2052dd1d4b1f6192d9b7283dec50c3ec3736ee7 Mon Sep 17 00:00:00 2001 From: hexiaoting <“hewenting_ict@163.com”> Date: Fri, 5 Jun 2020 17:13:13 +0800 Subject: [PATCH 0373/2229] show clusters --- .../InterpreterShowTablesQuery.cpp | 27 +++++++++++++++ src/Parsers/ASTShowTablesQuery.cpp | 19 ++++++++++- src/Parsers/ASTShowTablesQuery.h | 5 ++- src/Parsers/ParserShowTablesQuery.cpp | 34 +++++++++++++++++++ src/Parsers/ParserShowTablesQuery.h | 2 +- .../0_stateless/01293_show_create_cluster.sql | 3 ++ 6 files changed, 87 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01293_show_create_cluster.sql diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 3660a41c474..a925466f72b 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -33,6 +34,32 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.databases) return "SELECT name FROM system.databases"; + /// SHOW CLUSTER/CLUSTERS + if (query.clusters) + { + std::stringstream rewritten_query; + rewritten_query << "SELECT cluster FROM system.clusters"; + + if (!query.like.empty()) + { + rewritten_query << " WHERE cluster " << (query.not_like ? "NOT " : "") << "LIKE " << std::quoted(query.like, '\''); + } + + if (query.limit_length) + rewritten_query << " LIMIT " << query.limit_length; + + return rewritten_query.str(); + } + else if (query.cluster) + { + std::stringstream rewritten_query; + rewritten_query << "SELECT * FROM system.clusters"; + + rewritten_query << " WHERE cluster = " << std::quoted(query.cluster_str, '\''); + + return rewritten_query.str(); + } + if (query.temporary && !query.from.empty()) throw Exception("The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`", ErrorCodes::SYNTAX_ERROR); diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 82b773ea70a..39904061200 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -2,7 +2,6 @@ #include #include - namespace DB { @@ -20,6 +19,24 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); } + else if (clusters) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTERS" << (settings.hilite ? hilite_none : ""); + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << (not_like ? " NOT" : "") << " LIKE " << (settings.hilite ? hilite_none : "") + << std::quoted(like, '\''); + + if (limit_length) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIMIT " << (settings.hilite ? hilite_none : ""); + limit_length->formatImpl(settings, state, frame); + } + } + else if (cluster) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : ""); + settings.ostr << " " << backQuoteIfNeed(cluster_str); + } else { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << (temporary ? "TEMPORARY " : "") << diff --git a/src/Parsers/ASTShowTablesQuery.h b/src/Parsers/ASTShowTablesQuery.h index be0d73a3ac7..f14d6e7bd33 100644 --- a/src/Parsers/ASTShowTablesQuery.h +++ b/src/Parsers/ASTShowTablesQuery.h @@ -9,14 +9,17 @@ namespace DB { -/** Query SHOW TABLES or SHOW DATABASES +/** Query SHOW TABLES or SHOW DATABASES or SHOW CLUSTERS */ class ASTShowTablesQuery : public ASTQueryWithOutput { public: bool databases{false}; + bool clusters{false}; + bool cluster{false}; bool dictionaries{false}; bool temporary{false}; + String cluster_str; String from; String like; bool not_like{false}; diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 36caf24e623..fb29b6d99cd 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -6,8 +6,10 @@ #include #include #include +#include #include +#include namespace DB @@ -20,6 +22,8 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserKeyword s_temporary("TEMPORARY"); ParserKeyword s_tables("TABLES"); ParserKeyword s_databases("DATABASES"); + ParserKeyword s_clusters("CLUSTERS"); + ParserKeyword s_cluster("CLUSTER"); ParserKeyword s_dictionaries("DICTIONARIES"); ParserKeyword s_from("FROM"); ParserKeyword s_in("IN"); @@ -43,6 +47,36 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { query->databases = true; } + else if (s_clusters.ignore(pos)) + { + query->clusters = true; + + if (s_not.ignore(pos, expected)) + query->not_like = true; + + if (s_like.ignore(pos, expected)) + { + if (!like_p.parse(pos, like, expected)) + return false; + } + else if (query->not_like) + return false; + if (s_limit.ignore(pos, expected)) + { + if (!exp_elem.parse(pos, query->limit_length, expected)) + return false; + } + } + else if (s_cluster.ignore(pos)) + { + query->cluster = true; + + String cluster_str; + if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) + return false; + + query->cluster_str = std::move(cluster_str); + } else { if (s_temporary.ignore(pos)) diff --git a/src/Parsers/ParserShowTablesQuery.h b/src/Parsers/ParserShowTablesQuery.h index 1bbd3cb4ef6..4fd11d8e2a0 100644 --- a/src/Parsers/ParserShowTablesQuery.h +++ b/src/Parsers/ParserShowTablesQuery.h @@ -14,7 +14,7 @@ namespace DB class ParserShowTablesQuery : public IParserBase { protected: - const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES [[NOT] LIKE 'str'] [LIMIT expr]"; } + const char * getName() const override { return "SHOW [TEMPORARY] TABLES|DATABASES|CLUSTERS|CLUSTER 'name' [[NOT] LIKE 'str'] [LIMIT expr]"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/tests/queries/0_stateless/01293_show_create_cluster.sql b/tests/queries/0_stateless/01293_show_create_cluster.sql new file mode 100644 index 00000000000..af450680dac --- /dev/null +++ b/tests/queries/0_stateless/01293_show_create_cluster.sql @@ -0,0 +1,3 @@ +show clusters; +show clusters like 'test%' limit 1; +show cluster 'test_shard_localhost'; From 5bc7f67e616cdc81bc2827750d981995f6cdeb32 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jun 2020 14:54:54 +0300 Subject: [PATCH 0374/2229] Better metadata for select query and renames --- .../PushingToViewsBlockOutputStream.cpp | 2 +- src/Storages/IStorage.cpp | 41 ++++++-- src/Storages/IStorage.h | 38 ++++--- src/Storages/LiveView/StorageLiveView.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 23 ++--- src/Storages/MergeTree/MergeTreeData.h | 2 - src/Storages/StorageInMemoryMetadata.cpp | 34 ------- src/Storages/StorageInMemoryMetadata.h | 30 ------ src/Storages/StorageMaterializedView.cpp | 98 ++++--------------- src/Storages/StorageMaterializedView.h | 7 -- src/Storages/StorageView.cpp | 10 +- src/Storages/StorageView.h | 3 - src/Storages/TTLDescription.cpp | 2 +- src/Storages/TTLDescription.h | 3 +- src/Storages/ya.make | 2 + 15 files changed, 103 insertions(+), 193 deletions(-) diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 2c4792e184e..2c2e6972158 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -72,7 +72,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); - query = materialized_view->getInnerQuery(); + query = materialized_view->getSelectQuery().inner_query; std::unique_ptr insert = std::make_unique(); insert->table_id = inner_table_id; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 496aa55d071..33daf0c298c 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -421,12 +421,12 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const StorageMetadataKeyField & IStorage::getPartitionKey() const +const KeyDescription & IStorage::getPartitionKey() const { return partition_key; } -void IStorage::setPartitionKey(const StorageMetadataKeyField & partition_key_) +void IStorage::setPartitionKey(const KeyDescription & partition_key_) { partition_key = partition_key_; } @@ -448,12 +448,12 @@ Names IStorage::getColumnsRequiredForPartitionKey() const return {}; } -const StorageMetadataKeyField & IStorage::getSortingKey() const +const KeyDescription & IStorage::getSortingKey() const { return sorting_key; } -void IStorage::setSortingKey(const StorageMetadataKeyField & sorting_key_) +void IStorage::setSortingKey(const KeyDescription & sorting_key_) { sorting_key = sorting_key_; } @@ -482,12 +482,12 @@ Names IStorage::getSortingKeyColumns() const return {}; } -const StorageMetadataKeyField & IStorage::getPrimaryKey() const +const KeyDescription & IStorage::getPrimaryKey() const { return primary_key; } -void IStorage::setPrimaryKey(const StorageMetadataKeyField & primary_key_) +void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { primary_key = primary_key_; } @@ -516,12 +516,12 @@ Names IStorage::getPrimaryKeyColumns() const return {}; } -const StorageMetadataKeyField & IStorage::getSamplingKey() const +const KeyDescription & IStorage::getSamplingKey() const { return sampling_key; } -void IStorage::setSamplingKey(const StorageMetadataKeyField & sampling_key_) +void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { sampling_key = sampling_key_; } @@ -654,4 +654,29 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum } +ASTPtr IStorage::getSettingsChanges() const +{ + return settings_changes->clone(); +} + +void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) +{ + settings_changes = settings_changes_->clone(); +} + +const SelectQueryDescription & IStorage::getSelectQuery() const +{ + return select; +} + +void IStorage::setSelectQuery(const SelectQueryDescription & select_) +{ + select = select_; +} + +bool IStorage::hasSelectQuery() const +{ + return select.select_query != nullptr; +} + } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a637c9c6881..cd7b2ad8a0e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -15,7 +15,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -152,6 +154,15 @@ public: /// thread-unsafe part. lockStructure must be acquired const ConstraintsDescription & getConstraints() const; void setConstraints(ConstraintsDescription constraints_); + /// Storage settings + ASTPtr getSettingsChanges() const; + void setSettingsChanges(const ASTPtr & settings_changes_); + bool hasSettingsChanges() const { return settings_changes != nullptr; } + + const SelectQueryDescription & getSelectQuery() const; + void setSelectQuery(const SelectQueryDescription & select_); + bool hasSelectQuery() const; + /// Returns storage metadata copy. Direct modification of /// result structure doesn't affect storage. virtual StorageInMemoryMetadata getInMemoryMetadata() const; @@ -203,14 +214,17 @@ private: IndicesDescription secondary_indices; ConstraintsDescription constraints; - StorageMetadataKeyField partition_key; - StorageMetadataKeyField primary_key; - StorageMetadataKeyField sorting_key; - StorageMetadataKeyField sampling_key; + KeyDescription partition_key; + KeyDescription primary_key; + KeyDescription sorting_key; + KeyDescription sampling_key; TTLColumnsDescription column_ttls_by_name; TTLTableDescription table_ttl; + ASTPtr settings_changes; + SelectQueryDescription select; + private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; @@ -443,10 +457,10 @@ public: virtual Strings getDataPaths() const { return {}; } /// Returns structure with partition key. - const StorageMetadataKeyField & getPartitionKey() const; + const KeyDescription & getPartitionKey() const; /// Set partition key for storage (methods bellow, are just wrappers for this /// struct). - void setPartitionKey(const StorageMetadataKeyField & partition_key_); + void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. ASTPtr getPartitionKeyAST() const { return partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. @@ -458,10 +472,10 @@ public: /// Returns structure with sorting key. - const StorageMetadataKeyField & getSortingKey() const; + const KeyDescription & getSortingKey() const; /// Set sorting key for storage (methods bellow, are just wrappers for this /// struct). - void setSortingKey(const StorageMetadataKeyField & sorting_key_); + void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. ASTPtr getSortingKeyAST() const { return sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -475,10 +489,10 @@ public: Names getSortingKeyColumns() const; /// Returns structure with primary key. - const StorageMetadataKeyField & getPrimaryKey() const; + const KeyDescription & getPrimaryKey() const; /// Set primary key for storage (methods bellow, are just wrappers for this /// struct). - void setPrimaryKey(const StorageMetadataKeyField & primary_key_); + void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. ASTPtr getPrimaryKeyAST() const { return primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -493,10 +507,10 @@ public: Names getPrimaryKeyColumns() const; /// Returns structure with sampling key. - const StorageMetadataKeyField & getSamplingKey() const; + const KeyDescription & getSamplingKey() const; /// Set sampling key for storage (methods bellow, are just wrappers for this /// struct). - void setSamplingKey(const StorageMetadataKeyField & sampling_key_); + void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fe62de224da..458e74eb506 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -165,6 +165,7 @@ public: const Context & context); private: + /// TODO move to common struct SelectQueryDescription StorageID select_table_id = StorageID::createEmpty(); /// Will be initialized in constructor ASTPtr inner_query; /// stored query : SELECT * FROM ( SELECT a FROM A) ASTPtr inner_subquery; /// stored query's innermost subquery if any diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 63d163a593e..8b046673556 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -131,7 +131,6 @@ MergeTreeData::MergeTreeData( : IStorage(table_id_) , global_context(context_) , merging_params(merging_params_) - , settings_ast(metadata.settings_ast) , require_part_metadata(require_part_metadata_) , relative_data_path(relative_data_path_) , broken_part_callback(broken_part_callback_) @@ -145,6 +144,7 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + setSettingsChanges(metadata.settings_ast); const auto settings = getSettings(); setProperties(metadata, /*only_check*/ false, attach); @@ -153,7 +153,7 @@ MergeTreeData::MergeTreeData( if (metadata.sample_by_ast != nullptr) { - StorageMetadataKeyField candidate_sampling_key = StorageMetadataKeyField::getKeyFromAST(metadata.sample_by_ast, getColumns(), global_context); + KeyDescription candidate_sampling_key = KeyDescription::getKeyFromAST(metadata.sample_by_ast, getColumns(), global_context); const auto & pk_sample_block = getPrimaryKey().sample_block; if (!pk_sample_block.has(candidate_sampling_key.column_names[0]) && !attach @@ -265,8 +265,8 @@ StorageInMemoryMetadata MergeTreeData::getInMemoryMetadata() const if (isSamplingKeyDefined()) metadata.sample_by_ast = getSamplingKeyAST()->clone(); - if (settings_ast) - metadata.settings_ast = settings_ast->clone(); + if (hasSettingsChanges()) + metadata.settings_ast = getSettingsChanges(); return metadata; } @@ -444,7 +444,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool { setColumns(std::move(metadata.columns)); - StorageMetadataKeyField new_sorting_key; + KeyDescription new_sorting_key; new_sorting_key.definition_ast = metadata.order_by_ast; new_sorting_key.column_names = std::move(new_sorting_key_columns); new_sorting_key.expression_list_ast = std::move(new_sorting_key_expr_list); @@ -453,7 +453,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool new_sorting_key.data_types = std::move(new_sorting_key_data_types); setSortingKey(new_sorting_key); - StorageMetadataKeyField new_primary_key; + KeyDescription new_primary_key; new_primary_key.definition_ast = metadata.primary_key_ast; new_primary_key.column_names = std::move(new_primary_key_columns); new_primary_key.expression_list_ast = std::move(new_primary_key_expr_list); @@ -472,7 +472,7 @@ namespace { ExpressionActionsPtr getCombinedIndicesExpression( - const StorageMetadataKeyField & key, + const KeyDescription & key, const IndicesDescription & indices, const ColumnsDescription & columns, const Context & context) @@ -523,7 +523,7 @@ ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) void MergeTreeData::initPartitionKey(ASTPtr partition_by_ast) { - StorageMetadataKeyField new_partition_key = StorageMetadataKeyField::getKeyFromAST(partition_by_ast, getColumns(), global_context); + KeyDescription new_partition_key = KeyDescription::getKeyFromAST(partition_by_ast, getColumns(), global_context); if (new_partition_key.expression_list_ast->children.empty()) return; @@ -1460,9 +1460,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S setTTLExpressions(metadata.columns, metadata.ttl_for_table_ast, /* only_check = */ true); - if (settings_ast) + if (hasSettingsChanges()) { - const auto & current_changes = settings_ast->as().changes; + + const auto & current_changes = getSettingsChanges()->as().changes; const auto & new_changes = metadata.settings_ast->as().changes; for (const auto & changed_setting : new_changes) { @@ -1601,7 +1602,7 @@ void MergeTreeData::changeSettings( MergeTreeSettings copy = *getSettings(); copy.applyChanges(new_changes); storage_settings.set(std::make_unique(copy)); - settings_ast = new_settings; + setSettingsChanges(new_settings); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 6df181e3f98..dcc6174ef5a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -677,8 +677,6 @@ protected: friend struct ReplicatedMergeTreeTableMetadata; friend class StorageReplicatedMergeTree; - ASTPtr settings_ast; - bool require_part_metadata; String relative_data_path; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index ee38637e118..1b7ec39b9e3 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,13 +1,5 @@ #include -#include -#include -#include -#include -#include -#include -#include - namespace DB { @@ -89,30 +81,4 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo return *this; } -StorageMetadataKeyField StorageMetadataKeyField::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context) -{ - StorageMetadataKeyField result; - result.definition_ast = definition_ast; - result.expression_list_ast = extractKeyExpressionList(definition_ast); - - if (result.expression_list_ast->children.empty()) - return result; - - const auto & children = result.expression_list_ast->children; - for (const auto & child : children) - result.column_names.emplace_back(child->getColumnName()); - - { - auto expr = result.expression_list_ast->clone(); - auto syntax_result = SyntaxAnalyzer(context).analyze(expr, columns.getAllPhysical()); - result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(true); - result.sample_block = result.expression->getSampleBlock(); - } - - for (size_t i = 0; i < result.sample_block.columns(); ++i) - result.data_types.emplace_back(result.sample_block.getByPosition(i).type); - - return result; -} - } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 39bc8fd2b31..39374166d5e 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -3,7 +3,6 @@ #include #include #include -#include #include namespace DB @@ -44,33 +43,4 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); }; -/// Common structure for primary, partition and other storage keys -struct StorageMetadataKeyField -{ - /// User defined AST in CREATE/ALTER query. This field may be empty, but key - /// can exists because some of them maybe set implicitly (for example, - /// primary key in merge tree can be part of sorting key) - ASTPtr definition_ast; - - /// ASTExpressionList with key fields, example: (x, toStartOfMonth(date))). - ASTPtr expression_list_ast; - - /// Expression from expression_list_ast created by ExpressionAnalyzer. Useful, - /// when you need to get required columns for key, example: a, date, b. - ExpressionActionsPtr expression; - - /// Sample block with key columns (names, types, empty column) - Block sample_block; - - /// Column names in key definition, example: x, toStartOfMonth(date), a * b. - Names column_names; - - /// Types from sample block ordered in columns order. - DataTypes data_types; - - /// Parse key structure from key definition. Requires all columns, available - /// in storage. - static StorageMetadataKeyField getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context); -}; - } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 1b8c6acb49f..313e75a169e 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -41,58 +42,6 @@ static inline String generateInnerTableName(const StorageID & view_id) return ".inner." + view_id.getTableName(); } -static StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, const Context & context, bool add_default_db = true) -{ - if (add_default_db) - { - AddDefaultDatabaseVisitor visitor(context.getCurrentDatabase(), nullptr); - visitor.visit(query); - } - - if (auto db_and_table = getDatabaseAndTable(query, 0)) - { - return StorageID(db_and_table->database, db_and_table->table/*, db_and_table->uuid*/); - } - else if (auto subquery = extractTableExpression(query, 0)) - { - auto * ast_select = subquery->as(); - if (!ast_select) - throw Exception("Logical error while creating StorageMaterializedView. " - "Could not retrieve table name from select query.", - DB::ErrorCodes::LOGICAL_ERROR); - if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", - ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); - - auto & inner_query = ast_select->list_of_selects->children.at(0); - - return extractDependentTableFromSelectQuery(inner_query->as(), context, false); - } - else - return StorageID::createEmpty(); -} - - -static void checkAllowedQueries(const ASTSelectQuery & query) -{ - if (query.prewhere() || query.final() || query.sampleSize()) - throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); - - ASTPtr subquery = extractTableExpression(query, 0); - if (!subquery) - return; - - if (const auto * ast_select = subquery->as()) - { - if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); - - const auto & inner_query = ast_select->list_of_selects->children.at(0); - - checkAllowedQueries(inner_query->as()); - } -} - StorageMaterializedView::StorageMaterializedView( const StorageID & table_id_, @@ -117,13 +66,8 @@ StorageMaterializedView::StorageMaterializedView( if (query.select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); - select = query.select->clone(); - inner_query = query.select->list_of_selects->children.at(0); - - auto & select_query = inner_query->as(); - checkAllowedQueries(select_query); - - select_table_id = extractDependentTableFromSelectQuery(select_query, local_context); + auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); + setSelectQuery(select); if (!has_inner_table) target_table_id = query.to_table_id; @@ -152,14 +96,14 @@ StorageMaterializedView::StorageMaterializedView( target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->database, manual_create_query->table}, global_context)->getStorageID(); } - if (!select_table_id.empty()) - DatabaseCatalog::instance().addDependency(select_table_id, getStorageID()); + if (!select.select_table_id.empty()) + DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } StorageInMemoryMetadata StorageMaterializedView::getInMemoryMetadata() const { StorageInMemoryMetadata result(getColumns(), getSecondaryIndices(), getConstraints()); - result.select = getSelectQuery(); + result.select = getSelectQuery().select_query; return result; } @@ -222,8 +166,9 @@ static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context, void StorageMaterializedView::drop() { auto table_id = getStorageID(); - if (!select_table_id.empty()) - DatabaseCatalog::instance().removeDependency(select_table_id, table_id); + const auto & select_query = getSelectQuery(); + if (!select_query.select_table_id.empty()) + DatabaseCatalog::instance().removeDependency(select_query.select_table_id, table_id); if (has_inner_table && tryGetTargetTable()) executeDropQuery(ASTDropQuery::Kind::Drop, global_context, target_table_id); @@ -262,21 +207,12 @@ void StorageMaterializedView::alter( /// start modify query if (context.getSettingsRef().allow_experimental_alter_materialized_view_structure) { - auto & new_select = metadata.select->as(); + auto new_select = SelectQueryDescription::getSelectQueryFromASTForMatView(metadata.select, context); + const auto & old_select = getSelectQuery(); - if (new_select.list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); - auto & new_inner_query = new_select.list_of_selects->children.at(0); - auto & select_query = new_inner_query->as(); - checkAllowedQueries(select_query); - - auto new_select_table_id = extractDependentTableFromSelectQuery(select_query, context); - DatabaseCatalog::instance().updateDependency(select_table_id, table_id, new_select_table_id, table_id); - - select_table_id = new_select_table_id; - select = metadata.select; - inner_query = new_inner_query; + setSelectQuery(new_select); } /// end modify query @@ -349,15 +285,17 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) } IStorage::renameInMemory(new_table_id); + const auto & select_query = getSelectQuery(); // TODO Actually we don't need to update dependency if MV has UUID, but then db and table name will be outdated - DatabaseCatalog::instance().updateDependency(select_table_id, old_table_id, select_table_id, getStorageID()); + DatabaseCatalog::instance().updateDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); } void StorageMaterializedView::shutdown() { + const auto & select_query = getSelectQuery(); /// Make sure the dependency is removed after DETACH TABLE - if (!select_table_id.empty()) - DatabaseCatalog::instance().removeDependency(select_table_id, getStorageID()); + if (!select_query.select_table_id.empty()) + DatabaseCatalog::instance().removeDependency(select_query.select_table_id, getStorageID()); } StoragePtr StorageMaterializedView::getTargetTable() const diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index fd8639abb6a..d5f81e2248e 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -19,8 +19,6 @@ public: std::string getName() const override { return "MaterializedView"; } bool isView() const override { return true; } - ASTPtr getSelectQuery() const { return select->clone(); } - ASTPtr getInnerQuery() const { return inner_query->clone(); } bool hasInnerTable() const { return has_inner_table; } StorageInMemoryMetadata getInMemoryMetadata() const override; @@ -76,14 +74,9 @@ public: Strings getDataPaths() const override; private: - /// Can be empty if SELECT query doesn't contain table - StorageID select_table_id = StorageID::createEmpty(); /// Will be initialized in constructor StorageID target_table_id = StorageID::createEmpty(); - ASTPtr select; - ASTPtr inner_query; - Context & global_context; bool has_inner_table = false; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index c6b37a50aa9..2525ac48732 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -12,6 +12,7 @@ #include #include +#include #include @@ -45,7 +46,10 @@ StorageView::StorageView( if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); - inner_query = query.select->ptr(); + SelectQueryDescription description; + + description.inner_query = query.select->ptr(); + setSelectQuery(description); } @@ -59,7 +63,7 @@ Pipes StorageView::read( { Pipes pipes; - ASTPtr current_inner_query = inner_query; + ASTPtr current_inner_query = getSelectQuery().inner_query; if (context.getSettings().enable_optimize_predicate_expression) current_inner_query = getRuntimeViewQuery(*query_info.query->as(), context); @@ -119,7 +123,7 @@ static void replaceTableNameWithSubquery(ASTSelectQuery * select_query, ASTPtr & ASTPtr StorageView::getRuntimeViewQuery(ASTSelectQuery * outer_query, const Context & context, bool normalize) { - auto runtime_view_query = inner_query->clone(); + auto runtime_view_query = getSelectQuery().inner_query->clone(); /// TODO: remove getTableExpressions and getTablesWithColumns { diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 86550db83ce..179da5a07b9 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -33,9 +33,6 @@ public: ASTPtr getRuntimeViewQuery(ASTSelectQuery * outer_query, const Context & context, bool normalize); -private: - ASTPtr inner_query; - protected: StorageView( const StorageID & table_id_, diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index da9691aab4a..3bef8894971 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -59,7 +59,7 @@ TTLDescription TTLDescription::getTTLFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, - const StorageMetadataKeyField & primary_key) + const KeyDescription & primary_key) { TTLDescription result; const auto * ttl_element = definition_ast->as(); diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 99a145b8acc..1ad6960ee3b 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -73,7 +74,7 @@ struct TTLDescription /// Parse TTL structure from definition. Able to parse both column and table /// TTLs. - static TTLDescription getTTLFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const StorageMetadataKeyField & primary_key); + static TTLDescription getTTLFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const KeyDescription & primary_key); }; /// Mapping from column name to column TTL diff --git a/src/Storages/ya.make b/src/Storages/ya.make index a28dd393929..b10a6194b7c 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -176,6 +176,8 @@ SRCS( VirtualColumnUtils.cpp extractKeyExpressionList.cpp TTLDescription.cpp + KeyDescription.cpp + SelectQueryDescription.cpp ) END() From 2b23d1aa3349b487d953f3d48e63fa552ba7f0c4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jun 2020 15:13:24 +0300 Subject: [PATCH 0375/2229] Fix reference --- src/Storages/IStorage.cpp | 9 ++++++--- src/Storages/IStorage.h | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 33daf0c298c..8f7c6869892 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -654,14 +654,17 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum } -ASTPtr IStorage::getSettingsChanges() const +const ASTPtr & IStorage::getSettingsChanges() const { - return settings_changes->clone(); + return settings_changes; } void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { - settings_changes = settings_changes_->clone(); + if (settings_changes_) + settings_changes = settings_changes_->clone(); + else + settings_changes = nullptr; } const SelectQueryDescription & IStorage::getSelectQuery() const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index cd7b2ad8a0e..4f6787e0c61 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -155,7 +155,7 @@ public: /// thread-unsafe part. lockStructure must be acquired void setConstraints(ConstraintsDescription constraints_); /// Storage settings - ASTPtr getSettingsChanges() const; + const ASTPtr & getSettingsChanges() const; void setSettingsChanges(const ASTPtr & settings_changes_); bool hasSettingsChanges() const { return settings_changes != nullptr; } From 68b94c5c20fc1ed1d222ee14d096991f2fdca705 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Jun 2020 13:42:11 +0000 Subject: [PATCH 0376/2229] Fixes --- programs/server/config.xml | 3 ++- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 14 +------------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp | 7 +++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 12 ++++++++---- src/Storages/RabbitMQ/StorageRabbitMQ.h | 3 ++- .../RabbitMQ/WriteBufferToRabbitMQProducer.cpp | 7 +++++-- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 5 ++++- 8 files changed, 28 insertions(+), 24 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 21605edeb36..b39ee180466 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -51,7 +51,8 @@ 8443 9440 --> - + root + clickhouse diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 1a3ede79420..34a77489faa 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -6,8 +6,7 @@ namespace DB enum { - Lock_timeout = 50, - Max_threads_to_pass = 10 + Lock_timeout = 50 }; @@ -50,17 +49,6 @@ void RabbitMQHandler::start(std::atomic & check_param) mutex_before_event_loop.unlock(); } - else - { - if (++count_passed == Max_threads_to_pass) - { - /* Event loop is blocking to the thread that started it and it is not good to block one single thread as it loops - * untill there are no active events, but there can be too many of them for one thread to be blocked for so long. - */ - stop(); - count_passed = 0; - } - } } void RabbitMQHandler::stop() diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index f8259ce8c4c..1bd2c7831ff 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -44,6 +44,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , stopped(stopped_) , exchange_declared(false) , false_param(false) + , loop_attempt(false) { messages.clear(); current = messages.begin(); @@ -225,7 +226,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) message_received += row_delimiter; //LOG_TRACE(log, "Consumer {} received a message", channel_id); - + bool stop_loop = false; /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). @@ -236,7 +237,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) /* As event loop is blocking to the thread that started it and a single thread should not be blocked while * executing all callbacks on the connection (not only its own), then there should be some point to unblock */ - if (received.size() >= Received_max_to_stop_loop) + if (!loop_attempt && received.size() % Received_max_to_stop_loop == 0) { stop_loop = true; } @@ -284,7 +285,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (received.empty()) { /// Run the onReceived callbacks to save the messages that have been received by now + loop_attempt = true; startEventLoop(false_param); + loop_attempt = false; } if (received.empty()) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 55adb39bdce..97eca73cece 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -66,6 +66,7 @@ private: String current_exchange_name; size_t count_subscribed = 0; size_t count_bound_queues = 0; + std::atomic loop_attempt; Messages received; Messages messages; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 7cbfb164a2d..481314a38c2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -74,10 +74,14 @@ StorageRabbitMQ::StorageRabbitMQ( , hash_exchange(hash_exchange_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) + , login_password(std::make_pair( + rabbitmq_context.getConfigRef().getString("rabbitmq_username", "root"), + rabbitmq_context.getConfigRef().getString("rabbitmq_password", "clickhouse"))) , parsed_address(parseAddress(global_context.getMacros()->expand(host_port_), 5672)) , evbase(event_base_new()) , eventHandler(evbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), "/")) { size_t cnt_retries = 0; while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) @@ -208,14 +212,14 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - return std::make_shared(consumer_channel, eventHandler, exchange_name, - routing_key, next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, + next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - return std::make_shared(parsed_address, routing_key, exchange_name, + return std::make_shared(parsed_address, login_password, routing_key, exchange_name, log, num_consumers * num_queues, bind_by_id, hash_exchange, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 635d53e6cf0..563f37ae6f1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -86,7 +86,8 @@ private: const bool hash_exchange; Poco::Logger * log; - std::pair parsed_address; + std::pair parsed_address; + std::pair login_password; event_base * evbase; RabbitMQHandler eventHandler; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index e61a8e1ccd8..7c0764853c7 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -20,7 +20,8 @@ enum }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( - std::pair & parsed_address, + std::pair & parsed_address, + std::pair & login_password_, const String & routing_key_, const String & exchange_, Poco::Logger * log_, @@ -31,6 +32,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( size_t rows_per_message, size_t chunk_size_) : WriteBuffer(nullptr, 0) + , login_password(login_password_) , routing_key(routing_key_) , exchange_name(exchange_) , log(log_) @@ -42,7 +44,8 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , chunk_size(chunk_size_) , producerEvbase(event_base_new()) , eventHandler(producerEvbase, log) - , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, AMQP::Login("root", "clickhouse"), "/")) + , connection(&eventHandler, AMQP::Address(parsed_address.first, parsed_address.second, + AMQP::Login(login_password.first, login_password.second), "/")) { /* The reason behind making a separate connection for each concurrent producer is explained here: * https://github.com/CopernicaMarketingSoftware/AMQP-CPP/issues/128#issuecomment-300780086 - publishing from diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index c61a76a3e74..e0c48556239 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { @@ -18,7 +19,8 @@ class WriteBufferToRabbitMQProducer : public WriteBuffer { public: WriteBufferToRabbitMQProducer( - std::pair & parsed_address, + std::pair & parsed_address, + std::pair & login_password_, const String & routing_key_, const String & exchange_, Poco::Logger * log_, @@ -40,6 +42,7 @@ private: void checkExchange(); void startEventLoop(std::atomic & check_param); + std::pair & login_password; const String routing_key; const String exchange_name; const bool bind_by_id; From cb618a32b80df2cbaec35264df01754d35e30d6b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Jun 2020 14:27:56 +0000 Subject: [PATCH 0377/2229] Fix style --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 4 ++-- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 34a77489faa..95d7e22d434 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -18,7 +18,7 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : } -void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) +void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * message) { LOG_ERROR(log, "Library error report: {}", message); @@ -44,7 +44,7 @@ void RabbitMQHandler::start(std::atomic & check_param) */ if (!check_param) { - event_base_loop(evbase, EVLOOP_NONBLOCK); + event_base_loop(evbase, EVLOOP_NONBLOCK); } mutex_before_event_loop.unlock(); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 39fccd4dace..d2d70185128 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,7 +19,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(std::atomic & check_param); + void start(std::atomic & check_param); void stop(); private: diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 563f37ae6f1..111e52768d0 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -63,11 +63,11 @@ protected: const ColumnsDescription & columns_, const String & host_port_, const String & routing_key_, - const String & exchange_name_, + const String & exchange_name_, const String & format_name_, char row_delimiter_, - size_t num_consumers_, - size_t num_queues_, + size_t num_consumers_, + size_t num_queues_, bool hash_exchange); private: From 108575c8adf6f2cb1d1c56c0db172b69ca2c1630 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Jun 2020 18:38:03 +0300 Subject: [PATCH 0378/2229] Added IQueryPlanStep. --- src/CMakeLists.txt | 1 + src/Processors/QueryPlan/IQueryPlanStep.cpp | 19 ++++++++ src/Processors/QueryPlan/IQueryPlanStep.h | 54 +++++++++++++++++++++ src/Processors/ya.make | 1 + 4 files changed, 75 insertions(+) create mode 100644 src/Processors/QueryPlan/IQueryPlanStep.cpp create mode 100644 src/Processors/QueryPlan/IQueryPlanStep.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 13e8aac6906..1dfaf68e1e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -157,6 +157,7 @@ add_object_library(clickhouse_processors_transforms Processors/Transforms) add_object_library(clickhouse_processors_sources Processors/Sources) add_object_library(clickhouse_processors_merges Processors/Merges) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) +add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp new file mode 100644 index 00000000000..f25d17188ea --- /dev/null +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -0,0 +1,19 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +const DataStream & IQueryPlanStep::getOutputStream() const +{ + if (!hasOutputStream()) + throw Exception("QueryPlanStep " + getName() + " does not have output stream.", ErrorCodes::LOGICAL_ERROR); + + return *output_stream; +} + +} diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h new file mode 100644 index 00000000000..fe84e49672a --- /dev/null +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -0,0 +1,54 @@ +#pragma once +#include + +namespace DB +{ + +class QueryPipeline; +using QueryPipelinePtr = std::unique_ptr; +using QueryPipelines = std::vector; + +/// Description of data stream. +class DataStream +{ +public: + Block header; + + /// Only header for now. + /// Things which may be added: + /// * sort description + /// * distinct columns + /// * limit + /// * estimated rows number + /// * memory allocation context +}; + +using DataStreams = std::vector; + +/// Single step of query plan. +class IQueryPlanStep +{ +public: + virtual ~IQueryPlanStep() = default; + + virtual String getName() const = 0; + + /// Add processors from current step to QueryPipeline. + /// Calling this method, we assume and don't check that: + /// * pipelines.size() == getInputStreams.size() + /// * header from each pipeline is the same as header from corresponding input_streams + /// Result pipeline must contain any number of streams with compatible output header is hasOutputStream(), + /// or pipeline should be completed otherwise. + virtual QueryPipelinePtr updatePipeline(QueryPipelines pipelines) = 0; + + const DataStreams & getInputStreams() const { return input_streams; } + + bool hasOutputStream() const { return output_stream.has_value(); } + const DataStream & getOutputStream() const; + +protected: + DataStreams input_streams; + std::optional output_stream; +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 62320f1c147..fee4847fb56 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -134,6 +134,7 @@ SRCS( Transforms/RollupTransform.cpp Transforms/SortingTransform.cpp Transforms/TotalsHavingTransform.cpp + QueryPlan/IQueryPlanStep.cpp ) END() From abaf47f0cda4e9d6436e8aaece11e0e69e904ddc Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 5 Jun 2020 20:29:40 +0300 Subject: [PATCH 0379/2229] Make metadata single structure --- src/Databases/DatabaseOrdinary.cpp | 20 +- src/Storages/AlterCommands.cpp | 21 +- src/Storages/ConstraintsDescription.cpp | 15 ++ src/Storages/ConstraintsDescription.h | 3 + src/Storages/IStorage.cpp | 97 +++++---- src/Storages/IStorage.h | 34 +-- src/Storages/IndicesDescription.cpp | 37 ++++ src/Storages/IndicesDescription.h | 7 + src/Storages/KeyDescription.cpp | 69 +++++++ src/Storages/KeyDescription.h | 45 ++++ src/Storages/MergeTree/MergeTreeData.cpp | 193 +++++------------- src/Storages/MergeTree/MergeTreeData.h | 7 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 5 - .../MergeTree/registerStorageMergeTree.cpp | 45 ++-- src/Storages/SelectQueryDescription.cpp | 117 +++++++++++ src/Storages/SelectQueryDescription.h | 27 +++ src/Storages/StorageInMemoryMetadata.cpp | 69 ------- src/Storages/StorageInMemoryMetadata.h | 35 ++-- src/Storages/StorageMaterializedView.cpp | 9 +- src/Storages/StorageMaterializedView.h | 2 - src/Storages/StorageMergeTree.cpp | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 29 +-- src/Storages/TTLDescription.cpp | 65 ++++++ src/Storages/TTLDescription.h | 8 +- 24 files changed, 576 insertions(+), 389 deletions(-) create mode 100644 src/Storages/KeyDescription.cpp create mode 100644 src/Storages/KeyDescription.h create mode 100644 src/Storages/SelectQueryDescription.cpp create mode 100644 src/Storages/SelectQueryDescription.h diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b31752ad1b3..eec58ed9b33 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -253,9 +253,9 @@ void DatabaseOrdinary::alterTable( ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->indices, new_indices); ast_create_query.columns_list->setOrReplace(ast_create_query.columns_list->constraints, new_constraints); - if (metadata.select) + if (metadata.select.select_query) { - ast->replace(ast_create_query.select, metadata.select); + ast->replace(ast_create_query.select, metadata.select.select_query); } /// MaterializedView is one type of CREATE query without storage. @@ -263,17 +263,17 @@ void DatabaseOrdinary::alterTable( { ASTStorage & storage_ast = *ast_create_query.storage; /// ORDER BY may change, but cannot appear, it's required construction - if (metadata.order_by_ast && storage_ast.order_by) - storage_ast.set(storage_ast.order_by, metadata.order_by_ast); + if (metadata.sorting_key.definition_ast && storage_ast.order_by) + storage_ast.set(storage_ast.order_by, metadata.sorting_key.definition_ast); - if (metadata.primary_key_ast) - storage_ast.set(storage_ast.primary_key, metadata.primary_key_ast); + if (metadata.primary_key.definition_ast) + storage_ast.set(storage_ast.primary_key, metadata.primary_key.definition_ast); - if (metadata.ttl_for_table_ast) - storage_ast.set(storage_ast.ttl_table, metadata.ttl_for_table_ast); + if (metadata.table_ttl.definition_ast) + storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast); - if (metadata.settings_ast) - storage_ast.set(storage_ast.settings, metadata.settings_ast); + if (metadata.settings_changes) + storage_ast.set(storage_ast.settings, metadata.settings_changes); } statement = getObjectDefinitionFromCreateQuery(ast); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index ce70af2bb6a..13da6db8ed3 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -316,14 +316,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == MODIFY_ORDER_BY) { - if (!metadata.primary_key_ast && metadata.order_by_ast) + if (metadata.primary_key.definition_ast == nullptr && metadata.sorting_key.definition_ast != nullptr) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - metadata.primary_key_ast = metadata.order_by_ast->clone(); + metadata.primary_key = metadata.sorting_key; } - metadata.order_by_ast = order_by; + metadata.sorting_key = KeyDescription::getKeyFromAST(order_by, metadata.columns, context); } else if (type == COMMENT_COLUMN) { @@ -430,15 +430,15 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == MODIFY_TTL) { - metadata.ttl_for_table_ast = ttl; + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl, metadata.columns, context, metadata.primary_key); } else if (type == MODIFY_QUERY) { - metadata.select = select; + metadata.select = SelectQueryDescription::getSelectQueryFromASTForMatView(select, context); } else if (type == MODIFY_SETTING) { - auto & settings_from_storage = metadata.settings_ast->as().changes; + auto & settings_from_storage = metadata.settings_changes->as().changes; for (const auto & change : settings_changes) { auto finder = [&change](const SettingChange & c) { return c.name == change.name; }; @@ -465,8 +465,11 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con rename_visitor.visit(column_to_modify.ttl); }); } - if (metadata.ttl_for_table_ast) - rename_visitor.visit(metadata.ttl_for_table_ast); + if (metadata.table_ttl.definition_ast) + rename_visitor.visit(metadata.table_ttl.definition_ast); + + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + metadata.table_ttl.definition_ast, metadata.columns, context, metadata.primary_key); for (auto & constraint : metadata.constraints.constraints) rename_visitor.visit(constraint); @@ -832,7 +835,7 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, const Con } else if (command.type == AlterCommand::MODIFY_SETTING) { - if (metadata.settings_ast == nullptr) + if (metadata.settings_changes == nullptr) throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS}; } else if (command.type == AlterCommand::RENAME_COLUMN) diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index d86796908a7..3d4f528302a 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -54,4 +54,19 @@ ConstraintsExpressions ConstraintsDescription::getExpressions(const DB::Context return res; } +ConstraintsDescription::ConstraintsDescription(const ConstraintsDescription & other) +{ + constraints.reserve(other.constraints.size()); + for (const auto & constraint : other.constraints) + constraints.emplace_back(constraint->clone()); +} + +ConstraintsDescription & ConstraintsDescription::operator=(const ConstraintsDescription & other) +{ + constraints.resize(other.constraints.size()); + for (size_t i = 0; i < constraints.size(); ++i) + constraints[i] = other.constraints[i]->clone(); + return *this; +} + } diff --git a/src/Storages/ConstraintsDescription.h b/src/Storages/ConstraintsDescription.h index f4da4376041..ddc9597f813 100644 --- a/src/Storages/ConstraintsDescription.h +++ b/src/Storages/ConstraintsDescription.h @@ -20,6 +20,9 @@ struct ConstraintsDescription static ConstraintsDescription parse(const String & str); ConstraintsExpressions getExpressions(const Context & context, const NamesAndTypesList & source_columns_) const; + + ConstraintsDescription(const ConstraintsDescription & other); + ConstraintsDescription & operator=(const ConstraintsDescription & other); }; } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8f7c6869892..2ad89f559b5 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -34,23 +34,23 @@ namespace ErrorCodes const ColumnsDescription & IStorage::getColumns() const { - return columns; + return metadata.columns; } const IndicesDescription & IStorage::getSecondaryIndices() const { - return secondary_indices; + return metadata.secondary_indices; } bool IStorage::hasSecondaryIndices() const { - return !secondary_indices.empty(); + return !metadata.secondary_indices.empty(); } const ConstraintsDescription & IStorage::getConstraints() const { - return constraints; + return metadata.constraints; } Block IStorage::getSampleBlock() const @@ -292,17 +292,17 @@ void IStorage::setColumns(ColumnsDescription columns_) { if (columns_.getOrdinary().empty()) throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - columns = std::move(columns_); + metadata.columns = std::move(columns_); } void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_) { - secondary_indices = std::move(secondary_indices_); + metadata.secondary_indices = std::move(secondary_indices_); } void IStorage::setConstraints(ConstraintsDescription constraints_) { - constraints = std::move(constraints_); + metadata.constraints = std::move(constraints_); } bool IStorage::isVirtualColumn(const String & column_name) const @@ -373,11 +373,6 @@ TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id, return result; } -StorageInMemoryMetadata IStorage::getInMemoryMetadata() const -{ - return StorageInMemoryMetadata(getColumns(), getSecondaryIndices(), getConstraints()); -} - void IStorage::alter( const AlterCommands & params, const Context & context, @@ -385,8 +380,8 @@ void IStorage::alter( { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, context); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); + params.apply(old_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); setColumns(std::move(metadata.columns)); } @@ -423,135 +418,135 @@ NamesAndTypesList IStorage::getVirtuals() const const KeyDescription & IStorage::getPartitionKey() const { - return partition_key; + return metadata.partition_key; } void IStorage::setPartitionKey(const KeyDescription & partition_key_) { - partition_key = partition_key_; + metadata.partition_key = partition_key_; } bool IStorage::isPartitionKeyDefined() const { - return partition_key.definition_ast != nullptr; + return metadata.partition_key.definition_ast != nullptr; } bool IStorage::hasPartitionKey() const { - return !partition_key.column_names.empty(); + return !metadata.partition_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPartitionKey() const { if (hasPartitionKey()) - return partition_key.expression->getRequiredColumns(); + return metadata.partition_key.expression->getRequiredColumns(); return {}; } const KeyDescription & IStorage::getSortingKey() const { - return sorting_key; + return metadata.sorting_key; } void IStorage::setSortingKey(const KeyDescription & sorting_key_) { - sorting_key = sorting_key_; + metadata.sorting_key = sorting_key_; } bool IStorage::isSortingKeyDefined() const { - return sorting_key.definition_ast != nullptr; + return metadata.sorting_key.definition_ast != nullptr; } bool IStorage::hasSortingKey() const { - return !sorting_key.column_names.empty(); + return !metadata.sorting_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSortingKey() const { if (hasSortingKey()) - return sorting_key.expression->getRequiredColumns(); + return metadata.sorting_key.expression->getRequiredColumns(); return {}; } Names IStorage::getSortingKeyColumns() const { if (hasSortingKey()) - return sorting_key.column_names; + return metadata.sorting_key.column_names; return {}; } const KeyDescription & IStorage::getPrimaryKey() const { - return primary_key; + return metadata.primary_key; } void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { - primary_key = primary_key_; + metadata.primary_key = primary_key_; } bool IStorage::isPrimaryKeyDefined() const { - return primary_key.definition_ast != nullptr; + return metadata.primary_key.definition_ast != nullptr; } bool IStorage::hasPrimaryKey() const { - return !primary_key.column_names.empty(); + return !metadata.primary_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPrimaryKey() const { if (hasPrimaryKey()) - return primary_key.expression->getRequiredColumns(); + return metadata.primary_key.expression->getRequiredColumns(); return {}; } Names IStorage::getPrimaryKeyColumns() const { if (hasSortingKey()) - return primary_key.column_names; + return metadata.primary_key.column_names; return {}; } const KeyDescription & IStorage::getSamplingKey() const { - return sampling_key; + return metadata.sampling_key; } void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { - sampling_key = sampling_key_; + metadata.sampling_key = sampling_key_; } bool IStorage::isSamplingKeyDefined() const { - return sampling_key.definition_ast != nullptr; + return metadata.sampling_key.definition_ast != nullptr; } bool IStorage::hasSamplingKey() const { - return !sampling_key.column_names.empty(); + return !metadata.sampling_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSampling() const { if (hasSamplingKey()) - return sampling_key.expression->getRequiredColumns(); + return metadata.sampling_key.expression->getRequiredColumns(); return {}; } const TTLTableDescription & IStorage::getTableTTLs() const { - return table_ttl; + return metadata.table_ttl; } void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) { - table_ttl = table_ttl_; + metadata.table_ttl = table_ttl_; } bool IStorage::hasAnyTableTTL() const @@ -561,37 +556,37 @@ bool IStorage::hasAnyTableTTL() const const TTLColumnsDescription & IStorage::getColumnTTLs() const { - return column_ttls_by_name; + return metadata.column_ttls_by_name; } void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) { - column_ttls_by_name = column_ttls_by_name_; + metadata.column_ttls_by_name = column_ttls_by_name_; } bool IStorage::hasAnyColumnTTL() const { - return !column_ttls_by_name.empty(); + return !metadata.column_ttls_by_name.empty(); } const TTLDescription & IStorage::getRowsTTL() const { - return table_ttl.rows_ttl; + return metadata.table_ttl.rows_ttl; } bool IStorage::hasRowsTTL() const { - return table_ttl.rows_ttl.expression != nullptr; + return metadata.table_ttl.rows_ttl.expression != nullptr; } const TTLDescriptions & IStorage::getMoveTTLs() const { - return table_ttl.move_ttl; + return metadata.table_ttl.move_ttl; } bool IStorage::hasAnyMoveTTL() const { - return !table_ttl.move_ttl.empty(); + return !metadata.table_ttl.move_ttl.empty(); } @@ -656,30 +651,30 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum const ASTPtr & IStorage::getSettingsChanges() const { - return settings_changes; + return metadata.settings_changes; } void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { if (settings_changes_) - settings_changes = settings_changes_->clone(); + metadata.settings_changes = settings_changes_->clone(); else - settings_changes = nullptr; + metadata.settings_changes = nullptr; } const SelectQueryDescription & IStorage::getSelectQuery() const { - return select; + return metadata.select; } void IStorage::setSelectQuery(const SelectQueryDescription & select_) { - select = select_; + metadata.select = select_; } bool IStorage::hasSelectQuery() const { - return select.select_query != nullptr; + return metadata.select.select_query != nullptr; } } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4f6787e0c61..82012eacc03 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -10,12 +10,7 @@ #include #include #include -#include -#include -#include #include -#include -#include #include #include #include @@ -157,15 +152,13 @@ public: /// thread-unsafe part. lockStructure must be acquired /// Storage settings const ASTPtr & getSettingsChanges() const; void setSettingsChanges(const ASTPtr & settings_changes_); - bool hasSettingsChanges() const { return settings_changes != nullptr; } + bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } const SelectQueryDescription & getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; - /// Returns storage metadata copy. Direct modification of - /// result structure doesn't affect storage. - virtual StorageInMemoryMetadata getInMemoryMetadata() const; + const StorageInMemoryMetadata & getInMemoryMetadata() const { return metadata; } Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. @@ -210,21 +203,8 @@ private: StorageID storage_id; mutable std::mutex id_mutex; - ColumnsDescription columns; - IndicesDescription secondary_indices; - ConstraintsDescription constraints; - - KeyDescription partition_key; - KeyDescription primary_key; - KeyDescription sorting_key; - KeyDescription sampling_key; - - TTLColumnsDescription column_ttls_by_name; - TTLTableDescription table_ttl; - - ASTPtr settings_changes; - SelectQueryDescription select; + StorageInMemoryMetadata metadata; private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; @@ -462,7 +442,7 @@ public: /// struct). void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. - ASTPtr getPartitionKeyAST() const { return partition_key.definition_ast; } + ASTPtr getPartitionKeyAST() const { return metadata.partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. bool isPartitionKeyDefined() const; /// Storage has partition key. @@ -477,7 +457,7 @@ public: /// struct). void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. - ASTPtr getSortingKeyAST() const { return sorting_key.definition_ast; } + ASTPtr getSortingKeyAST() const { return metadata.sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isSortingKeyDefined() const; /// Storage has sorting key. It means, that it contains at least one column. @@ -494,7 +474,7 @@ public: /// struct). void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. - ASTPtr getPrimaryKeyAST() const { return primary_key.definition_ast; } + ASTPtr getPrimaryKeyAST() const { return metadata.primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isPrimaryKeyDefined() const; /// Storage has primary key (maybe part of some other key). It means, that @@ -512,7 +492,7 @@ public: /// struct). void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. - ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; } + ASTPtr getSamplingKeyAST() const { return metadata.sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. bool isSamplingKeyDefined() const; /// Storage has sampling key. diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index f3afe8c03a2..6f8406d9a3f 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -19,6 +19,43 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; }; +IndexDescription::IndexDescription(const IndexDescription & other) + : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) + , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) + , name(other.name) + , type(other.type) + , expression(other.expression) /// actions never changed + , arguments(other.arguments) + , column_names(other.column_names) + , data_types(other.data_types) + , sample_block(other.sample_block) + , granularity(other.granularity) +{ +} + + +IndexDescription & IndexDescription::operator=(const IndexDescription & other) +{ + if (other.definition_ast) + definition_ast = other.definition_ast->clone(); + else + definition_ast.reset(); + + if (other.expression_list_ast) + expression_list_ast = other.expression_list_ast->clone(); + else + expression_list_ast.reset(); + + name = other.name; + type = other.type; + expression = other.expression; + arguments = other.arguments; + column_names = other.column_names; + data_types = other.data_types; + sample_block = other.sample_block; + granularity = other.granularity; + return *this; +} IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context) { diff --git a/src/Storages/IndicesDescription.h b/src/Storages/IndicesDescription.h index 16932dda6a2..b4d225c6511 100644 --- a/src/Storages/IndicesDescription.h +++ b/src/Storages/IndicesDescription.h @@ -48,6 +48,13 @@ struct IndexDescription /// Parse index from definition AST static IndexDescription getIndexFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context); + + IndexDescription() = default; + + /// We need custom copy constructors because we don't want + /// unintentionaly share AST variables and modify them. + IndexDescription(const IndexDescription & other); + IndexDescription & operator=(const IndexDescription & other); }; /// All secondary indices in storage diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp new file mode 100644 index 00000000000..78c7990b614 --- /dev/null +++ b/src/Storages/KeyDescription.cpp @@ -0,0 +1,69 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +KeyDescription::KeyDescription(const KeyDescription & other) + : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) + , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) + , expression(other.expression) + , sample_block(other.sample_block) + , column_names(other.column_names) + , data_types(other.data_types) +{ +} + +KeyDescription & KeyDescription::operator=(const KeyDescription & other) +{ + if (other.definition_ast) + definition_ast = other.definition_ast->clone(); + else + definition_ast.reset(); + + if (other.expression_list_ast) + expression_list_ast = other.expression_list_ast->clone(); + expression = other.expression; + sample_block = other.sample_block; + column_names = other.column_names; + data_types = other.data_types; + return *this; +} + + +KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, ASTPtr additional_key_expression) +{ + KeyDescription result; + result.definition_ast = definition_ast; + result.expression_list_ast = extractKeyExpressionList(definition_ast); + + if (additional_key_expression) + result.expression_list_ast->children.push_back(additional_key_expression); + + if (result.expression_list_ast->children.empty()) + return result; + + const auto & children = result.expression_list_ast->children; + for (const auto & child : children) + result.column_names.emplace_back(child->getColumnName()); + + { + auto expr = result.expression_list_ast->clone(); + auto syntax_result = SyntaxAnalyzer(context).analyze(expr, columns.getAllPhysical()); + result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(true); + result.sample_block = result.expression->getSampleBlock(); + } + + for (size_t i = 0; i < result.sample_block.columns(); ++i) + result.data_types.emplace_back(result.sample_block.getByPosition(i).type); + + return result; +} + +} diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h new file mode 100644 index 00000000000..980bd3baf00 --- /dev/null +++ b/src/Storages/KeyDescription.h @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ +/// Common structure for primary, partition and other storage keys +struct KeyDescription +{ + /// User defined AST in CREATE/ALTER query. This field may be empty, but key + /// can exists because some of them maybe set implicitly (for example, + /// primary key in merge tree can be part of sorting key) + ASTPtr definition_ast; + + /// ASTExpressionList with key fields, example: (x, toStartOfMonth(date))). + ASTPtr expression_list_ast; + + /// Expression from expression_list_ast created by ExpressionAnalyzer. Useful, + /// when you need to get required columns for key, example: a, date, b. + ExpressionActionsPtr expression; + + /// Sample block with key columns (names, types, empty column) + Block sample_block; + + /// Column names in key definition, example: x, toStartOfMonth(date), a * b. + Names column_names; + + /// Types from sample block ordered in columns order. + DataTypes data_types; + + /// Parse key structure from key definition. Requires all columns, available + /// in storage. + static KeyDescription getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, ASTPtr additional_key_expression = nullptr); + + KeyDescription() = default; + + /// We need custom copy constructors because we don't want + /// unintentionaly share AST variables and modify them. + KeyDescription(const KeyDescription & other); + KeyDescription & operator=(const KeyDescription & other); +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8b046673556..a4d709a4681 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -144,23 +144,21 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setSettingsChanges(metadata.settings_ast); + setSettingsChanges(metadata.settings_changes); const auto settings = getSettings(); setProperties(metadata, /*only_check*/ false, attach); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); - if (metadata.sample_by_ast != nullptr) + if (metadata.sampling_key.definition_ast != nullptr) { - KeyDescription candidate_sampling_key = KeyDescription::getKeyFromAST(metadata.sample_by_ast, getColumns(), global_context); - const auto & pk_sample_block = getPrimaryKey().sample_block; - if (!pk_sample_block.has(candidate_sampling_key.column_names[0]) && !attach + if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - setSamplingKey(candidate_sampling_key); + setSamplingKey(metadata.sampling_key); } MergeTreeDataFormatVersion min_format_version(0); @@ -169,7 +167,8 @@ MergeTreeData::MergeTreeData( try { auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); - initPartitionKey(partition_by_ast); + auto partition_key = KeyDescription::getKeyFromAST(partition_by_ast, getColumns(), global_context); + initPartitionKey(partition_key); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -184,11 +183,11 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - initPartitionKey(metadata.partition_by_ast); + initPartitionKey(metadata.partition_key); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } - setTTLExpressions(metadata.columns, metadata.ttl_for_table_ast); + setTTLExpressions(metadata.columns, metadata.table_ttl); /// format_file always contained on any data path PathWithDisk version_file; @@ -245,32 +244,6 @@ MergeTreeData::MergeTreeData( LOG_WARNING(log, "{} Settings 'min_bytes_for_wide_part' and 'min_bytes_for_wide_part' will be ignored.", reason); } - -StorageInMemoryMetadata MergeTreeData::getInMemoryMetadata() const -{ - StorageInMemoryMetadata metadata(getColumns(), getSecondaryIndices(), getConstraints()); - - if (isPartitionKeyDefined()) - metadata.partition_by_ast = getPartitionKeyAST()->clone(); - - if (isSortingKeyDefined()) - metadata.order_by_ast = getSortingKeyAST()->clone(); - - if (isPrimaryKeyDefined()) - metadata.primary_key_ast = getPrimaryKeyAST()->clone(); - - if (hasAnyTableTTL()) - metadata.ttl_for_table_ast = getTableTTLs().definition_ast->clone(); - - if (isSamplingKeyDefined()) - metadata.sample_by_ast = getSamplingKeyAST()->clone(); - - if (hasSettingsChanges()) - metadata.settings_ast = getSettingsChanges(); - - return metadata; -} - StoragePolicyPtr MergeTreeData::getStoragePolicy() const { return global_context.getStoragePolicy(getSettings()->storage_policy); @@ -306,35 +279,43 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool only_check, bool attach) { - if (!metadata.order_by_ast) + KeyDescription new_sorting_key = metadata.sorting_key; + KeyDescription new_primary_key = metadata.primary_key; + + if (!new_sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); - ASTPtr new_sorting_key_expr_list = extractKeyExpressionList(metadata.order_by_ast); - ASTPtr new_primary_key_expr_list = metadata.primary_key_ast - ? extractKeyExpressionList(metadata.primary_key_ast) : new_sorting_key_expr_list->clone(); - if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) - new_sorting_key_expr_list->children.push_back(std::make_shared(merging_params.version_column)); + new_sorting_key = KeyDescription::getKeyFromAST( + metadata.sorting_key.definition_ast, + metadata.columns, + global_context, + std::make_shared(merging_params.version_column)); - size_t primary_key_size = new_primary_key_expr_list->children.size(); - size_t sorting_key_size = new_sorting_key_expr_list->children.size(); + /// Primary key not defined at all + if (new_primary_key.definition_ast == nullptr) + { + /// We copy sorting key, and restore definition_ast to empty value + new_primary_key = metadata.sorting_key; + new_primary_key.definition_ast = nullptr; + } + + size_t sorting_key_size = new_sorting_key.column_names.size(); + size_t primary_key_size = new_primary_key.column_names.size(); if (primary_key_size > sorting_key_size) throw Exception("Primary key must be a prefix of the sorting key, but its length: " + toString(primary_key_size) + " is greater than the sorting key length: " + toString(sorting_key_size), ErrorCodes::BAD_ARGUMENTS); - Names new_primary_key_columns; - Names new_sorting_key_columns; NameSet primary_key_columns_set; for (size_t i = 0; i < sorting_key_size; ++i) { - String sorting_key_column = new_sorting_key_expr_list->children[i]->getColumnName(); - new_sorting_key_columns.push_back(sorting_key_column); + const String & sorting_key_column = new_sorting_key.column_names[i]; if (i < primary_key_size) { - String pk_column = new_primary_key_expr_list->children[i]->getColumnName(); + const String & pk_column = new_primary_key.column_names[i]; if (pk_column != sorting_key_column) throw Exception("Primary key must be a prefix of the sorting key, but in position " + toString(i) + " its column is " + pk_column + ", not " + sorting_key_column, @@ -343,7 +324,6 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool if (!primary_key_columns_set.emplace(pk_column).second) throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); - new_primary_key_columns.push_back(pk_column); } } @@ -355,6 +335,9 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). + Names new_primary_key_columns = new_primary_key.column_names; + Names new_sorting_key_columns = new_sorting_key.column_names; + ASTPtr added_key_column_expr_list = std::make_shared(); const auto & old_sorting_key_columns = getSortingKeyColumns(); for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i) @@ -362,12 +345,12 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool if (old_i < old_sorting_key_columns.size()) { if (new_sorting_key_columns[new_i] != old_sorting_key_columns[old_i]) - added_key_column_expr_list->children.push_back(new_sorting_key_expr_list->children[new_i]); + added_key_column_expr_list->children.push_back(new_sorting_key.expression_list_ast->children[new_i]); else ++old_i; } else - added_key_column_expr_list->children.push_back(new_sorting_key_expr_list->children[new_i]); + added_key_column_expr_list->children.push_back(new_sorting_key.expression_list_ast->children[new_i]); } if (!added_key_column_expr_list->children.empty()) @@ -394,37 +377,9 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool } } - auto new_sorting_key_syntax = SyntaxAnalyzer(global_context).analyze(new_sorting_key_expr_list, all_columns); - auto new_sorting_key_expr = ExpressionAnalyzer(new_sorting_key_expr_list, new_sorting_key_syntax, global_context) - .getActions(false); - auto new_sorting_key_sample = - ExpressionAnalyzer(new_sorting_key_expr_list, new_sorting_key_syntax, global_context) - .getActions(true)->getSampleBlock(); - - checkKeyExpression(*new_sorting_key_expr, new_sorting_key_sample, "Sorting"); - - auto new_primary_key_syntax = SyntaxAnalyzer(global_context).analyze(new_primary_key_expr_list, all_columns); - auto new_primary_key_expr = ExpressionAnalyzer(new_primary_key_expr_list, new_primary_key_syntax, global_context) - .getActions(false); - - Block new_primary_key_sample; - DataTypes new_primary_key_data_types; - for (size_t i = 0; i < primary_key_size; ++i) - { - const auto & elem = new_sorting_key_sample.getByPosition(i); - new_primary_key_sample.insert(elem); - new_primary_key_data_types.push_back(elem.type); - } - - DataTypes new_sorting_key_data_types; - for (size_t i = 0; i < sorting_key_size; ++i) - { - new_sorting_key_data_types.push_back(new_sorting_key_sample.getByPosition(i).type); - } - if (!metadata.secondary_indices.empty()) { - std::set indices_names; + std::unordered_set indices_names; for (const auto & index : metadata.secondary_indices) { @@ -444,22 +399,8 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool { setColumns(std::move(metadata.columns)); - KeyDescription new_sorting_key; - new_sorting_key.definition_ast = metadata.order_by_ast; - new_sorting_key.column_names = std::move(new_sorting_key_columns); - new_sorting_key.expression_list_ast = std::move(new_sorting_key_expr_list); - new_sorting_key.expression = std::move(new_sorting_key_expr); - new_sorting_key.sample_block = std::move(new_sorting_key_sample); - new_sorting_key.data_types = std::move(new_sorting_key_data_types); setSortingKey(new_sorting_key); - KeyDescription new_primary_key; - new_primary_key.definition_ast = metadata.primary_key_ast; - new_primary_key.column_names = std::move(new_primary_key_columns); - new_primary_key.expression_list_ast = std::move(new_primary_key_expr_list); - new_primary_key.expression = std::move(new_primary_key_expr); - new_primary_key.sample_block = std::move(new_primary_key_sample); - new_primary_key.data_types = std::move(new_primary_key_data_types); setPrimaryKey(new_primary_key); setSecondaryIndices(metadata.secondary_indices); @@ -521,10 +462,8 @@ ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) } -void MergeTreeData::initPartitionKey(ASTPtr partition_by_ast) +void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) { - KeyDescription new_partition_key = KeyDescription::getKeyFromAST(partition_by_ast, getColumns(), global_context); - if (new_partition_key.expression_list_ast->children.empty()) return; @@ -580,10 +519,10 @@ void MergeTreeData::initPartitionKey(ASTPtr partition_by_ast) } +/// Todo replace columns with TTL for columns void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, - const ASTPtr & new_ttl_table_ast, bool only_check) + const TTLTableDescription & new_table_ttl, bool only_check) { - auto new_column_ttls_asts = new_columns.getColumnTTLs(); TTLColumnsDescription new_column_ttl_by_name = getColumnTTLs(); @@ -614,56 +553,24 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, setColumnTTLs(new_column_ttl_by_name); } - if (new_ttl_table_ast) + if (new_table_ttl.definition_ast) { - TTLDescriptions update_move_ttl_entries; - TTLDescription update_rows_ttl_entry; - - bool seen_delete_ttl = false; - for (const auto & ttl_element_ptr : new_ttl_table_ast->children) + for (const auto & move_ttl : new_table_ttl.move_ttl) { - const auto * ttl_element = ttl_element_ptr->as(); - if (!ttl_element) - throw Exception("Unexpected AST element in TTL expression", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - - if (ttl_element->destination_type == DataDestinationType::DELETE) + if (!getDestinationForTTL(move_ttl)) { - if (seen_delete_ttl) - { - throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); - } - - update_rows_ttl_entry = TTLDescription::getTTLFromAST(ttl_element_ptr, new_columns, global_context, getPrimaryKey()); - - seen_delete_ttl = true; - } - else - { - auto new_ttl_entry = TTLDescription::getTTLFromAST(ttl_element_ptr, new_columns, global_context, getPrimaryKey()); - - if (!getDestinationForTTL(new_ttl_entry)) - { - String message; - if (new_ttl_entry.destination_type == DataDestinationType::DISK) - message = "No such disk " + backQuote(new_ttl_entry.destination_name) + " for given storage policy."; - else - message = "No such volume " + backQuote(new_ttl_entry.destination_name) + " for given storage policy."; - throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); - } - - update_move_ttl_entries.emplace_back(std::move(new_ttl_entry)); + String message; + if (move_ttl.destination_type == DataDestinationType::DISK) + message = "No such disk " + backQuote(move_ttl.destination_name) + " for given storage policy."; + else + message = "No such volume " + backQuote(move_ttl.destination_name) + " for given storage policy."; + throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); } } + if (!only_check) { - TTLTableDescription new_table_ttl - { - .definition_ast = new_ttl_table_ast, - .rows_ttl = update_rows_ttl_entry, - .move_ttl = update_move_ttl_entries, - }; - auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); setTableTTLs(new_table_ttl); } @@ -1458,13 +1365,13 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S setProperties(metadata, /* only_check = */ true); - setTTLExpressions(metadata.columns, metadata.ttl_for_table_ast, /* only_check = */ true); + setTTLExpressions(metadata.columns, metadata.table_ttl, /* only_check = */ true); if (hasSettingsChanges()) { const auto & current_changes = getSettingsChanges()->as().changes; - const auto & new_changes = metadata.settings_ast->as().changes; + const auto & new_changes = metadata.settings_changes->as().changes; for (const auto & changed_setting : new_changes) { if (MergeTreeSettings::findIndex(changed_setting.name) == MergeTreeSettings::npos) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index dcc6174ef5a..8b24bea7830 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -333,9 +333,6 @@ public: BrokenPartCallback broken_part_callback_ = [](const String &){}); - /// See comments about methods below in IStorage interface - StorageInMemoryMetadata getInMemoryMetadata() const override; - StoragePolicyPtr getStoragePolicy() const override; bool supportsPrewhere() const override { return true; } @@ -785,10 +782,10 @@ protected: void setProperties(const StorageInMemoryMetadata & metadata, bool only_check = false, bool attach = false); - void initPartitionKey(ASTPtr partition_by_ast); + void initPartitionKey(const KeyDescription & new_partition_key); void setTTLExpressions(const ColumnsDescription & columns, - const ASTPtr & new_ttl_table_ast, bool only_check = false); + const TTLTableDescription & new_table_ttl, bool only_check = false); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 4cecba1faff..3031402715a 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -39,11 +39,6 @@ public: return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context); } - StorageInMemoryMetadata getInMemoryMetadata() const override - { - return part->storage.getInMemoryMetadata(); - } - NamesAndTypesList getVirtuals() const override { return part->storage.getVirtuals(); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 26a02f8904c..3d7c215b921 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -572,47 +572,46 @@ static StoragePtr create(const StorageFactory::Arguments & args) } String date_column_name; - ASTPtr partition_by_ast; - ASTPtr order_by_ast; - ASTPtr primary_key_ast; - ASTPtr sample_by_ast; - ASTPtr ttl_table_ast; - ASTPtr settings_ast; - IndicesDescription indices_description; - ConstraintsDescription constraints_description; + + StorageInMemoryMetadata metadata; + metadata.columns = args.columns; std::unique_ptr storage_settings = std::make_unique(args.context.getMergeTreeSettings()); if (is_extended_storage_def) { if (args.storage_def->partition_by) - partition_by_ast = args.storage_def->partition_by->ptr(); + metadata.partition_key = KeyDescription::getKeyFromAST( + args.storage_def->partition_by->ptr(), metadata.columns, args.context); if (!args.storage_def->order_by) throw Exception("You must provide an ORDER BY expression in the table definition. " "If you don't want this table to be sorted, use ORDER BY tuple()", ErrorCodes::BAD_ARGUMENTS); - order_by_ast = args.storage_def->order_by->ptr(); + metadata.sorting_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); if (args.storage_def->primary_key) - primary_key_ast = args.storage_def->primary_key->ptr(); + metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); if (args.storage_def->sample_by) - sample_by_ast = args.storage_def->sample_by->ptr(); + metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, args.context); if (args.storage_def->ttl_table) - ttl_table_ast = args.storage_def->ttl_table->ptr(); - + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + args.storage_def->ttl_table->ptr(), + metadata.columns, + args.context, + metadata.primary_key); if (args.query.columns_list && args.query.columns_list->indices) for (auto & index : args.query.columns_list->indices->children) - indices_description.push_back(IndexDescription::getIndexFromAST(index, args.columns, args.context)); + metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, args.columns, args.context)); storage_settings->loadFromQuery(*args.storage_def); if (args.storage_def->settings) - settings_ast = args.storage_def->settings->ptr(); + metadata.settings_changes = args.storage_def->settings->ptr(); } else { @@ -627,12 +626,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// If there is an expression for sampling if (arg_cnt - arg_num == 3) { - sample_by_ast = engine_args[arg_num]; + metadata.sampling_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context); ++arg_num; } /// Now only two parameters remain - primary_key, index_granularity. - order_by_ast = engine_args[arg_num]; + metadata.sorting_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context); ++arg_num; const auto * ast = engine_args[arg_num]->as(); @@ -648,18 +647,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (arg_num != arg_cnt) throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS); - if (!args.attach && !indices_description.empty() && !args.local_context.getSettingsRef().allow_experimental_data_skipping_indices) + if (!args.attach && !metadata.secondary_indices.empty() && !args.local_context.getSettingsRef().allow_experimental_data_skipping_indices) throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); - StorageInMemoryMetadata metadata(args.columns, indices_description, args.constraints); - metadata.partition_by_ast = partition_by_ast; - metadata.order_by_ast = order_by_ast; - metadata.primary_key_ast = primary_key_ast; - metadata.ttl_for_table_ast = ttl_table_ast; - metadata.sample_by_ast = sample_by_ast; - metadata.settings_ast = settings_ast; - if (replicated) return StorageReplicatedMergeTree::create( zookeeper_path, replica_name, args.attach, args.table_id, args.relative_data_path, diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp new file mode 100644 index 00000000000..3d2ba27a62c --- /dev/null +++ b/src/Storages/SelectQueryDescription.cpp @@ -0,0 +1,117 @@ +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; +extern const int LOGICAL_ERROR; +} + +SelectQueryDescription::SelectQueryDescription(const SelectQueryDescription & other) + : select_table_id(other.select_table_id) + , select_query(other.select_query ? other.select_query->clone() : nullptr) + , inner_query(other.inner_query ? other.inner_query->clone() : nullptr) +{ +} + +SelectQueryDescription & SelectQueryDescription::SelectQueryDescription::operator=(const SelectQueryDescription & other) +{ + select_table_id = other.select_table_id; + if (other.select_query) + select_query = other.select_query->clone(); + else + select_query.reset(); + + if (other.inner_query) + inner_query = other.inner_query->clone(); + else + inner_query.reset(); + return *this; +} + + +namespace +{ + +StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, const Context & context, bool add_default_db = true) +{ + if (add_default_db) + { + AddDefaultDatabaseVisitor visitor(context.getCurrentDatabase(), nullptr); + visitor.visit(query); + } + + if (auto db_and_table = getDatabaseAndTable(query, 0)) + { + return StorageID(db_and_table->database, db_and_table->table/*, db_and_table->uuid*/); + } + else if (auto subquery = extractTableExpression(query, 0)) + { + auto * ast_select = subquery->as(); + if (!ast_select) + throw Exception("Logical error while creating StorageMaterializedView. " + "Could not retrieve table name from select query.", + DB::ErrorCodes::LOGICAL_ERROR); + if (ast_select->list_of_selects->children.size() != 1) + throw Exception("UNION is not supported for MATERIALIZED VIEW", + ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + + auto & inner_query = ast_select->list_of_selects->children.at(0); + + return extractDependentTableFromSelectQuery(inner_query->as(), context, false); + } + else + return StorageID::createEmpty(); +} + + +void checkAllowedQueries(const ASTSelectQuery & query) +{ + if (query.prewhere() || query.final() || query.sampleSize()) + throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + + ASTPtr subquery = extractTableExpression(query, 0); + if (!subquery) + return; + + if (const auto * ast_select = subquery->as()) + { + if (ast_select->list_of_selects->children.size() != 1) + throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + + const auto & inner_query = ast_select->list_of_selects->children.at(0); + + checkAllowedQueries(inner_query->as()); + } +} + +} + +SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(const ASTPtr & select, const Context & context) +{ + auto & new_select = select->as(); + + if (new_select.list_of_selects->children.size() != 1) + throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + + SelectQueryDescription result; + + result.inner_query = new_select.list_of_selects->children.at(0)->clone(); + + auto & select_query = result.inner_query->as(); + checkAllowedQueries(select_query); + result.select_table_id = extractDependentTableFromSelectQuery(select_query, context); + result.select_query = select->clone(); + + return result; +} + +} diff --git a/src/Storages/SelectQueryDescription.h b/src/Storages/SelectQueryDescription.h new file mode 100644 index 00000000000..ce3ca44c147 --- /dev/null +++ b/src/Storages/SelectQueryDescription.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +/// Select query for different view in storages +struct SelectQueryDescription +{ + /// Table id for select query + StorageID select_table_id = StorageID::createEmpty(); + /// Select query itself (ASTSelectWithUnionQuery) + ASTPtr select_query; + /// First query from select_query list + ASTPtr inner_query; + + /// Parse description from select query for materialized view. Also + /// validates query. + static SelectQueryDescription getSelectQueryFromASTForMatView(const ASTPtr & select, const Context & context); + + SelectQueryDescription() = default; + SelectQueryDescription(const SelectQueryDescription & other); + SelectQueryDescription & operator=(const SelectQueryDescription & other); +}; + +} diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 1b7ec39b9e3..67e0c7f6028 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -12,73 +12,4 @@ StorageInMemoryMetadata::StorageInMemoryMetadata( , constraints(constraints_) { } - -StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other) - : columns(other.columns) - , secondary_indices(other.secondary_indices) - , constraints(other.constraints) -{ - if (other.partition_by_ast) - partition_by_ast = other.partition_by_ast->clone(); - if (other.order_by_ast) - order_by_ast = other.order_by_ast->clone(); - if (other.primary_key_ast) - primary_key_ast = other.primary_key_ast->clone(); - if (other.ttl_for_table_ast) - ttl_for_table_ast = other.ttl_for_table_ast->clone(); - if (other.sample_by_ast) - sample_by_ast = other.sample_by_ast->clone(); - if (other.settings_ast) - settings_ast = other.settings_ast->clone(); - if (other.select) - select = other.select->clone(); -} - -StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other) -{ - if (this == &other) - return *this; - - columns = other.columns; - secondary_indices = other.secondary_indices; - constraints = other.constraints; - - if (other.partition_by_ast) - partition_by_ast = other.partition_by_ast->clone(); - else - partition_by_ast.reset(); - - if (other.order_by_ast) - order_by_ast = other.order_by_ast->clone(); - else - order_by_ast.reset(); - - if (other.primary_key_ast) - primary_key_ast = other.primary_key_ast->clone(); - else - primary_key_ast.reset(); - - if (other.ttl_for_table_ast) - ttl_for_table_ast = other.ttl_for_table_ast->clone(); - else - ttl_for_table_ast.reset(); - - if (other.sample_by_ast) - sample_by_ast = other.sample_by_ast->clone(); - else - sample_by_ast.reset(); - - if (other.settings_ast) - settings_ast = other.settings_ast->clone(); - else - settings_ast.reset(); - - if (other.select) - select = other.select->clone(); - else - select.reset(); - - return *this; -} - } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 39374166d5e..ba49cfa210d 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -1,9 +1,13 @@ #pragma once -#include -#include -#include #include +#include +#include +#include +#include +#include +#include + namespace DB { @@ -21,26 +25,25 @@ struct StorageInMemoryMetadata /// Table constraints. Currently supported for MergeTree only. ConstraintsDescription constraints; /// PARTITION BY expression. Currently supported for MergeTree only. - ASTPtr partition_by_ast = nullptr; + KeyDescription partition_key; + /// PRIMARY KEY expression. If absent, than equal to order_by_ast. + KeyDescription primary_key; /// ORDER BY expression. Required field for all MergeTree tables /// even in old syntax MergeTree(partition_key, order_by, ...) - ASTPtr order_by_ast = nullptr; - /// PRIMARY KEY expression. If absent, than equal to order_by_ast. - ASTPtr primary_key_ast = nullptr; - /// TTL expression for whole table. Supported for MergeTree only. - ASTPtr ttl_for_table_ast = nullptr; + KeyDescription sorting_key; /// SAMPLE BY expression. Supported for MergeTree only. - ASTPtr sample_by_ast = nullptr; + KeyDescription sampling_key; + /// Separate ttl expressions for columns + TTLColumnsDescription column_ttls_by_name; + /// TTL expressions for table (Move and Rows) + TTLTableDescription table_ttl; /// SETTINGS expression. Supported for MergeTree, Buffer and Kafka. - ASTPtr settings_ast = nullptr; - /// SELECT QUERY. Supported for MaterializedView only. - ASTPtr select = nullptr; + ASTPtr settings_changes; + /// SELECT QUERY. Supported for MaterializedView and View (have to support LiveView). + SelectQueryDescription select; - StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata() = default; StorageInMemoryMetadata(const ColumnsDescription & columns_, const IndicesDescription & secondary_indices_, const ConstraintsDescription & constraints_); - - StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); }; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 313e75a169e..6efba2b56a1 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -100,13 +100,6 @@ StorageMaterializedView::StorageMaterializedView( DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } -StorageInMemoryMetadata StorageMaterializedView::getInMemoryMetadata() const -{ - StorageInMemoryMetadata result(getColumns(), getSecondaryIndices(), getConstraints()); - result.select = getSelectQuery().select_query; - return result; -} - QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const { return getTargetTable()->getQueryProcessingStage(context, to_stage, query_ptr); @@ -207,7 +200,7 @@ void StorageMaterializedView::alter( /// start modify query if (context.getSettingsRef().allow_experimental_alter_materialized_view_structure) { - auto new_select = SelectQueryDescription::getSelectQueryFromASTForMatView(metadata.select, context); + const auto & new_select = metadata.select; const auto & old_select = getSelectQuery(); DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index d5f81e2248e..06bf659e05f 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -21,8 +21,6 @@ public: bool hasInnerTable() const { return has_inner_table; } - StorageInMemoryMetadata getInMemoryMetadata() const override; - bool supportsSampling() const override { return getTargetTable()->supportsSampling(); } bool supportsPrewhere() const override { return getTargetTable()->supportsPrewhere(); } bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4650485847c..65428a15876 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -260,7 +260,7 @@ void StorageMergeTree::alter( { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(metadata.settings_ast, table_lock_holder); + changeSettings(metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); } @@ -271,11 +271,11 @@ void StorageMergeTree::alter( auto merges_block = getActionLock(ActionLocks::PartsMerge); lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(metadata.settings_ast, table_lock_holder); + changeSettings(metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. setProperties(metadata); - setTTLExpressions(metadata.columns, metadata.ttl_for_table_ast); + setTTLExpressions(metadata.columns, metadata.table_ttl); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index faa44ff7db1..f57dd0b5f32 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -483,20 +483,22 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column ParserNotEmptyExpressionList parser(false); auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr order_by_ast; if (new_sorting_key_expr_list->children.size() == 1) - metadata.order_by_ast = new_sorting_key_expr_list->children[0]; + order_by_ast = new_sorting_key_expr_list->children[0]; else { auto tuple = makeASTFunction("tuple"); tuple->arguments->children = new_sorting_key_expr_list->children; - metadata.order_by_ast = tuple; + order_by_ast = tuple; } + metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, metadata.columns, global_context); if (!isPrimaryKeyDefined()) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - metadata.primary_key_ast = getSortingKeyAST()->clone(); + metadata.primary_key = getSortingKey(); } } @@ -509,7 +511,8 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column if (metadata_diff.ttl_table_changed) { ParserTTLExpressionList parser; - metadata.ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl_for_table_ast, metadata.columns, global_context, metadata.primary_key); } } @@ -519,7 +522,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. setProperties(metadata); - setTTLExpressions(new_columns, metadata.ttl_for_table_ast); + setTTLExpressions(new_columns, metadata.table_ttl); } @@ -3295,7 +3298,7 @@ void StorageReplicatedMergeTree::alter( params.apply(metadata, query_context); - changeSettings(metadata.settings_ast, table_lock_holder); + changeSettings(metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata); return; @@ -3329,11 +3332,11 @@ void StorageReplicatedMergeTree::alter( params.apply(future_metadata, query_context); ReplicatedMergeTreeTableMetadata future_metadata_in_zk(*this); - if (ast_to_str(future_metadata.order_by_ast) != ast_to_str(current_metadata.order_by_ast)) - future_metadata_in_zk.sorting_key = serializeAST(*extractKeyExpressionList(future_metadata.order_by_ast)); + if (ast_to_str(future_metadata.sorting_key.definition_ast) != ast_to_str(current_metadata.sorting_key.definition_ast)) + future_metadata_in_zk.sorting_key = serializeAST(*future_metadata.sorting_key.expression_list_ast); - if (ast_to_str(future_metadata.ttl_for_table_ast) != ast_to_str(current_metadata.ttl_for_table_ast)) - future_metadata_in_zk.ttl_table = serializeAST(*future_metadata.ttl_for_table_ast); + if (ast_to_str(future_metadata.table_ttl.definition_ast) != ast_to_str(current_metadata.table_ttl.definition_ast)) + future_metadata_in_zk.ttl_table = serializeAST(*future_metadata.table_ttl.definition_ast); String new_indices_str = future_metadata.secondary_indices.toString(); if (new_indices_str != current_metadata.secondary_indices.toString()) @@ -3352,13 +3355,13 @@ void StorageReplicatedMergeTree::alter( ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/columns", new_columns_str, -1)); - if (ast_to_str(current_metadata.settings_ast) != ast_to_str(future_metadata.settings_ast)) + if (ast_to_str(current_metadata.settings_changes) != ast_to_str(future_metadata.settings_changes)) { lockStructureExclusively( table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// Just change settings - current_metadata.settings_ast = future_metadata.settings_ast; - changeSettings(current_metadata.settings_ast, table_lock_holder); + current_metadata.settings_changes = future_metadata.settings_changes; + changeSettings(current_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, current_metadata); } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 3bef8894971..ac77bd51b69 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -55,6 +55,41 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin } +TTLDescription::TTLDescription(const TTLDescription & other) + : mode(other.mode) + , expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr) + , expression(other.expression) + , result_column(other.result_column) + , where_expression(other.where_expression) + , where_result_column(other.where_result_column) + , group_by_keys(other.group_by_keys) + , set_parts(other.set_parts) + , aggregate_descriptions(other.aggregate_descriptions) + , destination_type(other.destination_type) + , destination_name(other.destination_name) +{ +} + +TTLDescription & TTLDescription::operator=(const TTLDescription & other) +{ + mode = other.mode; + if (other.expression_ast) + expression_ast = other.expression_ast->clone(); + else + expression_ast.reset(); + + expression = other.expression; + result_column = other.result_column; + where_expression = other.where_expression; + where_result_column = other.where_result_column; + group_by_keys = other.group_by_keys; + set_parts = other.set_parts; + aggregate_descriptions = other.aggregate_descriptions; + destination_type = other.destination_type; + destination_name = other.destination_name; + return * this; +} + TTLDescription TTLDescription::getTTLFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, @@ -195,4 +230,34 @@ TTLDescription TTLDescription::getTTLFromAST( return result; } + +TTLTableDescription TTLTableDescription::getTTLForTableFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context, + const KeyDescription & primary_key) +{ + TTLTableDescription result; + if (!definition_ast) + return result; + + result.definition_ast = definition_ast->clone(); + + bool seen_delete_ttl = false; + for (const auto & ttl_element_ptr : definition_ast->children) + { + auto ttl = TTLDescription::getTTLFromAST(ttl_element_ptr, columns, context, primary_key); + if (ttl.destination_type == DataDestinationType::DELETE) + { + if (seen_delete_ttl) + throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); + result.rows_ttl = ttl; + seen_delete_ttl = true; + } + else + result.move_ttl.emplace_back(std::move(ttl)); + } + return result; +} + } diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 1ad6960ee3b..86e82e14c73 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -5,7 +5,6 @@ #include #include #include -#include #include namespace DB @@ -75,6 +74,10 @@ struct TTLDescription /// Parse TTL structure from definition. Able to parse both column and table /// TTLs. static TTLDescription getTTLFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const KeyDescription & primary_key); + + TTLDescription() = default; + TTLDescription(const TTLDescription & other); + TTLDescription & operator=(const TTLDescription & other); }; /// Mapping from column name to column TTL @@ -94,6 +97,9 @@ struct TTLTableDescription /// Moving data TTL (to other disks or volumes) TTLDescriptions move_ttl; + + static TTLTableDescription getTTLForTableFromAST( + const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const KeyDescription & primary_key); }; } From 66e31d4311507350f8a08c30f34980e016cf7d2d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 5 Jun 2020 23:47:46 +0300 Subject: [PATCH 0380/2229] in-memory parts: several fixes --- src/Storages/MergeTree/DataPartsExchange.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 3 --- src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 7 ++++++- src/Storages/MergeTree/MergeTreeDataPartInMemory.h | 5 +++-- .../MergeTree/MergeTreeDataPartWriterInMemory.cpp | 5 +++++ src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 8 ++++---- src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++---- tests/queries/0_stateless/01130_in_memory_parts.reference | 1 + tests/queries/0_stateless/01130_in_memory_parts.sql | 4 ++++ 13 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 61561a8e3cf..7039951b256 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -135,7 +135,7 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo void Service::sendPartFromMemory(const MergeTreeData::DataPartPtr & part, WriteBuffer & out) { - const auto * part_in_memory = dynamic_cast(part.get()); + auto part_in_memory = asInMemoryPart(part); if (!part_in_memory) throw Exception("Part " + part->name + " is not stored in memory", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f091d8ec519..8b65ec29b97 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -95,9 +95,6 @@ public: virtual bool supportsVerticalMerge() const { return false; } - virtual bool waitUntilMerged(size_t /* timeout */) const { return true; } - virtual void notifyMerged() const {} - /// NOTE: Returns zeros if column files are not found in checksums. /// Otherwise return information about column size on disk. ColumnSize getColumnSize(const String & column_name, const IDataType & /* type */) const; diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 1b3f80b4e09..e5ee8b2be5e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -32,14 +32,14 @@ void MergeTreeBlockOutputStream::write(const Block & block) PartLog::addNewPart(storage.global_context, part, watch.elapsed()); - if (auto * part_in_memory = dynamic_cast(part.get())) + if (auto part_in_memory = asInMemoryPart(part)) { storage.in_memory_merges_throttler.add(part_in_memory->block.bytes(), part_in_memory->rows_count); auto settings = storage.getSettings(); if (settings->in_memory_parts_insert_sync) { - if (!part->waitUntilMerged(in_memory_parts_timeout)) + if (!part_in_memory->waitUntilMerged(in_memory_parts_timeout)) throw Exception("Timeout exceeded while waiting to write part " + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ef526552e12..098416e87ed 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1941,7 +1941,7 @@ void MergeTreeData::renameTempPartAndReplace( addPartContributionToColumnSizes(part); } - auto * part_in_memory = dynamic_cast(part.get()); + auto part_in_memory = asInMemoryPart(part); if (part_in_memory && getSettings()->in_memory_parts_enable_wal) { auto wal = getWriteAheadLog(); @@ -3271,7 +3271,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk( throw Exception("Part in " + fullPath(disk, dst_part_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); /// If source part is in memory, flush it to disk and clone it already in on-disk format - if (const auto * src_part_in_memory = dynamic_cast(src_part.get())) + if (auto src_part_in_memory = asInMemoryPart(src_part)) { const auto & src_relative_data_path = src_part_in_memory->storage.relative_data_path; auto flushed_part_path = src_part_in_memory->getRelativePathForPrefix(tmp_part_prefix); @@ -3367,7 +3367,7 @@ void MergeTreeData::freezePartitionsByMatcher(MatcherFn matcher, const String & LOG_DEBUG(log, "Freezing part {} snapshot will be placed at {}", part->name, backup_path); String backup_part_path = backup_path + relative_data_path + part->relative_path; - if (const auto * part_in_memory = dynamic_cast(part.get())) + if (auto part_in_memory = asInMemoryPart(part)) part_in_memory->flushToDisk(backup_path + relative_data_path, part->relative_path); else localBackup(part->volume->getDisk(), part->getFullRelativePath(), backup_part_path); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 4f6a5e38384..7c6204a5a32 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1455,7 +1455,7 @@ NamesAndTypesList MergeTreeDataMergerMutator::getColumnsForNewDataPart( { /// In compact parts we read all columns, because they all stored in a /// single file - if (isCompactPart(source_part)) + if (!isWidePart(source_part)) return updated_header.getNamesAndTypesList(); NameSet removed_columns; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 0d930eba4e8..bec9d16209d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -13,7 +13,6 @@ namespace DB namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int DIRECTORY_ALREADY_EXISTS; } @@ -124,4 +123,10 @@ void MergeTreeDataPartInMemory::calculateEachColumnSizesOnDisk(ColumnSizeByName each_columns_size[column.name].data_uncompressed += block.getByName(column.name).column->byteSize(); } +DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part) +{ + return std::dynamic_pointer_cast(part); +} + + } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 3e2ec82b038..e48d9b8e201 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -45,8 +45,8 @@ public: void flushToDisk(const String & base_path, const String & new_relative_path) const; - bool waitUntilMerged(size_t timeout) const override; - void notifyMerged() const override; + bool waitUntilMerged(size_t timeout) const; + void notifyMerged() const; mutable Block block; @@ -58,5 +58,6 @@ private: }; using DataPartInMemoryPtr = std::shared_ptr; +DataPartInMemoryPtr asInMemoryPart(const MergeTreeDataPartPtr & part); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 917f2b862a9..2c50d5baee0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -79,6 +79,11 @@ static MergeTreeDataPartChecksum createUncompressedChecksum(size_t size, SipHash void MergeTreeDataPartWriterInMemory::finishDataSerialization(IMergeTreeDataPart::Checksums & checksums) { + /// If part is empty we still need to initialize block by empty columns. + if (!part_in_memory->block) + for (const auto & column : columns_list) + part_in_memory->block.insert(ColumnWithTypeAndName{column.type, column.name}); + SipHash hash; for (const auto & column : part_in_memory->block) column.column->updateHashFast(hash); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 11c12d47823..b367cf73a08 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1007,7 +1007,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}); if (part) { - if (const auto * part_in_memory = dynamic_cast(part.get())) + if (auto part_in_memory = asInMemoryPart(part)) sum_parts_size_in_bytes += part_in_memory->block.bytes(); else sum_parts_size_in_bytes += part->getBytesOnDisk(); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 44a942551fb..b5cf716b079 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -681,11 +681,11 @@ bool StorageMergeTree::merge( auto lock = lockParts(); for (const auto & part : future_part.parts) { - part->notifyMerged(); - if (isInMemoryPart(part)) + if (auto part_in_memory = asInMemoryPart(part)) { - modifyPartState(part, DataPartState::Deleting); - parts_to_remove_immediately.push_back(part); + part_in_memory->notifyMerged(); + modifyPartState(part_in_memory, DataPartState::Deleting); + parts_to_remove_immediately.push_back(part_in_memory); } } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9babf9476d1..882b5593c76 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1089,11 +1089,11 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) DataPartsVector parts_to_remove_immediatly; for (const auto & part_ptr : parts) { - part_ptr->notifyMerged(); - if (isInMemoryPart(part_ptr)) + if (auto part_in_memory = asInMemoryPart(part_ptr)) { - modifyPartState(part_ptr, DataPartState::Deleting); - parts_to_remove_immediatly.push_back(part_ptr); + part_in_memory->notifyMerged(); + modifyPartState(part_in_memory, DataPartState::Deleting); + parts_to_remove_immediatly.push_back(part_in_memory); } } diff --git a/tests/queries/0_stateless/01130_in_memory_parts.reference b/tests/queries/0_stateless/01130_in_memory_parts.reference index 4a22f17c644..ad5435abb59 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.reference +++ b/tests/queries/0_stateless/01130_in_memory_parts.reference @@ -36,3 +36,4 @@ Mutations and Alters 4 [4,16] 5 [] 7 [7,49] +0 diff --git a/tests/queries/0_stateless/01130_in_memory_parts.sql b/tests/queries/0_stateless/01130_in_memory_parts.sql index 21665faefd6..dca12a85841 100644 --- a/tests/queries/0_stateless/01130_in_memory_parts.sql +++ b/tests/queries/0_stateless/01130_in_memory_parts.sql @@ -39,4 +39,8 @@ ALTER TABLE in_memory DROP COLUMN str; SELECT * FROM in_memory ORDER BY a LIMIT 5; +-- in-memory parts works if they're empty. +ALTER TABLE in_memory DELETE WHERE 1; +SELECT count() FROM in_memory; + DROP TABLE in_memory; From ce448d92916b180b7891ee5e14a8a9e52c703eeb Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 7 Jun 2020 11:14:05 +0000 Subject: [PATCH 0381/2229] Better event handler --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 25 ++++++- src/Storages/RabbitMQ/RabbitMQHandler.h | 5 +- .../ReadBufferFromRabbitMQConsumer.cpp | 70 +++++++++---------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 11 ++- .../WriteBufferToRabbitMQProducer.cpp | 7 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 2 +- 6 files changed, 70 insertions(+), 50 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 95d7e22d434..d9dc19afa28 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -6,7 +6,8 @@ namespace DB enum { - Lock_timeout = 50 + Lock_timeout = 50, + Loop_stop_timeout = 200 }; @@ -15,6 +16,8 @@ RabbitMQHandler::RabbitMQHandler(event_base * evbase_, Poco::Logger * log_) : evbase(evbase_), log(log_) { + tv.tv_sec = 0; + tv.tv_usec = Loop_stop_timeout; } @@ -31,7 +34,7 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes } -void RabbitMQHandler::start(std::atomic & check_param) +void RabbitMQHandler::startConsumerLoop(std::atomic & check_param, std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop). But the loop should not be attempted to start if it is already running. @@ -44,6 +47,7 @@ void RabbitMQHandler::start(std::atomic & check_param) */ if (!check_param) { + loop_started = true; event_base_loop(evbase, EVLOOP_NONBLOCK); } @@ -51,6 +55,13 @@ void RabbitMQHandler::start(std::atomic & check_param) } } + +void RabbitMQHandler::startProducerLoop() +{ + event_base_loop(evbase, EVLOOP_NONBLOCK); +} + + void RabbitMQHandler::stop() { if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) @@ -60,4 +71,14 @@ void RabbitMQHandler::stop() } } + +void RabbitMQHandler::stopWithTimeout() +{ + if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + { + event_base_loopexit(evbase, &tv); + mutex_before_loop_stop.unlock(); + } +} + } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index d2d70185128..a8692a845f1 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,13 +19,16 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void start(std::atomic & check_param); + void startConsumerLoop(std::atomic & check_param, std::atomic & loop_started); + void startProducerLoop(); + void stopWithTimeout(); void stop(); private: event_base * evbase; Poco::Logger * log; + timeval tv; size_t count_passed = 0; std::timed_mutex mutex_before_event_loop; std::timed_mutex mutex_before_loop_stop; diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 1bd2c7831ff..b650988dd61 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -7,17 +7,13 @@ #include #include #include +#include "Poco/Timer.h" #include namespace DB { -enum -{ - Received_max_to_stop_loop = 10000 // Explained below -}; - ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -44,7 +40,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , stopped(stopped_) , exchange_declared(false) , false_param(false) - , loop_attempt(false) { messages.clear(); current = messages.begin(); @@ -112,7 +107,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /* This varibale can be updated from a different thread in case of some error so its better to always check + /* This varibale can be updated from a different thread in case of some error so its better to check * whether exchange is in a working state and if not - declare it once again. */ if (!exchange_declared) @@ -123,7 +118,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) std::atomic bindings_created = false, bindings_error = false; - consumer_channel->declareQueue(AMQP::durable) + consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); @@ -151,12 +146,6 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&] { bindings_created = true; - - /// Unblock current thread so that it does not continue to execute all callbacks on the connection - if (++count_bound_queues == num_queues) - { - stopEventLoop(); - } }) .onError([&](const char * message) { @@ -176,8 +165,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!bindings_created && !bindings_error) { - /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(bindings_created); + startEventLoop(bindings_created, loop_started); } } @@ -187,7 +175,7 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() if (subscribed) return; - LOG_TRACE(log, "Subscribing to " + std::to_string(queues.size()) + " queues"); + LOG_TRACE(log, "Subscribing {} to {} queues", channel_id, queues.size()); for (auto & queue : queues) { @@ -200,17 +188,19 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - std::atomic consumer_created = false, consumer_error = false; + std::atomic consumer_created = false, consumer_failed = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { consumer_created = true; + LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); - LOG_TRACE(log, "Consumer " + std::to_string(channel_id) + " is subscribed to queue " + queue_name); - - /// Unblock current thread so that it does not continue to execute all callbacks on the connection - if (++count_subscribed == queues.size()) + /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not + * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too + * long and events will be distributed between them) + */ + if (loop_started && ++count_subscribed == queues.size()) { stopEventLoop(); } @@ -223,9 +213,9 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) String message_received = std::string(message.body(), message.body() + message_size); if (row_delimiter != '\0') + { message_received += row_delimiter; - - //LOG_TRACE(log, "Consumer {} received a message", channel_id); + } bool stop_loop = false; @@ -235,9 +225,10 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) received.push_back(message_received); /* As event loop is blocking to the thread that started it and a single thread should not be blocked while - * executing all callbacks on the connection (not only its own), then there should be some point to unblock + * executing all callbacks on the connection (not only its own), then there should be some point to unblock. + * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_attempt && received.size() % Received_max_to_stop_loop == 0) + if (!loop_started) { stop_loop = true; } @@ -245,20 +236,20 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (stop_loop) { - stopEventLoop(); + stopEventLoopWithTimeout(); } } }) .onError([&](const char * message) { - consumer_error = true; + consumer_failed = true; LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); - while (!consumer_created && !consumer_error) + /// These variables are updated in a separate thread. + while (!consumer_created && !consumer_failed) { - /// No need for timeouts as this event loop is blocking for the current thread and quits in case there are no active events - startEventLoop(consumer_created); + startEventLoop(consumer_created, loop_started); } } @@ -269,9 +260,15 @@ void ReadBufferFromRabbitMQConsumer::stopEventLoop() } -void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param) +void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() { - eventHandler.start(check_param); + eventHandler.stopWithTimeout(); +} + + +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param, std::atomic & loop_started) +{ + eventHandler.startConsumerLoop(check_param, loop_started); } @@ -284,10 +281,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /// Run the onReceived callbacks to save the messages that have been received by now - loop_attempt = true; - startEventLoop(false_param); - loop_attempt = false; + /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread + startEventLoop(false_param, loop_started); + loop_started = false; } if (received.empty()) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 97eca73cece..2341c94443f 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -41,7 +41,6 @@ public: private: using Messages = std::vector; - using Queues = std::vector; ChannelPtr consumer_channel; RabbitMQHandler & eventHandler; @@ -51,6 +50,7 @@ private: const size_t channel_id; const bool bind_by_id; const bool hash_exchange; + const size_t num_queues; Poco::Logger * log; char row_delimiter; @@ -60,14 +60,12 @@ private: std::atomic exchange_declared; std::atomic false_param; - const size_t num_queues; - Queues queues; bool subscribed = false; String current_exchange_name; size_t count_subscribed = 0; - size_t count_bound_queues = 0; - std::atomic loop_attempt; + std::atomic loop_started; + std::vector queues; Messages received; Messages messages; Messages::iterator current; @@ -79,7 +77,8 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(std::atomic & check_param); + void startEventLoop(std::atomic & check_param, std::atomic & loop_started); + void stopEventLoopWithTimeout(); void stopEventLoop(); }; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 7c0764853c7..31c3dea97aa 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -153,9 +153,10 @@ void WriteBufferToRabbitMQProducer::flush() LOG_ERROR(log, "Exchange was not declared: {}", message); }); + /// These variables are updated in a separate thread and starting the loop blocks current thread while (!exchange_declared && !exchange_error) { - startEventLoop(exchange_declared); + startEventLoop(); } } @@ -168,9 +169,9 @@ void WriteBufferToRabbitMQProducer::nextImpl() } -void WriteBufferToRabbitMQProducer::startEventLoop(std::atomic & check_param) +void WriteBufferToRabbitMQProducer::startEventLoop() { - eventHandler.start(check_param); + eventHandler.startProducerLoop(); } } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index e0c48556239..9ae3893d6ae 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -40,7 +40,7 @@ public: private: void nextImpl() override; void checkExchange(); - void startEventLoop(std::atomic & check_param); + void startEventLoop(); std::pair & login_password; const String routing_key; From 80496628e33a2af30a2635af0f43b527b7dd9fad Mon Sep 17 00:00:00 2001 From: potya Date: Sun, 7 Jun 2020 23:02:57 +0300 Subject: [PATCH 0382/2229] Add better code --- src/DataTypes/DataTypeString.cpp | 3 ++ src/DataTypes/DataTypesNumber.cpp | 1 + src/Parsers/ParserCreateQuery.h | 62 +++++++++++++++++-------------- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 5762d5d7055..d02f11d3602 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -376,7 +376,10 @@ void registerDataTypeString(DataTypeFactory & factory) /// These synonyms are added for compatibility. factory.registerAlias("CHAR", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("CHAR VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("VARYING CHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("CHARACTER", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("NATIVE CHARACTER", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("VARCHAR", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TEXT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("TINYTEXT", "String", DataTypeFactory::CaseInsensitive); diff --git a/src/DataTypes/DataTypesNumber.cpp b/src/DataTypes/DataTypesNumber.cpp index 5739a64d815..801213236fa 100644 --- a/src/DataTypes/DataTypesNumber.cpp +++ b/src/DataTypes/DataTypesNumber.cpp @@ -29,6 +29,7 @@ void registerDataTypeNumbers(DataTypeFactory & factory) factory.registerAlias("BIGINT", "Int64", DataTypeFactory::CaseInsensitive); factory.registerAlias("FLOAT", "Float32", DataTypeFactory::CaseInsensitive); factory.registerAlias("DOUBLE", "Float64", DataTypeFactory::CaseInsensitive); + factory.registerAlias("DOUBLE PRECISION", "Float64", DataTypeFactory::CaseInsensitive); } } diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 930dc0fec35..b661dd51d16 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -150,6 +151,39 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E { if (!type_parser.parse(pos, type, expected)) return false; + + ASTFunction * type_func = type->as(); + if (!type_func->arguments) + { + if (boost::algorithm::iequals(type_func->name, "DOUBLE")) + { + if (ParserKeyword{"PRESICION"}.ignore(pos)) + { + type_func->name += " PRESICION"; + } + } + else if (boost::algorithm::iequals(type_func->name, "CHAR")) + { + if (ParserKeyword{"VARYING"}.ignore(pos)) + { + type_func->name += " VARYING"; + } + } + else if (boost::algorithm::iequals(type_func->name, "NATIVE")) + { + if (ParserKeyword{"CHARACTER"}.ignore(pos)) + { + type_func->name += " CHARACTER"; + } + } + else if (boost::algorithm::iequals(type_func->name, "VARYING")) + { + if (ParserKeyword{"CHAR"}.ignore(pos)) + { + type_func->name += " CHAR"; + } + } + } } Pos pos_before_specifier = pos; @@ -165,35 +199,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - auto first_word = type->getID(); - if (boost::algorithm::to_lower_copy(first_word) == "function_double") { - ParserKeyword s_presicion{"PRESICION"}; - s_presicion.ignore(pos); - } else if (boost::algorithm::to_lower_copy(first_word) == "function_char") { - ParserKeyword s_varying{"VARYING"}; - s_varying.ignore(pos); - } else if (boost::algorithm::to_lower_copy(first_word) == "function_native") { - ParserIdentifierWithOptionalParameters tmp; - ASTPtr second_word; - if (!tmp.parse(pos, second_word, expected)) { - return false; - } - if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_character") { - return false; - } - - type = second_word; - } else if (boost::algorithm::to_lower_copy(first_word) == "function_varying") - { - ParserIdentifierWithOptionalParameters tmp; - ASTPtr second_word; - if (!tmp.parse(pos, second_word, expected)) - return false; - if (boost::algorithm::to_lower_copy(second_word->getID()) != "function_char") - return false; - type = second_word; - } if (s_comment.ignore(pos, expected)) From 75c97ca82aa6de897d41e14267425b67bb0fed1c Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Jun 2020 00:19:23 +0200 Subject: [PATCH 0383/2229] Adding support for PREWHERE in live view tables. --- src/Storages/LiveView/StorageBlocks.h | 5 +++++ src/Storages/LiveView/StorageLiveView.h | 1 + .../00973_live_view_select_prewhere.reference | 4 ++++ .../00973_live_view_select_prewhere.sql | 20 +++++++++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/00973_live_view_select_prewhere.reference create mode 100644 tests/queries/0_stateless/00973_live_view_select_prewhere.sql diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index a21a9374137..2a9d7766fd7 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -26,6 +26,11 @@ public: return std::make_shared(table_id, columns, std::move(pipes), to_stage); } std::string getName() const override { return "Blocks"; } + /// It is passed inside the query and solved at its level. + bool supportsPrewhere() const override { return true; } + bool supportsSampling() const override { return true; } + bool supportsFinal() const override { return true; } + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override { return to_stage; } Pipes read( diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fe62de224da..85e3d0cee62 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -65,6 +65,7 @@ public: ASTPtr getInnerBlocksQuery(); /// It is passed inside the query and solved at its level. + bool supportsPrewhere() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference new file mode 100644 index 00000000000..a2a88e78c97 --- /dev/null +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference @@ -0,0 +1,4 @@ +5 1 +5 1 +10 2 +10 2 diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql new file mode 100644 index 00000000000..e0e2d342f9e --- /dev/null +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql @@ -0,0 +1,20 @@ +SET allow_experimental_live_view = 1; + +DROP TABLE IF EXISTS lv; +DROP TABLE IF EXISTS mt; + +CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); +CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt PREWHERE a > 1; + +INSERT INTO mt VALUES (1),(2),(3); + +SELECT *,_version FROM lv; +SELECT *,_version FROM lv; + +INSERT INTO mt VALUES (1),(2),(3); + +SELECT *,_version FROM lv; +SELECT *,_version FROM lv; + +DROP TABLE lv; +DROP TABLE mt; From 834f8426e0d6fa7c25e745b6ce963436ddcfce30 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 01:46:58 +0300 Subject: [PATCH 0384/2229] Fix memory leak --- src/Interpreters/Aggregator.cpp | 74 ++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index e7f6f16b91d..ec4a2f2ba9d 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1016,6 +1016,8 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( aggregate_functions[i]->insertResultInto( data.getNullKeyData() + offsets_of_aggregate_states[i], *final_aggregate_columns[i]); + + data.getNullKeyData() = nullptr; } } @@ -1023,13 +1025,65 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( { method.insertKeyIntoColumns(key, key_columns, key_sizes); - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->insertResultInto( - mapped + offsets_of_aggregate_states[i], - *final_aggregate_columns[i]); - }); + /** Final values of aggregate functions are inserted to columns. + * Then states of aggregate functions, that are not longer needed, are destroyed. + * + * We mark already destroyed states with "nullptr" in data, + * so they will not be destroyed in destructor of Aggregator + * (other values will be destroyed in destructor in case of exception). + * + * But it becomes tricky, because we have multiple aggregate states pointed by a single pointer in data. + * So, if exception is thrown in the middle of moving states for different aggregate functions, + * we have to catch exceptions and destroy all the states that are no longer needed, + * to keep the data in consistent state. + * + * It is also tricky, because there are aggregate functions with "-State" modifier. + * When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction + * and ColumnAggregateFunction will take ownership of this state. + * So, for aggregate functions with "-State" modifier, the state must not be destroyed + * after it has been transferred to ColumnAggregateFunction. + * But we should mark that the data no longer owns these states. + */ - destroyImpl(data); + size_t insert_i = 0; + std::exception_ptr exception; + + try + { + /// Insert final values of aggregate functions into columns. + for (; insert_i < params.aggregates_size; ++insert_i) + aggregate_functions[insert_i]->insertResultInto( + mapped + offsets_of_aggregate_states[insert_i], + *final_aggregate_columns[insert_i]); + } + catch (...) + { + exception = std::current_exception(); + } + + /** Destroy states that are no longer needed. This loop does not throw. + * + * Don't destroy states for "-State" aggregate functions, + * because the ownership of this state is transferred to ColumnAggregateFunction + * and ColumnAggregateFunction will take care. + * + * But it's only for states that has been transferred to ColumnAggregateFunction + * before exception has been thrown; + */ + for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i) + { + /// If ownership was not transferred to ColumnAggregateFunction. + if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState())) + aggregate_functions[destroy_i]->destroy( + mapped + offsets_of_aggregate_states[destroy_i]); + } + + /// Mark the cell as destroyed so it will not be destroyed in destructor. + mapped = nullptr; + + if (exception) + std::rethrow_exception(exception); + }); } template @@ -1047,6 +1101,8 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( for (size_t i = 0; i < params.aggregates_size; ++i) aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); + + data.getNullKeyData() = nullptr; } } @@ -2387,8 +2443,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const return; for (size_t i = 0; i < params.aggregates_size; ++i) - if (!aggregate_functions[i]->isState()) - aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]); + aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]); data = nullptr; }); @@ -2402,8 +2457,7 @@ void Aggregator::destroyWithoutKey(AggregatedDataVariants & result) const if (nullptr != res_data) { for (size_t i = 0; i < params.aggregates_size; ++i) - if (!aggregate_functions[i]->isState()) - aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]); + aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]); res_data = nullptr; } From d5443293a3cfa26793429b63ab58ca21e195d4b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 01:49:26 +0300 Subject: [PATCH 0385/2229] Added a test --- ...regate_state_exception_memory_leak.reference | 2 ++ ...301_aggregate_state_exception_memory_leak.sh | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference create mode 100755 tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference new file mode 100644 index 00000000000..b20e7415f52 --- /dev/null +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference @@ -0,0 +1,2 @@ +Memory limit (for query) exceeded +Ok diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh new file mode 100755 index 00000000000..633fa5ce315 --- /dev/null +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +function test() +{ + for i in {1..1000}; do + $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; + done +} + +export -f test; + +# If the memory leak exists, it will lead to OOM fairly quickly. +timeout 30 bash -c test 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq +echo 'Ok' From e507f6b8363071f5c13b0febd9322d9063e2d1cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 02:01:04 +0300 Subject: [PATCH 0386/2229] More crystallized test --- ...gregate_state_exception_memory_leak.reference | 1 + ...1302_aggregate_state_exception_memory_leak.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference create mode 100755 tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference new file mode 100644 index 00000000000..7326d960397 --- /dev/null +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.reference @@ -0,0 +1 @@ +Ok diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh new file mode 100755 index 00000000000..23c1d7c4c57 --- /dev/null +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +function test() +{ + for i in {1..50}; do + $CLICKHOUSE_CLIENT --query "SELECT groupArrayIfState(('Hello, world' AS s) || s || s || s || s || s || s || s || s || s, NOT throwIf(number > 50000000, 'Ok')) FROM system.numbers_mt GROUP BY number % 10"; + done +} + +export -f test; + +# If the memory leak exists, it will lead to OOM fairly quickly. +timeout 10 bash -c test 2>&1 | grep -o -F 'Ok' | uniq From 17e7cc03c0e090bc045a8b0d5ee720e7d8df8ca2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Jun 2020 01:11:48 +0000 Subject: [PATCH 0387/2229] Add consumer failure handler --- .../ReadBufferFromRabbitMQConsumer.cpp | 50 ++++++++++++++++--- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 16 ++++-- .../integration/test_storage_rabbitmq/test.py | 25 +++++++--- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index b650988dd61..5d649ab2084 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -10,10 +10,14 @@ #include "Poco/Timer.h" #include - namespace DB { +enum +{ + Loop_retries_limit = 500 +}; + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -38,8 +42,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , hash_exchange(hash_exchange_) , num_queues(num_queues_) , stopped(stopped_) - , exchange_declared(false) - , false_param(false) { messages.clear(); current = messages.begin(); @@ -122,6 +124,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) { queues.emplace_back(queue_name_); + subscribed_queue[queue_name_] = false; + String binding_key = routing_key; /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id @@ -175,34 +179,43 @@ void ReadBufferFromRabbitMQConsumer::subscribeConsumer() if (subscribed) return; - LOG_TRACE(log, "Subscribing {} to {} queues", channel_id, queues.size()); - for (auto & queue : queues) { subscribe(queue); } - subscribed = true; + LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); + + if (count_subscribed == queues.size()) + { + subscribed = true; + } } void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) { - std::atomic consumer_created = false, consumer_failed = false; + if (subscribed_queue[queue_name]) + return; + + consumer_created = false, consumer_failed = false; consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { consumer_created = true; + ++count_subscribed; + LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too * long and events will be distributed between them) */ - if (loop_started && ++count_subscribed == queues.size()) + if (loop_started && count_subscribed == queues.size()) { stopEventLoop(); + subscribed = true; } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) @@ -246,10 +259,31 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); + size_t cnt_retries = 0; + /// These variables are updated in a separate thread. while (!consumer_created && !consumer_failed) { startEventLoop(consumer_created, loop_started); + + if (!consumer_created && !consumer_failed) + { + if (cnt_retries >= Loop_retries_limit) + { + /* For unknown reason there is a case when subscribtion may fail and OnError callback is not activated + * for a long time. In this case there should be resubscription. + */ + LOG_ERROR(log, "Consumer {} failed to subscride to queue {}", channel_id, queue_name); + break; + } + + ++cnt_retries; + } + } + + if (consumer_created) + { + subscribed_queue[queue_name] = true; } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 2341c94443f..9e0b29307c4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -58,17 +58,23 @@ private: bool allowed = true; const std::atomic & stopped; - std::atomic exchange_declared; - std::atomic false_param; - bool subscribed = false; String current_exchange_name; - size_t count_subscribed = 0; - std::atomic loop_started; + + /* Note: as all concurrent consumers share the same connection => they also share the same + * event loop, which can be started by any consumer and the loop is blocking only to the thread that + * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, + * at most two threads will be present: main thread and the one that executes callbacks (1 thread if + * main thread is the one that started the loop). Both reference these variables. + */ + std::atomic exchange_declared = false, subscribed = false, loop_started = false, false_param = false; + std::atomic consumer_created = false, consumer_failed = false; + std::atomic count_subscribed = 0; std::vector queues; Messages received; Messages messages; Messages::iterator current; + std::unordered_map subscribed_queue; std::mutex mutex; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 0533dd7e2f4..d7e991fe7ae 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -145,9 +145,14 @@ def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='new', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=False) - connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + rabbitmq_check_result(result, True) @@ -171,9 +176,14 @@ def test_rabbitmq_select_from_old_syntax_table(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='old', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) - connection.close() + + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break + rabbitmq_check_result(result, True) @@ -294,7 +304,11 @@ def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange='clickhouse-exchange', routing_key='tsv', body=message) - result = instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + result = '' + while True: + result += instance.query('SELECT * FROM test.rabbitmq', ignore_error=True) + if rabbitmq_check_result(result): + break connection.close() rabbitmq_check_result(result, True) @@ -997,7 +1011,6 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) - print result if int(result) == messages_num * threads_num: break From 75768413ad9882346524a674a4f2e5623746831a Mon Sep 17 00:00:00 2001 From: hexiaoting <“hewenting_ict@163.com”> Date: Mon, 8 Jun 2020 10:23:57 +0800 Subject: [PATCH 0388/2229] fix errors found by Style Check --- src/Interpreters/InterpreterShowTablesQuery.cpp | 15 +++++++-------- src/Parsers/ASTShowTablesQuery.cpp | 2 +- src/Parsers/ParserShowTablesQuery.cpp | 1 - .../0_stateless/01293_show_clusters.reference | 11 +++++++++++ ...create_cluster.sql => 01293_show_clusters.sql} | 0 5 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/01293_show_clusters.reference rename tests/queries/0_stateless/{01293_show_create_cluster.sql => 01293_show_clusters.sql} (100%) diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index a925466f72b..c647ea410c5 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB @@ -41,23 +40,23 @@ String InterpreterShowTablesQuery::getRewrittenQuery() rewritten_query << "SELECT cluster FROM system.clusters"; if (!query.like.empty()) - { - rewritten_query << " WHERE cluster " << (query.not_like ? "NOT " : "") << "LIKE " << std::quoted(query.like, '\''); - } + { + rewritten_query << " WHERE cluster " << (query.not_like ? "NOT " : "") << "LIKE " << std::quoted(query.like, '\''); + } - if (query.limit_length) + if (query.limit_length) rewritten_query << " LIMIT " << query.limit_length; - return rewritten_query.str(); + return rewritten_query.str(); } else if (query.cluster) { std::stringstream rewritten_query; - rewritten_query << "SELECT * FROM system.clusters"; + rewritten_query << "SELECT * FROM system.clusters"; rewritten_query << " WHERE cluster = " << std::quoted(query.cluster_str, '\''); - return rewritten_query.str(); + return rewritten_query.str(); } if (query.temporary && !query.from.empty()) diff --git a/src/Parsers/ASTShowTablesQuery.cpp b/src/Parsers/ASTShowTablesQuery.cpp index 39904061200..25a638c77d4 100644 --- a/src/Parsers/ASTShowTablesQuery.cpp +++ b/src/Parsers/ASTShowTablesQuery.cpp @@ -35,7 +35,7 @@ void ASTShowTablesQuery::formatQueryImpl(const FormatSettings & settings, Format else if (cluster) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CLUSTER" << (settings.hilite ? hilite_none : ""); - settings.ostr << " " << backQuoteIfNeed(cluster_str); + settings.ostr << " " << backQuoteIfNeed(cluster_str); } else { diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index fb29b6d99cd..2a0ec84461b 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -9,7 +9,6 @@ #include #include -#include namespace DB diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference new file mode 100644 index 00000000000..b25a9a4d174 --- /dev/null +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -0,0 +1,11 @@ +test_cluster_two_shards +test_cluster_two_shards +test_cluster_two_shards_localhost +test_cluster_two_shards_localhost +test_shard_localhost +test_shard_localhost[1] +test_shard_localhost_secure +test_unavailable_shard +test_unavailable_shard +test_cluster_two_shards +test_shard_localhost 1 1 1 localhost ::1 9000 1 default 0 0 diff --git a/tests/queries/0_stateless/01293_show_create_cluster.sql b/tests/queries/0_stateless/01293_show_clusters.sql similarity index 100% rename from tests/queries/0_stateless/01293_show_create_cluster.sql rename to tests/queries/0_stateless/01293_show_clusters.sql From 71e43934b7c41ce4e5472d39f9796793e6c1e5a3 Mon Sep 17 00:00:00 2001 From: hexiaoting <“hewenting_ict@163.com”> Date: Mon, 8 Jun 2020 10:29:53 +0800 Subject: [PATCH 0389/2229] Fix rest errors found by style check --- src/Interpreters/InterpreterShowTablesQuery.cpp | 2 +- src/Parsers/ParserShowTablesQuery.cpp | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index c647ea410c5..10447e52464 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -37,7 +37,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.clusters) { std::stringstream rewritten_query; - rewritten_query << "SELECT cluster FROM system.clusters"; + rewritten_query << "SELECT cluster FROM system.clusters"; if (!query.like.empty()) { diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index 2a0ec84461b..c60e442542d 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -70,11 +70,11 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { query->cluster = true; - String cluster_str; - if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) - return false; - - query->cluster_str = std::move(cluster_str); + String cluster_str; + if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) + return false; + + query->cluster_str = std::move(cluster_str); } else { From 58e513f59e9c0c5e3649f7f157500128c7085fd4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 07:23:56 +0300 Subject: [PATCH 0390/2229] Corrected implementation for the case without key --- src/Interpreters/Aggregator.cpp | 151 ++++++++++++++++---------------- src/Interpreters/Aggregator.h | 5 ++ 2 files changed, 82 insertions(+), 74 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index ec4a2f2ba9d..0337016acb9 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -999,6 +999,73 @@ void Aggregator::convertToBlockImpl( data.clearAndShrink(); } + +template +void ALWAYS_INLINE Aggregator::insertAggregatesIntoColumns( + Mapped & mapped, + MutableColumns & final_aggregate_columns) const +{ + /** Final values of aggregate functions are inserted to columns. + * Then states of aggregate functions, that are not longer needed, are destroyed. + * + * We mark already destroyed states with "nullptr" in data, + * so they will not be destroyed in destructor of Aggregator + * (other values will be destroyed in destructor in case of exception). + * + * But it becomes tricky, because we have multiple aggregate states pointed by a single pointer in data. + * So, if exception is thrown in the middle of moving states for different aggregate functions, + * we have to catch exceptions and destroy all the states that are no longer needed, + * to keep the data in consistent state. + * + * It is also tricky, because there are aggregate functions with "-State" modifier. + * When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction + * and ColumnAggregateFunction will take ownership of this state. + * So, for aggregate functions with "-State" modifier, the state must not be destroyed + * after it has been transferred to ColumnAggregateFunction. + * But we should mark that the data no longer owns these states. + */ + + size_t insert_i = 0; + std::exception_ptr exception; + + try + { + /// Insert final values of aggregate functions into columns. + for (; insert_i < params.aggregates_size; ++insert_i) + aggregate_functions[insert_i]->insertResultInto( + mapped + offsets_of_aggregate_states[insert_i], + *final_aggregate_columns[insert_i]); + } + catch (...) + { + exception = std::current_exception(); + } + + /** Destroy states that are no longer needed. This loop does not throw. + * + * Don't destroy states for "-State" aggregate functions, + * because the ownership of this state is transferred to ColumnAggregateFunction + * and ColumnAggregateFunction will take care. + * + * But it's only for states that has been transferred to ColumnAggregateFunction + * before exception has been thrown; + */ + for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i) + { + /// If ownership was not transferred to ColumnAggregateFunction. + if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState())) + aggregate_functions[destroy_i]->destroy( + mapped + offsets_of_aggregate_states[destroy_i]); + } + + /// Mark the cell as destroyed so it will not be destroyed in destructor. + mapped = nullptr; + + if (exception) + std::rethrow_exception(exception); +} + + template void NO_INLINE Aggregator::convertToBlockImplFinal( Method & method, @@ -1011,78 +1078,14 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( if (data.hasNullKeyData()) { key_columns[0]->insertDefault(); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->insertResultInto( - data.getNullKeyData() + offsets_of_aggregate_states[i], - *final_aggregate_columns[i]); - - data.getNullKeyData() = nullptr; + insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns); } } data.forEachValue([&](const auto & key, auto & mapped) { method.insertKeyIntoColumns(key, key_columns, key_sizes); - - /** Final values of aggregate functions are inserted to columns. - * Then states of aggregate functions, that are not longer needed, are destroyed. - * - * We mark already destroyed states with "nullptr" in data, - * so they will not be destroyed in destructor of Aggregator - * (other values will be destroyed in destructor in case of exception). - * - * But it becomes tricky, because we have multiple aggregate states pointed by a single pointer in data. - * So, if exception is thrown in the middle of moving states for different aggregate functions, - * we have to catch exceptions and destroy all the states that are no longer needed, - * to keep the data in consistent state. - * - * It is also tricky, because there are aggregate functions with "-State" modifier. - * When we call "insertResultInto" for them, they insert a pointer to the state to ColumnAggregateFunction - * and ColumnAggregateFunction will take ownership of this state. - * So, for aggregate functions with "-State" modifier, the state must not be destroyed - * after it has been transferred to ColumnAggregateFunction. - * But we should mark that the data no longer owns these states. - */ - - size_t insert_i = 0; - std::exception_ptr exception; - - try - { - /// Insert final values of aggregate functions into columns. - for (; insert_i < params.aggregates_size; ++insert_i) - aggregate_functions[insert_i]->insertResultInto( - mapped + offsets_of_aggregate_states[insert_i], - *final_aggregate_columns[insert_i]); - } - catch (...) - { - exception = std::current_exception(); - } - - /** Destroy states that are no longer needed. This loop does not throw. - * - * Don't destroy states for "-State" aggregate functions, - * because the ownership of this state is transferred to ColumnAggregateFunction - * and ColumnAggregateFunction will take care. - * - * But it's only for states that has been transferred to ColumnAggregateFunction - * before exception has been thrown; - */ - for (size_t destroy_i = 0; destroy_i < params.aggregates_size; ++destroy_i) - { - /// If ownership was not transferred to ColumnAggregateFunction. - if (!(destroy_i < insert_i && aggregate_functions[destroy_i]->isState())) - aggregate_functions[destroy_i]->destroy( - mapped + offsets_of_aggregate_states[destroy_i]); - } - - /// Mark the cell as destroyed so it will not be destroyed in destructor. - mapped = nullptr; - - if (exception) - std::rethrow_exception(exception); + insertAggregatesIntoColumns(mapped, final_aggregate_columns); }); } @@ -1243,16 +1246,16 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va { AggregatedDataWithoutKey & data = data_variants.without_key; - for (size_t i = 0; i < params.aggregates_size; ++i) - { - if (!final_) - aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]); - else - aggregate_functions[i]->insertResultInto(data + offsets_of_aggregate_states[i], *final_aggregate_columns[i]); - } - if (!final_) + { + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_columns[i]->push_back(data + offsets_of_aggregate_states[i]); data = nullptr; + } + else + { + insertAggregatesIntoColumns(data, final_aggregate_columns); + } if (params.overflow_row) for (size_t i = 0; i < params.keys_size; ++i) diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index a5d79ce46dc..6d0eeee9014 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1166,6 +1166,11 @@ protected: MutableColumns & final_aggregate_columns, bool final) const; + template + void insertAggregatesIntoColumns( + Mapped & mapped, + MutableColumns & final_aggregate_columns) const; + template void convertToBlockImplFinal( Method & method, From 4c179e454a1a9a1beda02223de79eef225654d3e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Jun 2020 12:14:58 +0300 Subject: [PATCH 0391/2229] Add QueryPlan. --- src/Processors/QueryPlan/IQueryPlanStep.h | 1 + src/Processors/QueryPlan/QueryPlan.cpp | 112 ++++++++++++++++++++++ src/Processors/QueryPlan/QueryPlan.h | 45 +++++++++ src/Processors/ya.make | 1 + 4 files changed, 159 insertions(+) create mode 100644 src/Processors/QueryPlan/QueryPlan.cpp create mode 100644 src/Processors/QueryPlan/QueryPlan.h diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h index fe84e49672a..0c3b0727b01 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.h +++ b/src/Processors/QueryPlan/IQueryPlanStep.h @@ -51,4 +51,5 @@ protected: std::optional output_stream; }; +using QueryPlanStepPtr = std::unique_ptr; } diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp new file mode 100644 index 00000000000..82bc4210825 --- /dev/null +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -0,0 +1,112 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void QueryPlan::checkInitialized() const +{ + if (!isInitialized()) + throw Exception("QueryPlan was not initialized", ErrorCodes::LOGICAL_ERROR); +} + +void QueryPlan::checkNotCompleted() const +{ + if (isCompleted()) + throw Exception("QueryPlan was already completed", ErrorCodes::LOGICAL_ERROR); +} + +bool QueryPlan::isCompleted() const +{ + return isInitialized() && !root->step->hasOutputStream(); +} + +const DataStream & QueryPlan::getCurrentDataStream() const +{ + checkInitialized(); + checkNotCompleted(); + return root->step->getOutputStream(); +} + +void QueryPlan::addStep(QueryPlanStepPtr step) +{ + checkNotCompleted(); + + size_t num_input_streams = step->getInputStreams().size(); + + if (num_input_streams == 0) + { + if (isInitialized()) + throw Exception("Cannot add step " + step->getName() + " to QueryPlan because " + "step has no inputs, but QueryPlan is already initialised", ErrorCodes::LOGICAL_ERROR); + + nodes.emplace_back(Node{.step = std::move(step)}); + return; + } + + if (num_input_streams == 1) + { + if (!isInitialized()) + throw Exception("Cannot add step " + step->getName() + " to QueryPlan because " + "step has input, but QueryPlan is not initialised", ErrorCodes::LOGICAL_ERROR); + + const auto & root_header = root->step->getOutputStream().header; + const auto & step_header = step->getInputStreams().front().header; + if (!blocksHaveEqualStructure(root_header, step_header)) + throw Exception("Cannot add step " + step->getName() + " to QueryPlan because " + "it has incompatible header with root step " + root->step->getName() + " " + "root header: " + root_header.dumpStructure() + + "step header: " + step_header.dumpStructure(), ErrorCodes::LOGICAL_ERROR); + + nodes.emplace_back(Node{.step = std::move(step), .children = {root}}); + root = &nodes.back(); + return; + } + + throw Exception("Cannot add step " + step->getName() + " to QueryPlan because it has " + + std::to_string(num_input_streams) + " inputs but " + std::to_string(isInitialized() ? 1 : 0) + + " input expected", ErrorCodes::LOGICAL_ERROR); +} + +QueryPipelinePtr QueryPlan::buildQueryPipeline() +{ + checkInitialized(); + + struct Frame + { + Node * node; + QueryPipelines pipelines; + }; + + QueryPipelinePtr last_pipeline; + + std::stack stack; + stack.push({.node = root}); + + while (!stack.empty()) + { + auto & frame = stack.top(); + + if (last_pipeline) + frame.pipelines.emplace_back(std::move(last_pipeline)); + + size_t next_child = frame.pipelines.size(); + if (next_child == frame.node->children.size()) + { + last_pipeline = frame.node->step->updatePipeline(std::move(frame.pipelines)); + stack.pop(); + } + else + stack.push({.node = frame.node->children[next_child]}); + } + + return last_pipeline; +} + +} diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h new file mode 100644 index 00000000000..de932524903 --- /dev/null +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +class DataStream; + +class IQueryPlanStep; +using QueryPlanStepPtr = std::unique_ptr; + +class QueryPipeline; +using QueryPipelinePtr = std::unique_ptr; + +/// A tree of query steps. +class QueryPlan +{ +public: + void addStep(QueryPlanStepPtr step); + + bool isInitialized() const { return root != nullptr; } /// Tree is not empty + bool isCompleted() const; /// Tree is not empty and root hasOutputStream() + const DataStream & getCurrentDataStream() const; /// Checks that (isInitialized() && !isCompleted()) + + QueryPipelinePtr buildQueryPipeline(); + +private: + struct Node + { + QueryPlanStepPtr step; + std::vector children; + }; + + using Nodes = std::list; + Nodes nodes; + + Node * root = nullptr; + + void checkInitialized() const; + void checkNotCompleted() const; +}; + +} diff --git a/src/Processors/ya.make b/src/Processors/ya.make index fee4847fb56..5cbc5dfd291 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -135,6 +135,7 @@ SRCS( Transforms/SortingTransform.cpp Transforms/TotalsHavingTransform.cpp QueryPlan/IQueryPlanStep.cpp + QueryPlan/QueryPlan.cpp ) END() From 5fe67c429264614e1a199aeba162421e05fb345d Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Jun 2020 13:12:40 +0200 Subject: [PATCH 0392/2229] * Removing supportsPrewhere() from StorageLiveView.h as it is not valid. * Updating test to check using PREWHERE in query against live view table. * Updating test to check using PREWHERE in the stored query against the table that does not support PREWHWERE. --- src/Storages/LiveView/StorageLiveView.h | 1 - .../00973_live_view_select_prewhere.reference | 2 -- .../0_stateless/00973_live_view_select_prewhere.sql | 12 +++++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 85e3d0cee62..fe62de224da 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -65,7 +65,6 @@ public: ASTPtr getInnerBlocksQuery(); /// It is passed inside the query and solved at its level. - bool supportsPrewhere() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference index a2a88e78c97..3a6fe59ae6d 100644 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference @@ -1,4 +1,2 @@ 5 1 -5 1 -10 2 10 2 diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql index e0e2d342f9e..df3b7cb505a 100644 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql @@ -1,20 +1,26 @@ SET allow_experimental_live_view = 1; DROP TABLE IF EXISTS lv; +DROP TABLE IF EXISTS lv2; DROP TABLE IF EXISTS mt; CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt PREWHERE a > 1; +CREATE LIVE VIEW lv AS SELECT sum(a) AS sum_a FROM mt PREWHERE a > 1; +CREATE LIVE VIEW lv2 AS SELECT sum(number) AS sum_number FROM system.numbers PREWHERE number > 1; INSERT INTO mt VALUES (1),(2),(3); SELECT *,_version FROM lv; -SELECT *,_version FROM lv; +SELECT *,_version FROM lv PREWHERE sum_a > 5; -- { serverError 182 } INSERT INTO mt VALUES (1),(2),(3); SELECT *,_version FROM lv; -SELECT *,_version FROM lv; +SELECT *,_version FROM lv PREWHERE sum_a > 10; -- { serverError 182 } + +SELECT *,_version FROM lv2; -- { serverError 182 } +SELECT *,_version FROM lv2 PREWHERE sum_number > 10; -- { serverError 182 } DROP TABLE lv; +DROP TABLE lv2; DROP TABLE mt; From fc981a28690dc1de53d148e4f3e7ab400479dc85 Mon Sep 17 00:00:00 2001 From: MovElb Date: Mon, 8 Jun 2020 15:05:49 +0300 Subject: [PATCH 0393/2229] fix for review --- src/Server/PostgreSQLHandlerFactory.h | 3 ++- tests/integration/test_postgresql_protocol/configs/config.xml | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index 0546b4ef8c2..4550e9ee8e9 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -12,9 +12,10 @@ namespace DB class PostgreSQLHandlerFactory : public Poco::Net::TCPServerConnectionFactory { private: -#if USE_SSL IServer & server; Poco::Logger * log; + +#if USE_SSL bool ssl_enabled = true; #else bool ssl_enabled = false; diff --git a/tests/integration/test_postgresql_protocol/configs/config.xml b/tests/integration/test_postgresql_protocol/configs/config.xml index 678b48425b1..a833e228222 100644 --- a/tests/integration/test_postgresql_protocol/configs/config.xml +++ b/tests/integration/test_postgresql_protocol/configs/config.xml @@ -24,7 +24,6 @@ - 9000 5433 127.0.0.1 From 138f3253bab1e9d37975f962c4d78e4c3c0ce84e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 15:33:00 +0300 Subject: [PATCH 0394/2229] Fix gcc build --- src/Interpreters/Aggregator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 0337016acb9..538a24fa997 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1001,7 +1001,7 @@ void Aggregator::convertToBlockImpl( template -void ALWAYS_INLINE Aggregator::insertAggregatesIntoColumns( +inline void Aggregator::insertAggregatesIntoColumns( Mapped & mapped, MutableColumns & final_aggregate_columns) const { From 7ba2d7e15f64838d092ae50931be6d362e5d6b8b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Jun 2020 15:35:57 +0300 Subject: [PATCH 0395/2229] Changed timeouts in test --- .../01302_aggregate_state_exception_memory_leak.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh index 23c1d7c4c57..cd2fec408ab 100755 --- a/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01302_aggregate_state_exception_memory_leak.sh @@ -5,12 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test() { - for i in {1..50}; do - $CLICKHOUSE_CLIENT --query "SELECT groupArrayIfState(('Hello, world' AS s) || s || s || s || s || s || s || s || s || s, NOT throwIf(number > 50000000, 'Ok')) FROM system.numbers_mt GROUP BY number % 10"; + for i in {1..250}; do + $CLICKHOUSE_CLIENT --query "SELECT groupArrayIfState(('Hello, world' AS s) || s || s || s || s || s || s || s || s || s, NOT throwIf(number > 10000000, 'Ok')) FROM system.numbers_mt GROUP BY number % 10"; done } export -f test; # If the memory leak exists, it will lead to OOM fairly quickly. -timeout 10 bash -c test 2>&1 | grep -o -F 'Ok' | uniq +timeout 30 bash -c test 2>&1 | grep -o -F 'Ok' | uniq From 2226f79f1662e79a1a4b4a71ffaf93de9f835aea Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 15:57:25 +0300 Subject: [PATCH 0396/2229] Fix some bugs --- src/Storages/MergeTree/KeyCondition.cpp | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 13 +++++++++++-- src/Storages/MergeTree/registerStorageMergeTree.cpp | 6 ++++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index dad73b6a003..9038fd684f1 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -524,6 +524,7 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( Field & out_value, DataTypePtr & out_type) { + LOG_DEBUG(&Poco::Logger::get("DEBUG"), "KEY EXPR EXMPTY: {}", (key_expr == nullptr)); String expr_name = node->getColumnName(); const auto & sample_block = key_expr->getSampleBlock(); if (!sample_block.has(expr_name)) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d6437764653..b855faecf22 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -278,25 +278,31 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool only_check, bool attach) { - KeyDescription new_sorting_key = metadata.sorting_key; KeyDescription new_primary_key = metadata.primary_key; - if (!new_sorting_key.definition_ast) + if (!metadata.sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); + KeyDescription new_sorting_key; if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) new_sorting_key = KeyDescription::getKeyFromAST( metadata.sorting_key.definition_ast, metadata.columns, global_context, std::make_shared(merging_params.version_column)); + else + new_sorting_key = metadata.sorting_key; /// Primary key not defined at all if (new_primary_key.definition_ast == nullptr) { + LOG_DEBUG(log, "PRIMARY KEY EMPTY, MAKING COPY"); /// We copy sorting key, and restore definition_ast to empty value new_primary_key = metadata.sorting_key; new_primary_key.definition_ast = nullptr; + LOG_DEBUG(log, "NEW PK DEF NULLPTR: {}", new_primary_key.definition_ast == nullptr); + LOG_DEBUG(log, "NEW PK EXPR NULLPTR: {}", new_primary_key.expression == nullptr); + LOG_DEBUG(log, "NEW PK COLUMN NAMES SIZE: {}", new_primary_key.column_names.size()); } size_t sorting_key_size = new_sorting_key.column_names.size(); @@ -402,9 +408,12 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool setPrimaryKey(new_primary_key); + setSecondaryIndices(metadata.secondary_indices); setConstraints(metadata.constraints); + LOG_DEBUG(log, "HAS PRIMARY KEY {}", hasPrimaryKey()); + LOG_DEBUG(log, "IS PRIMARY KEY {}", isPrimaryKeyDefined()); } } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index a012f9c00a6..d61418edfe7 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -491,9 +491,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (is_extended_storage_def) { + ASTPtr partition_by_key; if (args.storage_def->partition_by) - metadata.partition_key = KeyDescription::getKeyFromAST( - args.storage_def->partition_by->ptr(), metadata.columns, args.context); + partition_by_key = args.storage_def->partition_by->ptr(); + + metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.context); if (!args.storage_def->order_by) throw Exception("You must provide an ORDER BY expression in the table definition. " From ef5d03a273b2dffa6f7712aa9e65a2b8c83d6331 Mon Sep 17 00:00:00 2001 From: MovElb Date: Mon, 8 Jun 2020 16:49:10 +0300 Subject: [PATCH 0397/2229] fix build --- src/Server/PostgreSQLHandlerFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/PostgreSQLHandlerFactory.cpp b/src/Server/PostgreSQLHandlerFactory.cpp index ce433188c04..1158cf5835e 100644 --- a/src/Server/PostgreSQLHandlerFactory.cpp +++ b/src/Server/PostgreSQLHandlerFactory.cpp @@ -8,7 +8,7 @@ namespace DB PostgreSQLHandlerFactory::PostgreSQLHandlerFactory(IServer & server_) : server(server_) - , log(&Logger::get("PostgreSQLHandlerFactory")) + , log(&Poco::Logger::get("PostgreSQLHandlerFactory")) { auth_methods = { From e2e5e1d5f857abe5e21a61c1566e404bced4024a Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 8 Jun 2020 16:57:33 +0300 Subject: [PATCH 0398/2229] More stable perf tests --- docker/test/performance-comparison/compare.sh | 8 +- docker/test/performance-comparison/report.py | 9 +- .../performance/agg_functions_min_max_any.xml | 178 +++++++++--------- tests/performance/array_element.xml | 9 +- .../bit_operations_fixed_string.xml | 13 +- .../bit_operations_fixed_string_numbers.xml | 6 +- tests/performance/codecs_int_insert.xml | 2 +- .../performance/number_formatting_formats.xml | 22 ++- tests/performance/point_in_polygon.xml | 2 +- tests/performance/sum_map.xml | 1 - 10 files changed, 125 insertions(+), 125 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index e63ba6122c8..4993e381bd9 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -409,11 +409,11 @@ create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') ; create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as - select left, right, diff, stat_threshold, changed_fail, test, query_display_name + select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name from queries where changed_show order by abs(diff) desc; create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as - select left, right, diff, stat_threshold, unstable_fail, test, query_display_name + select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name from queries where unstable_show order by stat_threshold desc; create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, @@ -464,8 +464,8 @@ create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as select changed_fail, unstable_fail, left, right, diff, floor(left > right ? left / right : right / left, 3), - stat_threshold, test, query_display_name - from queries order by test, query_display_name; + stat_threshold, test, query_index, query_display_name + from queries order by test, query_index; -- new report for all queries with all metrics (no page yet) create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 866e78da098..5d4d30e0098 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -207,7 +207,8 @@ if args.report == 'main': 'p < 0.001 threshold', # 3 # Failed # 4 'Test', # 5 - 'Query', # 6 + '#', # 6 + 'Query', # 7 ] print(tableHeader(columns)) @@ -248,7 +249,8 @@ if args.report == 'main': 'p < 0.001 threshold', #3 # Failed #4 'Test', #5 - 'Query' #6 + '#', #6 + 'Query' #7 ] print(tableStart('Unstable queries')) @@ -391,7 +393,8 @@ elif args.report == 'all-queries': 'Times speedup / slowdown', #5 'p < 0.001 threshold', #6 'Test', #7 - 'Query', #8 + '#', #8 + 'Query', #9 ] print(tableStart('All query times')) diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml index 9c16cb88970..d40d19b6e38 100644 --- a/tests/performance/agg_functions_min_max_any.xml +++ b/tests/performance/agg_functions_min_max_any.xml @@ -1,95 +1,95 @@ - test.hits + hits_100m_single -select min(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(Title) from test.hits where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(URL) from test.hits where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(Referer) from test.hits where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(FlashMinor2) from test.hits where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(MobilePhoneModel) from test.hits where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(Params) from test.hits where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(SearchPhrase) from test.hits where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(SocialAction) from test.hits where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(SocialSourcePage) from test.hits where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(ParamOrderID) from test.hits where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(OpenstatServiceName) from test.hits where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(OpenstatCampaignID) from test.hits where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(OpenstatAdID) from test.hits where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(OpenstatSourceID) from test.hits where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(UTMSource) from test.hits where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(UTMMedium) from test.hits where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(UTMCampaign) from test.hits where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(UTMContent) from test.hits where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(UTMTerm) from test.hits where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(FromTag) from test.hits where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(Title) from hits_100m_single where Title != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(URL) from hits_100m_single where URL != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(Referer) from hits_100m_single where Referer != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(FlashMinor2) from hits_100m_single where FlashMinor2 != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(MobilePhoneModel) from hits_100m_single where MobilePhoneModel != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(Params) from hits_100m_single where Params != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(SearchPhrase) from hits_100m_single where SearchPhrase != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(PageCharset) from hits_100m_single where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(ParamOrderID) from hits_100m_single where ParamOrderID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(OpenstatServiceName) from hits_100m_single where OpenstatServiceName != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(OpenstatCampaignID) from hits_100m_single where OpenstatCampaignID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(OpenstatAdID) from hits_100m_single where OpenstatAdID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(OpenstatSourceID) from hits_100m_single where OpenstatSourceID != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(UTMSource) from hits_100m_single where UTMSource != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(UTMMedium) from hits_100m_single where UTMMedium != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(UTMCampaign) from hits_100m_single where UTMCampaign != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(UTMContent) from hits_100m_single where UTMContent != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(UTMTerm) from hits_100m_single where UTMTerm != '' group by intHash32(UserID) % 1000000 FORMAT Null +select min(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null +select max(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null +select any(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null +select anyHeavy(FromTag) from hits_100m_single where FromTag != '' group by intHash32(UserID) % 1000000 FORMAT Null diff --git a/tests/performance/array_element.xml b/tests/performance/array_element.xml index 456ce55cbe1..1f82b833380 100644 --- a/tests/performance/array_element.xml +++ b/tests/performance/array_element.xml @@ -1,8 +1,5 @@ - - - - SELECT count() FROM numbers(10000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) - SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) - SELECT count() FROM numbers(10000000) WHERE NOT ignore([[], []][number % 2 + 2]) + SELECT count() FROM numbers(100000000) WHERE NOT ignore([[1], [2]][number % 2 + 2]) + SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], [2]][number % 2 + 2]) + SELECT count() FROM numbers(100000000) WHERE NOT ignore([[], []][number % 2 + 2]) diff --git a/tests/performance/bit_operations_fixed_string.xml b/tests/performance/bit_operations_fixed_string.xml index c08761ba8fc..19972535fd9 100644 --- a/tests/performance/bit_operations_fixed_string.xml +++ b/tests/performance/bit_operations_fixed_string.xml @@ -1,13 +1,10 @@ - - - - test.hits + hits_100m_single - SELECT count() FROM test.hits WHERE NOT ignore(bitAnd(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM test.hits WHERE NOT ignore(bitOr(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM test.hits WHERE NOT ignore(bitXor(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM test.hits WHERE NOT ignore(bitNot(toFixedString(ClientIP6, 16))) + SELECT count() FROM hits_100m_single WHERE NOT ignore(bitAnd(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM hits_100m_single WHERE NOT ignore(bitOr(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM hits_100m_single WHERE NOT ignore(bitXor(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM hits_100m_single WHERE NOT ignore(bitNot(toFixedString(ClientIP6, 16))) diff --git a/tests/performance/bit_operations_fixed_string_numbers.xml b/tests/performance/bit_operations_fixed_string_numbers.xml index e10e665ac81..5d3d9aa804d 100644 --- a/tests/performance/bit_operations_fixed_string_numbers.xml +++ b/tests/performance/bit_operations_fixed_string_numbers.xml @@ -1,6 +1,4 @@ - - - SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(number + 1))) - SELECT count() FROM numbers(10000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(0xabcd0123cdef4567))) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(number + 1))) + SELECT count() FROM numbers(100000000) WHERE NOT ignore(bitXor(reinterpretAsFixedString(number), reinterpretAsFixedString(0xabcd0123cdef4567))) diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index 0ad04f00f78..662df80ae70 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -33,7 +33,7 @@ num_rows - 10000000 + 20000000 diff --git a/tests/performance/number_formatting_formats.xml b/tests/performance/number_formatting_formats.xml index e6b5a8344dc..a9faf9369ec 100644 --- a/tests/performance/number_formatting_formats.xml +++ b/tests/performance/number_formatting_formats.xml @@ -1,6 +1,4 @@ - CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`) - format @@ -13,22 +11,30 @@ JSONEachRow TSKV RowBinary - Native XML Parquet ODBCDriver2 - Null MySQLWire - - test.hits - + + + format_fast + + Native + Null + + + + + CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`) + CREATE TABLE IF NOT EXISTS table_{format_fast} (x UInt64) ENGINE = File(`{format}`) INSERT INTO table_{format} SELECT number FROM numbers(10000000) + INSERT INTO table_{format_fast} SELECT number FROM numbers(100000000) DROP TABLE IF EXISTS table_{format} - + DROP TABLE IF EXISTS table_{format_fast} diff --git a/tests/performance/point_in_polygon.xml b/tests/performance/point_in_polygon.xml index b15fecbbfb0..c1325720429 100644 --- a/tests/performance/point_in_polygon.xml +++ b/tests/performance/point_in_polygon.xml @@ -4,7 +4,7 @@ INSERT INTO polygons WITH number + 1 AS radius SELECT [arrayMap(x -> (cos(x / 90. * pi()) * radius, sin(x / 90. * pi()) * radius), range(180))] - FROM numbers(100000) + FROM numbers(1000000) SELECT pointInPolygon((100, 100), polygon) FROM polygons diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index 9cc03035cce..cb1a4cb5bc6 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -7,7 +7,6 @@ scale - 100000 1000000 From 638cd50ce4e31cffd4cc70f3c417d4f88f35ab2a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 17:18:38 +0300 Subject: [PATCH 0399/2229] Fix several bugs --- src/Storages/KeyDescription.cpp | 9 +++--- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++-- src/Storages/StorageInMemoryMetadata.cpp | 35 ++++++++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 3 ++ 4 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 78c7990b614..e59693f5343 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -46,9 +46,6 @@ KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, cons if (additional_key_expression) result.expression_list_ast->children.push_back(additional_key_expression); - if (result.expression_list_ast->children.empty()) - return result; - const auto & children = result.expression_list_ast->children; for (const auto & child : children) result.column_names.emplace_back(child->getColumnName()); @@ -56,8 +53,10 @@ KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, cons { auto expr = result.expression_list_ast->clone(); auto syntax_result = SyntaxAnalyzer(context).analyze(expr, columns.getAllPhysical()); - result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(true); - result.sample_block = result.expression->getSampleBlock(); + /// In expression we also need to store source columns + result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(false); + /// In sample block we use just key columns + result.sample_block = ExpressionAnalyzer(expr, syntax_result, context).getActions(true)->getSampleBlock(); } for (size_t i = 0; i < result.sample_block.columns(); ++i) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b855faecf22..79382aaa58d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -296,13 +296,9 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool /// Primary key not defined at all if (new_primary_key.definition_ast == nullptr) { - LOG_DEBUG(log, "PRIMARY KEY EMPTY, MAKING COPY"); /// We copy sorting key, and restore definition_ast to empty value new_primary_key = metadata.sorting_key; new_primary_key.definition_ast = nullptr; - LOG_DEBUG(log, "NEW PK DEF NULLPTR: {}", new_primary_key.definition_ast == nullptr); - LOG_DEBUG(log, "NEW PK EXPR NULLPTR: {}", new_primary_key.expression == nullptr); - LOG_DEBUG(log, "NEW PK COLUMN NAMES SIZE: {}", new_primary_key.column_names.size()); } size_t sorting_key_size = new_sorting_key.column_names.size(); @@ -400,6 +396,8 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool } } + checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting"); + if (!only_check) { setColumns(std::move(metadata.columns)); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 67e0c7f6028..0f8b88e691b 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -12,4 +12,39 @@ StorageInMemoryMetadata::StorageInMemoryMetadata( , constraints(constraints_) { } + +StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other) + : columns(other.columns) + , secondary_indices(other.secondary_indices) + , constraints(other.constraints) + , partition_key(other.partition_key) + , primary_key(other.primary_key) + , sorting_key(other.sorting_key) + , sampling_key(other.sampling_key) + , column_ttls_by_name(other.column_ttls_by_name) + , table_ttl(other.table_ttl) + , settings_changes(other.settings_changes ? other.settings_changes->clone() : nullptr) + , select(other.select) +{ +} + +StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other) +{ + columns = other.columns; + secondary_indices = other.secondary_indices; + constraints = other.constraints; + partition_key = other.partition_key; + primary_key = other.primary_key; + sorting_key = other.sorting_key; + sampling_key = other.sampling_key; + column_ttls_by_name = other.column_ttls_by_name; + table_ttl = other.table_ttl; + if (other.settings_changes) + settings_changes = other.settings_changes->clone(); + else + settings_changes.reset(); + select = other.select; + return *this; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index ba49cfa210d..889f8e49f69 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -44,6 +44,9 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata() = default; StorageInMemoryMetadata(const ColumnsDescription & columns_, const IndicesDescription & secondary_indices_, const ConstraintsDescription & constraints_); + + StorageInMemoryMetadata(const StorageInMemoryMetadata & other); + StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); }; } From 23549399946061ab2482a3989f84eeb43e843788 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 17:19:20 +0300 Subject: [PATCH 0400/2229] Remove log debug --- src/Storages/MergeTree/KeyCondition.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 9038fd684f1..dad73b6a003 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -524,7 +524,6 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( Field & out_value, DataTypePtr & out_type) { - LOG_DEBUG(&Poco::Logger::get("DEBUG"), "KEY EXPR EXMPTY: {}", (key_expr == nullptr)); String expr_name = node->getColumnName(); const auto & sample_block = key_expr->getSampleBlock(); if (!sample_block.has(expr_name)) From a30b72a1284331eafdec59db56c16695761839ce Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 17:21:12 +0300 Subject: [PATCH 0401/2229] Less debug --- src/Storages/MergeTree/MergeTreeData.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 79382aaa58d..0d8f02ff0b4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -410,8 +410,6 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool setSecondaryIndices(metadata.secondary_indices); setConstraints(metadata.constraints); - LOG_DEBUG(log, "HAS PRIMARY KEY {}", hasPrimaryKey()); - LOG_DEBUG(log, "IS PRIMARY KEY {}", isPrimaryKeyDefined()); } } From 37a1d128c9d06594a5f4dd3e3731cdfe4d38a369 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 17:22:11 +0300 Subject: [PATCH 0402/2229] Better --- src/Storages/MergeTree/MergeTreeData.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0d8f02ff0b4..587e3e5622e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -400,16 +400,12 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool if (!only_check) { + /// Other parts of metadata initialized is separate methods setColumns(std::move(metadata.columns)); - - setSortingKey(new_sorting_key); - - setPrimaryKey(new_primary_key); - - setSecondaryIndices(metadata.secondary_indices); - setConstraints(metadata.constraints); + setSortingKey(new_sorting_key); + setPrimaryKey(new_primary_key); } } From d625162fce75fbbcde852a55325e89169de128da Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 17:23:32 +0300 Subject: [PATCH 0403/2229] Better comment --- src/Storages/MergeTree/MergeTreeData.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 587e3e5622e..f2196b91446 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -296,7 +296,10 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool /// Primary key not defined at all if (new_primary_key.definition_ast == nullptr) { - /// We copy sorting key, and restore definition_ast to empty value + /// We copy sorting key, and restore definition_ast to empty value, + /// because in merge tree code we sometimes chech, that our primary key + /// is fake (copied from sorting key, i.e. isPrimaryKeyDefined() == + /// false, but hasSortingKey() == true) new_primary_key = metadata.sorting_key; new_primary_key.definition_ast = nullptr; } From 339703d1b89a790a3be7b61aab000964eef9805a Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Mon, 8 Jun 2020 18:03:54 +0300 Subject: [PATCH 0404/2229] Fix missed #include is required for std::move --- base/common/strong_typedef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/base/common/strong_typedef.h b/base/common/strong_typedef.h index a46eb415e15..0dc29ad9f1b 100644 --- a/base/common/strong_typedef.h +++ b/base/common/strong_typedef.h @@ -1,6 +1,7 @@ #pragma once #include +#include template struct StrongTypedef From 68bd636c9ccaf50cde51e476a1f85f71b0c072dc Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 18:16:01 +0300 Subject: [PATCH 0405/2229] Fix modify test --- .../01079_parallel_alter_modify_zookeeper.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index bacc742d16a..9a6e9c3156c 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -100,8 +100,14 @@ wait echo "Finishing alters" -# This alter will finish all previous, but replica 1 maybe still not up-to-date -while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do +# This alter will finish all previous, but replica 1 maybe still not up-to-date. +# If query will throw something, than we will sleep 1 and retry. If timeout +# happened we will silentrly go out of loop and probably fail tests in the +# following for loop. +# +# 120 seconds is more than enough, but in rare cases for slow builds (debug, +# thread) it maybe necessary +while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 done From 5db83dad07f258c09618800c541d7d938decd1e1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 18:18:33 +0300 Subject: [PATCH 0406/2229] Better comment --- .../0_stateless/01079_parallel_alter_modify_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index 9a6e9c3156c..effc9f540a1 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -106,7 +106,7 @@ echo "Finishing alters" # following for loop. # # 120 seconds is more than enough, but in rare cases for slow builds (debug, -# thread) it maybe necessary +# thread) it maybe necessary. while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 done From 7b63a88120bb6576b5787caa9609e603655f8ace Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 18:19:41 +0300 Subject: [PATCH 0407/2229] Remove redundant error code --- src/Storages/MergeTree/MergeTreeData.cpp | 1 - src/Storages/StorageMaterializedView.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f2196b91446..593498d5e84 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -106,7 +106,6 @@ namespace ErrorCodes extern const int READONLY_SETTING; extern const int ABORTED; extern const int UNKNOWN_PART_TYPE; - extern const int UNEXPECTED_AST_STRUCTURE; extern const int UNKNOWN_DISK; extern const int NOT_ENOUGH_SPACE; extern const int ALTER_OF_COLUMN_IS_FORBIDDEN; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 74718f02d47..a7a59f0e9b9 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -30,7 +30,6 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; } From 269bf6f5ef131a6a4c0d837f803ad17bbf675e3f Mon Sep 17 00:00:00 2001 From: "Matwey V. Kornilov" Date: Mon, 8 Jun 2020 19:25:21 +0300 Subject: [PATCH 0408/2229] Fix missed #include is required for std::hash --- base/common/strong_typedef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/base/common/strong_typedef.h b/base/common/strong_typedef.h index 0dc29ad9f1b..d9850a25c37 100644 --- a/base/common/strong_typedef.h +++ b/base/common/strong_typedef.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include From 9941fbe32d9085737efdd20c0162ea7e9d6f4241 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 19:34:42 +0300 Subject: [PATCH 0409/2229] Fix benign race condition during shutdown --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fea05c00e4f..b399584f4d9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1865,7 +1865,7 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect part->remove_time.store(remove_time, std::memory_order_relaxed); if (part->state != IMergeTreeDataPart::State::Outdated) - modifyPartState(part,IMergeTreeDataPart::State::Outdated); + modifyPartState(part, IMergeTreeDataPart::State::Outdated); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index faa44ff7db1..f2ac6678764 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2961,7 +2961,6 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { - clearOldPartsFromFilesystem(true); /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); @@ -2997,6 +2996,12 @@ void StorageReplicatedMergeTree::shutdown() std::unique_lock lock(data_parts_exchange_endpoint->rwlock); } data_parts_exchange_endpoint.reset(); + + /// We clear all parts after stopping all background operations. It's + /// important, because background operations can produce temporary parts + /// which will remove themselfs in their descrutors. If so, we may have race + /// condition between our remove call and background process. + clearOldPartsFromFilesystem(true); } From ce73b30505524d7791ef6fea71b9e26addc0df7a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 21:08:55 +0300 Subject: [PATCH 0410/2229] Review fixes --- src/Storages/StorageMergeTree.cpp | 27 ++++++++++++------- src/Storages/StorageReplicatedMergeTree.cpp | 6 ++--- .../01079_parallel_alter_modify_zookeeper.sh | 2 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4650485847c..15e662b27b5 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -141,16 +141,6 @@ void StorageMergeTree::shutdown() mutation_wait_event.notify_all(); } - try - { - clearOldPartsFromFilesystem(true); - } - catch (...) - { - /// Example: the case of readonly filesystem, we have failure removing old parts. - /// Should not prevent table shutdown. - tryLogCurrentException(log); - } merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); @@ -160,6 +150,23 @@ void StorageMergeTree::shutdown() if (moving_task_handle) global_context.getBackgroundMovePool().removeTask(moving_task_handle); + + + try + { + /// We clear all old parts after stopping all background operations. + /// It's important, because background operations can produce temporary + /// parts which will remove themselves in their descrutors. If so, we + /// may have race condition between our remove call and background + /// process. + clearOldPartsFromFilesystem(true); + } + catch (...) + { + /// Example: the case of readonly filesystem, we have failure removing old parts. + /// Should not prevent table shutdown. + tryLogCurrentException(log); + } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f2ac6678764..d109fa464b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2997,10 +2997,10 @@ void StorageReplicatedMergeTree::shutdown() } data_parts_exchange_endpoint.reset(); - /// We clear all parts after stopping all background operations. It's + /// We clear all old parts after stopping all background operations. It's /// important, because background operations can produce temporary parts - /// which will remove themselfs in their descrutors. If so, we may have race - /// condition between our remove call and background process. + /// which will remove themselves in their descrutors. If so, we may have + /// race condition between our remove call and background process. clearOldPartsFromFilesystem(true); } diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index effc9f540a1..05ef4a1a675 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -102,7 +102,7 @@ echo "Finishing alters" # This alter will finish all previous, but replica 1 maybe still not up-to-date. # If query will throw something, than we will sleep 1 and retry. If timeout -# happened we will silentrly go out of loop and probably fail tests in the +# happened we will silently go out of loop and probably fail tests in the # following for loop. # # 120 seconds is more than enough, but in rare cases for slow builds (debug, From a5a9048109e15cb3daef1849141d5c9919c8decb Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 21:23:26 +0300 Subject: [PATCH 0411/2229] At least fixed gcc-9 build --- src/Storages/StorageReplicatedMergeTree.cpp | 34 ++++++++++----------- src/Storages/StorageReplicatedMergeTree.h | 2 +- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f57dd0b5f32..dc61ab110cd 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -160,7 +160,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool attach, const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, @@ -168,7 +168,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( bool has_force_restore_data_flag) : MergeTreeData(table_id_, relative_data_path_, - metadata, + metadata_, context_, date_column_name, merging_params_, @@ -472,9 +472,9 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff) { - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - if (new_columns != metadata.columns) - metadata.columns = new_columns; + StorageInMemoryMetadata current_metadata = getInMemoryMetadata(); + if (new_columns != current_metadata.columns) + current_metadata.columns = new_columns; if (!metadata_diff.empty()) { @@ -492,37 +492,37 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column tuple->arguments->children = new_sorting_key_expr_list->children; order_by_ast = tuple; } - metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, metadata.columns, global_context); + current_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, current_metadata.columns, global_context); if (!isPrimaryKeyDefined()) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - metadata.primary_key = getSortingKey(); + current_metadata.primary_key = getSortingKey(); } } if (metadata_diff.skip_indices_changed) - metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, global_context); + current_metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, global_context); if (metadata_diff.constraints_changed) - metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); + current_metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); if (metadata_diff.ttl_table_changed) { ParserTTLExpressionList parser; auto ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl_for_table_ast, metadata.columns, global_context, metadata.primary_key); + current_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl_for_table_ast, current_metadata.columns, global_context, current_metadata.primary_key); } } auto table_id = getStorageID(); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, current_metadata); /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - setProperties(metadata); - setTTLExpressions(new_columns, metadata.table_ttl); + setProperties(current_metadata); + setTTLExpressions(new_columns, current_metadata.table_ttl); } @@ -3294,13 +3294,13 @@ void StorageReplicatedMergeTree::alter( table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// We don't replicate storage_settings_ptr ALTER. It's local operation. /// Also we don't upgrade alter lock to table structure lock. - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, query_context); + StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); + params.apply(future_metadata, query_context); - changeSettings(metadata.settings_changes, table_lock_holder); + changeSettings(future_metadata.settings_changes, table_lock_holder); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); return; } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index b82b387a623..8e136995917 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -540,7 +540,7 @@ protected: bool attach, const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, From 0d375e45223e41432b5539ce48019ee158843b1e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 21:49:54 +0300 Subject: [PATCH 0412/2229] Fix constraints --- src/Storages/AlterCommands.cpp | 3 ++- src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- src/Storages/MergeTree/registerStorageMergeTree.cpp | 9 +++++++++ .../0_stateless/01277_alter_rename_column_constraint.sql | 6 +++--- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 13da6db8ed3..33a62d9bd1c 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -327,7 +327,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == COMMENT_COLUMN) { - metadata.columns.modify(column_name, [&](ColumnDescription & column) { column.comment = *comment; }); + metadata.columns.modify(column_name, + [&](ColumnDescription & column) { column.comment = *comment; }); } else if (type == ADD_INDEX) { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 593498d5e84..fe13446def7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -404,10 +404,10 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool { /// Other parts of metadata initialized is separate methods setColumns(std::move(metadata.columns)); - setSecondaryIndices(metadata.secondary_indices); - setConstraints(metadata.constraints); - setSortingKey(new_sorting_key); - setPrimaryKey(new_primary_key); + setSecondaryIndices(std::move(metadata.secondary_indices)); + setConstraints(std::move(metadata.constraints)); + setSortingKey(std::move(new_sorting_key)); + setPrimaryKey(std::move(new_primary_key)); } } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index d61418edfe7..50775a04255 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -506,6 +506,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->primary_key) metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); + else + { + metadata.primary_key = metadata.sorting_key; + metadata.primary_key.definition_ast = nullptr; + } if (args.storage_def->sample_by) metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, args.context); @@ -521,6 +526,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) for (auto & index : args.query.columns_list->indices->children) metadata.secondary_indices.push_back(IndexDescription::getIndexFromAST(index, args.columns, args.context)); + if (args.query.columns_list && args.query.columns_list->constraints) + for (auto & constraint : args.query.columns_list->constraints->children) + metadata.constraints.constraints.push_back(constraint); + storage_settings->loadFromQuery(*args.storage_def); if (args.storage_def->settings) diff --git a/tests/queries/0_stateless/01277_alter_rename_column_constraint.sql b/tests/queries/0_stateless/01277_alter_rename_column_constraint.sql index 72fbb045601..b9d5030239d 100644 --- a/tests/queries/0_stateless/01277_alter_rename_column_constraint.sql +++ b/tests/queries/0_stateless/01277_alter_rename_column_constraint.sql @@ -15,7 +15,7 @@ PARTITION BY date ORDER BY key; INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number + 2) from numbers(9); -INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number) from numbers(9); ; --{serverError 469} +INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number) from numbers(9); --{serverError 469} SELECT * FROM table_for_rename ORDER BY key; @@ -26,7 +26,7 @@ SELECT * FROM table_for_rename ORDER BY key; SELECT '-- insert after rename --'; INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number + 2) from numbers(10, 10); -INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number) from numbers(10, 10); ; --{serverError 469} +INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number) from numbers(10, 10); --{serverError 469} SELECT * FROM table_for_rename ORDER BY key; SELECT '-- rename columns back --'; @@ -37,7 +37,7 @@ SELECT * FROM table_for_rename ORDER BY key; SELECT '-- insert after rename column --'; INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number + 1), toString(number + 2) from numbers(20,10); -INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number), toString(number + 2) from numbers(20, 10); ; --{serverError 469} +INSERT INTO table_for_rename SELECT toDate('2019-10-01') + number % 3, number, toString(number), toString(number), toString(number + 2) from numbers(20, 10); --{serverError 469} SELECT * FROM table_for_rename ORDER BY key; DROP TABLE IF EXISTS table_for_rename; From d2383f0f5deb24808294202233ba169a3476df11 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jun 2020 21:43:00 +0300 Subject: [PATCH 0413/2229] Fix async INSERT into Distributed for prefer_localhost_replica=0 and w/o internal_replication --- src/Storages/Distributed/DistributedBlockOutputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 5516e85b143..4e28923ebfc 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -518,7 +518,7 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz } else { - if (shard_info.isLocal()) + if (shard_info.isLocal() && settings.prefer_localhost_replica) writeToLocal(block, shard_info.getLocalNodeCount()); std::vector dir_names; From 222bbe95d6169ebdc732f7a4016a149183a87d80 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jun 2020 21:43:01 +0300 Subject: [PATCH 0414/2229] Cover load_balancing for Distributed INSERT w/o internal_replication --- .../configs/remote_servers.xml | 18 ++++++- .../test.py | 50 +++++++++++-------- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml index 61bc5af1f7d..bfcb1c0977b 100644 --- a/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml +++ b/tests/integration/test_insert_distributed_load_balancing/configs/remote_servers.xml @@ -1,6 +1,6 @@ - + true @@ -12,7 +12,21 @@ 9000 - + + + + + false + + n2 + 9000 + + + n1 + 9000 + + + diff --git a/tests/integration/test_insert_distributed_load_balancing/test.py b/tests/integration/test_insert_distributed_load_balancing/test.py index 49c8a89161f..52ee3ba1c4a 100644 --- a/tests/integration/test_insert_distributed_load_balancing/test.py +++ b/tests/integration/test_insert_distributed_load_balancing/test.py @@ -11,6 +11,11 @@ cluster = ClickHouseCluster(__file__) n1 = cluster.add_instance('n1', main_configs=['configs/remote_servers.xml']) n2 = cluster.add_instance('n2', main_configs=['configs/remote_servers.xml']) +params = pytest.mark.parametrize('cluster,q', [ + ('internal_replication', 0), + ('no_internal_replication', 1), +]) + @pytest.fixture(scope='module', autouse=True) def start_cluster(): try: @@ -19,7 +24,7 @@ def start_cluster(): finally: cluster.shutdown() -def create_tables(): +def create_tables(cluster): n1.query('DROP TABLE IF EXISTS data') n2.query('DROP TABLE IF EXISTS data') n1.query('DROP TABLE IF EXISTS dist') @@ -29,39 +34,44 @@ def create_tables(): n1.query(""" CREATE TABLE dist AS data Engine=Distributed( - integration_test_cluster, + {cluster}, currentDatabase(), data, rand() ) - """) + """.format(cluster=cluster)) -def insert_data(**settings): - create_tables() +def insert_data(cluster, **settings): + create_tables(cluster) n1.query('INSERT INTO dist SELECT * FROM numbers(10)', settings=settings) n1.query('SYSTEM FLUSH DISTRIBUTED dist') -def test_prefer_localhost_replica_1(): - insert_data() +@params +def test_prefer_localhost_replica_1(cluster, q): + insert_data(cluster) assert int(n1.query('SELECT count() FROM data')) == 10 - assert int(n2.query('SELECT count() FROM data')) == 0 + assert int(n2.query('SELECT count() FROM data')) == 10*q -def test_prefer_localhost_replica_1_load_balancing_in_order(): - insert_data(load_balancing='in_order') +@params +def test_prefer_localhost_replica_1_load_balancing_in_order(cluster, q): + insert_data(cluster, load_balancing='in_order') assert int(n1.query('SELECT count() FROM data')) == 10 - assert int(n2.query('SELECT count() FROM data')) == 0 + assert int(n2.query('SELECT count() FROM data')) == 10*q -def test_prefer_localhost_replica_0_load_balancing_nearest_hostname(): - insert_data(load_balancing='nearest_hostname', prefer_localhost_replica=0) +@params +def test_prefer_localhost_replica_0_load_balancing_nearest_hostname(cluster, q): + insert_data(cluster, load_balancing='nearest_hostname', prefer_localhost_replica=0) assert int(n1.query('SELECT count() FROM data')) == 10 - assert int(n2.query('SELECT count() FROM data')) == 0 + assert int(n2.query('SELECT count() FROM data')) == 10*q -def test_prefer_localhost_replica_0_load_balancing_in_order(): - insert_data(load_balancing='in_order', prefer_localhost_replica=0) - assert int(n1.query('SELECT count() FROM data')) == 0 +@params +def test_prefer_localhost_replica_0_load_balancing_in_order(cluster, q): + insert_data(cluster, load_balancing='in_order', prefer_localhost_replica=0) + assert int(n1.query('SELECT count() FROM data')) == 10*q assert int(n2.query('SELECT count() FROM data')) == 10 -def test_prefer_localhost_replica_0_load_balancing_in_order_sync(): - insert_data(load_balancing='in_order', prefer_localhost_replica=0, insert_distributed_sync=1) - assert int(n1.query('SELECT count() FROM data')) == 0 +@params +def test_prefer_localhost_replica_0_load_balancing_in_order_sync(cluster, q): + insert_data(cluster, load_balancing='in_order', prefer_localhost_replica=0, insert_distributed_sync=1) + assert int(n1.query('SELECT count() FROM data')) == 10*q assert int(n2.query('SELECT count() FROM data')) == 10 From c35f5e927161cd2b8f93bb9c4cfcc348e8a90bdf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jun 2020 22:06:32 +0300 Subject: [PATCH 0415/2229] Fix shard_num/replica_num for (breaks use_compact_format_in_distributed_parts_names) In system.clusters it was 1/1 while in address into it was 0/0 --- src/Interpreters/Cluster.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 7fad9af8960..c01d0188e5c 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -139,8 +139,11 @@ String Cluster::Address::toFullString(bool use_compact_format) const { if (use_compact_format) { - return ((shard_index == 0) ? "" : "shard" + std::to_string(shard_index)) - + ((replica_index == 0) ? "" : "_replica" + std::to_string(replica_index)); + if (shard_index == 0 || replica_index == 0) + // shard_num/replica_num like in system.clusters table + throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR); + + return "shard" + std::to_string(shard_index) + "_replica" + std::to_string(replica_index); } else { @@ -284,7 +287,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting const auto & prefix = config_prefix + key; const auto weight = config.getInt(prefix + ".weight", default_weight); - addresses.emplace_back(config, prefix); + addresses.emplace_back(config, prefix, current_shard_num, 1); const auto & address = addresses.back(); ShardInfo info; From eee2fb867aab5b875202463de0d64ed9c360b734 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Jun 2020 22:15:30 +0300 Subject: [PATCH 0416/2229] Update test_distributed_format to cover remote_servers via --- .../configs/remote_servers.xml | 7 +++++++ .../test_distributed_format/test.py | 21 ++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_distributed_format/configs/remote_servers.xml b/tests/integration/test_distributed_format/configs/remote_servers.xml index 7d8d64bb78b..5c86713bd78 100644 --- a/tests/integration/test_distributed_format/configs/remote_servers.xml +++ b/tests/integration/test_distributed_format/configs/remote_servers.xml @@ -8,5 +8,12 @@ + + + + not_existing + 9000 + + diff --git a/tests/integration/test_distributed_format/test.py b/tests/integration/test_distributed_format/test.py index 211127f624a..291db89ae4c 100644 --- a/tests/integration/test_distributed_format/test.py +++ b/tests/integration/test_distributed_format/test.py @@ -11,6 +11,11 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) node = cluster.add_instance('node', config_dir="configs", main_configs=['configs/remote_servers.xml']) +cluster_param = pytest.mark.parametrize("cluster", [ + ('test_cluster'), + ('test_cluster_2'), +]) + @pytest.fixture(scope="module") def started_cluster(): @@ -22,9 +27,9 @@ def started_cluster(): finally: cluster.shutdown() - -def test_single_file(started_cluster): - node.query("create table test.distr_1 (x UInt64, s String) engine = Distributed('test_cluster', database, table)") +@cluster_param +def test_single_file(started_cluster, cluster): + node.query("create table test.distr_1 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) node.query("insert into test.distr_1 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={"use_compact_format_in_distributed_parts_names": "1"}) query = "select * from file('/var/lib/clickhouse/data/test/distr_1/shard1_replica1/1.bin', 'Distributed')" @@ -41,8 +46,9 @@ def test_single_file(started_cluster): node.query("drop table test.distr_1") -def test_two_files(started_cluster): - node.query("create table test.distr_2 (x UInt64, s String) engine = Distributed('test_cluster', database, table)") +@cluster_param +def test_two_files(started_cluster, cluster): + node.query("create table test.distr_2 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", settings={"use_compact_format_in_distributed_parts_names": "1"}) node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", settings={"use_compact_format_in_distributed_parts_names": "1"}) @@ -60,8 +66,9 @@ def test_two_files(started_cluster): node.query("drop table test.distr_2") -def test_single_file_old(started_cluster): - node.query("create table test.distr_3 (x UInt64, s String) engine = Distributed('test_cluster', database, table)") +@cluster_param +def test_single_file_old(started_cluster, cluster): + node.query("create table test.distr_3 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')") query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')" From e6dcece2fddf0069f241d3ff8c5336df74e09238 Mon Sep 17 00:00:00 2001 From: MovElb Date: Mon, 8 Jun 2020 22:39:02 +0300 Subject: [PATCH 0417/2229] return tcp_port --- tests/integration/test_postgresql_protocol/configs/config.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_postgresql_protocol/configs/config.xml b/tests/integration/test_postgresql_protocol/configs/config.xml index a833e228222..678b48425b1 100644 --- a/tests/integration/test_postgresql_protocol/configs/config.xml +++ b/tests/integration/test_postgresql_protocol/configs/config.xml @@ -24,6 +24,7 @@ + 9000 5433 127.0.0.1 From 00e7eb91b19ef8bacfa12aa4aee418ebb65da337 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 8 Jun 2020 23:13:03 +0300 Subject: [PATCH 0418/2229] Revert "Autocompletion support for users in client" --- programs/client/Suggest.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index 1229caae245..4ac5e735fd5 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -116,8 +116,6 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo << " UNION ALL " "SELECT DISTINCT name FROM system.dictionaries LIMIT " << limit_str << " UNION ALL " - "SELECT DISTINCT name FROM system.users LIMIT " << limit_str - << " UNION ALL " "SELECT DISTINCT name FROM system.columns LIMIT " << limit_str; } From ef7b054443b5b2b8a43c1ddecbefae0039fb9173 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 00:53:32 +0300 Subject: [PATCH 0419/2229] Speed up merging in AggregatingMergeTree --- src/Columns/ColumnAggregateFunction.cpp | 59 +++++++++++++++++---- src/Columns/ColumnAggregateFunction.h | 21 +++----- src/DataTypes/DataTypeAggregateFunction.cpp | 10 ++-- 3 files changed, 61 insertions(+), 29 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 1568437618d..2e8d2589b78 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,51 @@ namespace ErrorCodes } +static std::string getTypeString(const AggregateFunctionPtr & func) +{ + WriteBufferFromOwnString stream; + stream << "AggregateFunction(" << func->getName(); + const auto & parameters = func->getParameters(); + const auto & argument_types = func->getArgumentTypes(); + + if (!parameters.empty()) + { + stream << '('; + for (size_t i = 0; i < parameters.size(); ++i) + { + if (i) + stream << ','; + stream << applyVisitor(FieldVisitorToString(), parameters[i]); + } + stream << ')'; + } + + for (const auto & argument_type : argument_types) + stream << ',' << argument_type->getName(); + + stream << ')'; + return stream.str(); +} + + +ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_) + : func(func_), type_string(getTypeString(func)) +{ +} + +ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_, const ConstArenas & arenas_) + : foreign_arenas(arenas_), func(func_), type_string(getTypeString(func)) +{ + +} + +void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_) +{ + func = func_; + type_string = getTypeString(func); +} + + ColumnAggregateFunction::~ColumnAggregateFunction() { if (!func->hasTrivialDestructor() && !src) @@ -336,15 +382,10 @@ MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const return create(func); } -String ColumnAggregateFunction::getTypeString() const -{ - return DataTypeAggregateFunction(func, func->getArgumentTypes(), func->getParameters()).getName(); -} - Field ColumnAggregateFunction::operator[](size_t n) const { Field field = AggregateFunctionStateData(); - field.get().name = getTypeString(); + field.get().name = type_string; { WriteBufferFromString buffer(field.get().data); func->serialize(data[n], buffer); @@ -355,7 +396,7 @@ Field ColumnAggregateFunction::operator[](size_t n) const void ColumnAggregateFunction::get(size_t n, Field & res) const { res = AggregateFunctionStateData(); - res.get().name = getTypeString(); + res.get().name = type_string; { WriteBufferFromString buffer(res.get().data); func->serialize(data[n], buffer); @@ -425,8 +466,6 @@ static void pushBackAndCreateState(ColumnAggregateFunction::Container & data, Ar void ColumnAggregateFunction::insert(const Field & x) { - String type_string = getTypeString(); - if (x.getType() != Field::Types::AggregateFunctionState) throw Exception(String("Inserting field of type ") + x.getTypeName() + " into ColumnAggregateFunction. " "Expected " + Field::Types::toString(Field::Types::AggregateFunctionState), ErrorCodes::LOGICAL_ERROR); @@ -564,7 +603,7 @@ void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const AggregateDataPtr place = place_buffer.data(); AggregateFunctionStateData serialized; - serialized.name = getTypeString(); + serialized.name = type_string; func->create(place); try diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 002bc71f561..a9b3c38a2e0 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -74,6 +74,9 @@ private: /// Array of pointers to aggregation states, that are placed in arenas. Container data; + /// Name of the type to distinguish different aggregation states. + String type_string; + ColumnAggregateFunction() {} /// Create a new column that has another column as a source. @@ -84,29 +87,17 @@ private: /// but ownership of different elements cannot be mixed by different columns. void ensureOwnership(); - ColumnAggregateFunction(const AggregateFunctionPtr & func_) - : func(func_) - { - } + ColumnAggregateFunction(const AggregateFunctionPtr & func_); ColumnAggregateFunction(const AggregateFunctionPtr & func_, - const ConstArenas & arenas_) - : foreign_arenas(arenas_), func(func_) - { - } - + const ConstArenas & arenas_); ColumnAggregateFunction(const ColumnAggregateFunction & src_); - String getTypeString() const; - public: ~ColumnAggregateFunction() override; - void set(const AggregateFunctionPtr & func_) - { - func = func_; - } + void set(const AggregateFunctionPtr & func_); AggregateFunctionPtr getAggregateFunction() { return func; } AggregateFunctionPtr getAggregateFunction() const { return func; } diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index e94d761dc87..59811b1cd55 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -36,25 +38,25 @@ namespace ErrorCodes std::string DataTypeAggregateFunction::doGetName() const { - std::stringstream stream; + WriteBufferFromOwnString stream; stream << "AggregateFunction(" << function->getName(); if (!parameters.empty()) { - stream << "("; + stream << '('; for (size_t i = 0; i < parameters.size(); ++i) { if (i) stream << ", "; stream << applyVisitor(DB::FieldVisitorToString(), parameters[i]); } - stream << ")"; + stream << ')'; } for (const auto & argument_type : argument_types) stream << ", " << argument_type->getName(); - stream << ")"; + stream << ')'; return stream.str(); } From f9ea964c87bb36bf0b7e26a315d20bdb240f0016 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 00:59:58 +0300 Subject: [PATCH 0420/2229] Added performance test --- tests/performance/aggregating_merge_tree.xml | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/performance/aggregating_merge_tree.xml diff --git a/tests/performance/aggregating_merge_tree.xml b/tests/performance/aggregating_merge_tree.xml new file mode 100644 index 00000000000..2116050f7a5 --- /dev/null +++ b/tests/performance/aggregating_merge_tree.xml @@ -0,0 +1,32 @@ + + DROP TABLE IF EXISTS test + + + CREATE TABLE test( + t UInt64, + q1 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64), + q2 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64), + q3 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64), + q4 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64), + q5 AggregateFunction(quantilesTiming(0.50, 0.75, 0.90, 0.99), Float64) + ) ENGINE=SummingMergeTree() + ORDER BY t + + + + INSERT INTO test + SELECT + number / 10 as t, + quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q1, + quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q2, + quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q3, + quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q4, + quantilesTimingState(0.50, 0.75, 0.90, 0.99)(number/1000) as q5 + FROM numbers(1000 * 1000) + GROUP BY t + + + OPTIMIZE TABLE test FINAL + + DROP TABLE test + From 6a3df071a2204a63d767f3eb1cf9cc39e5d79e2b Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 9 Jun 2020 01:13:08 +0300 Subject: [PATCH 0421/2229] fixup --- docker/test/performance-comparison/compare.sh | 2 +- .../performance/agg_functions_min_max_any.xml | 7 +- .../arithmetic_operations_in_aggr_func.xml | 20 +- tests/performance/base64_hits.xml | 5 +- tests/performance/basename.xml | 12 +- .../bit_operations_fixed_string.xml | 15 +- tests/performance/cpu_synthetic.xml | 5 +- tests/performance/int_parsing.xml | 171 +++++++++--------- .../performance/number_formatting_formats.xml | 4 +- tests/performance/point_in_polygon.xml | 2 +- tests/performance/set_index.xml | 2 +- tests/performance/website.xml | 2 +- 12 files changed, 118 insertions(+), 129 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 4993e381bd9..835e1424fd9 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -582,7 +582,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes, union all select * from unstable_run_traces union all select * from unstable_run_metrics_2) mm group by test, query_index, metric - having d > 0.5 + having d > 0.5 and q[3] > 5 ) metrics left join query_display_names using (test, query_index) order by test, query_index, d desc diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml index d40d19b6e38..f212ab227b8 100644 --- a/tests/performance/agg_functions_min_max_any.xml +++ b/tests/performance/agg_functions_min_max_any.xml @@ -1,4 +1,4 @@ - + hits_100m_single @@ -39,10 +39,7 @@ select max(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null select any(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null select anyHeavy(SocialNetwork) from hits_100m_single where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(SocialAction) from hits_100m_single where SocialAction != '' group by intHash32(UserID) % 1000000 FORMAT Null + select min(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null select max(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null select any(SocialSourcePage) from hits_100m_single where SocialSourcePage != '' group by intHash32(UserID) % 1000000 FORMAT Null diff --git a/tests/performance/arithmetic_operations_in_aggr_func.xml b/tests/performance/arithmetic_operations_in_aggr_func.xml index 28f13823731..c91033bf9e4 100644 --- a/tests/performance/arithmetic_operations_in_aggr_func.xml +++ b/tests/performance/arithmetic_operations_in_aggr_func.xml @@ -1,22 +1,14 @@ + SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000) - - - 10 - - + SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000) + SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(500000000) - SELECT max(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000) + SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000000) - SELECT min(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000) + SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(500000000) - SELECT sum(-1 * (((-2 * (number * -3)) * -4) * -5)) FROM numbers(120000000) - - SELECT min(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(120000000) - - SELECT max(-1 + (((-2 + (number + -3)) + -4) + -5)) FROM numbers(120000000) - - SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(120000000) + SELECT max(((((number) * 10) * -2) * 3) * 2) + min(((((number) * 10) * -2) * 3) * 2) FROM numbers(500000000) diff --git a/tests/performance/base64_hits.xml b/tests/performance/base64_hits.xml index b07212bd598..65828d6ca1f 100644 --- a/tests/performance/base64_hits.xml +++ b/tests/performance/base64_hits.xml @@ -1,10 +1,11 @@ - hits_100m_single - + + 1 + diff --git a/tests/performance/basename.xml b/tests/performance/basename.xml index 7d040da5998..4ccc7ddf3a0 100644 --- a/tests/performance/basename.xml +++ b/tests/performance/basename.xml @@ -1,12 +1,12 @@ - - - - test.hits + hits_100m_single -SELECT count() FROM test.hits WHERE NOT ignore(basename(URL)) -SELECT count() FROM test.hits WHERE NOT ignore(basename(Referer)) + + 1 + + SELECT count() FROM hits_100m_single WHERE NOT ignore(basename(URL)) + SELECT count() FROM hits_100m_single WHERE NOT ignore(basename(Referer)) diff --git a/tests/performance/bit_operations_fixed_string.xml b/tests/performance/bit_operations_fixed_string.xml index 19972535fd9..56b6ff804f7 100644 --- a/tests/performance/bit_operations_fixed_string.xml +++ b/tests/performance/bit_operations_fixed_string.xml @@ -1,10 +1,15 @@ - hits_100m_single + test.hits - SELECT count() FROM hits_100m_single WHERE NOT ignore(bitAnd(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM hits_100m_single WHERE NOT ignore(bitOr(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM hits_100m_single WHERE NOT ignore(bitXor(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) - SELECT count() FROM hits_100m_single WHERE NOT ignore(bitNot(toFixedString(ClientIP6, 16))) + + 1 + + + + SELECT count() FROM test.hits WHERE NOT ignore(bitAnd(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM test.hits WHERE NOT ignore(bitOr(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM test.hits WHERE NOT ignore(bitXor(toFixedString(ClientIP6, 16), IPv6StringToNum('ffff:ffff:ffff:0000:0000:0000:0000:0000'))) + SELECT count() FROM test.hits WHERE NOT ignore(bitNot(toFixedString(ClientIP6, 16))) diff --git a/tests/performance/cpu_synthetic.xml b/tests/performance/cpu_synthetic.xml index 1076e051207..2888f7bbbd6 100644 --- a/tests/performance/cpu_synthetic.xml +++ b/tests/performance/cpu_synthetic.xml @@ -1,7 +1,4 @@ - - - - + hits_100m_single hits_10m_single diff --git a/tests/performance/int_parsing.xml b/tests/performance/int_parsing.xml index a9258875b5e..1a242c28393 100644 --- a/tests/performance/int_parsing.xml +++ b/tests/performance/int_parsing.xml @@ -1,92 +1,89 @@ - - - - test.hits + hits_100m_single - SELECT count() FROM test.hits WHERE NOT ignore(toString(WatchID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(JavaEnable)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(GoodEvent)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientIP)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RegionID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(UserID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(CounterClass)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsRobot)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMajor)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(FlashMinor)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMajor)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(NetMinor)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(UserAgentMajor)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(CookieEnable)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(JavascriptEnable)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsMobile)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(MobilePhone)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IPNetworkID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(TraficSourceID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchEngineID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SearchPhrase)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(AdvEngineID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsArtifical)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientWidth)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowClientHeight)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ClientTimeZone)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion1)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion2)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion3)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SilverlightVersion4)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(CodeVersion)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsLink)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsDownload)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsNotBounce)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(FUniqID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(HID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsOldCounter)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsEvent)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(IsParameter)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(DontCountHits)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(WithHash)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Age)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Sex)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Income)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Interests)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(Robotness)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RemoteIP)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(WindowName)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(OpenerName)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(HistoryLength)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(HTTPError)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SendTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(DNSTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ConnectTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMInteractiveTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMContentLoadedTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(DOMCompleteTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventStartTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(LoadEventEndTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(NSToDOMContentLoadedTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(FirstPaintTiming)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RedirectCount)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(HasGCLID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(YCLID)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestNum)) SETTINGS max_threads = 1 - SELECT count() FROM test.hits WHERE NOT ignore(toString(RequestTry)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WatchID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(JavaEnable)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(GoodEvent)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CounterID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ClientIP)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RegionID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CounterClass)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsRobot)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FlashMajor)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FlashMinor)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NetMajor)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NetMinor)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserAgentMajor)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CookieEnable)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(JavascriptEnable)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsMobile)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(MobilePhone)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IPNetworkID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(TraficSourceID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SearchEngineID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SearchPhrase)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(AdvEngineID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsArtifical)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowClientWidth)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowClientHeight)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ClientTimeZone)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion1)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion2)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion3)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SilverlightVersion4)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CodeVersion)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsLink)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsDownload)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsNotBounce)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FUniqID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsOldCounter)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsEvent)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsParameter)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DontCountHits)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WithHash)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Age)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Sex)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Income)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Interests)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Robotness)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RemoteIP)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(WindowName)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(OpenerName)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HistoryLength)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HTTPError)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SendTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DNSTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ConnectTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RedirectTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMInteractiveTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMContentLoadedTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMCompleteTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(LoadEventStartTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(LoadEventEndTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NSToDOMContentLoadedTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FirstPaintTiming)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RedirectCount)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(HasGCLID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(YCLID)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RequestNum)) SETTINGS max_threads = 1 + SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RequestTry)) SETTINGS max_threads = 1 diff --git a/tests/performance/number_formatting_formats.xml b/tests/performance/number_formatting_formats.xml index a9faf9369ec..c4a17b1f133 100644 --- a/tests/performance/number_formatting_formats.xml +++ b/tests/performance/number_formatting_formats.xml @@ -32,8 +32,8 @@ CREATE TABLE IF NOT EXISTS table_{format} (x UInt64) ENGINE = File(`{format}`) CREATE TABLE IF NOT EXISTS table_{format_fast} (x UInt64) ENGINE = File(`{format}`) - INSERT INTO table_{format} SELECT number FROM numbers(10000000) - INSERT INTO table_{format_fast} SELECT number FROM numbers(100000000) + INSERT INTO table_{format} SELECT number FROM numbers(10000000) + INSERT INTO table_{format_fast} SELECT number FROM numbers(20000000) DROP TABLE IF EXISTS table_{format} DROP TABLE IF EXISTS table_{format_fast} diff --git a/tests/performance/point_in_polygon.xml b/tests/performance/point_in_polygon.xml index c1325720429..a1ef4891577 100644 --- a/tests/performance/point_in_polygon.xml +++ b/tests/performance/point_in_polygon.xml @@ -7,7 +7,7 @@ FROM numbers(1000000) - SELECT pointInPolygon((100, 100), polygon) FROM polygons + SELECT pointInPolygon((100, 100), polygon) FROM polygons FORMAT Null DROP TABLE IF EXISTS polygons diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index d9b263159b5..46fe32e5486 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -15,7 +15,7 @@ SELECT count() FROM test_in WHERE -toInt64(a) NOT IN (SELECT toInt64(rand(1)) FROM numbers(100000)) settings max_rows_to_read=1, read_overflow_mode='break' - SELECT count() FROM numbers(1000) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', + SELECT count() FROM numbers(10000) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', '68446', '58896', '60969', '19849', '6722', '91854', '49519', '13949', '67109', '48824', '31723', '75554', '69575', '94986', '75350', '18628', '15284', '41943', '15433', '52607', '41', '22340', '29528', '24059', '34145', '72517', '46316', '10667', '54510', '19882', '47764', '69124', '41963', '84350', '48420', '4646', '24958', '69020', '97121', '26178', '62664', '50622', '32554', '49655', '45398', '11267', '72222', '73363', '89554', '89046', '57384', '29259', '37888', '24850', '74353', '57343', '34762', '2900', '11393', '42154', '94306', '70552', '75265', '921', '26003', '64352', '89857', '83171', '58249', '48940', '53512', '66335', '44865', '68729', '19299', '58003', '39854', '99122', '3860', '80173', '52242', '90966', '53183', '71982', '82325', '87842', '15000', '55627', '71132', '6354', '42402', '91719', '91644', '94533', '74925', '66278', '66911', '85576', '40495', '70919', '71797', '87835', '29845', '71832', '3390', '7994', '33499', '70715', '54897', '82710', '63077', '78105', '24758', '89585', '84607', '46477', '78618', '10989', '39222', '98749', '51685', '94664', '31008', '32823', '89521', '72160', '26952', '4001', '21421', '5737', '74027', '88179', '45553', '83743', '19913', '49435', '65616', '82641', '5149', '76959', '40681', '73023', '2670', '30845', '18863', '35094', '88400', '80963', '9154', '16571', '64192', '59694', '41317', '59942', '58856', '99281', '67260', '66971', '22716', '76089', '58047', '67071', '53707', '462', '52518', '72277', '10681', '69', '98855', '12593', '88842', '67242', '73543', '37439', '18413', '67211', '93495', '45576', '70614', '27988', '53210', '18618', '21318', '68059', '25518', '55917', '56522', '16548', '2404', '93538', '61452', '66358', '3709', '23914', '92426', '81439', '38070', '28988', '29939', '2948', '85720', '45628', '51101', '89431', '86365', '17571', '50987', '83849', '11015', '83812', '66187', '26362', '66786', '22024', '93866', '36161', '90080', '64874', '37294', '83860', '73821', '80279', '36766', '73117', '44620', '84556', '42070', '90383', '27862', '20665', '67576', '34997', '57958', '80638', '84351', '63961', '1362', '14338', '80377', '24192', '41294', '57368', '51189', '27287', '45764', '86289', '65600', '708', '84090', '96005', '55676', '84855', '72385', '70018', '9336', '82701', '3710', '52083', '74045', '96454', '30956', '67369', '78941', '81810', '71906', '23194', '33042', '50794', '61256', '24449', '48639', '22916', '78303', '13666', '40762', '43942', '51075', '89783', '95786', '90462', '6181', '36482', '40675', '4970', '6388', '91849', '72579', '94983', '86084', '20140', '68427', '48123', '43122', '98066', '37560', '6927', '72803', '5546', '62259', '98439', '6457', '98568', '70499', '33022', '28226', '29675', '20917', '75365', '20900', '8190', '56736', '99153', '77779', '49333', '50293', '97650', '4067', '47278', '42761', '71875', '13966', '11223', '46783', '18059', '61355', '29638', '75681', '24466', '89634', '20759', '83252', '37780', '15931', '74893', '6703', '64524', '80656', '85990', '78427', '18411', '20696', '86432', '93176', '69889', '15072', '15180', '9935', '10467', '60248', '42430', '62590', '89596', '27743', '26398', '79912', '60048', '50943', '38870', '69383', '72261', '98059', '55242', '74905', '5667', '54321', '70415', '39903', '49711', '85318', '79979', '59262', '82321', '15263', '17416', '74554', '94733', '72112', '49872', '54849', '73883', '78250', '74935', '68559', '57564', '50541', '45730', '41595', '5588', '83723', '42891', '11898', '14348', '99732', '14481', '85233', '21277', '94508', '52551', '74187', '7634', '42912', '25100', '43536', '35798', '48190', '86477', '22680', '48148', '59501', '56563', '16802', '81496', '97568', '68657', '51462', '67953', '99660', '39002', '54170', '57190', '68086', '52700', '6487', '55709', '70418', '62629', '70420', '35695', '36152', '45360', '53503', '46623', '76000', '50648', '97876', '44815', '29163', '1356', '64123', '71388', '17658', '99084', '58727', '59437', '38773', '71254', '81286', '97545', '18786', '56834', '20346', '36401', '62316', '58082', '67959', '99876', '69895', '80099', '62747', '20517', '99777', '6472', '49189', '31321', '39992', '68073', '13378', '51806', '21776', '52060', '96983', '25754', '93709', '96627', '8644', '93726', '14002', '37716', '87620', '34507', '76339', '24491', '5849', '44110', '522', '66521', '12776', '44887', '80535', '14548', '75248', '671', '73071', '35715', '59474', '7061', '82243', '56170', '20179', '59717', '1725', '24634', '11270', '77023', '63840', '46608', '44667', '22422', '59771', '94768', '73033', '82905', '16463', '40971', '22204', '58366', '28721', '14907', '76468', '81872', '38418', '36989', '61439', '10610', '131', '44296', '35453', '10117', '75856', '94603', '99602', '68075', '35949', '13599', '50030', '69633', '55956', '85465', '16429', '86081', '11145', '6195', '82207', '90598', '92814', '23725', '83204', '80346', '71542', '46634', '15820', '54123', '45397', '15322', '61743', '9273', '71347', '6835', '64006', '91718', '43677', '32923', '21486', '17098', '61694', '43347', '40019', '4071', '52443', '42386', '56839', '83514', '27633', '40780', '51749', '92101', '62384', '92206', '56044', '66174', '11137', '73966', '78471', '30468', '31643', '33197', '6888', '8066', '86603', '74383', '6098', '54411', '98819', '89862', '88639', '94422', '89371', '80526', '91747', '91220', '64944', '76658', '42046', '58518', '27249', '6646', '3028', '1346', '33763', '9734', '31737', '65527', '5892', '60813', '3410', '35464', '43009', '98382', '70580', '93898', '56404', '32995', '62771', '71556', '40538', '55612', '45656', '10758', '20268', '33603', '38310', '14242', '74397', '10722', '71575', '22590', '49043', '91439', '9055', '23668', '9101', '5268', '64133', '77501', '64684', '11337', '47575', '50732', '88680', '93730', '46785', '17589', '3520', '57595', '71241', '34994', '8753', '36147', '88844', '41914', '11250', '94632', '71927', '4624', '86279', '7664', '2659', '94853', '65386', '30438', '86005', '92883', '84629', '59910', '44484', '1306', '8404', '56962', '29990', '38445', '96191', '73013', '66590', '40951', '24712', '18825', '37268', '87843', '18972', '12154', '7779', '52149', '76152', '65799', '86011', '35475', '78083', '88232', '91551', '65532', '93516', '73827', '24227', '44687', '55759', '83819', '45088', '10856', '60488', '39051', '14103', '76650', '81181', '46731', '737', '58788', '78945', '42096', '66731', '66740', '72273', '88969', '5655', '86590', '41096', '80038', '32430', '51877', '23970', '91900', '13082', '45880', '94367', '19739', '61998', '71665', '16083', '57035', '26916', '10166', '18834', '46798', '66881', '28444', '68840', '10459', '81087', '4728', '76224', '39257', '23470', '93524', '37345', '30074', '49856', '22022', '55279', '5159', '5193', '58030', '57539', '12514', '49759', '96222', '52597', '67192', '88187', '53614', '16084', '79915', '28212', '79334', '85283', '32306', '31058', '43113', '74707', '74869', '2213', '32134', '6379', '85426', '87098', '35984', '51105', '69287', '16803', '83337', '14913', '62531', '58098', '7914', '20105', '28850', '1384', '43173', '62983', '87113', '76066', '86320', '77684', '45191', '95225', '41503', '36713', '48404', '91228', '53865', '98981', '59161', '61237', '84561', '17455', '14379', '57789', '80895', '99260', '84595', '72942', '53220', '84448', '81332', '49437', '83086', '93414', '54519', '52288', '74772', '22460', '49324', '11168', '96071', '61985', '38284', '6405', '54698', '71727', '60093', '37340', '87884', '83403', '4542', '94949', '19636', '15855', '39105', '10424', '67418', '91022', '69254', '8481', '38411', '3832', '44354', '93548', '57172', '28481', '372', '81497', '52179', '41060', '72141', '41396', '65590', '70432', '82819', '93814', '26118', '84780', '88485', '70821', '8222', '83000', '47067', '38516', '33347', '47681', '48202', '60749', '52112', '7937', '28105', '11394', '45746', '43252', '34494', '2979', '69715', '42486', '82315', '71760', '97413', '66137', '94487', '7429', '74434', '22964', '55251', '3448', '53534', '2574', '9693', '96157', '2955', '4348', '19566', '56930', '83319', '31310', '53905', '1148', '41726', '22233', '76045', '37351', '10545', '17581', '28047', '30199', '4741', '58111', '33497', '67796', '67730', '31247', '43772', '29461', '45970', '73353', '22534', '53962', '32147', '71392', '62579', '66345', '58246', '33442', '9581', '29705', '14058', '86471', '76125', '59363', '94982', '74810', '89149', '20066', '3366', '3568', '25752', '80036', '64119', '27270', '40061', '91052', '69022', '9852', '77112', '83075', '43924', '61661', '56133', '96652', '57944', '72576', '82170', '79236', '55745', '15309', '88878', '72761', '37647', '67465', '12777', '97309', '93202', '41470', '8787', '64920', '48514', '18917', '35157', '59151', '4640', '5317', '38134', '76548', '82788', '9214', '58418', '73185', '90554', '10543', '47182', '62936', '91765', '89751', '68931', '48865', '64607', '7150', '77862', '14297', '14828', '33013', '91698', '67593', '98096', '16595', '51639', '86531', '24719', '1703', '78788', '43810', '38918', '95491', '99903', '82671', '8291', '68288', '31224', '39863', '4265', '77798', '7698', '33804', '92286', '4744', '37038', '44203', '98212', '17369', '77442', '62879', '4145', '96881', '15646', '36824', '19959', '45451', '76049', '54272', '97577', '95298', '81115', '30204', '82041', '8037', '10052', '8756', '76833', '82851', '24276', '75574', '36037', '78079', '92807', '29064', '90000', '84150', '17102', '75092', '49424', '35597', '4693', '82853', '42511', '16119', '23478', '65240', '55585', '91762', '71671', '46682', '72479', '97696', '24615', '12579', '30274', '48255', '2336', '90202', '5808', '45426', '76308', '74639', '31245', '99894', '89638', '6233', '33893', '71899', '85273', '89429', '29761', '50231', '57249', '99347', '22642', '66972', '86221', '47514', '88274', '10819', '73150', '53754', '13304', '20478', '38099', '619', '14669', '8011', '97657', '26569', '65430', '13467', '38180', '23675', '72350', '42257', '39875', '23529', '53407', '11833', '29599', '95621', '7727', '59527', '86846', '22860', '5358', '3730', '87555', '362', '95755', '54565', '29935', '68950', '52349', '98344', '86576', '7420', '12236', '15844', '48099', '97535', '97081', '50261', '31187', '60496', '24123', '24042', '6376', '6679', '99806', '20306', '60676', '36881', '77309', '5247', '96569', '53417', '73252', '64179', '35318', '75732', '65119', '32621', '40464', '22887', '96152', '65161', '83381', '8915', '68142', '7328', '85031', '15688', '72519', '93992', '86927', '75538', '38205', '50877', '70039', '97538', '94822', '52131', '49643', '85206', '1347', '14574', '88736', '53442', '49991', '64925', '72283', '82213', '60905', '36118', '62963', '16983', '79185', '15111', '26059', '17792', '98218', '33214', '1094', '41754', '77275', '65173', '13190', '91004', '90422', '44387', '92672', '98641', '54609', '83295', '37395', '70104', '32986', '72524', '82478', '5837', '83916', '52736', '57112', '55985', '42642', '42136', '89642', '35712', '49489', '19726', '65824', '24384', '48112', '15366', '99206', '68384', '51389', '529', '21475', '75749', '95182', '60110', '70571', '74174', '38105', '78107', '4101', '8982', '11215', '23987', '3303', '28706', '54629', '98000', '67510', '30036', '99140', '48896', '40971', '7735', '79984', '50134', '94928', '57023', '52880', '83067', '41940', '62994', '89213', '38593', '19283', '68206', '22234', '19245', '26266', '32403', '65889', '17022', '64280', '42797', '27161', '57675', '42313', '93606', '93082', '20659', '90824', '1226', '66266', '12503', '57104', '15247', '51160', '92398', '71967', '59476', '44465', '35765', '10787', '47737', '45792', '2292', '47599', '89612', '8162', '87622', '69410', '45727', '31158', '99791', '89544', '27214', '99588', '40516', '75616', '36505', '46079', '95448', '97999', '47462', '47799', '82729', '34038', '60789', '96938', '22682', '79062', '93307', '36038', '49016', '90983', '48219', '50889', '32517', '72219', '71229', '82643', '1195', '70543', '17', '22178', '23544', '72371', '1163', '28527', '7336', '39846', '31956', '80963', '41804', '59791', '41831', '1940', '52377', '79494', '12531', '81112', '44320', '18746', '5774', '63869', '4085', '59922', '12751', '99443', '13530', '23872', '36026', '83360', '32711', '92980', '11140', '99323', '57263', '98149', '29265', '25548', '65995', '4818', '15593', '8535', '37863', '12217', '14474', '66584', '89272', '86690', '58777', '39666', '44756', '18442', '52586', '98030', '40850', '38708', '49304', '68923', '65008', '84388', '83639', '29866', '63675', '26793', '49227', '82099', '24090', '57535', '24201', '65776', '74054', '89833', '62979', '26613', '5851', '99766', '63484', '66605', '37179', '90760', '59336', '58390', '93239', '84578', '11396', '93994', '73818', '23972', '37720', '72369', '25063', '32952', '71036', '76612', '31285', '34090', '19136', '53783', '66436', '61478', '96749', '43658', '7399', '31574', '67073', '40480', '20727', '70993', '65549', '30800', '21507', '53785', '89574', '86381', '56492', '62603', '44856', '68687', '63794', '70996', '7475', '84238', '71939', '86886', '94792', '15036', '36936', '95722', '17771', '67850', '33371', '49314', '40744', '5432', '81057', '41201', '75986', '22961', '15323', '1570', '18657', '95219', '19130', '53127', '15867', '81135', '73206', '76668', '36386', '48828', '31417', '56916', '70891', '60534', '95777', '10022', '94053', '2928', '56326', '16559', '79656', '6414', '81247', '78270', '55687', '19151', '61597', '99857', '81142', '27725', '53493', '12185', '1455', '48501', '59425', '20591', '24900', '66079', '84889', '32024', '18919', '2043', '7076', '71201', '88258', '86521', '93348', '26395', '39646', '44145', '33911', '46231', '67054', '39979', '11630', '23020', '76278', '88056', '11480', '4723', '78612', '70211', '60622', '84687', '59092', '65675', '38479', '64399', '64699', '95964', '42764', '69060', '28189', '4193', '95805', '75462', '17245', '59640', '94773', '84292', '53092', '98507', '61353', '32483', '53027', '48912', '87221', '47788', '59263', '65196', '35567', '17494', '64253', '50223', '7057', '87467', '62414', '2523', '50910', '72353', '78986', '78104', '47719', '29108', '12957', '5114', '64435', '66707', '37449', '70399', '45334', '71606', '55338', '55072', '58765', '12151', '22012', '16954', '87366', '14240', '98041', '72296', '47408', '56879', '99584', '63172', '92316', '28071', '29880', '19608', '13839', '87484', '56541', '88662', '87098', '72124', '78282', '27653', '38993', '31870', '67239', '99445', '7376', '78487', '98880', '12180', '86773', '67773', '15416', '58172', '13075', '67559', '97510', '29705', '86985', '57024', '11827', '31236', '91920', '26116', '94614', '14486', '46252', '78847', '43786', '70048', '96739', '35240', '39933', '58209', '27852', '65669', '47323', '58150', '84444', '44344', '95882', '41258', '31314', '69060', '19916', '6979', '19436', '45572', '16259', '74566', '6306', '24705', '53422', '593', '97031', '22308', '26875', '23042', '78035', '34229', '61976', '23175', '50072', '90896', '50810', '71730', '86468', '94807', '8218', '36032', '58628', '60560', '51206', '37943', '27987', '15014', '49905', '70018', '66799', '80851', '23594', '29982', '6438', '97381', '47715', '96294', '17985', '48545', '12672', '5250', '9988', '24601', '3736', '97815', '54363', '64703', '44167', '68376', '16595', '38073', '29630', '59630', '1858', '71823', '75580', '70083', '14493', '93821', '93394', '85369', '3818', '8435', '59988', '43966', '13961', '15855', '83332', '80312', '27299', '88840', '76964', '56173', '62794', '79389', '82642', '85843', '47116', '43064', '16061', '28905', '54415', '72832', '91252', '93488', '79457', '99336', '70744', '80432', '6487', '880', '87701', '154', '86574', '86677', '17892', '81488', '95260', '12515', '43189', '9211', '55403', '41417', '60046', '54785', '83655', '28274', '65745', '63062', '44549', '36391', '48051', '7328', '3572', '33226', '49177', '25123', '59065', '19691', '15109', '10172', '95578', '29497', '48152', '20276', '36270', '78866', '48309', '53209', '55475', '30073', '19717', '16004', '45692', '83430', '9291', '45935', '57030', '92613', '91656', '67697', '34915', '28156', '56594', '3273', '11194', '98270', '34370', '2621', '66679', '97451', '97717', '87923', '48310', '37725', '69743', '75103', '84956', '75163', '16069', '65304', '19397', '18071', '27273', '49823', '57595', '98324', '82174', '10293', '80943', '64184', '19472', '4198', '9410', '25927', '65961', '33155', '95168', '33692', '61712', '69877', '13308', '17415', '10022', '2491', '67310', '96140', '68050', '76272', '17143', '76805', '57176', '7539', '22690', '95483', '87592', '27221', '90821', '51154', '99828', '68998', '54581', '74222', '10269', '65057', '45467', '96089', '55058', '89779', '60837', '74122', '52886', '58055', '14880', '93208', '66652', '68830', '24121', '62407', '87257', '18802', '14925', '45423', '98624', '55195', '59072', '41414', '77840', '66075', '62705', '26549', '19063', '57552', '2507', '52069', '57620', '66688', '14833', '33700', '90666', '98052', '5367', '2268', '43093', '69063', '22030', '85564', '92258', '1847', '24446', '65835', '38660', '91899', '87732', '52396', '31952', '36000', '86944', '16109', '80729', '53757', '60226', '59103', '84187', '36674', '72823', '29884', '4654', '69139', '20440', '57413', '3651', '39639', '44564', '57492', '84159', '751', '99748', '9659', '72661', '39220', '99742', '74734', '75729', '38071', '69934', '73640', '65294', '54524', '64372', '37927', '17187', '7863', '12732', '40296', '36197', '15821', '76831', '4400', '71933', '4040', '22072', '33064', '25702', '13324', '91275', '27388', '97729', '14620', '45989', '80737', '17934', '4219', '3032', '43457', '31051', '24469', '67041', '29328', '75499', '80951', '88212', '92595', '49969', '24612', '58732', '2718', '3805', '50918', '99426', '8614', '35580', '93273', '989', '24385', '41185', '25687', '47146', '25227', '95839', '56355', '98536', '79824', '31725', '46447', '26690', '68418', '47783', '33725', '21729', '70797', '59038', '60376', '25087', '68332', '67950', '12411', '95918', '64736', '65336', '74947', '64605', '4106', '42712', '96640', '28492', '28648', '42429', '821', '24333', '69677', '38959', '23484', '92005', '29352', '29159', '52873', '99947', '21834', '85347', '93479', '28298', '55608', '3226', '69714', '80283', '6577', '18849', '44605', '75286', '28139', '26541', '12867', '57500', '86617', '33005', '57498', '60223', '74954', '51401', '55246', '5648', '16513', '40930', '43821', '32090', '66002', '65530', '76083', '6047', '6879', '94987', '80787', '11688', '77161', '92670', '6696', '400', '28572', '47234', '51375', '88518', '762', '92617', '54260', '7560', '60180', '43331', '64059', '27616', '75839', '21392', '47756', '46254', '19486', '88533', '30130', '93694', '8557', '66534', '94447', '16910', '6480', '77440', '24366', '6195', '48946', '28597', '44429', '50300', '73556', '40638', '98709', '94413', '15987', '43860', '64871', '93953', '34506', '7296', '31753', '30626', '77510', '39829', '25696', '39776', '69185', '36540', '65413', '31528', '43446', '73532', '49776', '30282', '30004', '26725', '15200', '33958', '90320', '71836', '48051', '31970', '5326', '96194', '69695', '60898', '60945', '18271', '50868', '61468', '23593', '68985', '20628', '58044', '8942', '34849', '7384', '50500', '62895', '78780', '48946', '65278', '4067', '973', '34761', '15512', '73739', '23138', '47322', '55568', '32259', '71816', '49277', '75218', '76104', '19579', '68312', '67904', '33886', '53888', '26421', '43859', '40291', '39068', '31711', '36542', '10195', '39781', '72352', '13188', '34113', '9428', '60443', '4987', '13783', '80744', '63483', '18266', '11961', '87167', '46987', '28480', '74214', '39191', '8146', '38090', '75727', '79245', '47720', '52547', '45321', '4972', '49701', '74354', '69672', '63455', '41902', '5667', '54166', '4962', '25873', '44509', '73332', '73383', '29438', '21455', '12320', '11997', '16921', '49379', '63027', '86175', '8110', '76149', '2520', '11256', '25863', '50518', '69001', '79113', '9447', '91840', '5242', '10998', '46496', '2448', '56058', '20970', '10517', '17783', '25723', '97137', '62840', '1264', '78691', '81020', '55335', '48524', '2088', '90413', '76651', '26855', '16177', '14954', '62914', '21344', '5708', '75560', '39311', '95865', '28783', '64902', '95657', '46276', '33426', '4799', '11588', '57513', '73689', '77677', '63011', '97795', '34954', '76866', '32043', '32697', '26643', '36890', '53476', '3011', '13963', '49551', '87671', '67761', '17488', '94770', '50599', '33272', '23091', '38079', '41177', '22395', '91656', '79679', '38687', '57384', '80118', '42507', '4098', '78949', '45669', '48802', '83915', '78292', '4369', '57657', '49146', '45192', '98491', '72457', '46331', '207', '81601', '7409', '70856', '91605', '70295', '9171', '72293', '32997', '78025', '16795', '73534', '68780', '21284', '31767', '94381', '86439', '12420', '53285', '99563', '60502', '67954', '55012', '99809', '5431', '69978', '99712', '14401', '79498', '4495', '3045', '528', '72542', '91604', '72725', '39378', '80378', '41996', '20138', '54545', '59730', '36951', '45157', '37964', '97690', '12184', '4944', '53803', '93605', '60851', '68938', '46285', '89663', '90309', '6907', '87239', '81791', '83292', '90013', '68927', '14725', '81840', '63836', '52068', '43830', '4794', '931', '59255', '8263', '99057', '94401', '69033', '7437', '20364', '92884', '28193', '43932', '37629', '59426', '18891', '8583', '79551', '87242', '1483', '6725', '65786', '16844', '12650', '99305', '42841', '9811', '18800', '39313', '51373', '31874', '84558', '27831', '48614', '48975', '55509', '83363', '31854', '64001', '94028', '76125', '79314', '24893', '81132', '9441', '86015', '28356', '40358', '10160', '23328', '7330', '76538', '37611', '89351', '84132', '97047', '26109', '95222', '35130', '75600', '88602', '15073', '87835', '71649', '28948', '81615', '37498', '28674', '59776', '44095', '65924', '64368', '94536', '12518', '61711', '55619', '82949', '4114', '21540', '70544', '28022', '79983', '28781', '7749', '97873', '4951', '50076', '47611', '99522', '56820', '38653', '49047', '36283', '83908', '72452', '85625', '10811', '36998', '44083', '34864', '44975', '39057', '4551', '68450', '24781', '1503', '9871', '46885', '11424', '21259', '54900', '97669', '85669', '6015', '2521', '37661', '14915', '57423', '91903', '94789', '32059', '64972', '4600', '61465', '27118', '79785', '13547', '49766', '38410', '68860', '63756', '23621', '64387', '46255', '63408', '11297', '41081', '56326', '58349', '98703', '72268', '73574', '32098', '42534', '91502', '38083', '11241', '56828', '12098', '25377', '37054', '56328', '30034', '26922', '68401', '93478', '63275', '62650', '81407', '773', '79499', '14970', '47217', '1187', '57428', '69980', '77764', '74791', '22107', '54363', '39247', '56028', '56982', '84244', '21464', '18716', '25533', '94589', '94768', '21537', '18436', '81135', '27654', '79713', '56630', '61571', '58453', '26758', '68450', '68449', '2994', '15347', '83954', '71823', '6428', '44210', '79597', '95144', '32871', '1991', '320', '77157', '63607', '31154', '48846', '71125', '61750', '59608', '33038', '35733', '68915', '94127', '50383', '64242', '49708', '57270', '65019', '8581', '12111', '18487', '50013', '58664', '22214', '19033', '33681', '44754', '28830', '10381', '52318', '34959', '20682', '55453', '53800', '65774', '99164', '72102', '36986', '44157', '56716', '7974', '81475', '25926', '39402', '33688', '99671', '95312', '42268', '26536', '14482', '67377', '57993', '89147', '15834', '64995', '4700', '18714', '30221', '39095', '32749', '69257', '55204', '30497', '31839', '63045', '30009', '62683', '31232', '77680', '93551', '63589', '6989', '77246', '42169', '46117', '73226', '37427', '1858', '83649', '37410', '86369', '4641', '74481', '66168', '48041', '22597', '14670', '27464', '57165', '20939', '36282', '76940', '73358', '50521', '69603', '8895', '81793', '57743', '81903', '64025', '91641', '25276', '34040', '62642', '64015', '57657', '84890', '73832', '782', '60160', '16998', '40023', '24590', '88613', '76640', '53091', '67600', '80183', '45674', '64464', '25163', '42384', '66972', '13953', '41966', '66048', '15135', '73745', '19466', '53657', '34619', '13462', '15905', '48257', '73297', '238', '93525', '80556', '5942', '5411', '66169', '9090', '95130', '74316', '57321', '48083', '62355', '68113', '15239', '36644', '80326', '65817', '54428', '61955', '58849', '77206', '16073', '98261', '92091', '39178', '35464', '85109', '85452', '21128', '25665', '81860', '44664', '24024', '56960', '95124', '39786', '18836', '11121', '44163', '81074', '79064', '46219', '94694', '44233', '81469', '24642', '15030', '21995', '13587', '40755', '6669', '81093', '74305', '1881', '55649', '37273', '80827', '98643', '46694', '59281', '79231', '42813', '84984', '7052', '98113', '17296', '84434', '31205', '46894', '71219', '74530', '44686', '70744', '91388', '20692', '96853', '73803', '15836', '18126', '49686', '4179', '47588', '87892', '65425', '68012', '97468', '92510', '99271', '58694', '11918', '37051', '18644', '57228', '14265', '57572', '57022', '52186', '30193', '93570', '87872', '5257', '26784', '6476', '61746', '68559', '1720', '26202', '16519', '27688', '10645', '87174', '60845', '73385', '82075', '6933', '98828', '56895', '17344', '84253', '36561', '51648', '24939', '63470', '31034', '95052', '51090', '51465', '87979', '68650', '30181', '29598', '19137', '43221', '81353', '90170', '96985', '61115', '17385', '92314', '80650', '55821', '17874', '84333', '93272', '48260', '87272', '22764', '59957', '51870', '85988', '39222', '77241', '62535', '28344', '6011', '80831', '64551', '46299', '75195', '71177', '8660', '58943', '57003', '3306', '74413', '74068', '15073', '89016', '93140', '13911', '57170', '19880', '41870', '9131', '57495', '73032', '86979', '60094', '87026', '30880', '4736', '86301', '92707', '21689', '83565', '71275', '47665', '65687', '71184', '89897', '32490', '97577', '38723', '79113', '37531', '97500', '94450', '15699', '58019', '84423', '27057', '56017', '97148', '47365', '30669', '33818', '80406', '99690', '33012', '95178', '46809', '48448', '79350', '9146', '99701', '98976', '71197', '44161', '75069', '36602', '79650', '97301', '12020', '56658', '25701', '46392', '78609', '63073', '69419', '57736', '20102', '42415', '79044', '20277', '56280', '47903', '94311', '25558', '40336', '91305', '90505', '66769', '64562', '83737', '62892', '10375', '71024', '19988', '56946', '76110', '21847', '43162', '50578', '46086', '54167', '61722', '53463', '63134', '69288', '12838', '14116', '71687', '50846', '59810', '24826', '84138', '82885', '91496', '98600', '82769', '40049', '4125', '50694', '1294', '2805', '29691', '82321', '76462', '85945', '115', '29188', '66918', '71340', '31585', '61638', '95472', '52978', '50622', '81990', '60955', '70519', '22270', '35610', '95871', '89222', '41038', '52546', '1163', '67943', '1793', '92010', '35755', '74509', '66665', '95759', '8568', '44299', '67822', '5806', '85839', '13895', '87675', '31357', '88014', '40026', '53050', '28951', '31992', '42495', '82892', '51567', '2869', '45808', '20238', '20781', '56098', '66307', '95701', '614', '60833', '3091', '81339', '24195', '65639', '85976', '28116', '66224', '51502', '73637', '13207', '88302', '36488', '65518', '98187', '26', '74367', '64706', '53943', '86760', '25783', '82112', '34958', '86621', '20848', '63459', '14049', '84943', '91873', '50238', '77773', '64109', '8602', '87934', '47583', '66053', '30287', '5507', '80312', '37464', '57457', '86200', '17806', '16522', '38843', '94334', '59958', '63864', '53427', '74506', '33980', '90449', '30842', '53616', '36738', '52', '13595', '53051', '13174', '60163', '71420', '73835', '67119', '79018', '42782', '45059', '952', '46360', '85879', '71552', '84741', '29746', '32577', '10041', '7208', '97528', '51256', '916', '55973', '17684', '99046', '38782', '58660', '97798', '66032', '48339', '51329', '12532', '97904', '95454', '42737', '62541', '96702', '82953', '94610', '26645', '86813', '25480', '99713', '26078', '23028', '93056', '21445', '73209', '89318', '69987', '34705', '30064', '17094', '51135', '54141', '26625', '1086', '13082', '30843', '98672', '56864', '42605', '5833', '60850', '69366', '27351', '16456', '92609', '48030', '54322', '69891', '46502', '34578', '77918', '63276', '75958', '42519', '60266', '85576', '4855', '14258', '67017', '10545', '35078', '53012', '71922', '85784', '73402', '74363', '58457', '94102', '23510', '51559', '39482', '87057', '9377', '10106', '82985', '33931', '16523', '6484', '97749', '83172', '53753', '27466', '23073', '96083', '67302', '57465', '21877', '18013', '99804', '32873', '43123', '72365', '53197', '80578', '69770', '97471', '86954', '67183', '98497', '78474', '28450', '63183', '98699', '42738', '61433', '3491', '27304', '49311', '94980', '92740', '43272', '86549', '11406', '79636', '85582', '38086', '657', '2354', '26567', '77450', '42086', '21600', '49011', '44059', '47872', '75761', '96577', '11642', '83471', '79616', '23749', '77082', '96876', '65302', '84027', '48955', '59887', '20657', '75090', '9058', '50347', '66088', '70745', '76342', '58026', '95568', '61504', '93473', '84590', '47089', '74717', '93090', '46334', '68273', '59500', '54345', '72608', '54048', '86156', '40296', '74046', '6813', '36369', '74543', '18305', '85236', '31316', '37061', '96893', '23112', '5529', '10166', '19037', '1467', '70810', '30932', '18410', '92837', '81324', '12268', '54705', '25207', '90366', '56528', '3392', '88747', '39951', '97957', '99404', '23685', '13533', '15640', '11434', '66516', '71025', '65770', '88000', '52232', '32360', '10787', '37438', '2264', '94460', '80214', '42288', '59062', '29010', '64093', '21225', '22297', '36935', '19202', '5925', '85373', '27414', '28991', '9191', '42273', '56587', '89719', '77191', '64334', '61542', '28763', '28978', '79184', '59815', '95200', '30246', '54022', '287', '91808', '66347', '50833', '15356', '78614', diff --git a/tests/performance/website.xml b/tests/performance/website.xml index 0011d225d6c..66357352f3e 100644 --- a/tests/performance/website.xml +++ b/tests/performance/website.xml @@ -55,7 +55,7 @@ SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10 SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10 SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM hits_100m_single GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10 -SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10 +SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10 SETTINGS max_threads = 1 SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10 SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000 SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 34 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000 From 85363ebe568eb61d78b1f90a32004bbfb9acb085 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 9 Jun 2020 01:41:57 +0300 Subject: [PATCH 0422/2229] Update aggregating_merge_tree.xml --- tests/performance/aggregating_merge_tree.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/performance/aggregating_merge_tree.xml b/tests/performance/aggregating_merge_tree.xml index 2116050f7a5..d658fd705bb 100644 --- a/tests/performance/aggregating_merge_tree.xml +++ b/tests/performance/aggregating_merge_tree.xml @@ -1,6 +1,4 @@ - DROP TABLE IF EXISTS test - CREATE TABLE test( t UInt64, From 63f2d92eff60cd139600b92cfd5dd0590ae0db12 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 01:49:19 +0300 Subject: [PATCH 0423/2229] Better exception message when cannot parse columns declaration list #10403 --- .../parseColumnsListForTableFunction.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/TableFunctions/parseColumnsListForTableFunction.cpp b/src/TableFunctions/parseColumnsListForTableFunction.cpp index 8eea3edf9bd..5221d96e086 100644 --- a/src/TableFunctions/parseColumnsListForTableFunction.cpp +++ b/src/TableFunctions/parseColumnsListForTableFunction.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -11,27 +12,20 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int SYNTAX_ERROR; } ColumnsDescription parseColumnsListFromString(const std::string & structure, const Context & context) { - Expected expected; - - Tokens tokens(structure.c_str(), structure.c_str() + structure.size()); - IParser::Pos token_iterator(tokens, context.getSettingsRef().max_parser_depth); - ParserColumnDeclarationList parser; - ASTPtr columns_list_raw; + const Settings & settings = context.getSettingsRef(); - if (!parser.parse(token_iterator, columns_list_raw, expected)) - throw Exception("Cannot parse columns declaration list.", ErrorCodes::SYNTAX_ERROR); + ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth); auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) throw Exception("Could not cast AST to ASTExpressionList", ErrorCodes::LOGICAL_ERROR); - return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, !context.getSettingsRef().allow_suspicious_codecs); + return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, !settings.allow_suspicious_codecs); } } From 6ee6b751be55154edd8c3132c666876933e9a75e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 03:38:47 +0300 Subject: [PATCH 0424/2229] Added failing test #11539 --- .../0_stateless/01304_direct_io.reference | 1 + tests/queries/0_stateless/01304_direct_io.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/01304_direct_io.reference create mode 100755 tests/queries/0_stateless/01304_direct_io.sh diff --git a/tests/queries/0_stateless/01304_direct_io.reference b/tests/queries/0_stateless/01304_direct_io.reference new file mode 100644 index 00000000000..ec7a223ddc2 --- /dev/null +++ b/tests/queries/0_stateless/01304_direct_io.reference @@ -0,0 +1 @@ +Loaded 1 queries. diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh new file mode 100755 index 00000000000..0b6af15aa3b --- /dev/null +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery --query " + DROP TABLE IF EXISTS bug; + CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date; + INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); + OPTIMIZE TABLE bug FINAL;" + +$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" >/dev/null 2>$CLICKHOUSE_TMP/err +cat $CLICKHOUSE_TMP/err + +$CLICKHOUSE_CLIENT --multiquery --query " + DROP TABLE bug;" From 6936da4e5d2fc113d56c9d9a92835b7e802eff11 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 03:42:56 +0300 Subject: [PATCH 0425/2229] Fix bad test --- tests/queries/0_stateless/01087_table_function_generate.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql index 96db6803a47..05f03a5a4e6 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/tests/queries/0_stateless/01087_table_function_generate.sql @@ -33,11 +33,11 @@ LIMIT 10; SELECT '-'; SELECT toTypeName(i)s -FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))') +FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))') LIMIT 1; SELECT i -FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 1, 10, 10) +FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 1, 10, 10) LIMIT 10; SELECT '-'; SELECT From 508e0f44d188e883ca46935f3ee6dcfc9ac17853 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 18:16:01 +0300 Subject: [PATCH 0426/2229] Fix modify test --- .../01079_parallel_alter_modify_zookeeper.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index bacc742d16a..9a6e9c3156c 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -100,8 +100,14 @@ wait echo "Finishing alters" -# This alter will finish all previous, but replica 1 maybe still not up-to-date -while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do +# This alter will finish all previous, but replica 1 maybe still not up-to-date. +# If query will throw something, than we will sleep 1 and retry. If timeout +# happened we will silentrly go out of loop and probably fail tests in the +# following for loop. +# +# 120 seconds is more than enough, but in rare cases for slow builds (debug, +# thread) it maybe necessary +while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 done From bdeafe830b1b241d39a3ed32f8133ddebd66b5bb Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 18:18:33 +0300 Subject: [PATCH 0427/2229] Better comment --- .../0_stateless/01079_parallel_alter_modify_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index 9a6e9c3156c..effc9f540a1 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -106,7 +106,7 @@ echo "Finishing alters" # following for loop. # # 120 seconds is more than enough, but in rare cases for slow builds (debug, -# thread) it maybe necessary +# thread) it maybe necessary. while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_mt_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 done From de59629b386782da4e901845552b7e46a67d4dd9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 19:34:42 +0300 Subject: [PATCH 0428/2229] Fix benign race condition during shutdown --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fea05c00e4f..b399584f4d9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1865,7 +1865,7 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect part->remove_time.store(remove_time, std::memory_order_relaxed); if (part->state != IMergeTreeDataPart::State::Outdated) - modifyPartState(part,IMergeTreeDataPart::State::Outdated); + modifyPartState(part, IMergeTreeDataPart::State::Outdated); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index faa44ff7db1..f2ac6678764 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2961,7 +2961,6 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { - clearOldPartsFromFilesystem(true); /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); @@ -2997,6 +2996,12 @@ void StorageReplicatedMergeTree::shutdown() std::unique_lock lock(data_parts_exchange_endpoint->rwlock); } data_parts_exchange_endpoint.reset(); + + /// We clear all parts after stopping all background operations. It's + /// important, because background operations can produce temporary parts + /// which will remove themselfs in their descrutors. If so, we may have race + /// condition between our remove call and background process. + clearOldPartsFromFilesystem(true); } From decac918a27389e593c273ec147c3251ea998bd5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 04:48:11 +0300 Subject: [PATCH 0429/2229] Fix error --- src/Columns/ColumnAggregateFunction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 2e8d2589b78..d4021b45f0e 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -41,14 +41,14 @@ static std::string getTypeString(const AggregateFunctionPtr & func) for (size_t i = 0; i < parameters.size(); ++i) { if (i) - stream << ','; + stream << ", "; stream << applyVisitor(FieldVisitorToString(), parameters[i]); } stream << ')'; } for (const auto & argument_type : argument_types) - stream << ',' << argument_type->getName(); + stream << ", " << argument_type->getName(); stream << ')'; return stream.str(); From 8b8beb26d3db3aba31d072e24cfa9607acf04b7d Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Mon, 8 Jun 2020 19:26:56 +0300 Subject: [PATCH 0430/2229] S3 Poco HTTP Client (do not copy response stream body into memory). --- contrib/aws | 2 +- src/IO/S3/PocoHTTPClient.cpp | 4 ++-- src/IO/S3/PocoHTTPResponseStream.cpp | 12 ++++++++++++ src/IO/S3/PocoHTTPResponseStream.h | 21 +++++++++++++++++++++ src/IO/S3Common.cpp | 2 -- 5 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 src/IO/S3/PocoHTTPResponseStream.cpp create mode 100644 src/IO/S3/PocoHTTPResponseStream.h diff --git a/contrib/aws b/contrib/aws index f7d9ce39f41..17e10c0fc77 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit f7d9ce39f41323300044567be007c233338bb94a +Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56 diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index b8de483a5a8..f2d44e8d93a 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -149,8 +150,7 @@ void PocoHTTPClient::MakeRequestInternal( response->SetClientErrorMessage(error_message); } else - /// TODO: Do not copy whole stream. - Poco::StreamCopier::copyStream(response_body_stream, response->GetResponseBody()); + response->GetResponseStream().SetUnderlyingStream(std::make_shared(session, response_body_stream)); break; } diff --git a/src/IO/S3/PocoHTTPResponseStream.cpp b/src/IO/S3/PocoHTTPResponseStream.cpp new file mode 100644 index 00000000000..0a198268f2e --- /dev/null +++ b/src/IO/S3/PocoHTTPResponseStream.cpp @@ -0,0 +1,12 @@ +#include "PocoHTTPResponseStream.h" + +#include + +namespace DB::S3 +{ +PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_) + : Aws::IStream(response_stream_.rdbuf()), session(std::move(session_)) +{ +} + +} diff --git a/src/IO/S3/PocoHTTPResponseStream.h b/src/IO/S3/PocoHTTPResponseStream.h new file mode 100644 index 00000000000..8167ddc4346 --- /dev/null +++ b/src/IO/S3/PocoHTTPResponseStream.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace DB::S3 +{ +/** + * Wrapper of IStream to store response stream and corresponding HTTP session. + */ +class PocoHTTPResponseStream : public Aws::IStream +{ +public: + PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_); + +private: + /// Poco HTTP session is holder of response stream. + std::shared_ptr session; +}; + +} diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 2c75a137222..2d01416fe57 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -12,9 +12,7 @@ # include # include # include -# include # include -# include # include # include # include From 74ea867b1dee09001975b41f3b0ceead0a3a1b97 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Mon, 8 Jun 2020 23:17:39 +0300 Subject: [PATCH 0431/2229] Fix includes in S3Common.cpp --- src/IO/S3Common.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 2d01416fe57..2c75a137222 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -12,7 +12,9 @@ # include # include # include +# include # include +# include # include # include # include From c3d0b351956f9f1623f3e7eff368caf10f6546f4 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Tue, 9 Jun 2020 01:02:05 +0300 Subject: [PATCH 0432/2229] Fix includes in S3Common.cpp --- src/IO/S3/PocoHTTPClient.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index f2d44e8d93a..0dfa80ca107 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include From 223f45685f93e50f04c5e306d67339d54e4ea497 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 8 Jun 2020 21:08:55 +0300 Subject: [PATCH 0433/2229] Review fixes --- src/Storages/StorageMergeTree.cpp | 27 ++++++++++++------- src/Storages/StorageReplicatedMergeTree.cpp | 6 ++--- .../01079_parallel_alter_modify_zookeeper.sh | 2 +- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4650485847c..15e662b27b5 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -141,16 +141,6 @@ void StorageMergeTree::shutdown() mutation_wait_event.notify_all(); } - try - { - clearOldPartsFromFilesystem(true); - } - catch (...) - { - /// Example: the case of readonly filesystem, we have failure removing old parts. - /// Should not prevent table shutdown. - tryLogCurrentException(log); - } merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); @@ -160,6 +150,23 @@ void StorageMergeTree::shutdown() if (moving_task_handle) global_context.getBackgroundMovePool().removeTask(moving_task_handle); + + + try + { + /// We clear all old parts after stopping all background operations. + /// It's important, because background operations can produce temporary + /// parts which will remove themselves in their descrutors. If so, we + /// may have race condition between our remove call and background + /// process. + clearOldPartsFromFilesystem(true); + } + catch (...) + { + /// Example: the case of readonly filesystem, we have failure removing old parts. + /// Should not prevent table shutdown. + tryLogCurrentException(log); + } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f2ac6678764..d109fa464b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2997,10 +2997,10 @@ void StorageReplicatedMergeTree::shutdown() } data_parts_exchange_endpoint.reset(); - /// We clear all parts after stopping all background operations. It's + /// We clear all old parts after stopping all background operations. It's /// important, because background operations can produce temporary parts - /// which will remove themselfs in their descrutors. If so, we may have race - /// condition between our remove call and background process. + /// which will remove themselves in their descrutors. If so, we may have + /// race condition between our remove call and background process. clearOldPartsFromFilesystem(true); } diff --git a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh index effc9f540a1..05ef4a1a675 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_modify_zookeeper.sh @@ -102,7 +102,7 @@ echo "Finishing alters" # This alter will finish all previous, but replica 1 maybe still not up-to-date. # If query will throw something, than we will sleep 1 and retry. If timeout -# happened we will silentrly go out of loop and probably fail tests in the +# happened we will silently go out of loop and probably fail tests in the # following for loop. # # 120 seconds is more than enough, but in rare cases for slow builds (debug, From 09b9a308cbf5fb20870e94eb72918c4cd873ce8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 02:55:53 +0300 Subject: [PATCH 0434/2229] Fix obvious race condition in test --- .../0_stateless/01268_procfs_metrics.sh | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01268_procfs_metrics.sh b/tests/queries/0_stateless/01268_procfs_metrics.sh index e258f7faafa..1367b68a61c 100755 --- a/tests/queries/0_stateless/01268_procfs_metrics.sh +++ b/tests/queries/0_stateless/01268_procfs_metrics.sh @@ -17,14 +17,16 @@ function read_numbers_func() function show_processes_func() { - sleep 0.1; - - # These two system metrics for the generating query above are guaranteed to be nonzero when ProcFS is mounted at /proc - $CLICKHOUSE_CLIENT -q " - SELECT count() > 0 FROM system.processes\ - WHERE has(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds') AND has(ProfileEvents.Names, 'OSReadChars')\ - SETTINGS max_threads = 1 - "; + while true; do + sleep 0.1; + + # These two system metrics for the generating query above are guaranteed to be nonzero when ProcFS is mounted at /proc + $CLICKHOUSE_CLIENT -q " + SELECT count() > 0 FROM system.processes\ + WHERE has(ProfileEvents.Names, 'OSCPUVirtualTimeMicroseconds') AND has(ProfileEvents.Names, 'OSReadChars')\ + SETTINGS max_threads = 1 + " | grep '1' && break; + done } From 75075a7b2bb493ee9306ffe1e099e182249560a4 Mon Sep 17 00:00:00 2001 From: hexiaoting <“hewenting_ict@163.com”> Date: Tue, 9 Jun 2020 10:39:37 +0800 Subject: [PATCH 0435/2229] Add DISTINCT keyword when show clusters --- src/Interpreters/InterpreterShowTablesQuery.cpp | 2 +- tests/queries/0_stateless/01293_show_clusters.reference | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 10447e52464..4b0d4c21ad1 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -37,7 +37,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.clusters) { std::stringstream rewritten_query; - rewritten_query << "SELECT cluster FROM system.clusters"; + rewritten_query << "SELECT DISTINCT cluster FROM system.clusters"; if (!query.like.empty()) { diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index b25a9a4d174..85a14155529 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,11 +1,8 @@ test_cluster_two_shards -test_cluster_two_shards -test_cluster_two_shards_localhost test_cluster_two_shards_localhost test_shard_localhost test_shard_localhost[1] test_shard_localhost_secure test_unavailable_shard -test_unavailable_shard test_cluster_two_shards test_shard_localhost 1 1 1 localhost ::1 9000 1 default 0 0 From 44b20eee96a9c6adc17baa5894984c2701ec0800 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 9 Jun 2020 10:23:35 +0300 Subject: [PATCH 0436/2229] Revert "S3 HTTP client - Avoid copying response stream into memory" --- contrib/aws | 2 +- src/IO/S3/PocoHTTPClient.cpp | 5 ++--- src/IO/S3/PocoHTTPResponseStream.cpp | 12 ------------ src/IO/S3/PocoHTTPResponseStream.h | 21 --------------------- 4 files changed, 3 insertions(+), 37 deletions(-) delete mode 100644 src/IO/S3/PocoHTTPResponseStream.cpp delete mode 100644 src/IO/S3/PocoHTTPResponseStream.h diff --git a/contrib/aws b/contrib/aws index 17e10c0fc77..f7d9ce39f41 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56 +Subproject commit f7d9ce39f41323300044567be007c233338bb94a diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 0dfa80ca107..b8de483a5a8 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -2,8 +2,6 @@ #include #include -#include -#include #include #include #include @@ -151,7 +149,8 @@ void PocoHTTPClient::MakeRequestInternal( response->SetClientErrorMessage(error_message); } else - response->GetResponseStream().SetUnderlyingStream(std::make_shared(session, response_body_stream)); + /// TODO: Do not copy whole stream. + Poco::StreamCopier::copyStream(response_body_stream, response->GetResponseBody()); break; } diff --git a/src/IO/S3/PocoHTTPResponseStream.cpp b/src/IO/S3/PocoHTTPResponseStream.cpp deleted file mode 100644 index 0a198268f2e..00000000000 --- a/src/IO/S3/PocoHTTPResponseStream.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "PocoHTTPResponseStream.h" - -#include - -namespace DB::S3 -{ -PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_) - : Aws::IStream(response_stream_.rdbuf()), session(std::move(session_)) -{ -} - -} diff --git a/src/IO/S3/PocoHTTPResponseStream.h b/src/IO/S3/PocoHTTPResponseStream.h deleted file mode 100644 index 8167ddc4346..00000000000 --- a/src/IO/S3/PocoHTTPResponseStream.h +++ /dev/null @@ -1,21 +0,0 @@ -#pragma once - -#include -#include - -namespace DB::S3 -{ -/** - * Wrapper of IStream to store response stream and corresponding HTTP session. - */ -class PocoHTTPResponseStream : public Aws::IStream -{ -public: - PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_); - -private: - /// Poco HTTP session is holder of response stream. - std::shared_ptr session; -}; - -} From 1a6c1d179655e2cc970cc99313a1bf92e5161098 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 11:14:02 +0300 Subject: [PATCH 0437/2229] Fix 01293_system_distribution_queue test flackiness --- .../queries/0_stateless/01293_system_distribution_queue.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index c0ff6a21e8e..4c9c690af09 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -10,6 +10,11 @@ select * from system.distribution_queue; select 'INSERT'; system stop distributed sends dist_01293; insert into dist_01293 select * from numbers(10); +-- metrics updated only after distributed_directory_monitor_sleep_time_ms +set distributed_directory_monitor_sleep_time_ms=10; +-- 1 second should guarantee metrics update +-- XXX: but this is kind of quirk, way more better will be account this metrics without any delays. +select sleep(1) format Null; select is_blocked, error_count, data_files, data_compressed_bytes>100 from system.distribution_queue; system flush distributed dist_01293; From ba4d96438863ada5e9d9f3c0f1596e16822dcf51 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 11:17:38 +0300 Subject: [PATCH 0438/2229] Add comments for 01281_group_by_limit_memory_tracking test --- .../0_stateless/01281_group_by_limit_memory_tracking.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index 5922b8d74d2..2115530a450 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -20,6 +20,14 @@ function execute_null() function execute_group_by() { + # Peak memory usage for the main query (with GROUP BY) is ~100MiB (with + # max_threads=2 as here). + # So set max_memory_usage_for_user to 150MiB and if the memory tracking + # accounting will be incorrect then the second query will fail + # + # Note that we also need one running query for the user (sleep(3)), since + # max_memory_usage_for_user is installed to 0 once there are no more + # queries for user. local opts=( --max_memory_usage_for_user=$((150<<20)) --max_threads=2 From 96cdeecd81a1215a222e52e34722997f0ab624c2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 13:03:22 +0300 Subject: [PATCH 0439/2229] Better parallel tests --- .../01079_parallel_alter_add_drop_column_zookeeper.sh | 2 +- .../01079_parallel_alter_detach_table_zookeeper.sh | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh index 37ed463f59b..dbd53d6d0b7 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_add_drop_column_zookeeper.sh @@ -97,7 +97,7 @@ done echo "Equal number of columns" # This alter will finish all previous, but replica 1 maybe still not up-to-date -while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_1 MODIFY COLUMN value0 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do +while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_add_drop_1 MODIFY COLUMN value0 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 done diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index 114008ded26..1c46dc0cf33 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -96,8 +96,12 @@ done # This alter will finish all previous, but replica 1 maybe still not up-to-date -while [[ $(timeout 30 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do +while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 + # just try to attach table if it failed for some reason in code above + for i in `seq $REPLICAS`; do + $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_detach_$i" 2> /dev/null + done done for i in `seq $REPLICAS`; do From a5268b7c465298e780b8f288be1f55b56be16a5b Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 13:05:01 +0300 Subject: [PATCH 0440/2229] Update 01079_parallel_alter_detach_table_zookeeper.sh --- .../0_stateless/01079_parallel_alter_detach_table_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh index 1c46dc0cf33..90172d38cfb 100755 --- a/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh +++ b/tests/queries/0_stateless/01079_parallel_alter_detach_table_zookeeper.sh @@ -98,7 +98,7 @@ done # This alter will finish all previous, but replica 1 maybe still not up-to-date while [[ $(timeout 120 $CLICKHOUSE_CLIENT --query "ALTER TABLE concurrent_alter_detach_1 MODIFY COLUMN value1 String SETTINGS replication_alter_partitions_sync=2" 2>&1) ]]; do sleep 1 - # just try to attach table if it failed for some reason in code above + # just try to attach table if it failed for some reason in the code above for i in `seq $REPLICAS`; do $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_alter_detach_$i" 2> /dev/null done From 32772073ea8655298394fef45f2b5d16679c1f88 Mon Sep 17 00:00:00 2001 From: Ivan Starkov Date: Tue, 9 Jun 2020 13:52:21 +0300 Subject: [PATCH 0441/2229] Fix docker-entrypoint-initdb.d wait Fixes 'Address family not supported by protocol' for any docker-entrypoint-initdb.d script. wget uses 'localhost' which resolves for both ipv4 and ipv6 with current config (/etc/hosts) and so the wget fails _(Address family not supported by protocol)_ and does not retry. Forcing it to use IPv4 fixes the issue --- docker/server/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 2af8a377b92..6111b0057ed 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # check if clickhouse is ready to accept connections # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) - if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + if ! wget --spider --quiet -4 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then echo >&2 'ClickHouse init process failed.' exit 1 fi From 47ad338cb29c9bc91aec8db69a4e2cecdc0b0c08 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Tue, 9 Jun 2020 13:54:49 +0300 Subject: [PATCH 0442/2229] Refactor CMake build files (#11390) * Get rid of lib_name.cmake * Refactor Boost and HyperScan libraries * Refactor lz4 * Fix build with xxHash --- CMakeLists.txt | 6 - base/common/CMakeLists.txt | 7 +- base/mysqlxx/CMakeLists.txt | 10 +- cmake/Modules/Findmetrohash.cmake | 44 --- cmake/find/boost.cmake | 52 ---- cmake/find/hyperscan.cmake | 35 --- cmake/find/lz4.cmake | 23 -- cmake/find/parquet.cmake | 2 +- cmake/find/xxhash.cmake | 22 -- cmake/lib_name.cmake | 4 - cmake/print_include_directories.cmake | 5 - contrib/CMakeLists.txt | 29 +- contrib/arrow-cmake/CMakeLists.txt | 27 +- contrib/avro-cmake/CMakeLists.txt | 7 +- contrib/boost-cmake/CMakeLists.txt | 122 ++++++--- contrib/cppkafka-cmake/CMakeLists.txt | 52 ++-- contrib/hyperscan-cmake/CMakeLists.txt | 252 ++++++++++++++++++ contrib/hyperscan-cmake/common/hs_version.h | 40 +++ contrib/hyperscan-cmake/x86_64/config.h | 106 ++++++++ contrib/libdivide/CMakeLists.txt | 2 + contrib/libhdfs3-cmake/CMakeLists.txt | 3 +- contrib/libmetrohash/CMakeLists.txt | 17 +- contrib/librdkafka-cmake/CMakeLists.txt | 2 +- contrib/lz4-cmake/CMakeLists.txt | 37 ++- docker/packager/packager | 2 +- programs/benchmark/CMakeLists.txt | 11 +- programs/client/CMakeLists.txt | 19 +- programs/compressor/CMakeLists.txt | 11 +- programs/extract-from-config/CMakeLists.txt | 12 +- programs/format/CMakeLists.txt | 12 +- programs/local/CMakeLists.txt | 17 +- programs/obfuscator/CMakeLists.txt | 10 +- src/CMakeLists.txt | 34 +-- src/Common/Config/CMakeLists.txt | 3 +- src/Common/StringUtils/CMakeLists.txt | 1 - src/Common/ZooKeeper/CMakeLists.txt | 1 - src/Common/tests/CMakeLists.txt | 1 - src/Functions/CMakeLists.txt | 31 +-- src/Functions/URL/CMakeLists.txt | 5 +- src/Functions/config_functions.h.in | 2 - src/Interpreters/tests/CMakeLists.txt | 6 +- src/Parsers/CMakeLists.txt | 1 - ...StorageSystemBuildOptions.generated.cpp.in | 4 +- src/Storages/tests/CMakeLists.txt | 9 +- utils/check-marks/CMakeLists.txt | 2 +- utils/compressor/CMakeLists.txt | 2 +- .../CMakeLists.txt | 2 +- utils/test-data-generator/CMakeLists.txt | 2 +- utils/wikistat-loader/CMakeLists.txt | 2 +- .../CMakeLists.txt | 2 +- .../CMakeLists.txt | 2 +- utils/zookeeper-dump-tree/CMakeLists.txt | 2 +- utils/zookeeper-remove-by-list/CMakeLists.txt | 2 +- 53 files changed, 697 insertions(+), 419 deletions(-) delete mode 100644 cmake/Modules/Findmetrohash.cmake delete mode 100644 cmake/find/boost.cmake delete mode 100644 cmake/find/hyperscan.cmake delete mode 100644 cmake/find/lz4.cmake delete mode 100644 cmake/find/xxhash.cmake delete mode 100644 cmake/lib_name.cmake create mode 100644 contrib/hyperscan-cmake/CMakeLists.txt create mode 100644 contrib/hyperscan-cmake/common/hs_version.h create mode 100644 contrib/hyperscan-cmake/x86_64/config.h create mode 100644 contrib/libdivide/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dfd1df1cb..7b3d7676d0e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -327,20 +327,16 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE include (GNUInstallDirs) include (cmake/contrib_finder.cmake) -include (cmake/lib_name.cmake) find_contrib_lib(double-conversion) # Must be before parquet include (cmake/find/ssl.cmake) include (cmake/find/ldap.cmake) # after ssl include (cmake/find/icu.cmake) -include (cmake/find/boost.cmake) include (cmake/find/zlib.cmake) include (cmake/find/zstd.cmake) include (cmake/find/ltdl.cmake) # for odbc include (cmake/find/termcap.cmake) # openssl, zlib before poco -include (cmake/find/lz4.cmake) -include (cmake/find/xxhash.cmake) include (cmake/find/sparsehash.cmake) include (cmake/find/re2.cmake) include (cmake/find/libgsasl.cmake) @@ -358,7 +354,6 @@ include (cmake/find/hdfs3.cmake) # uses protobuf include (cmake/find/s3.cmake) include (cmake/find/base64.cmake) include (cmake/find/parquet.cmake) -include (cmake/find/hyperscan.cmake) include (cmake/find/simdjson.cmake) include (cmake/find/rapidjson.cmake) include (cmake/find/fastops.cmake) @@ -368,7 +363,6 @@ include (cmake/find/msgpack.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) -find_contrib_lib(metrohash) find_contrib_lib(btrie) if (ENABLE_TESTS) diff --git a/base/common/CMakeLists.txt b/base/common/CMakeLists.txt index d98da8f0450..074f73b158b 100644 --- a/base/common/CMakeLists.txt +++ b/base/common/CMakeLists.txt @@ -44,10 +44,6 @@ endif() target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..) -if (NOT USE_INTERNAL_BOOST_LIBRARY) - target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) -endif () - # Allow explicit fallback to readline if (NOT ENABLE_REPLXX AND ENABLE_READLINE) message (STATUS "Attempt to fallback to readline explicitly") @@ -73,7 +69,8 @@ endif () target_link_libraries (common PUBLIC ${CITYHASH_LIBRARIES} - ${Boost_SYSTEM_LIBRARY} + boost::headers_only + boost::system FastMemcpy Poco::Net Poco::Net::SSL diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt index 702e0197ffb..7d35c1bd31d 100644 --- a/base/mysqlxx/CMakeLists.txt +++ b/base/mysqlxx/CMakeLists.txt @@ -32,10 +32,18 @@ else () endif () endif () -target_link_libraries(mysqlxx PUBLIC common PRIVATE ${MYSQLCLIENT_LIBRARIES} PUBLIC ${Boost_SYSTEM_LIBRARY} PRIVATE ${ZLIB_LIBRARIES}) +target_link_libraries (mysqlxx + PUBLIC + common + PRIVATE + ${MYSQLCLIENT_LIBRARIES} + ${ZLIB_LIBRARIES} +) + if(OPENSSL_LIBRARIES) target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES}) endif() + target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES}) if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR) diff --git a/cmake/Modules/Findmetrohash.cmake b/cmake/Modules/Findmetrohash.cmake deleted file mode 100644 index c51665795bd..00000000000 --- a/cmake/Modules/Findmetrohash.cmake +++ /dev/null @@ -1,44 +0,0 @@ -# - Try to find metrohash headers and libraries. -# -# Usage of this module as follows: -# -# find_package(metrohash) -# -# Variables used by this module, they can change the default behaviour and need -# to be set before calling find_package: -# -# METROHASH_ROOT_DIR Set this variable to the root installation of -# metrohash if the module has problems finding -# the proper installation path. -# -# Variables defined by this module: -# -# METROHASH_FOUND System has metrohash libs/headers -# METROHASH_LIBRARIES The metrohash library/libraries -# METROHASH_INCLUDE_DIR The location of metrohash headers - -find_path(METROHASH_ROOT_DIR - NAMES include/metrohash.h -) - -find_library(METROHASH_LIBRARIES - NAMES metrohash - PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS} -) - -find_path(METROHASH_INCLUDE_DIR - NAMES metrohash.h - PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(metrohash DEFAULT_MSG - METROHASH_LIBRARIES - METROHASH_INCLUDE_DIR -) - -mark_as_advanced( - METROHASH_ROOT_DIR - METROHASH_LIBRARIES - METROHASH_INCLUDE_DIR -) diff --git a/cmake/find/boost.cmake b/cmake/find/boost.cmake deleted file mode 100644 index ec10a34d839..00000000000 --- a/cmake/find/boost.cmake +++ /dev/null @@ -1,52 +0,0 @@ -option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED}) - -# Test random file existing in all package variants -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp") - if(USE_INTERNAL_BOOST_LIBRARY) - message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive") - endif() - set (USE_INTERNAL_BOOST_LIBRARY 0) - set (MISSING_INTERNAL_BOOST_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_BOOST_LIBRARY) - set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) - set (BOOST_ROOT "/usr/local") - find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex) - # incomplete, no include search, who use it? - if (NOT Boost_FOUND) - # # Try to find manually. - # set (BOOST_PATHS "") - # find_library (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options PATHS ${BOOST_PATHS}) - # find_library (Boost_SYSTEM_LIBRARY boost_system PATHS ${BOOST_PATHS}) - # find_library (Boost_FILESYSTEM_LIBRARY boost_filesystem PATHS ${BOOST_PATHS}) - # maybe found but incorrect version. - set (Boost_INCLUDE_DIRS "") - set (Boost_SYSTEM_LIBRARY "") - endif () -endif () - -if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY) - set (USE_INTERNAL_BOOST_LIBRARY 1) - set (Boost_SYSTEM_LIBRARY boost_system_internal) - set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal) - set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY}) - set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal) - set (Boost_REGEX_LIBRARY boost_regex_internal) - - set (Boost_INCLUDE_DIRS) - - set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost") - - # For boost from github: - file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include") - list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_}) - # numeric has additional level - file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/numeric/*/include") - list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_}) - - # For packaged version: - list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost") -endif () - -message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}") diff --git a/cmake/find/hyperscan.cmake b/cmake/find/hyperscan.cmake deleted file mode 100644 index 3f65d1eb891..00000000000 --- a/cmake/find/hyperscan.cmake +++ /dev/null @@ -1,35 +0,0 @@ -if (HAVE_SSSE3) - option (ENABLE_HYPERSCAN "Enable hyperscan" ${ENABLE_LIBRARIES}) -endif () - -if (ENABLE_HYPERSCAN) - -option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED}) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt") - if (USE_INTERNAL_HYPERSCAN_LIBRARY) - message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive") - endif () - set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1) - set (USE_INTERNAL_HYPERSCAN_LIBRARY 0) -endif () - -# We cannot use OS hyperscan library due to different include path. -# -#if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY) -# find_library (HYPERSCAN_LIBRARY hs) -# find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS}) -#endif () - -if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR) - set (USE_HYPERSCAN 1) -elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY) - set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src) - set (HYPERSCAN_LIBRARY hs) - set (USE_HYPERSCAN 1) - set (USE_INTERNAL_HYPERSCAN_LIBRARY 1) -endif() - -message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}") - -endif () diff --git a/cmake/find/lz4.cmake b/cmake/find/lz4.cmake deleted file mode 100644 index 5f5e058b53d..00000000000 --- a/cmake/find/lz4.cmake +++ /dev/null @@ -1,23 +0,0 @@ -option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED}) - -if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h") - if (USE_INTERNAL_LZ4_LIBRARY) - message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive") - set (USE_INTERNAL_LZ4_LIBRARY 0) - endif () - set (MISSING_INTERNAL_LZ4_LIBRARY 1) -endif () - -if (NOT USE_INTERNAL_LZ4_LIBRARY) - find_library (LZ4_LIBRARY lz4) - find_path (LZ4_INCLUDE_DIR NAMES lz4.h PATHS ${LZ4_INCLUDE_PATHS}) -endif () - -if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR) -elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY) - set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib) - set (USE_INTERNAL_LZ4_LIBRARY 1) - set (LZ4_LIBRARY lz4) -endif () - -message (STATUS "Using lz4: ${LZ4_INCLUDE_DIR} : ${LZ4_LIBRARY}") diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index 4c91286dae0..d4f62b87d29 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -63,7 +63,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD) set(ARROW_LIBRARY arrow_shared) set(PARQUET_LIBRARY parquet_shared) if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE) - list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY}) + list(APPEND PARQUET_LIBRARY boost::regex) endif() set(THRIFT_LIBRARY thrift) endif() diff --git a/cmake/find/xxhash.cmake b/cmake/find/xxhash.cmake deleted file mode 100644 index 8af871e8fd5..00000000000 --- a/cmake/find/xxhash.cmake +++ /dev/null @@ -1,22 +0,0 @@ -option (USE_INTERNAL_XXHASH_LIBRARY "Set to FALSE to use system xxHash library instead of bundled" ${NOT_UNBUNDLED}) - -if (USE_INTERNAL_XXHASH_LIBRARY AND NOT USE_INTERNAL_LZ4_LIBRARY) - message (WARNING "can not use internal xxhash without internal lz4") - set (USE_INTERNAL_XXHASH_LIBRARY 0) -endif () - -if (USE_INTERNAL_XXHASH_LIBRARY) - set (XXHASH_LIBRARY lz4) - set (XXHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib) -else () - find_library (XXHASH_LIBRARY xxhash) - find_path (XXHASH_INCLUDE_DIR NAMES xxhash.h PATHS ${XXHASH_INCLUDE_PATHS}) -endif () - -if (XXHASH_LIBRARY AND XXHASH_INCLUDE_DIR) - set (USE_XXHASH 1) -else () - set (USE_XXHASH 0) -endif () - -message (STATUS "Using xxhash=${USE_XXHASH}: ${XXHASH_INCLUDE_DIR} : ${XXHASH_LIBRARY}") diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake deleted file mode 100644 index f18b2e52576..00000000000 --- a/cmake/lib_name.cmake +++ /dev/null @@ -1,4 +0,0 @@ -set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) -set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src) -set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion) -set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) diff --git a/cmake/print_include_directories.cmake b/cmake/print_include_directories.cmake index 62ebd434320..cc2098cb397 100644 --- a/cmake/print_include_directories.cmake +++ b/cmake/print_include_directories.cmake @@ -21,11 +21,6 @@ if (TARGET double-conversion) list(APPEND dirs ${dirs1}) endif () -if (TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY}) - get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES) - list(APPEND dirs ${dirs1}) -endif () - list(REMOVE_DUPLICATES dirs) file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "") foreach (dir ${dirs}) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d122188ad0b..2b0fba43348 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -16,13 +16,18 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) +add_subdirectory (boost-cmake) add_subdirectory (cctz-cmake) add_subdirectory (consistent-hashing-sumbur) add_subdirectory (consistent-hashing) add_subdirectory (croaring) add_subdirectory (FastMemcpy) +add_subdirectory (hyperscan-cmake) add_subdirectory (jemalloc-cmake) add_subdirectory (libcpuid-cmake) +add_subdirectory (libdivide) +add_subdirectory (libmetrohash) +add_subdirectory (lz4-cmake) add_subdirectory (murmurhash) add_subdirectory (replxx-cmake) add_subdirectory (ryu-cmake) @@ -33,14 +38,6 @@ add_subdirectory (poco-cmake) # TODO: refactor the contrib libraries below this comment. -if (USE_INTERNAL_BOOST_LIBRARY) - add_subdirectory (boost-cmake) -endif () - -if (USE_INTERNAL_LZ4_LIBRARY) - add_subdirectory (lz4-cmake) -endif () - if (USE_INTERNAL_ZSTD_LIBRARY) add_subdirectory (zstd-cmake) endif () @@ -63,10 +60,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY) add_subdirectory (libfarmhash) endif () -if (USE_INTERNAL_METROHASH_LIBRARY) - add_subdirectory (libmetrohash) -endif () - if (USE_INTERNAL_BTRIE_LIBRARY) add_subdirectory (libbtrie) endif () @@ -294,18 +287,6 @@ if (USE_BASE64) add_subdirectory (base64-cmake) endif() -if (USE_INTERNAL_HYPERSCAN_LIBRARY) - # The library is large - avoid bloat. - if (USE_STATIC_LIBRARIES) - add_subdirectory (hyperscan) - target_compile_options (hs PRIVATE -g0) - else () - set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "") - add_subdirectory (hyperscan) - target_compile_options (hs_shared PRIVATE -g0) - endif () -endif() - if (USE_SIMDJSON) add_subdirectory (simdjson-cmake) endif() diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46c6b0e3918..afcdae68e77 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -47,7 +47,8 @@ set(thriftcpp_threads_SOURCES ) add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES}) set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641 -target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src) +target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only) # === orc @@ -146,7 +147,7 @@ add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES}) add_dependencies(metadata_fbs flatc) # arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features. -# Apple Clang compiler failed to compile this code without specifying c++11 standard. +# Apple Clang compiler failed to compile this code without specifying c++11 standard. # As result these compiler features detected as absent. In result it failed to compile orc itself. # In orc makefile there is code that sets flags, but arrow-cmake ignores these flags. if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") @@ -286,10 +287,6 @@ set(ARROW_SRCS ${ARROW_SRCS} ${LIBRARY_DIR}/compute/kernels/util_internal.cc ) -if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY) - set(ARROW_WITH_LZ4 1) -endif () - if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) set(ARROW_WITH_SNAPPY 1) endif () @@ -302,10 +299,8 @@ if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY) set(ARROW_WITH_ZSTD 1) endif () -if (ARROW_WITH_LZ4) - add_definitions(-DARROW_WITH_LZ4) - SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS}) -endif () +add_definitions(-DARROW_WITH_LZ4) +SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS}) if (ARROW_WITH_SNAPPY) add_definitions(-DARROW_WITH_SNAPPY) @@ -328,18 +323,15 @@ add_library(${ARROW_LIBRARY} ${ARROW_SRCS}) # Arrow dependencies add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs) -target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal) -target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY}) +target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem) if (USE_INTERNAL_PROTOBUF_LIBRARY) add_dependencies(${ARROW_LIBRARY} protoc) endif () -target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS}) +target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY}) -if (ARROW_WITH_LZ4) - target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY}) -endif () +target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4) if (ARROW_WITH_SNAPPY) target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY}) endif () @@ -396,8 +388,7 @@ list(APPEND PARQUET_SRCS add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS}) target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src) include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h -target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY}) -target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS}) +target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex) if (SANITIZE STREQUAL "undefined") target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined) diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index f544b3c50cd..052a19ee804 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -45,13 +45,12 @@ set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_V target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR}) -target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS}) -target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY}) +target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams) if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY) target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) - target_link_libraries (avrocpp ${SNAPPY_LIBRARY}) + target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY}) endif () if (COMPILER_GCC) @@ -67,4 +66,4 @@ ADD_CUSTOM_TARGET(avro_symlink_headers ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro ) -add_dependencies(avrocpp avro_symlink_headers) \ No newline at end of file +add_dependencies(avrocpp avro_symlink_headers) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 582cc84a552..fb7b236d30d 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -1,45 +1,101 @@ -# Supported contrib/boost source variants: -# 1. Default - Minimized vrsion from release archive : https://github.com/ClickHouse-Extras/boost -# 2. Release archive unpacked to contrib/boost -# 3. Full boost https://github.com/boostorg/boost +option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ${NOT_UNBUNDLED}) -# if boostorg/boost connected as submodule: Update all boost internal submodules to tag: -# git submodule foreach "git fetch --all && git checkout boost-1.66.0 || true" +if (USE_INTERNAL_BOOST_LIBRARY) + set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost) -# -# Important boost patch: 094c18b -# + # filesystem -include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) + set (SRCS_FILESYSTEM + ${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp + ${LIBRARY_DIR}/libs/filesystem/src/operations.cpp + ${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp + ${LIBRARY_DIR}/libs/filesystem/src/path.cpp + ${LIBRARY_DIR}/libs/filesystem/src/portability.cpp + ${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp + ${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp + ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp + ) -set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost) + add_library (_boost_filesystem ${SRCS_FILESYSTEM}) + add_library (boost::filesystem ALIAS _boost_filesystem) + target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) -if(NOT MSVC) - add_definitions(-Wno-unused-variable -Wno-deprecated-declarations) -endif() + # headers-only -macro(add_boost_lib lib_name) - add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src) - add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources}) - target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) - target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED) -endmacro() + add_library (_boost_headers_only INTERFACE) + add_library (boost::headers_only ALIAS _boost_headers_only) + target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) -add_boost_lib(system) + # iostreams -add_boost_lib(program_options) + set (SRCS_IOSTREAMS + ${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp + ${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp + ${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp + ${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp + ) -add_boost_lib(filesystem) -target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal) + add_library (_boost_iostreams ${SRCS_IOSTREAMS}) + add_library (boost::iostreams ALIAS _boost_iostreams) + target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR}) + target_link_libraries (_boost_iostreams PRIVATE zlib) -#add_boost_lib(random) + # program_options -if (USE_INTERNAL_PARQUET_LIBRARY) - add_boost_lib(regex) -endif() + set (SRCS_PROGRAM_OPTIONS + ${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp + ${LIBRARY_DIR}/libs/program_options/src/config_file.cpp + ${LIBRARY_DIR}/libs/program_options/src/convert.cpp + ${LIBRARY_DIR}/libs/program_options/src/options_description.cpp + ${LIBRARY_DIR}/libs/program_options/src/parsers.cpp + ${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp + ${LIBRARY_DIR}/libs/program_options/src/split.cpp + ${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp + ${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp + ${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp + ${LIBRARY_DIR}/libs/program_options/src/winmain.cpp + ) -if (USE_INTERNAL_AVRO_LIBRARY) - add_boost_lib(iostreams) - target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES}) - target_include_directories(boost_iostreams_internal SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) -endif() + add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS}) + add_library (boost::program_options ALIAS _boost_program_options) + target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) + + # regex + + set (SRCS_REGEX + ${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp + ${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp + ${LIBRARY_DIR}/libs/regex/src/cregex.cpp + ${LIBRARY_DIR}/libs/regex/src/fileiter.cpp + ${LIBRARY_DIR}/libs/regex/src/icu.cpp + ${LIBRARY_DIR}/libs/regex/src/instances.cpp + ${LIBRARY_DIR}/libs/regex/src/internals.hpp + ${LIBRARY_DIR}/libs/regex/src/posix_api.cpp + ${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp + ${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp + ${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp + ${LIBRARY_DIR}/libs/regex/src/regex.cpp + ${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp + ${LIBRARY_DIR}/libs/regex/src/usinstances.cpp + ${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp + ${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp + ${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp + ${LIBRARY_DIR}/libs/regex/src/winstances.cpp + ) + + add_library (_boost_regex ${SRCS_REGEX}) + add_library (boost::regex ALIAS _boost_regex) + target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR}) + + # system + + set (SRCS_SYSTEM + ${LIBRARY_DIR}/libs/system/src/error_code.cpp + ) + + add_library (_boost_system ${SRCS_SYSTEM}) + add_library (boost::system ALIAS _boost_system) + target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) +else () + message (FATAL_ERROR "TODO: external Boost library is not supported!") +endif () diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt index 2725eaf7a77..9f512974948 100644 --- a/contrib/cppkafka-cmake/CMakeLists.txt +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -1,31 +1,33 @@ -set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka) +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka) set(SRCS - ${CPPKAFKA_DIR}/src/configuration.cpp - ${CPPKAFKA_DIR}/src/topic_configuration.cpp - ${CPPKAFKA_DIR}/src/configuration_option.cpp - ${CPPKAFKA_DIR}/src/exceptions.cpp - ${CPPKAFKA_DIR}/src/topic.cpp - ${CPPKAFKA_DIR}/src/buffer.cpp - ${CPPKAFKA_DIR}/src/queue.cpp - ${CPPKAFKA_DIR}/src/message.cpp - ${CPPKAFKA_DIR}/src/message_timestamp.cpp - ${CPPKAFKA_DIR}/src/message_internal.cpp - ${CPPKAFKA_DIR}/src/topic_partition.cpp - ${CPPKAFKA_DIR}/src/topic_partition_list.cpp - ${CPPKAFKA_DIR}/src/metadata.cpp - ${CPPKAFKA_DIR}/src/group_information.cpp - ${CPPKAFKA_DIR}/src/error.cpp - ${CPPKAFKA_DIR}/src/event.cpp - - ${CPPKAFKA_DIR}/src/kafka_handle_base.cpp - ${CPPKAFKA_DIR}/src/producer.cpp - ${CPPKAFKA_DIR}/src/consumer.cpp + ${LIBRARY_DIR}/src/buffer.cpp + ${LIBRARY_DIR}/src/configuration_option.cpp + ${LIBRARY_DIR}/src/configuration.cpp + ${LIBRARY_DIR}/src/consumer.cpp + ${LIBRARY_DIR}/src/error.cpp + ${LIBRARY_DIR}/src/event.cpp + ${LIBRARY_DIR}/src/exceptions.cpp + ${LIBRARY_DIR}/src/group_information.cpp + ${LIBRARY_DIR}/src/kafka_handle_base.cpp + ${LIBRARY_DIR}/src/message_internal.cpp + ${LIBRARY_DIR}/src/message_timestamp.cpp + ${LIBRARY_DIR}/src/message.cpp + ${LIBRARY_DIR}/src/metadata.cpp + ${LIBRARY_DIR}/src/producer.cpp + ${LIBRARY_DIR}/src/queue.cpp + ${LIBRARY_DIR}/src/topic_configuration.cpp + ${LIBRARY_DIR}/src/topic_partition_list.cpp + ${LIBRARY_DIR}/src/topic_partition.cpp + ${LIBRARY_DIR}/src/topic.cpp ) add_library(cppkafka ${SRCS}) -target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY}) -target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka) -target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS}) -target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include) +target_link_libraries(cppkafka + PRIVATE + ${RDKAFKA_LIBRARY} + boost::headers_only +) +target_include_directories(cppkafka PRIVATE ${LIBRARY_DIR}/include/cppkafka) +target_include_directories(cppkafka SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include) diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt new file mode 100644 index 00000000000..bed774afdbf --- /dev/null +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -0,0 +1,252 @@ +option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES}) + +if (NOT HAVE_SSSE3) + set (ENABLE_HYPERSCAN OFF) +endif () + +if (ENABLE_HYPERSCAN) + option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ${NOT_UNBUNDLED}) + + if (USE_INTERNAL_HYPERSCAN_LIBRARY) + set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan) + + set (SRCS + ${LIBRARY_DIR}/src/alloc.c + ${LIBRARY_DIR}/src/compiler/asserts.cpp + ${LIBRARY_DIR}/src/compiler/compiler.cpp + ${LIBRARY_DIR}/src/compiler/error.cpp + ${LIBRARY_DIR}/src/crc32.c + ${LIBRARY_DIR}/src/database.c + ${LIBRARY_DIR}/src/fdr/engine_description.cpp + ${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp + ${LIBRARY_DIR}/src/fdr/fdr_compile.cpp + ${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp + ${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp + ${LIBRARY_DIR}/src/fdr/fdr.c + ${LIBRARY_DIR}/src/fdr/flood_compile.cpp + ${LIBRARY_DIR}/src/fdr/teddy_compile.cpp + ${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp + ${LIBRARY_DIR}/src/fdr/teddy.c + ${LIBRARY_DIR}/src/grey.cpp + ${LIBRARY_DIR}/src/hs_valid_platform.c + ${LIBRARY_DIR}/src/hs_version.c + ${LIBRARY_DIR}/src/hs.cpp + ${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp + ${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp + ${LIBRARY_DIR}/src/hwlm/hwlm.c + ${LIBRARY_DIR}/src/hwlm/noodle_build.cpp + ${LIBRARY_DIR}/src/hwlm/noodle_engine.c + ${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp + ${LIBRARY_DIR}/src/nfa/accel.c + ${LIBRARY_DIR}/src/nfa/accelcompile.cpp + ${LIBRARY_DIR}/src/nfa/castle.c + ${LIBRARY_DIR}/src/nfa/castlecompile.cpp + ${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp + ${LIBRARY_DIR}/src/nfa/dfa_min.cpp + ${LIBRARY_DIR}/src/nfa/gough.c + ${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp + ${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp + ${LIBRARY_DIR}/src/nfa/goughcompile.cpp + ${LIBRARY_DIR}/src/nfa/lbr.c + ${LIBRARY_DIR}/src/nfa/limex_64.c + ${LIBRARY_DIR}/src/nfa/limex_accel.c + ${LIBRARY_DIR}/src/nfa/limex_compile.cpp + ${LIBRARY_DIR}/src/nfa/limex_native.c + ${LIBRARY_DIR}/src/nfa/limex_simd128.c + ${LIBRARY_DIR}/src/nfa/limex_simd256.c + ${LIBRARY_DIR}/src/nfa/limex_simd384.c + ${LIBRARY_DIR}/src/nfa/limex_simd512.c + ${LIBRARY_DIR}/src/nfa/mcclellan.c + ${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp + ${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp + ${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp + ${LIBRARY_DIR}/src/nfa/mcsheng_data.c + ${LIBRARY_DIR}/src/nfa/mcsheng.c + ${LIBRARY_DIR}/src/nfa/mpv.c + ${LIBRARY_DIR}/src/nfa/mpvcompile.cpp + ${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c + ${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp + ${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp + ${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp + ${LIBRARY_DIR}/src/nfa/rdfa.cpp + ${LIBRARY_DIR}/src/nfa/repeat.c + ${LIBRARY_DIR}/src/nfa/repeatcompile.cpp + ${LIBRARY_DIR}/src/nfa/sheng.c + ${LIBRARY_DIR}/src/nfa/shengcompile.cpp + ${LIBRARY_DIR}/src/nfa/shufti.c + ${LIBRARY_DIR}/src/nfa/shufticompile.cpp + ${LIBRARY_DIR}/src/nfa/tamarama.c + ${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp + ${LIBRARY_DIR}/src/nfa/truffle.c + ${LIBRARY_DIR}/src/nfa/trufflecompile.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_region.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_som.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_split.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_util.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp + ${LIBRARY_DIR}/src/nfagraph/ng_width.cpp + ${LIBRARY_DIR}/src/nfagraph/ng.cpp + ${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp + ${LIBRARY_DIR}/src/parser/buildstate.cpp + ${LIBRARY_DIR}/src/parser/check_refs.cpp + ${LIBRARY_DIR}/src/parser/Component.cpp + ${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp + ${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp + ${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp + ${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp + ${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp + ${LIBRARY_DIR}/src/parser/ComponentByte.cpp + ${LIBRARY_DIR}/src/parser/ComponentClass.cpp + ${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp + ${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp + ${LIBRARY_DIR}/src/parser/ComponentEUS.cpp + ${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp + ${LIBRARY_DIR}/src/parser/ComponentSequence.cpp + ${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp + ${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp + ${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp + ${LIBRARY_DIR}/src/parser/control_verbs.cpp + ${LIBRARY_DIR}/src/parser/logical_combination.cpp + ${LIBRARY_DIR}/src/parser/parse_error.cpp + ${LIBRARY_DIR}/src/parser/parser_util.cpp + ${LIBRARY_DIR}/src/parser/Parser.cpp + ${LIBRARY_DIR}/src/parser/prefilter.cpp + ${LIBRARY_DIR}/src/parser/shortcut_literal.cpp + ${LIBRARY_DIR}/src/parser/ucp_table.cpp + ${LIBRARY_DIR}/src/parser/unsupported.cpp + ${LIBRARY_DIR}/src/parser/utf8_validate.cpp + ${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp + ${LIBRARY_DIR}/src/rose/block.c + ${LIBRARY_DIR}/src/rose/catchup.c + ${LIBRARY_DIR}/src/rose/init.c + ${LIBRARY_DIR}/src/rose/match.c + ${LIBRARY_DIR}/src/rose/program_runtime.c + ${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp + ${LIBRARY_DIR}/src/rose/rose_build_add.cpp + ${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp + ${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp + ${LIBRARY_DIR}/src/rose/rose_build_castle.cpp + ${LIBRARY_DIR}/src/rose/rose_build_compile.cpp + ${LIBRARY_DIR}/src/rose/rose_build_convert.cpp + ${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp + ${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp + ${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp + ${LIBRARY_DIR}/src/rose/rose_build_groups.cpp + ${LIBRARY_DIR}/src/rose/rose_build_infix.cpp + ${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp + ${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp + ${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp + ${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp + ${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp + ${LIBRARY_DIR}/src/rose/rose_build_merge.cpp + ${LIBRARY_DIR}/src/rose/rose_build_misc.cpp + ${LIBRARY_DIR}/src/rose/rose_build_program.cpp + ${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp + ${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp + ${LIBRARY_DIR}/src/rose/rose_build_width.cpp + ${LIBRARY_DIR}/src/rose/rose_in_util.cpp + ${LIBRARY_DIR}/src/rose/stream.c + ${LIBRARY_DIR}/src/runtime.c + ${LIBRARY_DIR}/src/scratch.c + ${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp + ${LIBRARY_DIR}/src/som/slot_manager.cpp + ${LIBRARY_DIR}/src/som/som_runtime.c + ${LIBRARY_DIR}/src/som/som_stream.c + ${LIBRARY_DIR}/src/stream_compress.c + ${LIBRARY_DIR}/src/util/alloc.cpp + ${LIBRARY_DIR}/src/util/charreach.cpp + ${LIBRARY_DIR}/src/util/clique.cpp + ${LIBRARY_DIR}/src/util/compile_context.cpp + ${LIBRARY_DIR}/src/util/compile_error.cpp + ${LIBRARY_DIR}/src/util/cpuid_flags.c + ${LIBRARY_DIR}/src/util/depth.cpp + ${LIBRARY_DIR}/src/util/fatbit_build.cpp + ${LIBRARY_DIR}/src/util/multibit_build.cpp + ${LIBRARY_DIR}/src/util/multibit.c + ${LIBRARY_DIR}/src/util/report_manager.cpp + ${LIBRARY_DIR}/src/util/simd_utils.c + ${LIBRARY_DIR}/src/util/state_compress.c + ${LIBRARY_DIR}/src/util/target_info.cpp + ${LIBRARY_DIR}/src/util/ue2string.cpp + ) + + add_library (hyperscan ${SRCS}) + + target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) + target_compile_options (hyperscan + PRIVATE -g0 -march=corei7 # library has too much debug information + PUBLIC -Wno-documentation + ) + target_include_directories (hyperscan + PRIVATE + common + ${LIBRARY_DIR}/include + PUBLIC + ${LIBRARY_DIR}/src + ) + if (ARCH_AMD64) + target_include_directories (hyperscan PRIVATE x86_64) + endif () + target_link_libraries (hyperscan PRIVATE boost::headers_only) + else () + find_library (LIBRARY_HYPERSCAN hs) + find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder + + add_library (hyperscan UNKNOWN IMPORTED GLOBAL) + set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN}) + set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN}) + set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1) + endif () + + message (STATUS "Using hyperscan") +else () + add_library (hyperscan INTERFACE) + target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0) + + message (STATUS "Not using hyperscan") +endif () diff --git a/contrib/hyperscan-cmake/common/hs_version.h b/contrib/hyperscan-cmake/common/hs_version.h new file mode 100644 index 00000000000..f6fa8cb209f --- /dev/null +++ b/contrib/hyperscan-cmake/common/hs_version.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef HS_VERSION_H_C6428FAF8E3713 +#define HS_VERSION_H_C6428FAF8E3713 + +/** + * A version string to identify this release of Hyperscan. + */ +#define HS_VERSION_STRING "5.1.1 2000-01-01" + +#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0) + +#endif /* HS_VERSION_H_C6428FAF8E3713 */ + diff --git a/contrib/hyperscan-cmake/x86_64/config.h b/contrib/hyperscan-cmake/x86_64/config.h new file mode 100644 index 00000000000..4786e3f4e21 --- /dev/null +++ b/contrib/hyperscan-cmake/x86_64/config.h @@ -0,0 +1,106 @@ +/* used by cmake */ + +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* "Define if the build is 32 bit" */ +/* #undef ARCH_32_BIT */ + +/* "Define if the build is 64 bit" */ +#define ARCH_64_BIT + +/* "Define if building for IA32" */ +/* #undef ARCH_IA32 */ + +/* "Define if building for EM64T" */ +#define ARCH_X86_64 + +/* internal build, switch on dump support. */ +/* #undef DUMP_SUPPORT */ + +/* Define if building "fat" runtime. */ +/* #undef FAT_RUNTIME */ + +/* Define if building AVX-512 in the fat runtime. */ +/* #undef BUILD_AVX512 */ + +/* Define to 1 if `backtrace' works. */ +#define HAVE_BACKTRACE + +/* C compiler has __builtin_assume_aligned */ +#define HAVE_CC_BUILTIN_ASSUME_ALIGNED + +/* C++ compiler has __builtin_assume_aligned */ +#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED + +/* C++ compiler has x86intrin.h */ +#define HAVE_CXX_X86INTRIN_H + +/* C compiler has x86intrin.h */ +#define HAVE_C_X86INTRIN_H + +/* C++ compiler has intrin.h */ +/* #undef HAVE_CXX_INTRIN_H */ + +/* C compiler has intrin.h */ +/* #undef HAVE_C_INTRIN_H */ + +/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to + 0 if you don't. */ +/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */ + +/* #undef HAVE_PTHREAD_NP_H */ + +/* Define to 1 if you have the `malloc_info' function. */ +/* #undef HAVE_MALLOC_INFO */ + +/* Define to 1 if you have the `memmem' function. */ +/* #undef HAVE_MEMMEM */ + +/* Define to 1 if you have a working `mmap' system call. */ +#define HAVE_MMAP + +/* Define to 1 if `posix_memalign' works. */ +#define HAVE_POSIX_MEMALIGN + +/* Define to 1 if you have the `setrlimit' function. */ +#define HAVE_SETRLIMIT + +/* Define to 1 if you have the `shmget' function. */ +/* #undef HAVE_SHMGET */ + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION + +/* Define to 1 if you have the `sigaltstack' function. */ +#define HAVE_SIGALTSTACK + +/* Define if the sqlite3_open_v2 call is available */ +/* #undef HAVE_SQLITE3_OPEN_V2 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H + +/* Define to 1 if you have the `_aligned_malloc' function. */ +/* #undef HAVE__ALIGNED_MALLOC */ + +/* Define if compiler has __builtin_constant_p */ +#define HAVE__BUILTIN_CONSTANT_P + +/* Optimize, inline critical functions */ +#define HS_OPTIMIZE + +#define HS_VERSION +#define HS_MAJOR_VERSION +#define HS_MINOR_VERSION +#define HS_PATCH_VERSION + +#define BUILD_DATE + +/* define if this is a release build. */ +#define RELEASE_BUILD + +/* define if reverse_graph requires patch for boost 1.62.0 */ +/* #undef BOOST_REVGRAPH_PATCH */ + +#endif /* CONFIG_H_ */ diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt new file mode 100644 index 00000000000..57e9f254db5 --- /dev/null +++ b/contrib/libdivide/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library (libdivide INTERFACE) +target_include_directories (libdivide SYSTEM BEFORE INTERFACE .) diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index e68f0bacf46..4c71770f5b6 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -209,9 +209,8 @@ endif() target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY}) # inherit from parent cmake -target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR}) -target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY}) +target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only) if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES) target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR}) target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES}) diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index d71a5432715..9304cb3644c 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -1,13 +1,10 @@ -if (HAVE_SSE42) # Not used. Pretty easy to port. - set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h) -endif () - -add_library(metrohash - src/metrohash.h - src/testvector.h - +set (SRCS src/metrohash64.cpp src/metrohash128.cpp - ${SOURCES_SSE42_ONLY}) +) +if (HAVE_SSE42) # Not used. Pretty easy to port. + list (APPEND SRCS src/metrohash128crc.cpp) +endif () -target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src) +add_library(metrohash ${SRCS}) +target_include_directories(metrohash PUBLIC src) diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 93ef9d2357b..b8dcb0a9340 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -82,7 +82,7 @@ target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined) target_include_directories(rdkafka SYSTEM PUBLIC include) target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used. target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used. -target_link_libraries(rdkafka PRIVATE ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY}) +target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LIBGSASL_LIBRARY}) if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY) target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) endif() diff --git a/contrib/lz4-cmake/CMakeLists.txt b/contrib/lz4-cmake/CMakeLists.txt index 856389395ca..b8121976213 100644 --- a/contrib/lz4-cmake/CMakeLists.txt +++ b/contrib/lz4-cmake/CMakeLists.txt @@ -1,17 +1,28 @@ -SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib) +option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ${NOT_UNBUNDLED}) -add_library (lz4 - ${LIBRARY_DIR}/lz4.c - ${LIBRARY_DIR}/lz4hc.c - ${LIBRARY_DIR}/lz4frame.c - ${LIBRARY_DIR}/lz4frame.h - ${LIBRARY_DIR}/xxhash.c - ${LIBRARY_DIR}/xxhash.h +if (USE_INTERNAL_LZ4_LIBRARY) + set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4) - ${LIBRARY_DIR}/lz4.h - ${LIBRARY_DIR}/lz4hc.h) + set (SRCS + ${LIBRARY_DIR}/lib/lz4.c + ${LIBRARY_DIR}/lib/lz4hc.c + ${LIBRARY_DIR}/lib/lz4frame.c + ${LIBRARY_DIR}/lib/xxhash.c + ) -target_compile_definitions(lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1) -target_compile_options(lz4 PRIVATE -fno-sanitize=undefined) + add_library (lz4 ${SRCS}) -target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}) + target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1) + if (SANITIZE STREQUAL "undefined") + target_compile_options (lz4 PRIVATE -fno-sanitize=undefined) + endif () + target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}/lib) +else () + find_library (LIBRARY_LZ4 lz4) + find_path (INCLUDE_LZ4 lz4.h) + + add_library (lz4 UNKNOWN IMPORTED) + set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4}) + set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4}) + set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0) +endif () diff --git a/docker/packager/packager b/docker/packager/packager index ccb01a4df92..85dd3cc421c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -142,7 +142,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if unbundled: # TODO: fix build with ENABLE_RDKAFKA - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_BOOST_LIBRARY=1') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') diff --git a/programs/benchmark/CMakeLists.txt b/programs/benchmark/CMakeLists.txt index be999aafe80..3fa8deb6bd9 100644 --- a/programs/benchmark/CMakeLists.txt +++ b/programs/benchmark/CMakeLists.txt @@ -1,5 +1,12 @@ -set(CLICKHOUSE_BENCHMARK_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Benchmark.cpp) -set(CLICKHOUSE_BENCHMARK_LINK PRIVATE dbms clickhouse_aggregate_functions clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) +set (CLICKHOUSE_BENCHMARK_SOURCES Benchmark.cpp) + +set (CLICKHOUSE_BENCHMARK_LINK + PRIVATE + boost::program_options + clickhouse_aggregate_functions + clickhouse_common_config + dbms +) clickhouse_program_add(benchmark) diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index e273123afe0..6ded6a94f3a 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -1,10 +1,19 @@ -set(CLICKHOUSE_CLIENT_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/Client.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ConnectionParameters.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Suggest.cpp +set (CLICKHOUSE_CLIENT_SOURCES + Client.cpp + ConnectionParameters.cpp + Suggest.cpp ) -set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY}) +set (CLICKHOUSE_CLIENT_LINK + PRIVATE + boost::program_options + clickhouse_aggregate_functions + clickhouse_common_config + clickhouse_common_io + clickhouse_functions + clickhouse_parsers + string_utils +) # Always use internal readpassphrase add_subdirectory(readpassphrase) diff --git a/programs/compressor/CMakeLists.txt b/programs/compressor/CMakeLists.txt index c009bb55f76..ff642a32fd4 100644 --- a/programs/compressor/CMakeLists.txt +++ b/programs/compressor/CMakeLists.txt @@ -1,7 +1,12 @@ # Also in utils -set(CLICKHOUSE_COMPRESSOR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Compressor.cpp) -set(CLICKHOUSE_COMPRESSOR_LINK PRIVATE dbms clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY}) -#set(CLICKHOUSE_COMPRESSOR_INCLUDE SYSTEM PRIVATE ...) +set (CLICKHOUSE_COMPRESSOR_SOURCES Compressor.cpp) + +set (CLICKHOUSE_COMPRESSOR_LINK + PRIVATE + boost::program_options + clickhouse_parsers + dbms +) clickhouse_program_add(compressor) diff --git a/programs/extract-from-config/CMakeLists.txt b/programs/extract-from-config/CMakeLists.txt index b82cbb966ae..ff2d7937117 100644 --- a/programs/extract-from-config/CMakeLists.txt +++ b/programs/extract-from-config/CMakeLists.txt @@ -1,5 +1,11 @@ -set(CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ExtractFromConfig.cpp) -set(CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK PRIVATE clickhouse_common_config clickhouse_common_io clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) -#set(CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE SYSTEM PRIVATE ...) +set (CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES ExtractFromConfig.cpp) + +set (CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK + PRIVATE + boost::program_options + clickhouse_common_config + clickhouse_common_io + clickhouse_common_zookeeper +) clickhouse_program_add(extract-from-config) diff --git a/programs/format/CMakeLists.txt b/programs/format/CMakeLists.txt index aac72d641e6..ab06708cd3a 100644 --- a/programs/format/CMakeLists.txt +++ b/programs/format/CMakeLists.txt @@ -1,5 +1,11 @@ -set(CLICKHOUSE_FORMAT_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Format.cpp) -set(CLICKHOUSE_FORMAT_LINK PRIVATE dbms clickhouse_common_io clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY}) -#set(CLICKHOUSE_FORMAT_INCLUDE SYSTEM PRIVATE ...) +set (CLICKHOUSE_FORMAT_SOURCES Format.cpp) + +set (CLICKHOUSE_FORMAT_LINK + PRIVATE + boost::program_options + clickhouse_common_io + clickhouse_parsers + dbms +) clickhouse_program_add(format) diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index d066fd53277..b61f0ea33b7 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -1,6 +1,17 @@ -set(CLICKHOUSE_LOCAL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/LocalServer.cpp) -set(CLICKHOUSE_LOCAL_LINK PRIVATE clickhouse_storages_system clickhouse_dictionaries clickhouse_common_config clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions clickhouse_parsers clickhouse_table_functions ${Boost_PROGRAM_OPTIONS_LIBRARY}) -#set(CLICKHOUSE_LOCAL_INCLUDE SYSTEM PRIVATE ...) +set (CLICKHOUSE_LOCAL_SOURCES LocalServer.cpp) + +set (CLICKHOUSE_LOCAL_LINK + PRIVATE + boost::program_options + clickhouse_aggregate_functions + clickhouse_common_config + clickhouse_common_io + clickhouse_dictionaries + clickhouse_functions + clickhouse_parsers + clickhouse_storages_system + clickhouse_table_functions +) clickhouse_program_add(local) diff --git a/programs/obfuscator/CMakeLists.txt b/programs/obfuscator/CMakeLists.txt index 19dba2be95c..d1179b3718c 100644 --- a/programs/obfuscator/CMakeLists.txt +++ b/programs/obfuscator/CMakeLists.txt @@ -1,5 +1,9 @@ -set(CLICKHOUSE_OBFUSCATOR_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/Obfuscator.cpp) -set(CLICKHOUSE_OBFUSCATOR_LINK PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) -#set(CLICKHOUSE_OBFUSCATOR_INCLUDE SYSTEM PRIVATE ...) +set (CLICKHOUSE_OBFUSCATOR_SOURCES Obfuscator.cpp) + +set (CLICKHOUSE_OBFUSCATOR_LINK + PRIVATE + boost::program_options + dbms +) clickhouse_program_add(obfuscator) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 13e8aac6906..4d947ecdae5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -161,12 +161,12 @@ add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Alg if (MAKE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PRIVATE jemalloc) + target_link_libraries (dbms PRIVATE jemalloc libdivide) set (all_modules dbms) else() add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) target_link_libraries (dbms PUBLIC ${all_modules}) - target_link_libraries (clickhouse_interpreters PRIVATE jemalloc) + target_link_libraries (clickhouse_interpreters PRIVATE jemalloc libdivide) list (APPEND all_modules dbms) # force all split libs to be linked set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed") @@ -184,6 +184,9 @@ macro (dbms_target_link_libraries) endforeach () endmacro () +dbms_target_include_directories (PUBLIC ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src) +target_include_directories (clickhouse_common_io PUBLIC ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src) + if (USE_EMBEDDED_COMPILER) dbms_target_link_libraries (PRIVATE ${REQUIRED_LLVM_LIBRARIES}) dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) @@ -240,8 +243,8 @@ target_link_libraries(clickhouse_common_io ${EXECINFO_LIBRARIES} cpuid PUBLIC - ${Boost_PROGRAM_OPTIONS_LIBRARY} - ${Boost_SYSTEM_LIBRARY} + boost::program_options + boost::system ${CITYHASH_LIBRARIES} ${ZLIB_LIBRARIES} pcg_random @@ -264,18 +267,18 @@ endif() dbms_target_link_libraries ( PRIVATE ${BTRIE_LIBRARIES} - ${Boost_PROGRAM_OPTIONS_LIBRARY} - ${Boost_FILESYSTEM_LIBRARY} - ${LZ4_LIBRARY} - clickhouse_parsers + boost::filesystem + boost::program_options clickhouse_common_config clickhouse_common_zookeeper clickhouse_dictionaries_embedded + clickhouse_parsers + lz4 Poco::JSON string_utils PUBLIC - ${Boost_SYSTEM_LIBRARY} ${MYSQLXX_LIBRARY} + boost::system clickhouse_common_io ) @@ -284,10 +287,6 @@ dbms_target_include_directories(PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/Core/include) dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${PDQSORT_INCLUDE_DIR}) -if (NOT USE_INTERNAL_LZ4_LIBRARY AND LZ4_INCLUDE_DIR) - dbms_target_include_directories(SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR}) -endif () - if (ZSTD_LIBRARY) dbms_target_link_libraries(PRIVATE ${ZSTD_LIBRARY}) if (NOT USE_INTERNAL_ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR) @@ -295,10 +294,6 @@ if (ZSTD_LIBRARY) endif () endif() -if (NOT USE_INTERNAL_BOOST_LIBRARY) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) -endif () - if (USE_ICU) dbms_target_link_libraries (PRIVATE ${ICU_LIBRARIES}) dbms_target_include_directories (SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) @@ -329,8 +324,6 @@ if (USE_LDAP) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${OPENLDAP_INCLUDE_DIR}) dbms_target_link_libraries (PRIVATE ${OPENLDAP_LIBRARIES}) endif () - -dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) if (USE_PROTOBUF) @@ -359,9 +352,6 @@ if (USE_OPENCL) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS}) endif () -dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) -target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) - target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${DOUBLE_CONVERSION_INCLUDE_DIR}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${MSGPACK_INCLUDE_DIR}) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index 44e74fb30b5..a7914fb17ec 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -7,12 +7,11 @@ set (SRCS add_library(clickhouse_common_config ${SRCS}) -target_include_directories(clickhouse_common_config PUBLIC ${DBMS_INCLUDE_DIR}) target_link_libraries(clickhouse_common_config PUBLIC + clickhouse_common_zookeeper common Poco::XML PRIVATE - clickhouse_common_zookeeper string_utils ) diff --git a/src/Common/StringUtils/CMakeLists.txt b/src/Common/StringUtils/CMakeLists.txt index c63e0f260ba..bd1282a08d5 100644 --- a/src/Common/StringUtils/CMakeLists.txt +++ b/src/Common/StringUtils/CMakeLists.txt @@ -6,4 +6,3 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_common_stringutils .) add_library(string_utils ${clickhouse_common_stringutils_headers} ${clickhouse_common_stringutils_sources}) -target_include_directories (string_utils PRIVATE ${DBMS_INCLUDE_DIR}) diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 90a75f1d9ec..ef32d9266c0 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -5,7 +5,6 @@ add_headers_and_sources(clickhouse_common_zookeeper .) add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) target_link_libraries (clickhouse_common_zookeeper PUBLIC clickhouse_common_io common PRIVATE string_utils) -target_include_directories(clickhouse_common_zookeeper PUBLIC ${DBMS_INCLUDE_DIR}) if (ENABLE_TESTS) add_subdirectory (tests) diff --git a/src/Common/tests/CMakeLists.txt b/src/Common/tests/CMakeLists.txt index b68e71c0b43..2653ab30c29 100644 --- a/src/Common/tests/CMakeLists.txt +++ b/src/Common/tests/CMakeLists.txt @@ -26,7 +26,6 @@ add_executable (int_hashes_perf int_hashes_perf.cpp) target_link_libraries (int_hashes_perf PRIVATE clickhouse_common_io) add_executable (simple_cache simple_cache.cpp) -target_include_directories (simple_cache PRIVATE ${DBMS_INCLUDE_DIR}) target_link_libraries (simple_cache PRIVATE common) add_executable (compact_array compact_array.cpp) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 069a63aa9e1..6e5f984f435 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -10,28 +10,29 @@ add_library(clickhouse_functions ${clickhouse_functions_sources}) target_link_libraries(clickhouse_functions PUBLIC - clickhouse_dictionaries - clickhouse_dictionaries_embedded - dbms - consistent-hashing - consistent-hashing-sumbur + ${BASE64_LIBRARY} ${CITYHASH_LIBRARIES} ${FARMHASH_LIBRARIES} - ${METROHASH_LIBRARIES} - murmurhash - ${BASE64_LIBRARY} ${FASTOPS_LIBRARY} + clickhouse_dictionaries + clickhouse_dictionaries_embedded + consistent-hashing + consistent-hashing-sumbur + dbms + metrohash + murmurhash PRIVATE ${ZLIB_LIBRARIES} - ${Boost_FILESYSTEM_LIBRARY} + boost::filesystem + libdivide ) if (OPENSSL_CRYPTO_LIBRARY) target_link_libraries(clickhouse_functions PUBLIC ${OPENSSL_CRYPTO_LIBRARY}) endif() -target_include_directories(clickhouse_functions SYSTEM PRIVATE ${DIVIDE_INCLUDE_DIR} ${METROHASH_INCLUDE_DIR} ${SPARSEHASH_INCLUDE_DIR}) +target_include_directories(clickhouse_functions SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR}) if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL") # Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size. @@ -60,20 +61,14 @@ if(USE_BASE64) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR}) endif() -if(USE_XXHASH) - target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY}) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR}) -endif() +target_link_libraries(clickhouse_functions PRIVATE lz4) if (USE_H3) target_link_libraries(clickhouse_functions PRIVATE ${H3_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PRIVATE ${H3_INCLUDE_DIR}) endif() -if(USE_HYPERSCAN) - target_link_libraries(clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY}) - target_include_directories(clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR}) -endif() +target_link_libraries(clickhouse_functions PRIVATE hyperscan) if(USE_SIMDJSON) target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY}) diff --git a/src/Functions/URL/CMakeLists.txt b/src/Functions/URL/CMakeLists.txt index fabfccae230..21f0adb6594 100644 --- a/src/Functions/URL/CMakeLists.txt +++ b/src/Functions/URL/CMakeLists.txt @@ -9,10 +9,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW endif () # TODO: move Functions/Regexps.h to some lib and use here -if(USE_HYPERSCAN) - target_link_libraries(clickhouse_functions_url PRIVATE ${HYPERSCAN_LIBRARY}) - target_include_directories(clickhouse_functions_url SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR}) -endif() +target_link_libraries(clickhouse_functions_url PRIVATE hyperscan) if (USE_GPERF) # Only for regenerate diff --git a/src/Functions/config_functions.h.in b/src/Functions/config_functions.h.in index 46664caaa3f..eb96c13c355 100644 --- a/src/Functions/config_functions.h.in +++ b/src/Functions/config_functions.h.in @@ -3,8 +3,6 @@ // .h autogenerated by cmake! #cmakedefine01 USE_BASE64 -#cmakedefine01 USE_XXHASH -#cmakedefine01 USE_HYPERSCAN #cmakedefine01 USE_SIMDJSON #cmakedefine01 USE_RAPIDJSON #cmakedefine01 USE_H3 diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt index 725af95563a..324a38b1a17 100644 --- a/src/Interpreters/tests/CMakeLists.txt +++ b/src/Interpreters/tests/CMakeLists.txt @@ -7,8 +7,7 @@ target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARSEHASH_I target_link_libraries (hash_map_lookup PRIVATE dbms) add_executable (hash_map3 hash_map3.cpp) -target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR}) -target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) +target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} metrohash) add_executable (hash_map_string hash_map_string.cpp) target_include_directories (hash_map_string SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) @@ -18,8 +17,7 @@ add_executable (hash_map_string_2 hash_map_string_2.cpp) target_link_libraries (hash_map_string_2 PRIVATE dbms) add_executable (hash_map_string_3 hash_map_string_3.cpp) -target_include_directories(hash_map_string_3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR}) -target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) +target_link_libraries (hash_map_string_3 PRIVATE dbms ${FARMHASH_LIBRARIES} metrohash) add_executable (hash_map_string_small hash_map_string_small.cpp) target_include_directories (hash_map_string_small SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR}) diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 6e3ab9decb7..b83fc20e818 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -2,7 +2,6 @@ include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_parsers .) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io) -target_include_directories(clickhouse_parsers PUBLIC ${DBMS_INCLUDE_DIR}) if (USE_DEBUG_HELPERS) set (INCLUDE_DEBUG_HELPERS "-I${ClickHouse_SOURCE_DIR}/base -include ${ClickHouse_SOURCE_DIR}/src/Parsers/iostream_debug_helpers.h") diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 9f73c00d22b..97998e11ea5 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -44,14 +44,14 @@ const char * auto_config_build[] "USE_RDKAFKA", "@USE_RDKAFKA@", "USE_CAPNP", "@USE_CAPNP@", "USE_BASE64", "@USE_BASE64@", - "USE_XXHASH", "@USE_XXHASH@", + "USE_XXHASH", "@USE_INTERNAL_LZ4_LIBRARY@", "USE_HDFS", "@USE_HDFS@", "USE_SNAPPY", "@USE_SNAPPY@", "USE_PARQUET", "@USE_PARQUET@", "USE_PROTOBUF", "@USE_PROTOBUF@", "USE_BROTLI", "@USE_BROTLI@", "USE_SSL", "@USE_SSL@", - "USE_HYPERSCAN", "@USE_HYPERSCAN@", + "USE_HYPERSCAN", "@ENABLE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", "USE_GRPC", "@USE_GRPC@", diff --git a/src/Storages/tests/CMakeLists.txt b/src/Storages/tests/CMakeLists.txt index a6ad49e562e..292f7603838 100644 --- a/src/Storages/tests/CMakeLists.txt +++ b/src/Storages/tests/CMakeLists.txt @@ -17,7 +17,14 @@ add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_a target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) add_executable (transform_part_zk_nodes transform_part_zk_nodes.cpp) -target_link_libraries (transform_part_zk_nodes PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (transform_part_zk_nodes + PRIVATE + boost::program_options + clickhouse_common_config + clickhouse_common_zookeeper + dbms + string_utils +) if (ENABLE_FUZZING) add_executable (mergetree_checksum_fuzzer mergetree_checksum_fuzzer.cpp) diff --git a/utils/check-marks/CMakeLists.txt b/utils/check-marks/CMakeLists.txt index bfb200b8d28..2fc22a925b1 100644 --- a/utils/check-marks/CMakeLists.txt +++ b/utils/check-marks/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (check-marks main.cpp) -target_link_libraries(check-marks PRIVATE dbms ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(check-marks PRIVATE dbms boost::program_options) diff --git a/utils/compressor/CMakeLists.txt b/utils/compressor/CMakeLists.txt index df32330a137..43cde973846 100644 --- a/utils/compressor/CMakeLists.txt +++ b/utils/compressor/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (decompress_perf decompress_perf.cpp) -target_link_libraries(decompress_perf PRIVATE dbms ${LZ4_LIBRARY}) +target_link_libraries(decompress_perf PRIVATE dbms lz4) diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt index abfd60a07a0..14853590c76 100644 --- a/utils/convert-month-partitioned-parts/CMakeLists.txt +++ b/utils/convert-month-partitioned-parts/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (convert-month-partitioned-parts main.cpp) -target_link_libraries(convert-month-partitioned-parts PRIVATE dbms clickhouse_parsers ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(convert-month-partitioned-parts PRIVATE dbms clickhouse_parsers boost::program_options) diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt index 758c3cdc0ce..20c37854c0a 100644 --- a/utils/test-data-generator/CMakeLists.txt +++ b/utils/test-data-generator/CMakeLists.txt @@ -6,7 +6,7 @@ if (USE_PROTOBUF) protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs2 ProtobufDelimitedMessagesSerializer_Hdrs2 ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format_syntax2.proto) add_executable (ProtobufDelimitedMessagesSerializer ProtobufDelimitedMessagesSerializer.cpp ${ProtobufDelimitedMessagesSerializer_Srcs} ${ProtobufDelimitedMessagesSerializer_Hdrs} ${ProtobufDelimitedMessagesSerializer_Srcs2} ${ProtobufDelimitedMessagesSerializer_Hdrs2}) target_include_directories (ProtobufDelimitedMessagesSerializer SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) - target_link_libraries (ProtobufDelimitedMessagesSerializer PRIVATE ${Protobuf_LIBRARY} ${Boost_PROGRAM_OPTIONS_LIBRARY}) + target_link_libraries (ProtobufDelimitedMessagesSerializer PRIVATE ${Protobuf_LIBRARY} boost::program_options) get_filename_component(ProtobufDelimitedMessagesSerializer_OutputDir "${CMAKE_CURRENT_LIST_DIR}/../../tests/queries/0_stateless" REALPATH) target_compile_definitions(ProtobufDelimitedMessagesSerializer PRIVATE OUTPUT_DIR="${ProtobufDelimitedMessagesSerializer_OutputDir}") endif () diff --git a/utils/wikistat-loader/CMakeLists.txt b/utils/wikistat-loader/CMakeLists.txt index 7f72cbb9f46..96567e73790 100644 --- a/utils/wikistat-loader/CMakeLists.txt +++ b/utils/wikistat-loader/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (wikistat-loader main.cpp ${SRCS}) -target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (wikistat-loader PRIVATE clickhouse_common_io boost::program_options) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt index 2fdd87a4412..08907e1c5b9 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt +++ b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt @@ -1,3 +1,3 @@ add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS}) target_compile_options(zookeeper-adjust-block-numbers-to-parts PRIVATE -Wno-format) -target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt index 34f2e608ef9..7fe7fb94fa4 100644 --- a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt +++ b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-create-entry-to-download-part main.cpp ${SRCS}) -target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index d2947fa8932..9f5da351068 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-dump-tree main.cpp ${SRCS}) -target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper clickhouse_common_io boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index ba112bab9cf..c31b1ec3388 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) -target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper boost::program_options) From a5035d2d98388ce7159fc3a6b6042c3de4fb3d5a Mon Sep 17 00:00:00 2001 From: stepenhu Date: Tue, 9 Jun 2020 11:12:18 +0000 Subject: [PATCH 0443/2229] Update index.md (#11544) --- docs/zh/index.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/zh/index.md b/docs/zh/index.md index 522affa6250..926c4ce2fdf 100644 --- a/docs/zh/index.md +++ b/docs/zh/index.md @@ -4,7 +4,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在传统的行式数据库系统中,数据按如下顺序存储: -| 行 | 小心点 | JavaEnable | 标题 | GoodEvent | 活动时间 | +| row | watchID | JavaEnable | title | GoodEvent | EventTime | |-----|-------------|------------|------------|-----------|---------------------| | \#0 | 89354350662 | 1 | 投资者关系 | 1 | 2016-05-18 05:19:20 | | \#1 | 90329509958 | 0 | 联系我们 | 1 | 2016-05-18 08:10:20 | @@ -18,23 +18,23 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) 在列式数据库系统中,数据按如下的顺序存储: -| 行: | \#0 | \#1 | \#2 | \#N | +| row: | \#0 | \#1 | \#2 | \#N | |-------------|---------------------|---------------------|---------------------|-----| -| 小心点: | 89354350662 | 90329509958 | 89953706054 | … | +| watchID: | 89354350662 | 90329509958 | 89953706054 | … | | JavaEnable: | 1 | 0 | 1 | … | -| 标题: | 投资者关系 | 联系我们 | 任务 | … | +| title: | 投资者关系 | 联系我们 | 任务 | … | | GoodEvent: | 1 | 1 | 1 | … | -| 活动时间: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | +| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | -该示例中只展示了数据在列式数据库中数据的排列顺序。 +该示例中只展示了数据在列式数据库中数据的排列方式。 对于存储而言,列式数据库总是将同一列的数据存储在一起,不同列的数据也总是分开存储。 常见的列式数据库有: Vertica、 Paraccel (Actian Matrix,Amazon Redshift)、 Sybase IQ、 Exasol、 Infobright、 InfiniDB、 MonetDB (VectorWise, Actian Vector)、 LucidDB、 SAP HANA、 Google Dremel、 Google PowerDrill、 Druid、 kdb+。 {: .灰色 } -不同的存储方式适合不同的场景,这里的查询场景包括: 进行了哪些查询,多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。 +不同的数据存储方式适用不同的业务场景,数据访问的场景包括:进行了何种查询、多久查询一次以及各类查询的比例; 每种查询读取多少数据————行、列和字节;读取数据和写入数据之间的关系;使用的数据集大小以及如何使用本地的数据集;是否使用事务,以及它们是如何进行隔离的;数据的复制机制与数据的完整性要求;每种类型的查询要求的延迟与吞吐量等等。 -系统负载越高,根据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统同样适用于明显不同的场景。如果系统适用于广泛的场景,在负载高的情况下,所有的场景可以会被公平但低效处理,或者高效处理一小部分场景。 +系统负载越高,依据使用场景进行定制化就越重要,并且定制将会变的越精细。没有一个系统能够同时适用所有明显不同的业务场景。如果系统适用于广泛的场景,在负载高的情况下,要兼顾所有的场景,那么将不得不做出选择。是要平衡还是要效率? ## OLAP场景的关键特征 {#olapchang-jing-de-guan-jian-te-zheng} @@ -52,7 +52,7 @@ ClickHouse是一个用于联机分析(OLAP)的列式数据库管理系统(DBMS) - 每一个查询除了一个大表外都很小 - 查询结果明显小于源数据,换句话说,数据被过滤或聚合后能够被盛放在单台服务器的内存中 -很容易可以看出,OLAP场景与其他流行场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询是没有意义的,例如,使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。 +很容易可以看出,OLAP场景与其他通常业务场景(例如,OLTP或K/V)有很大的不同, 因此想要使用OLTP或Key-Value数据库去高效的处理分析查询场景,并不是非常完美的适用方案。例如,使用OLAP数据库去处理分析请求通常要优于使用MongoDB或Redis去处理分析请求。 ## 列式数据库更适合OLAP场景的原因 {#lie-shi-shu-ju-ku-geng-gua-he-olapchang-jing-de-yuan-yin} From af850c8123a69e3d0ddfbaf089d947666286460d Mon Sep 17 00:00:00 2001 From: alex-zaitsev Date: Tue, 9 Jun 2020 14:14:05 +0300 Subject: [PATCH 0444/2229] Fixed function compatibility matrix for 'like' (#11545) * Fixed function compatibility matrix for 'like' Bloom filters support 'like' for positive searches, and do not support 'notLike' for negative ones. There is a note under the table but the table itself was inconsistent. * Update docs/en/engines/table-engines/mergetree-family/mergetree.md Co-authored-by: Ivan Blinkov --- .../engines/table-engines/mergetree-family/mergetree.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 611b421d18d..33d12293172 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -340,8 +340,8 @@ The `set` index can be used with all functions. Function subsets for other index |------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| | [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | | [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ | -| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✗ | ✗ | +| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | | [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | | [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | | [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | @@ -357,7 +357,8 @@ The `set` index can be used with all functions. Function subsets for other index Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. -Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example: +!!! note "Note" + Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can’t be used for optimizing queries where the result of a function is expected to be false, for example: - Can be optimized: - `s LIKE '%test%'` @@ -660,4 +661,3 @@ After the completion of background merges and mutations, old parts are removed o During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space. [Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) - From 966593e0a8e42266138cbd5af917ec0be60a3c0a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 15:50:18 +0300 Subject: [PATCH 0445/2229] try to completely remove sentry from odbc-bridge --- programs/odbc-bridge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index ab8d94f2a0c..51abf4a9adb 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,7 +10,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp validateODBCConnectionString.cpp ) - +set (USE_SENTRY OFF CACHE BOOL "" FORCE) set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers From 5719549ee169bccf6c0b19000c96895057c224e6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 9 Jun 2020 16:29:07 +0300 Subject: [PATCH 0446/2229] fixup --- docker/test/performance-comparison/compare.sh | 36 +++++++++++++++++-- docker/test/performance-comparison/report.py | 6 ++-- tests/performance/base64_hits.xml | 8 ++--- tests/performance/basename.xml | 6 ++-- tests/performance/int_parsing.xml | 13 ------- tests/performance/set_index.xml | 9 +++-- 6 files changed, 50 insertions(+), 28 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 835e1424fd9..983db608b47 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -421,9 +421,39 @@ create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, select test, query_index from queries where unstable_show or changed_show ; -create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as - select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries - group by test having s > 0 order by s desc; +create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as + select test, queries, average_time_change from ( + select test, count(*) queries, + sum(left) as left, sum(right) as right, + (right - left) / right average_time_change + from queries + group by test + order by abs(average_time_change) desc + ) + ; + +create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as + select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed + from queries + group by test + order by total_unstable + total_changed desc + ; + +create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as + select test, + queries, + coalesce(total_unstable, 0) total_unstable, + coalesce(total_changed, 0) total_changed, + total_unstable + total_changed total_bad, + coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str + from test_time_changes_tsv + full join unstable_tests_tsv + using test + where (abs(average_time_change) > 0.05 and queries > 5) + or (total_bad > 0) + order by total_bad desc, average_time_change desc + settings join_use_nulls = 1 + ; create table query_time engine Memory as select * from file('analyze/client-times.tsv', TSV, diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 5d4d30e0098..98602bac546 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -274,9 +274,9 @@ if args.report == 'main': skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv') printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows) - printSimpleTable('Tests with most unstable queries', - ['Test', 'Unstable', 'Changed perf', 'Total not OK'], - tsvRows('report/bad-tests.tsv')) + printSimpleTable('Test performance changes', + ['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'], + tsvRows('report/test-perf-changes.tsv')) def print_test_times(): global slow_average_tests diff --git a/tests/performance/base64_hits.xml b/tests/performance/base64_hits.xml index 65828d6ca1f..7e7ffc6bd84 100644 --- a/tests/performance/base64_hits.xml +++ b/tests/performance/base64_hits.xml @@ -1,6 +1,6 @@ - hits_100m_single + hits_10m_single @@ -19,7 +19,7 @@ - SELECT count() FROM hits_100m_single WHERE NOT ignore(base64Encode({string})) - SELECT count() FROM hits_100m_single WHERE base64Decode(base64Encode({string})) != {string} - SELECT count() FROM hits_100m_single WHERE tryBase64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_10m_single WHERE NOT ignore(base64Encode({string})) + SELECT count() FROM hits_10m_single WHERE base64Decode(base64Encode({string})) != {string} + SELECT count() FROM hits_10m_single WHERE tryBase64Decode(base64Encode({string})) != {string} diff --git a/tests/performance/basename.xml b/tests/performance/basename.xml index 4ccc7ddf3a0..daa0e9605b3 100644 --- a/tests/performance/basename.xml +++ b/tests/performance/basename.xml @@ -1,12 +1,12 @@ - hits_100m_single + hits_10m_single 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(basename(URL)) - SELECT count() FROM hits_100m_single WHERE NOT ignore(basename(Referer)) + SELECT count() FROM hits_10m_single WHERE NOT ignore(basename(URL)) + SELECT count() FROM hits_10m_single WHERE NOT ignore(basename(Referer)) diff --git a/tests/performance/int_parsing.xml b/tests/performance/int_parsing.xml index 1a242c28393..3008ed06684 100644 --- a/tests/performance/int_parsing.xml +++ b/tests/performance/int_parsing.xml @@ -14,7 +14,6 @@ SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(OS)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(UserAgent)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(Refresh)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(IsRobot)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionWidth)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionHeight)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResolutionDepth)) SETTINGS max_threads = 1 @@ -67,15 +66,6 @@ SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseStartTiming)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ResponseEndTiming)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FetchTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RedirectTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMInteractiveTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMContentLoadedTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(DOMCompleteTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(LoadEventStartTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(LoadEventEndTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(NSToDOMContentLoadedTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(FirstPaintTiming)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RedirectCount)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(SocialSourceNetworkID)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamPrice)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(ParamCurrencyID)) SETTINGS max_threads = 1 @@ -83,7 +73,4 @@ SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RefererHash)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(URLHash)) SETTINGS max_threads = 1 SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(CLID)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(YCLID)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RequestNum)) SETTINGS max_threads = 1 - SELECT count() FROM hits_100m_single WHERE NOT ignore(toString(RequestTry)) SETTINGS max_threads = 1 diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index 46fe32e5486..e85ded1d8fa 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -14,8 +14,13 @@ SELECT count() FROM test_in WHERE -toInt64(a) NOT IN (SELECT toInt64(rand(1)) FROM numbers(100000)) settings max_rows_to_read=1, read_overflow_mode='break' - - SELECT count() FROM numbers(10000) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', + + SELECT count() FROM numbers(100) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', '68446', '58896', '60969', '19849', '6722', '91854', '49519', '13949', '67109', '48824', '31723', '75554', '69575', '94986', '75350', '18628', '15284', '41943', '15433', '52607', '41', '22340', '29528', '24059', '34145', '72517', '46316', '10667', '54510', '19882', '47764', '69124', '41963', '84350', '48420', '4646', '24958', '69020', '97121', '26178', '62664', '50622', '32554', '49655', '45398', '11267', '72222', '73363', '89554', '89046', '57384', '29259', '37888', '24850', '74353', '57343', '34762', '2900', '11393', '42154', '94306', '70552', '75265', '921', '26003', '64352', '89857', '83171', '58249', '48940', '53512', '66335', '44865', '68729', '19299', '58003', '39854', '99122', '3860', '80173', '52242', '90966', '53183', '71982', '82325', '87842', '15000', '55627', '71132', '6354', '42402', '91719', '91644', '94533', '74925', '66278', '66911', '85576', '40495', '70919', '71797', '87835', '29845', '71832', '3390', '7994', '33499', '70715', '54897', '82710', '63077', '78105', '24758', '89585', '84607', '46477', '78618', '10989', '39222', '98749', '51685', '94664', '31008', '32823', '89521', '72160', '26952', '4001', '21421', '5737', '74027', '88179', '45553', '83743', '19913', '49435', '65616', '82641', '5149', '76959', '40681', '73023', '2670', '30845', '18863', '35094', '88400', '80963', '9154', '16571', '64192', '59694', '41317', '59942', '58856', '99281', '67260', '66971', '22716', '76089', '58047', '67071', '53707', '462', '52518', '72277', '10681', '69', '98855', '12593', '88842', '67242', '73543', '37439', '18413', '67211', '93495', '45576', '70614', '27988', '53210', '18618', '21318', '68059', '25518', '55917', '56522', '16548', '2404', '93538', '61452', '66358', '3709', '23914', '92426', '81439', '38070', '28988', '29939', '2948', '85720', '45628', '51101', '89431', '86365', '17571', '50987', '83849', '11015', '83812', '66187', '26362', '66786', '22024', '93866', '36161', '90080', '64874', '37294', '83860', '73821', '80279', '36766', '73117', '44620', '84556', '42070', '90383', '27862', '20665', '67576', '34997', '57958', '80638', '84351', '63961', '1362', '14338', '80377', '24192', '41294', '57368', '51189', '27287', '45764', '86289', '65600', '708', '84090', '96005', '55676', '84855', '72385', '70018', '9336', '82701', '3710', '52083', '74045', '96454', '30956', '67369', '78941', '81810', '71906', '23194', '33042', '50794', '61256', '24449', '48639', '22916', '78303', '13666', '40762', '43942', '51075', '89783', '95786', '90462', '6181', '36482', '40675', '4970', '6388', '91849', '72579', '94983', '86084', '20140', '68427', '48123', '43122', '98066', '37560', '6927', '72803', '5546', '62259', '98439', '6457', '98568', '70499', '33022', '28226', '29675', '20917', '75365', '20900', '8190', '56736', '99153', '77779', '49333', '50293', '97650', '4067', '47278', '42761', '71875', '13966', '11223', '46783', '18059', '61355', '29638', '75681', '24466', '89634', '20759', '83252', '37780', '15931', '74893', '6703', '64524', '80656', '85990', '78427', '18411', '20696', '86432', '93176', '69889', '15072', '15180', '9935', '10467', '60248', '42430', '62590', '89596', '27743', '26398', '79912', '60048', '50943', '38870', '69383', '72261', '98059', '55242', '74905', '5667', '54321', '70415', '39903', '49711', '85318', '79979', '59262', '82321', '15263', '17416', '74554', '94733', '72112', '49872', '54849', '73883', '78250', '74935', '68559', '57564', '50541', '45730', '41595', '5588', '83723', '42891', '11898', '14348', '99732', '14481', '85233', '21277', '94508', '52551', '74187', '7634', '42912', '25100', '43536', '35798', '48190', '86477', '22680', '48148', '59501', '56563', '16802', '81496', '97568', '68657', '51462', '67953', '99660', '39002', '54170', '57190', '68086', '52700', '6487', '55709', '70418', '62629', '70420', '35695', '36152', '45360', '53503', '46623', '76000', '50648', '97876', '44815', '29163', '1356', '64123', '71388', '17658', '99084', '58727', '59437', '38773', '71254', '81286', '97545', '18786', '56834', '20346', '36401', '62316', '58082', '67959', '99876', '69895', '80099', '62747', '20517', '99777', '6472', '49189', '31321', '39992', '68073', '13378', '51806', '21776', '52060', '96983', '25754', '93709', '96627', '8644', '93726', '14002', '37716', '87620', '34507', '76339', '24491', '5849', '44110', '522', '66521', '12776', '44887', '80535', '14548', '75248', '671', '73071', '35715', '59474', '7061', '82243', '56170', '20179', '59717', '1725', '24634', '11270', '77023', '63840', '46608', '44667', '22422', '59771', '94768', '73033', '82905', '16463', '40971', '22204', '58366', '28721', '14907', '76468', '81872', '38418', '36989', '61439', '10610', '131', '44296', '35453', '10117', '75856', '94603', '99602', '68075', '35949', '13599', '50030', '69633', '55956', '85465', '16429', '86081', '11145', '6195', '82207', '90598', '92814', '23725', '83204', '80346', '71542', '46634', '15820', '54123', '45397', '15322', '61743', '9273', '71347', '6835', '64006', '91718', '43677', '32923', '21486', '17098', '61694', '43347', '40019', '4071', '52443', '42386', '56839', '83514', '27633', '40780', '51749', '92101', '62384', '92206', '56044', '66174', '11137', '73966', '78471', '30468', '31643', '33197', '6888', '8066', '86603', '74383', '6098', '54411', '98819', '89862', '88639', '94422', '89371', '80526', '91747', '91220', '64944', '76658', '42046', '58518', '27249', '6646', '3028', '1346', '33763', '9734', '31737', '65527', '5892', '60813', '3410', '35464', '43009', '98382', '70580', '93898', '56404', '32995', '62771', '71556', '40538', '55612', '45656', '10758', '20268', '33603', '38310', '14242', '74397', '10722', '71575', '22590', '49043', '91439', '9055', '23668', '9101', '5268', '64133', '77501', '64684', '11337', '47575', '50732', '88680', '93730', '46785', '17589', '3520', '57595', '71241', '34994', '8753', '36147', '88844', '41914', '11250', '94632', '71927', '4624', '86279', '7664', '2659', '94853', '65386', '30438', '86005', '92883', '84629', '59910', '44484', '1306', '8404', '56962', '29990', '38445', '96191', '73013', '66590', '40951', '24712', '18825', '37268', '87843', '18972', '12154', '7779', '52149', '76152', '65799', '86011', '35475', '78083', '88232', '91551', '65532', '93516', '73827', '24227', '44687', '55759', '83819', '45088', '10856', '60488', '39051', '14103', '76650', '81181', '46731', '737', '58788', '78945', '42096', '66731', '66740', '72273', '88969', '5655', '86590', '41096', '80038', '32430', '51877', '23970', '91900', '13082', '45880', '94367', '19739', '61998', '71665', '16083', '57035', '26916', '10166', '18834', '46798', '66881', '28444', '68840', '10459', '81087', '4728', '76224', '39257', '23470', '93524', '37345', '30074', '49856', '22022', '55279', '5159', '5193', '58030', '57539', '12514', '49759', '96222', '52597', '67192', '88187', '53614', '16084', '79915', '28212', '79334', '85283', '32306', '31058', '43113', '74707', '74869', '2213', '32134', '6379', '85426', '87098', '35984', '51105', '69287', '16803', '83337', '14913', '62531', '58098', '7914', '20105', '28850', '1384', '43173', '62983', '87113', '76066', '86320', '77684', '45191', '95225', '41503', '36713', '48404', '91228', '53865', '98981', '59161', '61237', '84561', '17455', '14379', '57789', '80895', '99260', '84595', '72942', '53220', '84448', '81332', '49437', '83086', '93414', '54519', '52288', '74772', '22460', '49324', '11168', '96071', '61985', '38284', '6405', '54698', '71727', '60093', '37340', '87884', '83403', '4542', '94949', '19636', '15855', '39105', '10424', '67418', '91022', '69254', '8481', '38411', '3832', '44354', '93548', '57172', '28481', '372', '81497', '52179', '41060', '72141', '41396', '65590', '70432', '82819', '93814', '26118', '84780', '88485', '70821', '8222', '83000', '47067', '38516', '33347', '47681', '48202', '60749', '52112', '7937', '28105', '11394', '45746', '43252', '34494', '2979', '69715', '42486', '82315', '71760', '97413', '66137', '94487', '7429', '74434', '22964', '55251', '3448', '53534', '2574', '9693', '96157', '2955', '4348', '19566', '56930', '83319', '31310', '53905', '1148', '41726', '22233', '76045', '37351', '10545', '17581', '28047', '30199', '4741', '58111', '33497', '67796', '67730', '31247', '43772', '29461', '45970', '73353', '22534', '53962', '32147', '71392', '62579', '66345', '58246', '33442', '9581', '29705', '14058', '86471', '76125', '59363', '94982', '74810', '89149', '20066', '3366', '3568', '25752', '80036', '64119', '27270', '40061', '91052', '69022', '9852', '77112', '83075', '43924', '61661', '56133', '96652', '57944', '72576', '82170', '79236', '55745', '15309', '88878', '72761', '37647', '67465', '12777', '97309', '93202', '41470', '8787', '64920', '48514', '18917', '35157', '59151', '4640', '5317', '38134', '76548', '82788', '9214', '58418', '73185', '90554', '10543', '47182', '62936', '91765', '89751', '68931', '48865', '64607', '7150', '77862', '14297', '14828', '33013', '91698', '67593', '98096', '16595', '51639', '86531', '24719', '1703', '78788', '43810', '38918', '95491', '99903', '82671', '8291', '68288', '31224', '39863', '4265', '77798', '7698', '33804', '92286', '4744', '37038', '44203', '98212', '17369', '77442', '62879', '4145', '96881', '15646', '36824', '19959', '45451', '76049', '54272', '97577', '95298', '81115', '30204', '82041', '8037', '10052', '8756', '76833', '82851', '24276', '75574', '36037', '78079', '92807', '29064', '90000', '84150', '17102', '75092', '49424', '35597', '4693', '82853', '42511', '16119', '23478', '65240', '55585', '91762', '71671', '46682', '72479', '97696', '24615', '12579', '30274', '48255', '2336', '90202', '5808', '45426', '76308', '74639', '31245', '99894', '89638', '6233', '33893', '71899', '85273', '89429', '29761', '50231', '57249', '99347', '22642', '66972', '86221', '47514', '88274', '10819', '73150', '53754', '13304', '20478', '38099', '619', '14669', '8011', '97657', '26569', '65430', '13467', '38180', '23675', '72350', '42257', '39875', '23529', '53407', '11833', '29599', '95621', '7727', '59527', '86846', '22860', '5358', '3730', '87555', '362', '95755', '54565', '29935', '68950', '52349', '98344', '86576', '7420', '12236', '15844', '48099', '97535', '97081', '50261', '31187', '60496', '24123', '24042', '6376', '6679', '99806', '20306', '60676', '36881', '77309', '5247', '96569', '53417', '73252', '64179', '35318', '75732', '65119', '32621', '40464', '22887', '96152', '65161', '83381', '8915', '68142', '7328', '85031', '15688', '72519', '93992', '86927', '75538', '38205', '50877', '70039', '97538', '94822', '52131', '49643', '85206', '1347', '14574', '88736', '53442', '49991', '64925', '72283', '82213', '60905', '36118', '62963', '16983', '79185', '15111', '26059', '17792', '98218', '33214', '1094', '41754', '77275', '65173', '13190', '91004', '90422', '44387', '92672', '98641', '54609', '83295', '37395', '70104', '32986', '72524', '82478', '5837', '83916', '52736', '57112', '55985', '42642', '42136', '89642', '35712', '49489', '19726', '65824', '24384', '48112', '15366', '99206', '68384', '51389', '529', '21475', '75749', '95182', '60110', '70571', '74174', '38105', '78107', '4101', '8982', '11215', '23987', '3303', '28706', '54629', '98000', '67510', '30036', '99140', '48896', '40971', '7735', '79984', '50134', '94928', '57023', '52880', '83067', '41940', '62994', '89213', '38593', '19283', '68206', '22234', '19245', '26266', '32403', '65889', '17022', '64280', '42797', '27161', '57675', '42313', '93606', '93082', '20659', '90824', '1226', '66266', '12503', '57104', '15247', '51160', '92398', '71967', '59476', '44465', '35765', '10787', '47737', '45792', '2292', '47599', '89612', '8162', '87622', '69410', '45727', '31158', '99791', '89544', '27214', '99588', '40516', '75616', '36505', '46079', '95448', '97999', '47462', '47799', '82729', '34038', '60789', '96938', '22682', '79062', '93307', '36038', '49016', '90983', '48219', '50889', '32517', '72219', '71229', '82643', '1195', '70543', '17', '22178', '23544', '72371', '1163', '28527', '7336', '39846', '31956', '80963', '41804', '59791', '41831', '1940', '52377', '79494', '12531', '81112', '44320', '18746', '5774', '63869', '4085', '59922', '12751', '99443', '13530', '23872', '36026', '83360', '32711', '92980', '11140', '99323', '57263', '98149', '29265', '25548', '65995', '4818', '15593', '8535', '37863', '12217', '14474', '66584', '89272', '86690', '58777', '39666', '44756', '18442', '52586', '98030', '40850', '38708', '49304', '68923', '65008', '84388', '83639', '29866', '63675', '26793', '49227', '82099', '24090', '57535', '24201', '65776', '74054', '89833', '62979', '26613', '5851', '99766', '63484', '66605', '37179', '90760', '59336', '58390', '93239', '84578', '11396', '93994', '73818', '23972', '37720', '72369', '25063', '32952', '71036', '76612', '31285', '34090', '19136', '53783', '66436', '61478', '96749', '43658', '7399', '31574', '67073', '40480', '20727', '70993', '65549', '30800', '21507', '53785', '89574', '86381', '56492', '62603', '44856', '68687', '63794', '70996', '7475', '84238', '71939', '86886', '94792', '15036', '36936', '95722', '17771', '67850', '33371', '49314', '40744', '5432', '81057', '41201', '75986', '22961', '15323', '1570', '18657', '95219', '19130', '53127', '15867', '81135', '73206', '76668', '36386', '48828', '31417', '56916', '70891', '60534', '95777', '10022', '94053', '2928', '56326', '16559', '79656', '6414', '81247', '78270', '55687', '19151', '61597', '99857', '81142', '27725', '53493', '12185', '1455', '48501', '59425', '20591', '24900', '66079', '84889', '32024', '18919', '2043', '7076', '71201', '88258', '86521', '93348', '26395', '39646', '44145', '33911', '46231', '67054', '39979', '11630', '23020', '76278', '88056', '11480', '4723', '78612', '70211', '60622', '84687', '59092', '65675', '38479', '64399', '64699', '95964', '42764', '69060', '28189', '4193', '95805', '75462', '17245', '59640', '94773', '84292', '53092', '98507', '61353', '32483', '53027', '48912', '87221', '47788', '59263', '65196', '35567', '17494', '64253', '50223', '7057', '87467', '62414', '2523', '50910', '72353', '78986', '78104', '47719', '29108', '12957', '5114', '64435', '66707', '37449', '70399', '45334', '71606', '55338', '55072', '58765', '12151', '22012', '16954', '87366', '14240', '98041', '72296', '47408', '56879', '99584', '63172', '92316', '28071', '29880', '19608', '13839', '87484', '56541', '88662', '87098', '72124', '78282', '27653', '38993', '31870', '67239', '99445', '7376', '78487', '98880', '12180', '86773', '67773', '15416', '58172', '13075', '67559', '97510', '29705', '86985', '57024', '11827', '31236', '91920', '26116', '94614', '14486', '46252', '78847', '43786', '70048', '96739', '35240', '39933', '58209', '27852', '65669', '47323', '58150', '84444', '44344', '95882', '41258', '31314', '69060', '19916', '6979', '19436', '45572', '16259', '74566', '6306', '24705', '53422', '593', '97031', '22308', '26875', '23042', '78035', '34229', '61976', '23175', '50072', '90896', '50810', '71730', '86468', '94807', '8218', '36032', '58628', '60560', '51206', '37943', '27987', '15014', '49905', '70018', '66799', '80851', '23594', '29982', '6438', '97381', '47715', '96294', '17985', '48545', '12672', '5250', '9988', '24601', '3736', '97815', '54363', '64703', '44167', '68376', '16595', '38073', '29630', '59630', '1858', '71823', '75580', '70083', '14493', '93821', '93394', '85369', '3818', '8435', '59988', '43966', '13961', '15855', '83332', '80312', '27299', '88840', '76964', '56173', '62794', '79389', '82642', '85843', '47116', '43064', '16061', '28905', '54415', '72832', '91252', '93488', '79457', '99336', '70744', '80432', '6487', '880', '87701', '154', '86574', '86677', '17892', '81488', '95260', '12515', '43189', '9211', '55403', '41417', '60046', '54785', '83655', '28274', '65745', '63062', '44549', '36391', '48051', '7328', '3572', '33226', '49177', '25123', '59065', '19691', '15109', '10172', '95578', '29497', '48152', '20276', '36270', '78866', '48309', '53209', '55475', '30073', '19717', '16004', '45692', '83430', '9291', '45935', '57030', '92613', '91656', '67697', '34915', '28156', '56594', '3273', '11194', '98270', '34370', '2621', '66679', '97451', '97717', '87923', '48310', '37725', '69743', '75103', '84956', '75163', '16069', '65304', '19397', '18071', '27273', '49823', '57595', '98324', '82174', '10293', '80943', '64184', '19472', '4198', '9410', '25927', '65961', '33155', '95168', '33692', '61712', '69877', '13308', '17415', '10022', '2491', '67310', '96140', '68050', '76272', '17143', '76805', '57176', '7539', '22690', '95483', '87592', '27221', '90821', '51154', '99828', '68998', '54581', '74222', '10269', '65057', '45467', '96089', '55058', '89779', '60837', '74122', '52886', '58055', '14880', '93208', '66652', '68830', '24121', '62407', '87257', '18802', '14925', '45423', '98624', '55195', '59072', '41414', '77840', '66075', '62705', '26549', '19063', '57552', '2507', '52069', '57620', '66688', '14833', '33700', '90666', '98052', '5367', '2268', '43093', '69063', '22030', '85564', '92258', '1847', '24446', '65835', '38660', '91899', '87732', '52396', '31952', '36000', '86944', '16109', '80729', '53757', '60226', '59103', '84187', '36674', '72823', '29884', '4654', '69139', '20440', '57413', '3651', '39639', '44564', '57492', '84159', '751', '99748', '9659', '72661', '39220', '99742', '74734', '75729', '38071', '69934', '73640', '65294', '54524', '64372', '37927', '17187', '7863', '12732', '40296', '36197', '15821', '76831', '4400', '71933', '4040', '22072', '33064', '25702', '13324', '91275', '27388', '97729', '14620', '45989', '80737', '17934', '4219', '3032', '43457', '31051', '24469', '67041', '29328', '75499', '80951', '88212', '92595', '49969', '24612', '58732', '2718', '3805', '50918', '99426', '8614', '35580', '93273', '989', '24385', '41185', '25687', '47146', '25227', '95839', '56355', '98536', '79824', '31725', '46447', '26690', '68418', '47783', '33725', '21729', '70797', '59038', '60376', '25087', '68332', '67950', '12411', '95918', '64736', '65336', '74947', '64605', '4106', '42712', '96640', '28492', '28648', '42429', '821', '24333', '69677', '38959', '23484', '92005', '29352', '29159', '52873', '99947', '21834', '85347', '93479', '28298', '55608', '3226', '69714', '80283', '6577', '18849', '44605', '75286', '28139', '26541', '12867', '57500', '86617', '33005', '57498', '60223', '74954', '51401', '55246', '5648', '16513', '40930', '43821', '32090', '66002', '65530', '76083', '6047', '6879', '94987', '80787', '11688', '77161', '92670', '6696', '400', '28572', '47234', '51375', '88518', '762', '92617', '54260', '7560', '60180', '43331', '64059', '27616', '75839', '21392', '47756', '46254', '19486', '88533', '30130', '93694', '8557', '66534', '94447', '16910', '6480', '77440', '24366', '6195', '48946', '28597', '44429', '50300', '73556', '40638', '98709', '94413', '15987', '43860', '64871', '93953', '34506', '7296', '31753', '30626', '77510', '39829', '25696', '39776', '69185', '36540', '65413', '31528', '43446', '73532', '49776', '30282', '30004', '26725', '15200', '33958', '90320', '71836', '48051', '31970', '5326', '96194', '69695', '60898', '60945', '18271', '50868', '61468', '23593', '68985', '20628', '58044', '8942', '34849', '7384', '50500', '62895', '78780', '48946', '65278', '4067', '973', '34761', '15512', '73739', '23138', '47322', '55568', '32259', '71816', '49277', '75218', '76104', '19579', '68312', '67904', '33886', '53888', '26421', '43859', '40291', '39068', '31711', '36542', '10195', '39781', '72352', '13188', '34113', '9428', '60443', '4987', '13783', '80744', '63483', '18266', '11961', '87167', '46987', '28480', '74214', '39191', '8146', '38090', '75727', '79245', '47720', '52547', '45321', '4972', '49701', '74354', '69672', '63455', '41902', '5667', '54166', '4962', '25873', '44509', '73332', '73383', '29438', '21455', '12320', '11997', '16921', '49379', '63027', '86175', '8110', '76149', '2520', '11256', '25863', '50518', '69001', '79113', '9447', '91840', '5242', '10998', '46496', '2448', '56058', '20970', '10517', '17783', '25723', '97137', '62840', '1264', '78691', '81020', '55335', '48524', '2088', '90413', '76651', '26855', '16177', '14954', '62914', '21344', '5708', '75560', '39311', '95865', '28783', '64902', '95657', '46276', '33426', '4799', '11588', '57513', '73689', '77677', '63011', '97795', '34954', '76866', '32043', '32697', '26643', '36890', '53476', '3011', '13963', '49551', '87671', '67761', '17488', '94770', '50599', '33272', '23091', '38079', '41177', '22395', '91656', '79679', '38687', '57384', '80118', '42507', '4098', '78949', '45669', '48802', '83915', '78292', '4369', '57657', '49146', '45192', '98491', '72457', '46331', '207', '81601', '7409', '70856', '91605', '70295', '9171', '72293', '32997', '78025', '16795', '73534', '68780', '21284', '31767', '94381', '86439', '12420', '53285', '99563', '60502', '67954', '55012', '99809', '5431', '69978', '99712', '14401', '79498', '4495', '3045', '528', '72542', '91604', '72725', '39378', '80378', '41996', '20138', '54545', '59730', '36951', '45157', '37964', '97690', '12184', '4944', '53803', '93605', '60851', '68938', '46285', '89663', '90309', '6907', '87239', '81791', '83292', '90013', '68927', '14725', '81840', '63836', '52068', '43830', '4794', '931', '59255', '8263', '99057', '94401', '69033', '7437', '20364', '92884', '28193', '43932', '37629', '59426', '18891', '8583', '79551', '87242', '1483', '6725', '65786', '16844', '12650', '99305', '42841', '9811', '18800', '39313', '51373', '31874', '84558', '27831', '48614', '48975', '55509', '83363', '31854', '64001', '94028', '76125', '79314', '24893', '81132', '9441', '86015', '28356', '40358', '10160', '23328', '7330', '76538', '37611', '89351', '84132', '97047', '26109', '95222', '35130', '75600', '88602', '15073', '87835', '71649', '28948', '81615', '37498', '28674', '59776', '44095', '65924', '64368', '94536', '12518', '61711', '55619', '82949', '4114', '21540', '70544', '28022', '79983', '28781', '7749', '97873', '4951', '50076', '47611', '99522', '56820', '38653', '49047', '36283', '83908', '72452', '85625', '10811', '36998', '44083', '34864', '44975', '39057', '4551', '68450', '24781', '1503', '9871', '46885', '11424', '21259', '54900', '97669', '85669', '6015', '2521', '37661', '14915', '57423', '91903', '94789', '32059', '64972', '4600', '61465', '27118', '79785', '13547', '49766', '38410', '68860', '63756', '23621', '64387', '46255', '63408', '11297', '41081', '56326', '58349', '98703', '72268', '73574', '32098', '42534', '91502', '38083', '11241', '56828', '12098', '25377', '37054', '56328', '30034', '26922', '68401', '93478', '63275', '62650', '81407', '773', '79499', '14970', '47217', '1187', '57428', '69980', '77764', '74791', '22107', '54363', '39247', '56028', '56982', '84244', '21464', '18716', '25533', '94589', '94768', '21537', '18436', '81135', '27654', '79713', '56630', '61571', '58453', '26758', '68450', '68449', '2994', '15347', '83954', '71823', '6428', '44210', '79597', '95144', '32871', '1991', '320', '77157', '63607', '31154', '48846', '71125', '61750', '59608', '33038', '35733', '68915', '94127', '50383', '64242', '49708', '57270', '65019', '8581', '12111', '18487', '50013', '58664', '22214', '19033', '33681', '44754', '28830', '10381', '52318', '34959', '20682', '55453', '53800', '65774', '99164', '72102', '36986', '44157', '56716', '7974', '81475', '25926', '39402', '33688', '99671', '95312', '42268', '26536', '14482', '67377', '57993', '89147', '15834', '64995', '4700', '18714', '30221', '39095', '32749', '69257', '55204', '30497', '31839', '63045', '30009', '62683', '31232', '77680', '93551', '63589', '6989', '77246', '42169', '46117', '73226', '37427', '1858', '83649', '37410', '86369', '4641', '74481', '66168', '48041', '22597', '14670', '27464', '57165', '20939', '36282', '76940', '73358', '50521', '69603', '8895', '81793', '57743', '81903', '64025', '91641', '25276', '34040', '62642', '64015', '57657', '84890', '73832', '782', '60160', '16998', '40023', '24590', '88613', '76640', '53091', '67600', '80183', '45674', '64464', '25163', '42384', '66972', '13953', '41966', '66048', '15135', '73745', '19466', '53657', '34619', '13462', '15905', '48257', '73297', '238', '93525', '80556', '5942', '5411', '66169', '9090', '95130', '74316', '57321', '48083', '62355', '68113', '15239', '36644', '80326', '65817', '54428', '61955', '58849', '77206', '16073', '98261', '92091', '39178', '35464', '85109', '85452', '21128', '25665', '81860', '44664', '24024', '56960', '95124', '39786', '18836', '11121', '44163', '81074', '79064', '46219', '94694', '44233', '81469', '24642', '15030', '21995', '13587', '40755', '6669', '81093', '74305', '1881', '55649', '37273', '80827', '98643', '46694', '59281', '79231', '42813', '84984', '7052', '98113', '17296', '84434', '31205', '46894', '71219', '74530', '44686', '70744', '91388', '20692', '96853', '73803', '15836', '18126', '49686', '4179', '47588', '87892', '65425', '68012', '97468', '92510', '99271', '58694', '11918', '37051', '18644', '57228', '14265', '57572', '57022', '52186', '30193', '93570', '87872', '5257', '26784', '6476', '61746', '68559', '1720', '26202', '16519', '27688', '10645', '87174', '60845', '73385', '82075', '6933', '98828', '56895', '17344', '84253', '36561', '51648', '24939', '63470', '31034', '95052', '51090', '51465', '87979', '68650', '30181', '29598', '19137', '43221', '81353', '90170', '96985', '61115', '17385', '92314', '80650', '55821', '17874', '84333', '93272', '48260', '87272', '22764', '59957', '51870', '85988', '39222', '77241', '62535', '28344', '6011', '80831', '64551', '46299', '75195', '71177', '8660', '58943', '57003', '3306', '74413', '74068', '15073', '89016', '93140', '13911', '57170', '19880', '41870', '9131', '57495', '73032', '86979', '60094', '87026', '30880', '4736', '86301', '92707', '21689', '83565', '71275', '47665', '65687', '71184', '89897', '32490', '97577', '38723', '79113', '37531', '97500', '94450', '15699', '58019', '84423', '27057', '56017', '97148', '47365', '30669', '33818', '80406', '99690', '33012', '95178', '46809', '48448', '79350', '9146', '99701', '98976', '71197', '44161', '75069', '36602', '79650', '97301', '12020', '56658', '25701', '46392', '78609', '63073', '69419', '57736', '20102', '42415', '79044', '20277', '56280', '47903', '94311', '25558', '40336', '91305', '90505', '66769', '64562', '83737', '62892', '10375', '71024', '19988', '56946', '76110', '21847', '43162', '50578', '46086', '54167', '61722', '53463', '63134', '69288', '12838', '14116', '71687', '50846', '59810', '24826', '84138', '82885', '91496', '98600', '82769', '40049', '4125', '50694', '1294', '2805', '29691', '82321', '76462', '85945', '115', '29188', '66918', '71340', '31585', '61638', '95472', '52978', '50622', '81990', '60955', '70519', '22270', '35610', '95871', '89222', '41038', '52546', '1163', '67943', '1793', '92010', '35755', '74509', '66665', '95759', '8568', '44299', '67822', '5806', '85839', '13895', '87675', '31357', '88014', '40026', '53050', '28951', '31992', '42495', '82892', '51567', '2869', '45808', '20238', '20781', '56098', '66307', '95701', '614', '60833', '3091', '81339', '24195', '65639', '85976', '28116', '66224', '51502', '73637', '13207', '88302', '36488', '65518', '98187', '26', '74367', '64706', '53943', '86760', '25783', '82112', '34958', '86621', '20848', '63459', '14049', '84943', '91873', '50238', '77773', '64109', '8602', '87934', '47583', '66053', '30287', '5507', '80312', '37464', '57457', '86200', '17806', '16522', '38843', '94334', '59958', '63864', '53427', '74506', '33980', '90449', '30842', '53616', '36738', '52', '13595', '53051', '13174', '60163', '71420', '73835', '67119', '79018', '42782', '45059', '952', '46360', '85879', '71552', '84741', '29746', '32577', '10041', '7208', '97528', '51256', '916', '55973', '17684', '99046', '38782', '58660', '97798', '66032', '48339', '51329', '12532', '97904', '95454', '42737', '62541', '96702', '82953', '94610', '26645', '86813', '25480', '99713', '26078', '23028', '93056', '21445', '73209', '89318', '69987', '34705', '30064', '17094', '51135', '54141', '26625', '1086', '13082', '30843', '98672', '56864', '42605', '5833', '60850', '69366', '27351', '16456', '92609', '48030', '54322', '69891', '46502', '34578', '77918', '63276', '75958', '42519', '60266', '85576', '4855', '14258', '67017', '10545', '35078', '53012', '71922', '85784', '73402', '74363', '58457', '94102', '23510', '51559', '39482', '87057', '9377', '10106', '82985', '33931', '16523', '6484', '97749', '83172', '53753', '27466', '23073', '96083', '67302', '57465', '21877', '18013', '99804', '32873', '43123', '72365', '53197', '80578', '69770', '97471', '86954', '67183', '98497', '78474', '28450', '63183', '98699', '42738', '61433', '3491', '27304', '49311', '94980', '92740', '43272', '86549', '11406', '79636', '85582', '38086', '657', '2354', '26567', '77450', '42086', '21600', '49011', '44059', '47872', '75761', '96577', '11642', '83471', '79616', '23749', '77082', '96876', '65302', '84027', '48955', '59887', '20657', '75090', '9058', '50347', '66088', '70745', '76342', '58026', '95568', '61504', '93473', '84590', '47089', '74717', '93090', '46334', '68273', '59500', '54345', '72608', '54048', '86156', '40296', '74046', '6813', '36369', '74543', '18305', '85236', '31316', '37061', '96893', '23112', '5529', '10166', '19037', '1467', '70810', '30932', '18410', '92837', '81324', '12268', '54705', '25207', '90366', '56528', '3392', '88747', '39951', '97957', '99404', '23685', '13533', '15640', '11434', '66516', '71025', '65770', '88000', '52232', '32360', '10787', '37438', '2264', '94460', '80214', '42288', '59062', '29010', '64093', '21225', '22297', '36935', '19202', '5925', '85373', '27414', '28991', '9191', '42273', '56587', '89719', '77191', '64334', '61542', '28763', '28978', '79184', '59815', '95200', '30246', '54022', '287', '91808', '66347', '50833', '15356', '78614', From bdcf1170ced2f334d9a273815b9ac8064f4046ba Mon Sep 17 00:00:00 2001 From: Ivan Starkov Date: Tue, 9 Jun 2020 16:32:49 +0300 Subject: [PATCH 0447/2229] Instead of forcing IPv4 protocol force wget to use protocols in order --- docker/server/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 6111b0057ed..059f3cb631b 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # check if clickhouse is ready to accept connections # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) - if ! wget --spider --quiet -4 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then echo >&2 'ClickHouse init process failed.' exit 1 fi From e466c6b38724effdb2c93bbb052cfa1f2b2f28fb Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 9 Jun 2020 22:38:04 +0800 Subject: [PATCH 0448/2229] Fix spelling and improve translation --- docs/zh/operations/configuration-files.md | 28 +++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/docs/zh/operations/configuration-files.md b/docs/zh/operations/configuration-files.md index 16a903b6528..aa0da86e8d0 100644 --- a/docs/zh/operations/configuration-files.md +++ b/docs/zh/operations/configuration-files.md @@ -1,27 +1,31 @@ # 配置文件 {#configuration_files} -主服务器配置文件是 `config.xml`. 它驻留在 `/etc/clickhouse-server/` 目录。 +ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。 -单个设置可以在复盖 `*.xml` 和 `*.conf` 在文件 `conf.d` 和 `config.d` 配置文件旁边的目录。 +!!! 注意: + 所有配置文件必须是XML格式。此外,配置文件须有相同的跟元素,通常是``。 -该 `replace` 或 `remove` 可以为这些配置文件的元素指定属性。 +主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。 -如果两者都未指定,则递归组合元素的内容,替换重复子项的值。 +如果两者都未指定,则递归组合配置的内容,替换重复子项的值。 -如果 `replace` 如果指定,则将整个元素替换为指定的元素。 +如果指定`replace`属性,则将整个元素替换为指定的元素。 -如果 `remove` 如果指定,则删除该元素。 +如果指定`remove`属性,则删除该元素。 -The config can also define «substitutions». If an element has the `incl` 属性时,从文件中的相应替换将被用作该值。 默认情况下,具有替换的文件的路径为 `/etc/metrika.xml`. 这可以在改变 [包括\_从](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) 服务器配置中的元素。 替换值在指定 `/yandex/substitution_name` 这个文件中的元素。 如果在指定的替换 `incl` 不存在,则将其记录在日志中。 要防止ClickHouse记录丢失的替换,请指定 `optional="true"` 属性(例如,设置 [宏](#macros) server\_settings/settings.md))。 +此外,配置文件还可指定"substitutions"。如果一个元素有`incl`属性,则文件中的相应替换值将被使用。默认情况下,具有替换的文件的路径为`/etc/metrika.xml`。这可以在服务配置中的[include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from)元素中被修改。替换值在这个文件的`/yandex/substitution_name`元素中被指定。如果`incl`中指定的替换值不存在,则将其记录在日志中。为防止ClickHouse记录丢失的替换,请指定`optional="true"`属性(例如,[宏](server-configuration-parameters/settings.md)设置)。 -替换也可以从ZooKeeper执行。 为此,请指定属性 `from_zk = "/path/to/node"`. 元素值被替换为节点的内容 `/path/to/node` 在动物园管理员。 您还可以将整个XML子树放在ZooKeeper节点上,并将其完全插入到源元素中。 +替换也可以从ZooKeeper执行。为此,请指定属性`from_zk = "/path/to/node"`。元素值被替换为ZooKeeper节点`/path/to/node`的内容。您还可以将整个XML子树放在ZooKeeper节点上,并将其完全插入到源元素中。 -该 `config.xml` 文件可以指定具有用户设置、配置文件和配额的单独配置。 这个配置的相对路径在 ‘users\_config’ 元素。 默认情况下,它是 `users.xml`. 如果 `users_config` 被省略,用户设置,配置文件和配额直接在指定 `config.xml`. +`config.xml` 文件可以指定单独的配置文件用于配置用户设置、配置文件及配额。可在`users_config`元素中指定其配置文件相对路径。其默认值是`users.xml`。如果`users_config`被省略,用户设置,配置文件和配额则直接在`config.xml`中指定。 -此外, `users_config` 可以从文件中复盖 `users_config.d` 目录(例如, `users.d`)和替换。 例如,您可以为每个用户提供单独的配置文件,如下所示: +用户配置可以分为如`config.xml`和`config.d/`等形式的单独配置文件。目录名称为配置`user_config`的值,去掉`.xml`后缀并与添加`.d`。由于`users_config`配置默认值为`users.xml`,所以目录名默认使用`users.d`。例如,您可以为每个用户有单独的配置文件,如下所示: + +``` bash +$ cat /etc/clickhouse-server/users.d/alice.xml +``` ``` xml -$ cat /etc/clickhouse-server/users.d/alice.xml @@ -36,7 +40,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` -对于每个配置文件,服务器还会生成 `file-preprocessed.xml` 启动时的文件。 这些文件包含所有已完成的替换和复盖,并且它们旨在提供信息。 如果zookeeper替换在配置文件中使用,但ZooKeeper在服务器启动时不可用,则服务器将从预处理的文件中加载配置。 +对于每个配置文件,服务器还会在启动时生成 `file-preprocessed.xml` 文件。这些文件包含所有已完成的替换和复盖,并且它们旨在提供信息。如果zookeeper替换在配置文件中使用,但ZooKeeper在服务器启动时不可用,则服务器将从预处理的文件中加载配置。 服务器跟踪配置文件中的更改,以及执行替换和复盖时使用的文件和ZooKeeper节点,并动态重新加载用户和集群的设置。 这意味着您可以在不重新启动服务器的情况下修改群集、用户及其设置。 From 981462db041e001573ffff35c0809aa0d8482ef2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:01:34 +0300 Subject: [PATCH 0449/2229] Remove dump flag from ReadBufferAIO --- src/IO/ReadBufferAIO.cpp | 11 ++++------- src/IO/ReadBufferAIO.h | 2 -- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/IO/ReadBufferAIO.cpp b/src/IO/ReadBufferAIO.cpp index ffe8183f005..4a52b1825f7 100644 --- a/src/IO/ReadBufferAIO.cpp +++ b/src/IO/ReadBufferAIO.cpp @@ -95,11 +95,8 @@ bool ReadBufferAIO::nextImpl() if (profile_callback) watch.emplace(clock_type); - if (!is_aio) - { + if (!is_pending_read) synchronousRead(); - is_aio = true; - } else receive(); @@ -215,7 +212,9 @@ void ReadBufferAIO::synchronousRead() void ReadBufferAIO::receive() { if (!waitForAIOCompletion()) - return; + { + throw Exception("Trying to receive data from AIO, but nothing was queued. It's a bug", ErrorCodes::LOGICAL_ERROR); + } finalize(); } @@ -224,8 +223,6 @@ void ReadBufferAIO::skip() if (!waitForAIOCompletion()) return; - is_aio = false; - /// @todo I presume this assignment is redundant since waitForAIOCompletion() performs a similar one // bytes_read = future_bytes_read.get(); if ((bytes_read < 0) || (static_cast(bytes_read) < region_left_padding)) diff --git a/src/IO/ReadBufferAIO.h b/src/IO/ReadBufferAIO.h index 77274c47073..5b2cf247a45 100644 --- a/src/IO/ReadBufferAIO.h +++ b/src/IO/ReadBufferAIO.h @@ -100,8 +100,6 @@ private: bool is_eof = false; /// At least one read request was sent. bool is_started = false; - /// Is the operation asynchronous? - bool is_aio = false; /// Did the asynchronous operation fail? bool aio_failed = false; From e3ae0734c9bbf234ed7cb8542e49f3b3778c8069 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:04:56 +0300 Subject: [PATCH 0450/2229] Better test --- tests/queries/0_stateless/01304_direct_io.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh index 0b6af15aa3b..32091acd5eb 100755 --- a/tests/queries/0_stateless/01304_direct_io.sh +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -9,8 +9,9 @@ $CLICKHOUSE_CLIENT --multiquery --query " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" -$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" >/dev/null 2>$CLICKHOUSE_TMP/err -cat $CLICKHOUSE_TMP/err +$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>$CLICKHOUSE_TMP/err +cat $CLICKHOUSE_TMP/err | grep Exception +cat $CLICKHOUSE_TMP/err | grep Loaded $CLICKHOUSE_CLIENT --multiquery --query " DROP TABLE bug;" From a6feb81af194376b8d6cdcdc46798418b425e368 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:11:11 +0300 Subject: [PATCH 0451/2229] Fix under msan --- src/IO/ReadBufferAIO.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/IO/ReadBufferAIO.cpp b/src/IO/ReadBufferAIO.cpp index 4a52b1825f7..8b01b67c0c0 100644 --- a/src/IO/ReadBufferAIO.cpp +++ b/src/IO/ReadBufferAIO.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,9 @@ void ReadBufferAIO::prepare() region_aligned_size = region_aligned_end - region_aligned_begin; buffer_begin = fill_buffer.internalBuffer().begin(); + + /// Unpoison because msan doesn't instrument linux AIO + __msan_unpoison(buffer_begin, fill_buffer.internalBuffer().size()); } void ReadBufferAIO::finalize() From f623ef635b3be124d218e1c480df837f1d539abe Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Tue, 9 Jun 2020 18:26:51 +0300 Subject: [PATCH 0452/2229] Avoid copying whole response stream into memory in S3 HTTP client. --- contrib/aws | 2 +- src/IO/S3/PocoHTTPClient.cpp | 5 +++-- src/IO/S3/PocoHTTPClientFactory.cpp | 6 ++++-- src/IO/S3/PocoHTTPClientFactory.h | 11 +++++++---- src/IO/S3/PocoHTTPResponseStream.cpp | 12 ++++++++++++ src/IO/S3/PocoHTTPResponseStream.h | 21 +++++++++++++++++++++ 6 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 src/IO/S3/PocoHTTPResponseStream.cpp create mode 100644 src/IO/S3/PocoHTTPResponseStream.h diff --git a/contrib/aws b/contrib/aws index f7d9ce39f41..17e10c0fc77 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit f7d9ce39f41323300044567be007c233338bb94a +Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56 diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index b8de483a5a8..0dfa80ca107 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -149,8 +151,7 @@ void PocoHTTPClient::MakeRequestInternal( response->SetClientErrorMessage(error_message); } else - /// TODO: Do not copy whole stream. - Poco::StreamCopier::copyStream(response_body_stream, response->GetResponseBody()); + response->GetResponseStream().SetUnderlyingStream(std::make_shared(session, response_body_stream)); break; } diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index 033ad4af37c..e4b86593ec1 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -21,10 +21,12 @@ std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest } std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest( - const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const + const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory &) const { auto request = Aws::MakeShared("PocoHTTPClientFactory", uri, method); - request->SetResponseStreamFactory(streamFactory); + + /// Don't create default response stream. Actual response stream will be set later in PocoHTTPClient. + request->SetResponseStreamFactory(null_factory); return request; } diff --git a/src/IO/S3/PocoHTTPClientFactory.h b/src/IO/S3/PocoHTTPClientFactory.h index ac586289113..4e555f05502 100644 --- a/src/IO/S3/PocoHTTPClientFactory.h +++ b/src/IO/S3/PocoHTTPClientFactory.h @@ -4,22 +4,25 @@ namespace Aws::Http { - class HttpClient; - class HttpRequest; +class HttpClient; +class HttpRequest; } namespace DB::S3 { - class PocoHTTPClientFactory : public Aws::Http::HttpClientFactory { public: ~PocoHTTPClientFactory() override = default; - [[nodiscard]] std::shared_ptr CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override; + [[nodiscard]] std::shared_ptr + CreateHttpClient(const Aws::Client::ClientConfiguration & clientConfiguration) const override; [[nodiscard]] std::shared_ptr CreateHttpRequest(const Aws::String & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override; [[nodiscard]] std::shared_ptr CreateHttpRequest(const Aws::Http::URI & uri, Aws::Http::HttpMethod method, const Aws::IOStreamFactory & streamFactory) const override; + +private: + const Aws::IOStreamFactory null_factory = []() { return nullptr; }; }; } diff --git a/src/IO/S3/PocoHTTPResponseStream.cpp b/src/IO/S3/PocoHTTPResponseStream.cpp new file mode 100644 index 00000000000..0a198268f2e --- /dev/null +++ b/src/IO/S3/PocoHTTPResponseStream.cpp @@ -0,0 +1,12 @@ +#include "PocoHTTPResponseStream.h" + +#include + +namespace DB::S3 +{ +PocoHTTPResponseStream::PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_) + : Aws::IStream(response_stream_.rdbuf()), session(std::move(session_)) +{ +} + +} diff --git a/src/IO/S3/PocoHTTPResponseStream.h b/src/IO/S3/PocoHTTPResponseStream.h new file mode 100644 index 00000000000..8167ddc4346 --- /dev/null +++ b/src/IO/S3/PocoHTTPResponseStream.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +namespace DB::S3 +{ +/** + * Wrapper of IStream to store response stream and corresponding HTTP session. + */ +class PocoHTTPResponseStream : public Aws::IStream +{ +public: + PocoHTTPResponseStream(std::shared_ptr session_, std::istream & response_stream_); + +private: + /// Poco HTTP session is holder of response stream. + std::shared_ptr session; +}; + +} From 2909b53fff48a2a8d206d3778a72901bd33905d8 Mon Sep 17 00:00:00 2001 From: "Fan()" <18501341937@163.com> Date: Tue, 9 Jun 2020 23:29:52 +0800 Subject: [PATCH 0453/2229] Update clickhouse-copier.md (#11559) --- docs/zh/operations/utilities/clickhouse-copier.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/operations/utilities/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md index a5364bcaa71..3dc29fe16fa 100644 --- a/docs/zh/operations/utilities/clickhouse-copier.md +++ b/docs/zh/operations/utilities/clickhouse-copier.md @@ -24,7 +24,7 @@ 该实用程序应手动运行: ``` bash -clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir +clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir ``` 参数: From f011ca02cf609e2c45395ed51c70fda58fc14e2b Mon Sep 17 00:00:00 2001 From: vxider Date: Tue, 9 Jun 2020 23:52:53 +0800 Subject: [PATCH 0454/2229] fix punctuation (#11553) --- docs/en/operations/configuration-files.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 30ea1f2e562..e1f9e427413 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -18,7 +18,7 @@ If `replace` is specified, it replaces the entire element with the specified one If `remove` is specified, it deletes the element. -The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)). +The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)). Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. From d91f0bd580aa8632cc89aae0531281f300b48740 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 19:07:40 +0300 Subject: [PATCH 0455/2229] Switch back to sentry upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index ff4e644f657..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 18835dd8c49..9651561d45e 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 18835dd8c496f22859bd6a1a7054a2bd4762e7ed +Subproject commit 9651561d45e4d00e9fe708275c086a3cfeb496bd From 6f673b0981c1b5c1a90df917d1f8d479309a5b0f Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 19:33:28 +0300 Subject: [PATCH 0456/2229] Fix comments bug --- src/Storages/IStorage.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 2ad89f559b5..31dd87a7ce4 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -382,8 +382,8 @@ void IStorage::alter( auto table_id = getStorageID(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); params.apply(old_metadata, context); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); - setColumns(std::move(metadata.columns)); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, old_metadata); + setColumns(std::move(old_metadata.columns)); } From e0de3aa7c98659f546761d579cedb74f38b41dd2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Jun 2020 19:39:26 +0300 Subject: [PATCH 0457/2229] Revert "Addition to #11184" --- docker/test/stateful/Dockerfile | 2 ++ docker/test/stateful_with_coverage/run.sh | 2 ++ docker/test/stateless/Dockerfile | 2 ++ docker/test/stateless_with_coverage/run.sh | 2 ++ docker/test/stress/Dockerfile | 1 + tests/config/log_queries.xml | 7 +++++++ tests/config/metric_log.xml | 8 ++++++++ 7 files changed, 24 insertions(+) create mode 100644 tests/config/log_queries.xml create mode 100644 tests/config/metric_log.xml diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index d751a2532bc..3aff49bf5a1 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -24,6 +24,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index 5530aadb4ca..b946f5b187d 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -59,7 +59,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 8fbaffe88bc..41a53f8a3f5 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -62,7 +62,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 12ed7a25b75..185dc95c783 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -50,7 +50,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 66f5135a4a4..a5aa3bbf004 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -31,6 +31,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ dpkg -i package_folder/clickhouse-client_*.deb; \ dpkg -i package_folder/clickhouse-test_*.deb; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \ echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \ diff --git a/tests/config/log_queries.xml b/tests/config/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/tests/config/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/config/metric_log.xml b/tests/config/metric_log.xml new file mode 100644 index 00000000000..0ca9f162416 --- /dev/null +++ b/tests/config/metric_log.xml @@ -0,0 +1,8 @@ + + + system + metric_log
+ 7500 + 1000 +
+
From 4c2e0561f501e7d2c8593aabf80495a1a0386529 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 20:21:38 +0300 Subject: [PATCH 0458/2229] Fix primary key update --- src/Storages/MergeTree/MergeTreeData.cpp | 28 ++++++++++++------------ src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8ca125eff64..0f142089bc2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -275,22 +275,22 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } } -void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool only_check, bool attach) +void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool only_check, bool attach) { - KeyDescription new_primary_key = metadata.primary_key; + KeyDescription new_primary_key = new_metadata.primary_key; - if (!metadata.sorting_key.definition_ast) + if (!new_metadata.sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); KeyDescription new_sorting_key; if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) new_sorting_key = KeyDescription::getKeyFromAST( - metadata.sorting_key.definition_ast, - metadata.columns, + new_metadata.sorting_key.definition_ast, + new_metadata.columns, global_context, std::make_shared(merging_params.version_column)); else - new_sorting_key = metadata.sorting_key; + new_sorting_key = new_metadata.sorting_key; /// Primary key not defined at all if (new_primary_key.definition_ast == nullptr) @@ -299,7 +299,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool /// because in merge tree code we sometimes chech, that our primary key /// is fake (copied from sorting key, i.e. isPrimaryKeyDefined() == /// false, but hasSortingKey() == true) - new_primary_key = metadata.sorting_key; + new_primary_key = new_metadata.sorting_key; new_primary_key.definition_ast = nullptr; } @@ -330,7 +330,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool } } - auto all_columns = metadata.columns.getAllPhysical(); + auto all_columns = new_metadata.columns.getAllPhysical(); /// Order by check AST if (hasSortingKey() && only_check) @@ -372,7 +372,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool "added to the sorting key. You can add expressions that use only the newly added columns", ErrorCodes::BAD_ARGUMENTS); - if (metadata.columns.getDefaults().count(col)) + if (new_metadata.columns.getDefaults().count(col)) throw Exception("Newly added column " + col + " has a default expression, so adding " "expressions that use it to the sorting key is forbidden", ErrorCodes::BAD_ARGUMENTS); @@ -380,11 +380,11 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool } } - if (!metadata.secondary_indices.empty()) + if (!new_metadata.secondary_indices.empty()) { std::unordered_set indices_names; - for (const auto & index : metadata.secondary_indices) + for (const auto & index : new_metadata.secondary_indices) { MergeTreeIndexFactory::instance().validate(index, attach); @@ -403,9 +403,9 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool if (!only_check) { /// Other parts of metadata initialized is separate methods - setColumns(std::move(metadata.columns)); - setSecondaryIndices(std::move(metadata.secondary_indices)); - setConstraints(std::move(metadata.constraints)); + setColumns(std::move(new_metadata.columns)); + setSecondaryIndices(std::move(new_metadata.secondary_indices)); + setConstraints(std::move(new_metadata.constraints)); setSortingKey(std::move(new_sorting_key)); setPrimaryKey(std::move(new_primary_key)); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8b24bea7830..4080a898106 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -780,7 +780,7 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void setProperties(const StorageInMemoryMetadata & metadata, bool only_check = false, bool attach = false); + void setProperties(const StorageInMemoryMetadata & new_metadata, bool only_check = false, bool attach = false); void initPartitionKey(const KeyDescription & new_partition_key); From 936cc9d57376d1e307a75cb880b93dab2c352fe2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 20:22:03 +0300 Subject: [PATCH 0459/2229] Update key --- src/Storages/AlterCommands.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 33a62d9bd1c..5652d1717ec 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -313,6 +313,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con column.default_desc.expression = default_expression; } }); + + if (metadata.sorting_key.sample_block.has(column_name)) + metadata.sorting_key = KeyDescription::getKeyFromAST(metadata.sorting_key.definition_ast, metadata.columns, context); } else if (type == MODIFY_ORDER_BY) { From d2fcf5aea5421508f5b9ce3dec1b282dc9f6ecf1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 20:28:29 +0300 Subject: [PATCH 0460/2229] Fixes for gcc --- src/Storages/MergeTree/MergeTreeData.cpp | 16 ++++++++-------- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/StorageBuffer.cpp | 8 ++++---- src/Storages/StorageDistributed.cpp | 8 ++++---- src/Storages/StorageMergeTree.cpp | 24 ++++++++++++------------ src/Storages/StorageNull.cpp | 8 ++++---- 6 files changed, 33 insertions(+), 33 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0f142089bc2..dd70e7b2b5c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -118,7 +118,7 @@ const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt"; MergeTreeData::MergeTreeData( const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, @@ -142,21 +142,21 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setSettingsChanges(metadata.settings_changes); + setSettingsChanges(metadata_.settings_changes); const auto settings = getSettings(); - setProperties(metadata, /*only_check*/ false, attach); + setProperties(metadata_, /*only_check*/ false, attach); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); - if (metadata.sampling_key.definition_ast != nullptr) + if (metadata_.sampling_key.definition_ast != nullptr) { const auto & pk_sample_block = getPrimaryKey().sample_block; - if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !attach + if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - setSamplingKey(metadata.sampling_key); + setSamplingKey(metadata_.sampling_key); } MergeTreeDataFormatVersion min_format_version(0); @@ -181,11 +181,11 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - initPartitionKey(metadata.partition_key); + initPartitionKey(metadata_.partition_key); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } - setTTLExpressions(metadata.columns, metadata.table_ttl); + setTTLExpressions(metadata_.columns, metadata_.table_ttl); /// format_file always contained on any data path PathWithDisk version_file; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4080a898106..0e2e9c71324 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -323,7 +323,7 @@ public: /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 2d8c3fd9a2f..1f2c05bf7ad 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -775,10 +775,10 @@ void StorageBuffer::alter(const AlterCommands & params, const Context & context, /// So that no blocks of the old structure remain. optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, context); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); - setColumns(std::move(metadata.columns)); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setColumns(std::move(new_metadata.columns)); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d80fee1e4dc..7e7460a013f 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -559,10 +559,10 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, context); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); - setColumns(std::move(metadata.columns)); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setColumns(std::move(new_metadata.columns)); } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 8ccd1712232..76f752abb68 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -52,7 +52,7 @@ namespace ActionLocks StorageMergeTree::StorageMergeTree( const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata, + const StorageInMemoryMetadata & metadata_, bool attach, Context & context_, const String & date_column_name, @@ -62,7 +62,7 @@ StorageMergeTree::StorageMergeTree( : MergeTreeData( table_id_, relative_data_path_, - metadata, + metadata_, context_, date_column_name, merging_params_, @@ -256,20 +256,20 @@ void StorageMergeTree::alter( { auto table_id = getStorageID(); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - auto maybe_mutation_commands = commands.getMutationCommands(metadata, context.getSettingsRef().materialize_ttl_after_modify, context); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, context.getSettingsRef().materialize_ttl_after_modify, context); String mutation_file_name; Int64 mutation_version = -1; - commands.apply(metadata, context); + commands.apply(new_metadata, context); - /// This alter can be performed at metadata level only + /// This alter can be performed at new_metadata level only if (commands.isSettingsAlter()) { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(metadata.settings_changes, table_lock_holder); + changeSettings(new_metadata.settings_changes, table_lock_holder); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); } else { @@ -278,13 +278,13 @@ void StorageMergeTree::alter( auto merges_block = getActionLock(ActionLocks::PartsMerge); lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(metadata.settings_changes, table_lock_holder); + changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. - setProperties(metadata); + setProperties(new_metadata); - setTTLExpressions(metadata.columns, metadata.table_ttl); + setTTLExpressions(new_metadata.columns, new_metadata.table_ttl); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); if (!maybe_mutation_commands.empty()) mutation_version = startMutation(maybe_mutation_commands, mutation_file_name); diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index c29562325fa..b6e4605530d 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -51,10 +51,10 @@ void StorageNull::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, context); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); - setColumns(std::move(metadata.columns)); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setColumns(std::move(new_metadata.columns)); } } From 4c77ba664a95e7c474af18a79a09a5733f09a740 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 20:42:04 +0300 Subject: [PATCH 0461/2229] Better assignment --- src/Storages/IndicesDescription.cpp | 3 +++ src/Storages/KeyDescription.cpp | 3 +++ src/Storages/SelectQueryDescription.cpp | 3 +++ src/Storages/StorageInMemoryMetadata.cpp | 3 +++ src/Storages/TTLDescription.cpp | 3 +++ 5 files changed, 15 insertions(+) diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 6f8406d9a3f..d59aef2ecaa 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -36,6 +36,9 @@ IndexDescription::IndexDescription(const IndexDescription & other) IndexDescription & IndexDescription::operator=(const IndexDescription & other) { + if (&other == this) + return *this; + if (other.definition_ast) definition_ast = other.definition_ast->clone(); else diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index e59693f5343..5d44ced78db 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -22,6 +22,9 @@ KeyDescription::KeyDescription(const KeyDescription & other) KeyDescription & KeyDescription::operator=(const KeyDescription & other) { + if (&other == this) + return *this; + if (other.definition_ast) definition_ast = other.definition_ast->clone(); else diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 3d2ba27a62c..87ba4ce74a9 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -24,6 +24,9 @@ SelectQueryDescription::SelectQueryDescription(const SelectQueryDescription & ot SelectQueryDescription & SelectQueryDescription::SelectQueryDescription::operator=(const SelectQueryDescription & other) { + if (&other == this) + return *this; + select_table_id = other.select_table_id; if (other.select_query) select_query = other.select_query->clone(); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 0f8b88e691b..e83a41a3877 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -30,6 +30,9 @@ StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemoryMetadata & other) { + if (&other == this) + return *this; + columns = other.columns; secondary_indices = other.secondary_indices; constraints = other.constraints; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index ac77bd51b69..e241b7676a0 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -72,6 +72,9 @@ TTLDescription::TTLDescription(const TTLDescription & other) TTLDescription & TTLDescription::operator=(const TTLDescription & other) { + if (&other == this) + return *this; + mode = other.mode; if (other.expression_ast) expression_ast = other.expression_ast->clone(); From f872c639ed6893d0731ed61c1927f3c6f313f0d2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 20:44:56 +0300 Subject: [PATCH 0462/2229] Try to disable linker options from sentry --- .gitmodules | 2 +- cmake/find/sentry.cmake | 1 + contrib/sentry-native | 2 +- programs/odbc-bridge/CMakeLists.txt | 1 - 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index e1cd28c1d59..7fa384cb906 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -11,6 +11,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) set (SENTRY_TRANSPORT "curl" CACHE STRING "") set (SENTRY_BACKEND "none" CACHE STRING "") + set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) diff --git a/contrib/sentry-native b/contrib/sentry-native index 9651561d45e..78fb54989cd 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 9651561d45e4d00e9fe708275c086a3cfeb496bd +Subproject commit 78fb54989cd61cf11dcea142e12d1ecc6940c962 diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 51abf4a9adb..af59383d030 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,7 +10,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp validateODBCConnectionString.cpp ) -set (USE_SENTRY OFF CACHE BOOL "" FORCE) set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers From 8a78cffe5c3e8b9ac2e26e0731c40306cad4c9e1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 21:11:08 +0300 Subject: [PATCH 0463/2229] Force table creation on SYSTEM FLUSH LOGS --- src/Interpreters/InterpreterSystemQuery.cpp | 12 +++--- src/Interpreters/SystemLog.h | 14 +++++-- .../test_SYSTEM_FLUSH_LOGS/test.py | 38 +++++++++++++++++++ 3 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 tests/integration/test_SYSTEM_FLUSH_LOGS/test.py diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9ebdb155643..1480651b4b6 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -301,12 +301,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: context.checkAccess(AccessType::SYSTEM_FLUSH_LOGS); executeCommandsAndThrowIfError( - [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(); }, - [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(); }, - [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(); }, - [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(); }, - [&] () { if (auto text_log = context.getTextLog()) text_log->flush(); }, - [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(); } + [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(true); }, + [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(true); }, + [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(true); }, + [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(true); }, + [&] () { if (auto text_log = context.getTextLog()) text_log->flush(true); }, + [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(true); } ); break; case Type::STOP_LISTEN_QUERIES: diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index dd2f815ce92..1f889d53aec 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -76,7 +76,8 @@ class ISystemLog public: virtual String getName() = 0; virtual ASTPtr getCreateTableQuery() = 0; - virtual void flush() = 0; + //// force -- force table creation (used for SYSTEM FLUSH LOGS) + virtual void flush(bool force = false) = 0; virtual void prepareTable() = 0; virtual void startup() = 0; virtual void shutdown() = 0; @@ -133,7 +134,7 @@ public: void stopFlushThread(); /// Flush data in the buffer to disk - void flush() override; + void flush(bool force = false) override; /// Start the background thread. void startup() override; @@ -166,6 +167,8 @@ private: /* Data shared between callers of add()/flush()/shutdown(), and the saving thread */ std::mutex mutex; + /* prepareTable() guard */ + std::mutex prepare_mutex; // Queue is bounded. But its size is quite large to not block in all normal cases. std::vector queue; // An always-incrementing index of the first message currently in the queue. @@ -272,13 +275,16 @@ void SystemLog::add(const LogElement & element) template -void SystemLog::flush() +void SystemLog::flush(bool force) { std::unique_lock lock(mutex); if (is_shutdown) return; + if (force) + prepareTable(); + const uint64_t queue_end = queue_front_index + queue.size(); if (requested_flush_before < queue_end) @@ -429,6 +435,8 @@ void SystemLog::flushImpl(const std::vector & to_flush, template void SystemLog::prepareTable() { + std::unique_lock prepare_lock(prepare_mutex); + String description = table_id.getNameForLogs(); table = DatabaseCatalog::instance().tryGetTable(table_id, context); diff --git a/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py b/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py new file mode 100644 index 00000000000..2329094e150 --- /dev/null +++ b/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py @@ -0,0 +1,38 @@ +# pylint: disable=line-too-long +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node_default') + +system_logs = [ + # disabled by default + ('system.part_log', 0), + ('system.text_log', 0), + + # enabled by default + ('system.query_log', 1), + ('system.query_thread_log', 1), + ('system.trace_log', 1), + ('system.metric_log', 1), +] + +@pytest.fixture(scope='module') +def start_cluster(): + try: + cluster.start() + node.query('SYSTEM FLUSH LOGS') + yield cluster + finally: + cluster.shutdown() + +@pytest.mark.parametrize('table,exists', system_logs) +def test_system_logs(start_cluster, table, exists): + q = 'SELECT * FROM {}'.format(table) + if exists: + node.query(q) + else: + assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) From 75977bd939887daa55f320951b431f4392a0acb6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 00:22:01 +0300 Subject: [PATCH 0464/2229] Gcc fixed --- src/Storages/MergeTree/MergeTreeData.cpp | 12 ++++++------ src/Storages/StorageMaterializedView.cpp | 10 +++++----- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index dd70e7b2b5c..50971bbc881 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1273,9 +1273,9 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) { /// Check that needed transformations can be applied to the list of columns without considering type conversions. - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - commands.apply(metadata, global_context); - if (getSecondaryIndices().empty() && !metadata.secondary_indices.empty() && + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + commands.apply(new_metadata, global_context); + if (getSecondaryIndices().empty() && !new_metadata.secondary_indices.empty() && !settings.allow_experimental_data_skipping_indices) throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); @@ -1365,15 +1365,15 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S } } - setProperties(metadata, /* only_check = */ true); + setProperties(new_metadata, /* only_check = */ true); - setTTLExpressions(metadata.columns, metadata.table_ttl, /* only_check = */ true); + setTTLExpressions(new_metadata.columns, new_metadata.table_ttl, /* only_check = */ true); if (hasSettingsChanges()) { const auto & current_changes = getSettingsChanges()->as().changes; - const auto & new_changes = metadata.settings_changes->as().changes; + const auto & new_changes = new_metadata.settings_changes->as().changes; for (const auto & changed_setting : new_changes) { if (MergeTreeSettings::findIndex(changed_setting.name) == MergeTreeSettings::npos) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a7a59f0e9b9..08984a6e1f3 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -193,13 +193,13 @@ void StorageMaterializedView::alter( { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); - StorageInMemoryMetadata metadata = getInMemoryMetadata(); - params.apply(metadata, context); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); /// start modify query if (context.getSettingsRef().allow_experimental_alter_materialized_view_structure) { - const auto & new_select = metadata.select; + const auto & new_select = new_metadata.select; const auto & old_select = getSelectQuery(); DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); @@ -208,8 +208,8 @@ void StorageMaterializedView::alter( } /// end modify query - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, metadata); - setColumns(std::move(metadata.columns)); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setColumns(std::move(new_metadata.columns)); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f4941a05bf2..8efe22e03f9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -415,12 +415,12 @@ void StorageReplicatedMergeTree::createTableIfNotExists() zookeeper->createAncestors(zookeeper_path); /// We write metadata of table so that the replicas can check table parameters with them. - String metadata = ReplicatedMergeTreeTableMetadata(*this).toString(); + String metadata_str = ReplicatedMergeTreeTableMetadata(*this).toString(); Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata, + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(), zkutil::CreateMode::Persistent)); From c95d5786fdda6cb8057e2eff885b750f07839315 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2020 01:52:25 +0300 Subject: [PATCH 0465/2229] Fixup --- tests/performance/generate_table_function.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml index 74d4ad4d014..f09753c963a 100644 --- a/tests/performance/generate_table_function.xml +++ b/tests/performance/generate_table_function.xml @@ -3,7 +3,7 @@ SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('ui64 UInt64, i64 Int64, ui32 UInt32, i32 Int32, ui16 UInt16, i16 Int16, ui8 UInt8, i8 Int8', 0, 10, 10) LIMIT 10000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 10000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 10000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200)))', 0, 10, 10) LIMIT 10000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 10000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 10000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 10000000); From 898d83491dcf3592ceba4bf66eacf975588f26a6 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 10 Jun 2020 02:37:20 +0300 Subject: [PATCH 0466/2229] fixup --- tests/performance/set_hits.xml | 12 ++++++------ tests/performance/set_index.xml | 4 +--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/performance/set_hits.xml b/tests/performance/set_hits.xml index 8b9ae1da83b..f788bb43196 100644 --- a/tests/performance/set_hits.xml +++ b/tests/performance/set_hits.xml @@ -1,16 +1,16 @@ - + hits_10m_single hits_100m_single - - - SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single WHERE AdvEngineID != 0) + + SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single WHERE AdvEngineID != 0) SETTINGS max_threads = 1 SELECT count() FROM hits_10m_single WHERE UserID IN (SELECT UserID FROM hits_10m_single) SELECT count() FROM hits_10m_single WHERE SearchPhrase IN (SELECT SearchPhrase FROM hits_10m_single) - SELECT count() FROM hits_10m_single WHERE URL IN (SELECT URL FROM hits_10m_single WHERE AdvEngineID != 0) + SELECT count() FROM hits_100m_single WHERE URL IN (SELECT URL FROM hits_100m_single WHERE AdvEngineID != 0) SELECT count() FROM hits_10m_single WHERE URL IN (SELECT URL FROM hits_10m_single WHERE SearchEngineID != 0) - SELECT count() FROM hits_10m_single WHERE RegionID IN (SELECT RegionID FROM hits_10m_single) + SELECT count() FROM hits_100m_single WHERE RegionID IN (SELECT RegionID FROM hits_100m_single) diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index e85ded1d8fa..021f6dc60f3 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -1,9 +1,7 @@ - CREATE TABLE test_in (`a` UInt32) ENGINE = MergeTree() ORDER BY a INSERT INTO test_in SELECT number FROM numbers(500000000) - SELECT count() FROM test_in WHERE a IN (SELECT rand(1) FROM numbers(100000)) SETTINGS max_rows_to_read = 1, read_overflow_mode = 'break' @@ -20,7 +18,7 @@ comparison), because it tests that the big set is parsed fast enough, not the lookups we do in this set. --> - SELECT count() FROM numbers(100) WHERE toString(number) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', + SELECT toString(rand()) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', '68446', '58896', '60969', '19849', '6722', '91854', '49519', '13949', '67109', '48824', '31723', '75554', '69575', '94986', '75350', '18628', '15284', '41943', '15433', '52607', '41', '22340', '29528', '24059', '34145', '72517', '46316', '10667', '54510', '19882', '47764', '69124', '41963', '84350', '48420', '4646', '24958', '69020', '97121', '26178', '62664', '50622', '32554', '49655', '45398', '11267', '72222', '73363', '89554', '89046', '57384', '29259', '37888', '24850', '74353', '57343', '34762', '2900', '11393', '42154', '94306', '70552', '75265', '921', '26003', '64352', '89857', '83171', '58249', '48940', '53512', '66335', '44865', '68729', '19299', '58003', '39854', '99122', '3860', '80173', '52242', '90966', '53183', '71982', '82325', '87842', '15000', '55627', '71132', '6354', '42402', '91719', '91644', '94533', '74925', '66278', '66911', '85576', '40495', '70919', '71797', '87835', '29845', '71832', '3390', '7994', '33499', '70715', '54897', '82710', '63077', '78105', '24758', '89585', '84607', '46477', '78618', '10989', '39222', '98749', '51685', '94664', '31008', '32823', '89521', '72160', '26952', '4001', '21421', '5737', '74027', '88179', '45553', '83743', '19913', '49435', '65616', '82641', '5149', '76959', '40681', '73023', '2670', '30845', '18863', '35094', '88400', '80963', '9154', '16571', '64192', '59694', '41317', '59942', '58856', '99281', '67260', '66971', '22716', '76089', '58047', '67071', '53707', '462', '52518', '72277', '10681', '69', '98855', '12593', '88842', '67242', '73543', '37439', '18413', '67211', '93495', '45576', '70614', '27988', '53210', '18618', '21318', '68059', '25518', '55917', '56522', '16548', '2404', '93538', '61452', '66358', '3709', '23914', '92426', '81439', '38070', '28988', '29939', '2948', '85720', '45628', '51101', '89431', '86365', '17571', '50987', '83849', '11015', '83812', '66187', '26362', '66786', '22024', '93866', '36161', '90080', '64874', '37294', '83860', '73821', '80279', '36766', '73117', '44620', '84556', '42070', '90383', '27862', '20665', '67576', '34997', '57958', '80638', '84351', '63961', '1362', '14338', '80377', '24192', '41294', '57368', '51189', '27287', '45764', '86289', '65600', '708', '84090', '96005', '55676', '84855', '72385', '70018', '9336', '82701', '3710', '52083', '74045', '96454', '30956', '67369', '78941', '81810', '71906', '23194', '33042', '50794', '61256', '24449', '48639', '22916', '78303', '13666', '40762', '43942', '51075', '89783', '95786', '90462', '6181', '36482', '40675', '4970', '6388', '91849', '72579', '94983', '86084', '20140', '68427', '48123', '43122', '98066', '37560', '6927', '72803', '5546', '62259', '98439', '6457', '98568', '70499', '33022', '28226', '29675', '20917', '75365', '20900', '8190', '56736', '99153', '77779', '49333', '50293', '97650', '4067', '47278', '42761', '71875', '13966', '11223', '46783', '18059', '61355', '29638', '75681', '24466', '89634', '20759', '83252', '37780', '15931', '74893', '6703', '64524', '80656', '85990', '78427', '18411', '20696', '86432', '93176', '69889', '15072', '15180', '9935', '10467', '60248', '42430', '62590', '89596', '27743', '26398', '79912', '60048', '50943', '38870', '69383', '72261', '98059', '55242', '74905', '5667', '54321', '70415', '39903', '49711', '85318', '79979', '59262', '82321', '15263', '17416', '74554', '94733', '72112', '49872', '54849', '73883', '78250', '74935', '68559', '57564', '50541', '45730', '41595', '5588', '83723', '42891', '11898', '14348', '99732', '14481', '85233', '21277', '94508', '52551', '74187', '7634', '42912', '25100', '43536', '35798', '48190', '86477', '22680', '48148', '59501', '56563', '16802', '81496', '97568', '68657', '51462', '67953', '99660', '39002', '54170', '57190', '68086', '52700', '6487', '55709', '70418', '62629', '70420', '35695', '36152', '45360', '53503', '46623', '76000', '50648', '97876', '44815', '29163', '1356', '64123', '71388', '17658', '99084', '58727', '59437', '38773', '71254', '81286', '97545', '18786', '56834', '20346', '36401', '62316', '58082', '67959', '99876', '69895', '80099', '62747', '20517', '99777', '6472', '49189', '31321', '39992', '68073', '13378', '51806', '21776', '52060', '96983', '25754', '93709', '96627', '8644', '93726', '14002', '37716', '87620', '34507', '76339', '24491', '5849', '44110', '522', '66521', '12776', '44887', '80535', '14548', '75248', '671', '73071', '35715', '59474', '7061', '82243', '56170', '20179', '59717', '1725', '24634', '11270', '77023', '63840', '46608', '44667', '22422', '59771', '94768', '73033', '82905', '16463', '40971', '22204', '58366', '28721', '14907', '76468', '81872', '38418', '36989', '61439', '10610', '131', '44296', '35453', '10117', '75856', '94603', '99602', '68075', '35949', '13599', '50030', '69633', '55956', '85465', '16429', '86081', '11145', '6195', '82207', '90598', '92814', '23725', '83204', '80346', '71542', '46634', '15820', '54123', '45397', '15322', '61743', '9273', '71347', '6835', '64006', '91718', '43677', '32923', '21486', '17098', '61694', '43347', '40019', '4071', '52443', '42386', '56839', '83514', '27633', '40780', '51749', '92101', '62384', '92206', '56044', '66174', '11137', '73966', '78471', '30468', '31643', '33197', '6888', '8066', '86603', '74383', '6098', '54411', '98819', '89862', '88639', '94422', '89371', '80526', '91747', '91220', '64944', '76658', '42046', '58518', '27249', '6646', '3028', '1346', '33763', '9734', '31737', '65527', '5892', '60813', '3410', '35464', '43009', '98382', '70580', '93898', '56404', '32995', '62771', '71556', '40538', '55612', '45656', '10758', '20268', '33603', '38310', '14242', '74397', '10722', '71575', '22590', '49043', '91439', '9055', '23668', '9101', '5268', '64133', '77501', '64684', '11337', '47575', '50732', '88680', '93730', '46785', '17589', '3520', '57595', '71241', '34994', '8753', '36147', '88844', '41914', '11250', '94632', '71927', '4624', '86279', '7664', '2659', '94853', '65386', '30438', '86005', '92883', '84629', '59910', '44484', '1306', '8404', '56962', '29990', '38445', '96191', '73013', '66590', '40951', '24712', '18825', '37268', '87843', '18972', '12154', '7779', '52149', '76152', '65799', '86011', '35475', '78083', '88232', '91551', '65532', '93516', '73827', '24227', '44687', '55759', '83819', '45088', '10856', '60488', '39051', '14103', '76650', '81181', '46731', '737', '58788', '78945', '42096', '66731', '66740', '72273', '88969', '5655', '86590', '41096', '80038', '32430', '51877', '23970', '91900', '13082', '45880', '94367', '19739', '61998', '71665', '16083', '57035', '26916', '10166', '18834', '46798', '66881', '28444', '68840', '10459', '81087', '4728', '76224', '39257', '23470', '93524', '37345', '30074', '49856', '22022', '55279', '5159', '5193', '58030', '57539', '12514', '49759', '96222', '52597', '67192', '88187', '53614', '16084', '79915', '28212', '79334', '85283', '32306', '31058', '43113', '74707', '74869', '2213', '32134', '6379', '85426', '87098', '35984', '51105', '69287', '16803', '83337', '14913', '62531', '58098', '7914', '20105', '28850', '1384', '43173', '62983', '87113', '76066', '86320', '77684', '45191', '95225', '41503', '36713', '48404', '91228', '53865', '98981', '59161', '61237', '84561', '17455', '14379', '57789', '80895', '99260', '84595', '72942', '53220', '84448', '81332', '49437', '83086', '93414', '54519', '52288', '74772', '22460', '49324', '11168', '96071', '61985', '38284', '6405', '54698', '71727', '60093', '37340', '87884', '83403', '4542', '94949', '19636', '15855', '39105', '10424', '67418', '91022', '69254', '8481', '38411', '3832', '44354', '93548', '57172', '28481', '372', '81497', '52179', '41060', '72141', '41396', '65590', '70432', '82819', '93814', '26118', '84780', '88485', '70821', '8222', '83000', '47067', '38516', '33347', '47681', '48202', '60749', '52112', '7937', '28105', '11394', '45746', '43252', '34494', '2979', '69715', '42486', '82315', '71760', '97413', '66137', '94487', '7429', '74434', '22964', '55251', '3448', '53534', '2574', '9693', '96157', '2955', '4348', '19566', '56930', '83319', '31310', '53905', '1148', '41726', '22233', '76045', '37351', '10545', '17581', '28047', '30199', '4741', '58111', '33497', '67796', '67730', '31247', '43772', '29461', '45970', '73353', '22534', '53962', '32147', '71392', '62579', '66345', '58246', '33442', '9581', '29705', '14058', '86471', '76125', '59363', '94982', '74810', '89149', '20066', '3366', '3568', '25752', '80036', '64119', '27270', '40061', '91052', '69022', '9852', '77112', '83075', '43924', '61661', '56133', '96652', '57944', '72576', '82170', '79236', '55745', '15309', '88878', '72761', '37647', '67465', '12777', '97309', '93202', '41470', '8787', '64920', '48514', '18917', '35157', '59151', '4640', '5317', '38134', '76548', '82788', '9214', '58418', '73185', '90554', '10543', '47182', '62936', '91765', '89751', '68931', '48865', '64607', '7150', '77862', '14297', '14828', '33013', '91698', '67593', '98096', '16595', '51639', '86531', '24719', '1703', '78788', '43810', '38918', '95491', '99903', '82671', '8291', '68288', '31224', '39863', '4265', '77798', '7698', '33804', '92286', '4744', '37038', '44203', '98212', '17369', '77442', '62879', '4145', '96881', '15646', '36824', '19959', '45451', '76049', '54272', '97577', '95298', '81115', '30204', '82041', '8037', '10052', '8756', '76833', '82851', '24276', '75574', '36037', '78079', '92807', '29064', '90000', '84150', '17102', '75092', '49424', '35597', '4693', '82853', '42511', '16119', '23478', '65240', '55585', '91762', '71671', '46682', '72479', '97696', '24615', '12579', '30274', '48255', '2336', '90202', '5808', '45426', '76308', '74639', '31245', '99894', '89638', '6233', '33893', '71899', '85273', '89429', '29761', '50231', '57249', '99347', '22642', '66972', '86221', '47514', '88274', '10819', '73150', '53754', '13304', '20478', '38099', '619', '14669', '8011', '97657', '26569', '65430', '13467', '38180', '23675', '72350', '42257', '39875', '23529', '53407', '11833', '29599', '95621', '7727', '59527', '86846', '22860', '5358', '3730', '87555', '362', '95755', '54565', '29935', '68950', '52349', '98344', '86576', '7420', '12236', '15844', '48099', '97535', '97081', '50261', '31187', '60496', '24123', '24042', '6376', '6679', '99806', '20306', '60676', '36881', '77309', '5247', '96569', '53417', '73252', '64179', '35318', '75732', '65119', '32621', '40464', '22887', '96152', '65161', '83381', '8915', '68142', '7328', '85031', '15688', '72519', '93992', '86927', '75538', '38205', '50877', '70039', '97538', '94822', '52131', '49643', '85206', '1347', '14574', '88736', '53442', '49991', '64925', '72283', '82213', '60905', '36118', '62963', '16983', '79185', '15111', '26059', '17792', '98218', '33214', '1094', '41754', '77275', '65173', '13190', '91004', '90422', '44387', '92672', '98641', '54609', '83295', '37395', '70104', '32986', '72524', '82478', '5837', '83916', '52736', '57112', '55985', '42642', '42136', '89642', '35712', '49489', '19726', '65824', '24384', '48112', '15366', '99206', '68384', '51389', '529', '21475', '75749', '95182', '60110', '70571', '74174', '38105', '78107', '4101', '8982', '11215', '23987', '3303', '28706', '54629', '98000', '67510', '30036', '99140', '48896', '40971', '7735', '79984', '50134', '94928', '57023', '52880', '83067', '41940', '62994', '89213', '38593', '19283', '68206', '22234', '19245', '26266', '32403', '65889', '17022', '64280', '42797', '27161', '57675', '42313', '93606', '93082', '20659', '90824', '1226', '66266', '12503', '57104', '15247', '51160', '92398', '71967', '59476', '44465', '35765', '10787', '47737', '45792', '2292', '47599', '89612', '8162', '87622', '69410', '45727', '31158', '99791', '89544', '27214', '99588', '40516', '75616', '36505', '46079', '95448', '97999', '47462', '47799', '82729', '34038', '60789', '96938', '22682', '79062', '93307', '36038', '49016', '90983', '48219', '50889', '32517', '72219', '71229', '82643', '1195', '70543', '17', '22178', '23544', '72371', '1163', '28527', '7336', '39846', '31956', '80963', '41804', '59791', '41831', '1940', '52377', '79494', '12531', '81112', '44320', '18746', '5774', '63869', '4085', '59922', '12751', '99443', '13530', '23872', '36026', '83360', '32711', '92980', '11140', '99323', '57263', '98149', '29265', '25548', '65995', '4818', '15593', '8535', '37863', '12217', '14474', '66584', '89272', '86690', '58777', '39666', '44756', '18442', '52586', '98030', '40850', '38708', '49304', '68923', '65008', '84388', '83639', '29866', '63675', '26793', '49227', '82099', '24090', '57535', '24201', '65776', '74054', '89833', '62979', '26613', '5851', '99766', '63484', '66605', '37179', '90760', '59336', '58390', '93239', '84578', '11396', '93994', '73818', '23972', '37720', '72369', '25063', '32952', '71036', '76612', '31285', '34090', '19136', '53783', '66436', '61478', '96749', '43658', '7399', '31574', '67073', '40480', '20727', '70993', '65549', '30800', '21507', '53785', '89574', '86381', '56492', '62603', '44856', '68687', '63794', '70996', '7475', '84238', '71939', '86886', '94792', '15036', '36936', '95722', '17771', '67850', '33371', '49314', '40744', '5432', '81057', '41201', '75986', '22961', '15323', '1570', '18657', '95219', '19130', '53127', '15867', '81135', '73206', '76668', '36386', '48828', '31417', '56916', '70891', '60534', '95777', '10022', '94053', '2928', '56326', '16559', '79656', '6414', '81247', '78270', '55687', '19151', '61597', '99857', '81142', '27725', '53493', '12185', '1455', '48501', '59425', '20591', '24900', '66079', '84889', '32024', '18919', '2043', '7076', '71201', '88258', '86521', '93348', '26395', '39646', '44145', '33911', '46231', '67054', '39979', '11630', '23020', '76278', '88056', '11480', '4723', '78612', '70211', '60622', '84687', '59092', '65675', '38479', '64399', '64699', '95964', '42764', '69060', '28189', '4193', '95805', '75462', '17245', '59640', '94773', '84292', '53092', '98507', '61353', '32483', '53027', '48912', '87221', '47788', '59263', '65196', '35567', '17494', '64253', '50223', '7057', '87467', '62414', '2523', '50910', '72353', '78986', '78104', '47719', '29108', '12957', '5114', '64435', '66707', '37449', '70399', '45334', '71606', '55338', '55072', '58765', '12151', '22012', '16954', '87366', '14240', '98041', '72296', '47408', '56879', '99584', '63172', '92316', '28071', '29880', '19608', '13839', '87484', '56541', '88662', '87098', '72124', '78282', '27653', '38993', '31870', '67239', '99445', '7376', '78487', '98880', '12180', '86773', '67773', '15416', '58172', '13075', '67559', '97510', '29705', '86985', '57024', '11827', '31236', '91920', '26116', '94614', '14486', '46252', '78847', '43786', '70048', '96739', '35240', '39933', '58209', '27852', '65669', '47323', '58150', '84444', '44344', '95882', '41258', '31314', '69060', '19916', '6979', '19436', '45572', '16259', '74566', '6306', '24705', '53422', '593', '97031', '22308', '26875', '23042', '78035', '34229', '61976', '23175', '50072', '90896', '50810', '71730', '86468', '94807', '8218', '36032', '58628', '60560', '51206', '37943', '27987', '15014', '49905', '70018', '66799', '80851', '23594', '29982', '6438', '97381', '47715', '96294', '17985', '48545', '12672', '5250', '9988', '24601', '3736', '97815', '54363', '64703', '44167', '68376', '16595', '38073', '29630', '59630', '1858', '71823', '75580', '70083', '14493', '93821', '93394', '85369', '3818', '8435', '59988', '43966', '13961', '15855', '83332', '80312', '27299', '88840', '76964', '56173', '62794', '79389', '82642', '85843', '47116', '43064', '16061', '28905', '54415', '72832', '91252', '93488', '79457', '99336', '70744', '80432', '6487', '880', '87701', '154', '86574', '86677', '17892', '81488', '95260', '12515', '43189', '9211', '55403', '41417', '60046', '54785', '83655', '28274', '65745', '63062', '44549', '36391', '48051', '7328', '3572', '33226', '49177', '25123', '59065', '19691', '15109', '10172', '95578', '29497', '48152', '20276', '36270', '78866', '48309', '53209', '55475', '30073', '19717', '16004', '45692', '83430', '9291', '45935', '57030', '92613', '91656', '67697', '34915', '28156', '56594', '3273', '11194', '98270', '34370', '2621', '66679', '97451', '97717', '87923', '48310', '37725', '69743', '75103', '84956', '75163', '16069', '65304', '19397', '18071', '27273', '49823', '57595', '98324', '82174', '10293', '80943', '64184', '19472', '4198', '9410', '25927', '65961', '33155', '95168', '33692', '61712', '69877', '13308', '17415', '10022', '2491', '67310', '96140', '68050', '76272', '17143', '76805', '57176', '7539', '22690', '95483', '87592', '27221', '90821', '51154', '99828', '68998', '54581', '74222', '10269', '65057', '45467', '96089', '55058', '89779', '60837', '74122', '52886', '58055', '14880', '93208', '66652', '68830', '24121', '62407', '87257', '18802', '14925', '45423', '98624', '55195', '59072', '41414', '77840', '66075', '62705', '26549', '19063', '57552', '2507', '52069', '57620', '66688', '14833', '33700', '90666', '98052', '5367', '2268', '43093', '69063', '22030', '85564', '92258', '1847', '24446', '65835', '38660', '91899', '87732', '52396', '31952', '36000', '86944', '16109', '80729', '53757', '60226', '59103', '84187', '36674', '72823', '29884', '4654', '69139', '20440', '57413', '3651', '39639', '44564', '57492', '84159', '751', '99748', '9659', '72661', '39220', '99742', '74734', '75729', '38071', '69934', '73640', '65294', '54524', '64372', '37927', '17187', '7863', '12732', '40296', '36197', '15821', '76831', '4400', '71933', '4040', '22072', '33064', '25702', '13324', '91275', '27388', '97729', '14620', '45989', '80737', '17934', '4219', '3032', '43457', '31051', '24469', '67041', '29328', '75499', '80951', '88212', '92595', '49969', '24612', '58732', '2718', '3805', '50918', '99426', '8614', '35580', '93273', '989', '24385', '41185', '25687', '47146', '25227', '95839', '56355', '98536', '79824', '31725', '46447', '26690', '68418', '47783', '33725', '21729', '70797', '59038', '60376', '25087', '68332', '67950', '12411', '95918', '64736', '65336', '74947', '64605', '4106', '42712', '96640', '28492', '28648', '42429', '821', '24333', '69677', '38959', '23484', '92005', '29352', '29159', '52873', '99947', '21834', '85347', '93479', '28298', '55608', '3226', '69714', '80283', '6577', '18849', '44605', '75286', '28139', '26541', '12867', '57500', '86617', '33005', '57498', '60223', '74954', '51401', '55246', '5648', '16513', '40930', '43821', '32090', '66002', '65530', '76083', '6047', '6879', '94987', '80787', '11688', '77161', '92670', '6696', '400', '28572', '47234', '51375', '88518', '762', '92617', '54260', '7560', '60180', '43331', '64059', '27616', '75839', '21392', '47756', '46254', '19486', '88533', '30130', '93694', '8557', '66534', '94447', '16910', '6480', '77440', '24366', '6195', '48946', '28597', '44429', '50300', '73556', '40638', '98709', '94413', '15987', '43860', '64871', '93953', '34506', '7296', '31753', '30626', '77510', '39829', '25696', '39776', '69185', '36540', '65413', '31528', '43446', '73532', '49776', '30282', '30004', '26725', '15200', '33958', '90320', '71836', '48051', '31970', '5326', '96194', '69695', '60898', '60945', '18271', '50868', '61468', '23593', '68985', '20628', '58044', '8942', '34849', '7384', '50500', '62895', '78780', '48946', '65278', '4067', '973', '34761', '15512', '73739', '23138', '47322', '55568', '32259', '71816', '49277', '75218', '76104', '19579', '68312', '67904', '33886', '53888', '26421', '43859', '40291', '39068', '31711', '36542', '10195', '39781', '72352', '13188', '34113', '9428', '60443', '4987', '13783', '80744', '63483', '18266', '11961', '87167', '46987', '28480', '74214', '39191', '8146', '38090', '75727', '79245', '47720', '52547', '45321', '4972', '49701', '74354', '69672', '63455', '41902', '5667', '54166', '4962', '25873', '44509', '73332', '73383', '29438', '21455', '12320', '11997', '16921', '49379', '63027', '86175', '8110', '76149', '2520', '11256', '25863', '50518', '69001', '79113', '9447', '91840', '5242', '10998', '46496', '2448', '56058', '20970', '10517', '17783', '25723', '97137', '62840', '1264', '78691', '81020', '55335', '48524', '2088', '90413', '76651', '26855', '16177', '14954', '62914', '21344', '5708', '75560', '39311', '95865', '28783', '64902', '95657', '46276', '33426', '4799', '11588', '57513', '73689', '77677', '63011', '97795', '34954', '76866', '32043', '32697', '26643', '36890', '53476', '3011', '13963', '49551', '87671', '67761', '17488', '94770', '50599', '33272', '23091', '38079', '41177', '22395', '91656', '79679', '38687', '57384', '80118', '42507', '4098', '78949', '45669', '48802', '83915', '78292', '4369', '57657', '49146', '45192', '98491', '72457', '46331', '207', '81601', '7409', '70856', '91605', '70295', '9171', '72293', '32997', '78025', '16795', '73534', '68780', '21284', '31767', '94381', '86439', '12420', '53285', '99563', '60502', '67954', '55012', '99809', '5431', '69978', '99712', '14401', '79498', '4495', '3045', '528', '72542', '91604', '72725', '39378', '80378', '41996', '20138', '54545', '59730', '36951', '45157', '37964', '97690', '12184', '4944', '53803', '93605', '60851', '68938', '46285', '89663', '90309', '6907', '87239', '81791', '83292', '90013', '68927', '14725', '81840', '63836', '52068', '43830', '4794', '931', '59255', '8263', '99057', '94401', '69033', '7437', '20364', '92884', '28193', '43932', '37629', '59426', '18891', '8583', '79551', '87242', '1483', '6725', '65786', '16844', '12650', '99305', '42841', '9811', '18800', '39313', '51373', '31874', '84558', '27831', '48614', '48975', '55509', '83363', '31854', '64001', '94028', '76125', '79314', '24893', '81132', '9441', '86015', '28356', '40358', '10160', '23328', '7330', '76538', '37611', '89351', '84132', '97047', '26109', '95222', '35130', '75600', '88602', '15073', '87835', '71649', '28948', '81615', '37498', '28674', '59776', '44095', '65924', '64368', '94536', '12518', '61711', '55619', '82949', '4114', '21540', '70544', '28022', '79983', '28781', '7749', '97873', '4951', '50076', '47611', '99522', '56820', '38653', '49047', '36283', '83908', '72452', '85625', '10811', '36998', '44083', '34864', '44975', '39057', '4551', '68450', '24781', '1503', '9871', '46885', '11424', '21259', '54900', '97669', '85669', '6015', '2521', '37661', '14915', '57423', '91903', '94789', '32059', '64972', '4600', '61465', '27118', '79785', '13547', '49766', '38410', '68860', '63756', '23621', '64387', '46255', '63408', '11297', '41081', '56326', '58349', '98703', '72268', '73574', '32098', '42534', '91502', '38083', '11241', '56828', '12098', '25377', '37054', '56328', '30034', '26922', '68401', '93478', '63275', '62650', '81407', '773', '79499', '14970', '47217', '1187', '57428', '69980', '77764', '74791', '22107', '54363', '39247', '56028', '56982', '84244', '21464', '18716', '25533', '94589', '94768', '21537', '18436', '81135', '27654', '79713', '56630', '61571', '58453', '26758', '68450', '68449', '2994', '15347', '83954', '71823', '6428', '44210', '79597', '95144', '32871', '1991', '320', '77157', '63607', '31154', '48846', '71125', '61750', '59608', '33038', '35733', '68915', '94127', '50383', '64242', '49708', '57270', '65019', '8581', '12111', '18487', '50013', '58664', '22214', '19033', '33681', '44754', '28830', '10381', '52318', '34959', '20682', '55453', '53800', '65774', '99164', '72102', '36986', '44157', '56716', '7974', '81475', '25926', '39402', '33688', '99671', '95312', '42268', '26536', '14482', '67377', '57993', '89147', '15834', '64995', '4700', '18714', '30221', '39095', '32749', '69257', '55204', '30497', '31839', '63045', '30009', '62683', '31232', '77680', '93551', '63589', '6989', '77246', '42169', '46117', '73226', '37427', '1858', '83649', '37410', '86369', '4641', '74481', '66168', '48041', '22597', '14670', '27464', '57165', '20939', '36282', '76940', '73358', '50521', '69603', '8895', '81793', '57743', '81903', '64025', '91641', '25276', '34040', '62642', '64015', '57657', '84890', '73832', '782', '60160', '16998', '40023', '24590', '88613', '76640', '53091', '67600', '80183', '45674', '64464', '25163', '42384', '66972', '13953', '41966', '66048', '15135', '73745', '19466', '53657', '34619', '13462', '15905', '48257', '73297', '238', '93525', '80556', '5942', '5411', '66169', '9090', '95130', '74316', '57321', '48083', '62355', '68113', '15239', '36644', '80326', '65817', '54428', '61955', '58849', '77206', '16073', '98261', '92091', '39178', '35464', '85109', '85452', '21128', '25665', '81860', '44664', '24024', '56960', '95124', '39786', '18836', '11121', '44163', '81074', '79064', '46219', '94694', '44233', '81469', '24642', '15030', '21995', '13587', '40755', '6669', '81093', '74305', '1881', '55649', '37273', '80827', '98643', '46694', '59281', '79231', '42813', '84984', '7052', '98113', '17296', '84434', '31205', '46894', '71219', '74530', '44686', '70744', '91388', '20692', '96853', '73803', '15836', '18126', '49686', '4179', '47588', '87892', '65425', '68012', '97468', '92510', '99271', '58694', '11918', '37051', '18644', '57228', '14265', '57572', '57022', '52186', '30193', '93570', '87872', '5257', '26784', '6476', '61746', '68559', '1720', '26202', '16519', '27688', '10645', '87174', '60845', '73385', '82075', '6933', '98828', '56895', '17344', '84253', '36561', '51648', '24939', '63470', '31034', '95052', '51090', '51465', '87979', '68650', '30181', '29598', '19137', '43221', '81353', '90170', '96985', '61115', '17385', '92314', '80650', '55821', '17874', '84333', '93272', '48260', '87272', '22764', '59957', '51870', '85988', '39222', '77241', '62535', '28344', '6011', '80831', '64551', '46299', '75195', '71177', '8660', '58943', '57003', '3306', '74413', '74068', '15073', '89016', '93140', '13911', '57170', '19880', '41870', '9131', '57495', '73032', '86979', '60094', '87026', '30880', '4736', '86301', '92707', '21689', '83565', '71275', '47665', '65687', '71184', '89897', '32490', '97577', '38723', '79113', '37531', '97500', '94450', '15699', '58019', '84423', '27057', '56017', '97148', '47365', '30669', '33818', '80406', '99690', '33012', '95178', '46809', '48448', '79350', '9146', '99701', '98976', '71197', '44161', '75069', '36602', '79650', '97301', '12020', '56658', '25701', '46392', '78609', '63073', '69419', '57736', '20102', '42415', '79044', '20277', '56280', '47903', '94311', '25558', '40336', '91305', '90505', '66769', '64562', '83737', '62892', '10375', '71024', '19988', '56946', '76110', '21847', '43162', '50578', '46086', '54167', '61722', '53463', '63134', '69288', '12838', '14116', '71687', '50846', '59810', '24826', '84138', '82885', '91496', '98600', '82769', '40049', '4125', '50694', '1294', '2805', '29691', '82321', '76462', '85945', '115', '29188', '66918', '71340', '31585', '61638', '95472', '52978', '50622', '81990', '60955', '70519', '22270', '35610', '95871', '89222', '41038', '52546', '1163', '67943', '1793', '92010', '35755', '74509', '66665', '95759', '8568', '44299', '67822', '5806', '85839', '13895', '87675', '31357', '88014', '40026', '53050', '28951', '31992', '42495', '82892', '51567', '2869', '45808', '20238', '20781', '56098', '66307', '95701', '614', '60833', '3091', '81339', '24195', '65639', '85976', '28116', '66224', '51502', '73637', '13207', '88302', '36488', '65518', '98187', '26', '74367', '64706', '53943', '86760', '25783', '82112', '34958', '86621', '20848', '63459', '14049', '84943', '91873', '50238', '77773', '64109', '8602', '87934', '47583', '66053', '30287', '5507', '80312', '37464', '57457', '86200', '17806', '16522', '38843', '94334', '59958', '63864', '53427', '74506', '33980', '90449', '30842', '53616', '36738', '52', '13595', '53051', '13174', '60163', '71420', '73835', '67119', '79018', '42782', '45059', '952', '46360', '85879', '71552', '84741', '29746', '32577', '10041', '7208', '97528', '51256', '916', '55973', '17684', '99046', '38782', '58660', '97798', '66032', '48339', '51329', '12532', '97904', '95454', '42737', '62541', '96702', '82953', '94610', '26645', '86813', '25480', '99713', '26078', '23028', '93056', '21445', '73209', '89318', '69987', '34705', '30064', '17094', '51135', '54141', '26625', '1086', '13082', '30843', '98672', '56864', '42605', '5833', '60850', '69366', '27351', '16456', '92609', '48030', '54322', '69891', '46502', '34578', '77918', '63276', '75958', '42519', '60266', '85576', '4855', '14258', '67017', '10545', '35078', '53012', '71922', '85784', '73402', '74363', '58457', '94102', '23510', '51559', '39482', '87057', '9377', '10106', '82985', '33931', '16523', '6484', '97749', '83172', '53753', '27466', '23073', '96083', '67302', '57465', '21877', '18013', '99804', '32873', '43123', '72365', '53197', '80578', '69770', '97471', '86954', '67183', '98497', '78474', '28450', '63183', '98699', '42738', '61433', '3491', '27304', '49311', '94980', '92740', '43272', '86549', '11406', '79636', '85582', '38086', '657', '2354', '26567', '77450', '42086', '21600', '49011', '44059', '47872', '75761', '96577', '11642', '83471', '79616', '23749', '77082', '96876', '65302', '84027', '48955', '59887', '20657', '75090', '9058', '50347', '66088', '70745', '76342', '58026', '95568', '61504', '93473', '84590', '47089', '74717', '93090', '46334', '68273', '59500', '54345', '72608', '54048', '86156', '40296', '74046', '6813', '36369', '74543', '18305', '85236', '31316', '37061', '96893', '23112', '5529', '10166', '19037', '1467', '70810', '30932', '18410', '92837', '81324', '12268', '54705', '25207', '90366', '56528', '3392', '88747', '39951', '97957', '99404', '23685', '13533', '15640', '11434', '66516', '71025', '65770', '88000', '52232', '32360', '10787', '37438', '2264', '94460', '80214', '42288', '59062', '29010', '64093', '21225', '22297', '36935', '19202', '5925', '85373', '27414', '28991', '9191', '42273', '56587', '89719', '77191', '64334', '61542', '28763', '28978', '79184', '59815', '95200', '30246', '54022', '287', '91808', '66347', '50833', '15356', '78614', From af1c8f8a5f1dcbc9bf12cef3dc55b02f87edb212 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 10 Jun 2020 02:57:31 +0300 Subject: [PATCH 0467/2229] fixup --- docker/test/performance-comparison/compare.sh | 8 +++++--- tests/performance/set_index.xml | 13 ++++++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 983db608b47..f7986689020 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -347,9 +347,11 @@ create table query_metric_stats engine File(TSVWithNamesAndTypes, create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') as select -- FIXME Comparison mode doesn't make sense for queries that complete - -- immediately, so for now we pretend they don't exist. We don't want to - -- remove them altogether because we want to be able to detect regressions, - -- but the right way to do this is not yet clear. + -- immediately (on the same order of time as noise). We compute average + -- run time between old and new version, and if it is below a threshold, + -- we just skip the query. If there is a significant regression, the + -- average will be above threshold, we'll process it normally and will + -- detect the regression. (left + right) / 2 < 0.02 as short, not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail, diff --git a/tests/performance/set_index.xml b/tests/performance/set_index.xml index 021f6dc60f3..88dbf457cba 100644 --- a/tests/performance/set_index.xml +++ b/tests/performance/set_index.xml @@ -14,9 +14,9 @@ SELECT toString(rand()) IN ('41577', '83972', '51697', '50014', '37553', '93459', '87438', '95971', '83186', '74326', '67871', '50406', '83678', '29655', '18580', '83905', '61518', '29059', '56700', '82787', '98672', '30884', '81822', '39850', '80852', '57627', '91346', '64522', '17781', '49467', '41099', '41929', '85618', '91389', '68564', '91769', '81219', '52218', '37220', '97097', '2129', '9886', '52049', '34847', '25364', '36429', '76897', '71868', '58121', '71199', '84819', '69991', '34046', '64507', '34892', '24228', '36986', '28588', '51159', '53444', '80531', '9941', '20256', '48103', '32565', '62890', '5379', '60302', '46434', '3205', '18821', '31030', '19794', '71557', '71703', '15024', '14004', '82164', '95659', '40227', '83358', '24395', '9610', '19814', '48491', '66412', '16012', '71586', '42143', '51103', '24463', '89949', '35694', '39193', '63904', '40489', '77144', '94014', '84836', '9980', '46554', '43905', '25588', '25205', '72624', '10249', '35888', '98478', '99030', '26834', '31', '81499', '14847', '82997', '92357', '92893', '17426', '56630', '22252', '68119', '62710', '8740', '82144', '79916', '23391', '30192', '99271', '96435', '44237', '98327', '69481', '16691', '13643', '84554', '38571', '70926', '99283', '79000', '20926', '86495', '4834', '1222', '39486', '57697', '58002', '40790', '15623', '3999', '31515', '12694', '26143', '35951', '54085', '97534', '35329', '73535', '88715', '29572', '75799', '45166', '32066', '48023', '69523', '93150', '8740', '96790', '15534', '63252', '5142', '67045', '93992', '16663', '292', '63924', '6588', '12190', '31506', '69590', '35394', '55168', '65223', '79183', '32600', '69676', '28316', '72111', '53531', '15073', '41127', '73451', '24725', '61647', '65315', '41143', '26493', '95608', '34407', '76098', '53105', '83691', '48755', '35696', '62587', '81826', '3963', '45766', '82751', '12430', '97685', '29919', '78155', '71636', '50215', '89734', '9892', '47151', '54855', '3428', '9712', '52592', '2403', '79602', '81243', '79859', '57361', '82000', '42107', '28860', '99591', '28296', '57337', '64969', '32332', '25535', '30924', '21313', '32554', '17342', '87311', '19825', '24898', '61323', '83209', '79322', '79009', '50746', '33396', '62033', '16548', '17427', '24073', '34640', '52368', '4724', '80408', '40', '33787', '16666', '19665', '86751', '27264', '2241', '88134', '53566', '10589', '79711', '92823', '58972', '91767', '60885', '51659', '7867', '96849', '30360', '20914', '9584', '1250', '22871', '23282', '99312', '4683', '33429', '68361', '82614', '81440', '47863', '69790', '11968', '75210', '66854', '37002', '61142', '71514', '1588', '42336', '11069', '26291', '2261', '71056', '13492', '9133', '91216', '72207', '71586', '86535', '83898', '24392', '45384', '48545', '61972', '503', '80180', '35834', '97025', '70411', '55039', '35430', '27631', '82533', '96831', '74077', '42533', '14451', '26943', '53783', '69489', '71969', '8432', '37230', '61348', '19472', '59115', '9886', '50951', '57109', '7141', '1902', '84130', '4323', '55889', '47784', '2220', '75988', '66988', '63721', '8131', '95601', '95207', '2311', '26541', '50991', '6717', '2969', '71857', '51034', '65958', '94716', '90275', '21012', '46859', '7984', '31131', '46457', '69578', '44540', '7294', '80117', '9925', '60155', '90608', '82684', '32193', '87071', '28006', '87604', '24501', '79087', '2848', '29237', '11221', '81319', '40966', '87641', '35325', '78705', '88636', '78717', '62831', '56390', '99271', '43821', '14453', '17923', '62695', '77322', '21038', '67677', '41271', '4376', '65426', '46091', '19887', '97251', '55583', '58763', '3826', '35037', '73533', '64267', '82319', '9836', '42622', '96829', '16363', '10455', '49290', '99992', '98229', '66356', '59087', '73998', '25986', '4279', '56790', '69540', '588', '36620', '60358', '45056', '89297', '42740', '8323', '19245', '82417', '41431', '699', '11554', '73910', '44491', '56019', '68901', '45816', '68126', '89379', '23885', '13263', '56395', '73130', '19089', '23771', '10335', '48547', '16903', '6453', '33560', '89668', '38159', '43177', '90655', '49712', '62', '66920', '34180', '12150', '48564', '39538', '85026', '87195', '14928', '8956', '71157', '53287', '39161', '67583', '83309', '92054', '86977', '56188', '15229', '88170', '60894', '58497', '89254', '40082', '86890', '60161', '97291', '45878', '23368', '14577', '92870', '37017', '97356', '99426', '76061', '89186', '99751', '85153', '61580', '39360', '90107', '25603', '26798', '76224', '6469', '7912', '69838', '16404', '67497', '28965', '80836', '80365', '91249', '48713', '17113', '33090', '40793', '70450', '66689', '83698', '17802', '43869', '13355', '18959', '79411', '87930', '9265', '37504', '44876', '97234', '94149', '35040', '22049', '49248', '6535', '36080', '28346', '94437', '78319', '17961', '89056', '56161', '35810', '41632', '45494', '53351', '89729', '99510', '51584', '59688', '6193', '70809', '51093', '92589', '90247', '34910', '78235', '17362', '49423', '63324', '525', '37638', '72325', '89356', '15298', '59116', '17848', '65429', '27029', '84781', '70247', '8825', '35082', '70451', '22522', '58125', '91879', '90531', '2478', '463', '37902', '54405', '87267', '72688', '22803', '33134', '35177', '84551', '44974', '88375', '76407', '27774', '33849', '19915', '82014', '80434', '26380', '48777', '53811', '14838', '26829', '56441', '99869', '49574', '85476', '19723', '16907', '4018', '37338', '78510', '47912', '13030', '65277', '95716', '67363', '21393', '89887', '78842', '81650', '903', '17436', '30704', '49223', '27198', '25500', '52214', '54258', '70082', '53950', '49312', '43615', '99473', '94348', '53661', '96213', '96346', '62010', '38268', '32861', '75660', '10392', '89491', '68335', '29817', '88706', '24184', '36298', '43440', '21626', '26535', '44560', '46363', '12534', '99070', '95606', '33714', '73070', '8303', '29853', '23014', '99982', '4530', '14955', '45803', '50', '90750', '30394', '81276', '95563', '47314', '58520', '91299', '88944', '54402', '67405', '29253', '47079', '71734', '99728', '17652', '13307', '35556', '18962', '26780', '17771', '53712', '60055', '37628', '35830', '90739', '61151', '41309', '27652', '3051', '53167', '98417', '19382', '36833', '75085', '65374', '87732', '30352', '31776', '32765', '97565', '92199', '49050', '29503', '51024', '18834', '8515', '24069', '96216', '10777', '90680', '18974', '68884', '85305', '36007', '56707', '4212', '47352', '34426', '13185', '92939', '95782', '70577', '58080', '98279', '3906', '5065', '56896', '16382', '31273', '17117', '98602', '12786', '24086', '63970', '72756', '35798', '82367', '7356', '53398', '68503', '2962', '16425', '67334', '68461', '65439', '15620', '70906', '29649', '46461', '74602', '38012', '71714', '16825', '89480', '53386', '88532', '35104', '28556', '82120', '23155', '23347', '24797', '60061', '54962', '99427', '82248', '82447', '39968', '63727', '27431', '81511', '91168', '71425', '80740', '84127', '40717', '15503', '15419', '46594', '61263', '19212', '53175', '70724', '74445', '23034', '71818', '40246', '18886', '53066', '4880', '83701', '86107', '87862', '44751', '392', '73440', '90291', '93395', '20894', '38463', '32664', '55158', '20090', '50004', '79070', '98471', '85478', '96615', '68149', '78334', '97752', '73207', '71678', '91238', '96757', '82598', '194', '35797', '45120', '60782', '28721', '17676', '78066', '60957', '11826', '51563', '50516', '16485', '47053', '31738', '48923', '23554', '96850', '42033', '73701', '78607', '45979', '54571', '12415', '31693', '15356', '36902', '9126', '3767', '3295', '90402', '24005', '95350', '67033', '49137', '72606', '51899', '17522', '31957', '44641', '53982', '23767', '68257', '15766', '19995', '2107', '48788', '11765', '91055', '46576', '54651', '50381', '62827', '73636', '46606', '98753', '37631', '70441', '87916', '66983', '33870', '31125', '12904', '57040', '4874', '58632', '42037', '18782', '5998', '18974', '57949', '81010', '90407', '99874', '20462', '89949', '10952', '71454', '95130', '46115', '3518', '13384', '69039', '79482', '22076', '59782', '32042', '40930', '60243', '29298', '6790', '46985', '44398', '85631', '14380', '66179', '2629', '32126', '49833', '14118', '58492', '31493', '81172', '96638', '8745', '89663', '76842', '78633', '41373', '83721', '42886', '11123', '32739', '11051', '1303', '92314', '83324', '85600', '44276', '69064', '56125', '84650', '31028', '12628', '14502', '64764', '39405', '44855', '79046', '51716', '46824', '83389', '1941', '1257', '9280', '73176', '84729', '2579', '63366', '22606', '35541', '51096', '13447', '18355', '68037', '28436', '94116', '81070', '78355', '67897', '5296', '32742', '77645', '91853', '18767', '67949', '40963', '5792', '17278', '25597', '41884', '80829', '7099', '18645', '60295', '12082', '81800', '78415', '18082', '38789', '16295', '72377', '74949', '55583', '66853', '15402', '72977', '15123', '99434', '34999', '21687', '76049', '42987', '83748', '88256', '66688', '21766', '20304', '29271', '10069', '19822', '11792', '42526', '74143', '17289', '30253', '6367', '20888', '12975', '94073', '98639', '30134', '26320', '65507', '69002', '53120', '4550', '38893', '18954', '38283', '54863', '17698', '99670', '10521', '92467', '60994', '18052', '48673', '35811', '87282', '62706', '16061', '53112', '22652', '37780', '55662', '26331', '49410', '79074', '10623', '69577', '79613', '9491', '31229', '43922', '84231', '58409', '36386', '46875', '74431', '76735', '38776', '23350', '7314', '9079', '51519', '98544', '70216', '63380', '90381', '1295', '46901', '58225', '55339', '89918', '75522', '35431', '89460', '49552', '89302', '23068', '28493', '3042', '25194', '59520', '9810', '95706', '81297', '89638', '54794', '94527', '45262', '97932', '78685', '6947', '22818', '48700', '9153', '12289', '22011', '58825', '93854', '65438', '4509', '33741', '28208', '69061', '48578', '40247', '77725', '31837', '39003', '69363', '78113', '76398', '97262', '67795', '68446', '58896', '60969', '19849', '6722', '91854', '49519', '13949', '67109', '48824', '31723', '75554', '69575', '94986', '75350', '18628', '15284', '41943', '15433', '52607', '41', '22340', '29528', '24059', '34145', '72517', '46316', '10667', '54510', '19882', '47764', '69124', '41963', '84350', '48420', '4646', '24958', '69020', '97121', '26178', '62664', '50622', '32554', '49655', '45398', '11267', '72222', '73363', '89554', '89046', '57384', '29259', '37888', '24850', '74353', '57343', '34762', '2900', '11393', '42154', '94306', '70552', '75265', '921', '26003', '64352', '89857', '83171', '58249', '48940', '53512', '66335', '44865', '68729', '19299', '58003', '39854', '99122', '3860', '80173', '52242', '90966', '53183', '71982', '82325', '87842', '15000', '55627', '71132', '6354', '42402', '91719', '91644', '94533', '74925', '66278', '66911', '85576', '40495', '70919', '71797', '87835', '29845', '71832', '3390', '7994', '33499', '70715', '54897', '82710', '63077', '78105', '24758', '89585', '84607', '46477', '78618', '10989', '39222', '98749', '51685', '94664', '31008', '32823', '89521', '72160', '26952', '4001', '21421', '5737', '74027', '88179', '45553', '83743', '19913', '49435', '65616', '82641', '5149', '76959', '40681', '73023', '2670', '30845', '18863', '35094', '88400', '80963', '9154', '16571', '64192', '59694', '41317', '59942', '58856', '99281', '67260', '66971', '22716', '76089', '58047', '67071', '53707', '462', '52518', '72277', '10681', '69', '98855', '12593', '88842', '67242', '73543', '37439', '18413', '67211', '93495', '45576', '70614', '27988', '53210', '18618', '21318', '68059', '25518', '55917', '56522', '16548', '2404', '93538', '61452', '66358', '3709', '23914', '92426', '81439', '38070', '28988', '29939', '2948', '85720', '45628', '51101', '89431', '86365', '17571', '50987', '83849', '11015', '83812', '66187', '26362', '66786', '22024', '93866', '36161', '90080', '64874', '37294', '83860', '73821', '80279', '36766', '73117', '44620', '84556', '42070', '90383', '27862', '20665', '67576', '34997', '57958', '80638', '84351', '63961', '1362', '14338', '80377', '24192', '41294', '57368', '51189', '27287', '45764', '86289', '65600', '708', '84090', '96005', '55676', '84855', '72385', '70018', '9336', '82701', '3710', '52083', '74045', '96454', '30956', '67369', '78941', '81810', '71906', '23194', '33042', '50794', '61256', '24449', '48639', '22916', '78303', '13666', '40762', '43942', '51075', '89783', '95786', '90462', '6181', '36482', '40675', '4970', '6388', '91849', '72579', '94983', '86084', '20140', '68427', '48123', '43122', '98066', '37560', '6927', '72803', '5546', '62259', '98439', '6457', '98568', '70499', '33022', '28226', '29675', '20917', '75365', '20900', '8190', '56736', '99153', '77779', '49333', '50293', '97650', '4067', '47278', '42761', '71875', '13966', '11223', '46783', '18059', '61355', '29638', '75681', '24466', '89634', '20759', '83252', '37780', '15931', '74893', '6703', '64524', '80656', '85990', '78427', '18411', '20696', '86432', '93176', '69889', '15072', '15180', '9935', '10467', '60248', '42430', '62590', '89596', '27743', '26398', '79912', '60048', '50943', '38870', '69383', '72261', '98059', '55242', '74905', '5667', '54321', '70415', '39903', '49711', '85318', '79979', '59262', '82321', '15263', '17416', '74554', '94733', '72112', '49872', '54849', '73883', '78250', '74935', '68559', '57564', '50541', '45730', '41595', '5588', '83723', '42891', '11898', '14348', '99732', '14481', '85233', '21277', '94508', '52551', '74187', '7634', '42912', '25100', '43536', '35798', '48190', '86477', '22680', '48148', '59501', '56563', '16802', '81496', '97568', '68657', '51462', '67953', '99660', '39002', '54170', '57190', '68086', '52700', '6487', '55709', '70418', '62629', '70420', '35695', '36152', '45360', '53503', '46623', '76000', '50648', '97876', '44815', '29163', '1356', '64123', '71388', '17658', '99084', '58727', '59437', '38773', '71254', '81286', '97545', '18786', '56834', '20346', '36401', '62316', '58082', '67959', '99876', '69895', '80099', '62747', '20517', '99777', '6472', '49189', '31321', '39992', '68073', '13378', '51806', '21776', '52060', '96983', '25754', '93709', '96627', '8644', '93726', '14002', '37716', '87620', '34507', '76339', '24491', '5849', '44110', '522', '66521', '12776', '44887', '80535', '14548', '75248', '671', '73071', '35715', '59474', '7061', '82243', '56170', '20179', '59717', '1725', '24634', '11270', '77023', '63840', '46608', '44667', '22422', '59771', '94768', '73033', '82905', '16463', '40971', '22204', '58366', '28721', '14907', '76468', '81872', '38418', '36989', '61439', '10610', '131', '44296', '35453', '10117', '75856', '94603', '99602', '68075', '35949', '13599', '50030', '69633', '55956', '85465', '16429', '86081', '11145', '6195', '82207', '90598', '92814', '23725', '83204', '80346', '71542', '46634', '15820', '54123', '45397', '15322', '61743', '9273', '71347', '6835', '64006', '91718', '43677', '32923', '21486', '17098', '61694', '43347', '40019', '4071', '52443', '42386', '56839', '83514', '27633', '40780', '51749', '92101', '62384', '92206', '56044', '66174', '11137', '73966', '78471', '30468', '31643', '33197', '6888', '8066', '86603', '74383', '6098', '54411', '98819', '89862', '88639', '94422', '89371', '80526', '91747', '91220', '64944', '76658', '42046', '58518', '27249', '6646', '3028', '1346', '33763', '9734', '31737', '65527', '5892', '60813', '3410', '35464', '43009', '98382', '70580', '93898', '56404', '32995', '62771', '71556', '40538', '55612', '45656', '10758', '20268', '33603', '38310', '14242', '74397', '10722', '71575', '22590', '49043', '91439', '9055', '23668', '9101', '5268', '64133', '77501', '64684', '11337', '47575', '50732', '88680', '93730', '46785', '17589', '3520', '57595', '71241', '34994', '8753', '36147', '88844', '41914', '11250', '94632', '71927', '4624', '86279', '7664', '2659', '94853', '65386', '30438', '86005', '92883', '84629', '59910', '44484', '1306', '8404', '56962', '29990', '38445', '96191', '73013', '66590', '40951', '24712', '18825', '37268', '87843', '18972', '12154', '7779', '52149', '76152', '65799', '86011', '35475', '78083', '88232', '91551', '65532', '93516', '73827', '24227', '44687', '55759', '83819', '45088', '10856', '60488', '39051', '14103', '76650', '81181', '46731', '737', '58788', '78945', '42096', '66731', '66740', '72273', '88969', '5655', '86590', '41096', '80038', '32430', '51877', '23970', '91900', '13082', '45880', '94367', '19739', '61998', '71665', '16083', '57035', '26916', '10166', '18834', '46798', '66881', '28444', '68840', '10459', '81087', '4728', '76224', '39257', '23470', '93524', '37345', '30074', '49856', '22022', '55279', '5159', '5193', '58030', '57539', '12514', '49759', '96222', '52597', '67192', '88187', '53614', '16084', '79915', '28212', '79334', '85283', '32306', '31058', '43113', '74707', '74869', '2213', '32134', '6379', '85426', '87098', '35984', '51105', '69287', '16803', '83337', '14913', '62531', '58098', '7914', '20105', '28850', '1384', '43173', '62983', '87113', '76066', '86320', '77684', '45191', '95225', '41503', '36713', '48404', '91228', '53865', '98981', '59161', '61237', '84561', '17455', '14379', '57789', '80895', '99260', '84595', '72942', '53220', '84448', '81332', '49437', '83086', '93414', '54519', '52288', '74772', '22460', '49324', '11168', '96071', '61985', '38284', '6405', '54698', '71727', '60093', '37340', '87884', '83403', '4542', '94949', '19636', '15855', '39105', '10424', '67418', '91022', '69254', '8481', '38411', '3832', '44354', '93548', '57172', '28481', '372', '81497', '52179', '41060', '72141', '41396', '65590', '70432', '82819', '93814', '26118', '84780', '88485', '70821', '8222', '83000', '47067', '38516', '33347', '47681', '48202', '60749', '52112', '7937', '28105', '11394', '45746', '43252', '34494', '2979', '69715', '42486', '82315', '71760', '97413', '66137', '94487', '7429', '74434', '22964', '55251', '3448', '53534', '2574', '9693', '96157', '2955', '4348', '19566', '56930', '83319', '31310', '53905', '1148', '41726', '22233', '76045', '37351', '10545', '17581', '28047', '30199', '4741', '58111', '33497', '67796', '67730', '31247', '43772', '29461', '45970', '73353', '22534', '53962', '32147', '71392', '62579', '66345', '58246', '33442', '9581', '29705', '14058', '86471', '76125', '59363', '94982', '74810', '89149', '20066', '3366', '3568', '25752', '80036', '64119', '27270', '40061', '91052', '69022', '9852', '77112', '83075', '43924', '61661', '56133', '96652', '57944', '72576', '82170', '79236', '55745', '15309', '88878', '72761', '37647', '67465', '12777', '97309', '93202', '41470', '8787', '64920', '48514', '18917', '35157', '59151', '4640', '5317', '38134', '76548', '82788', '9214', '58418', '73185', '90554', '10543', '47182', '62936', '91765', '89751', '68931', '48865', '64607', '7150', '77862', '14297', '14828', '33013', '91698', '67593', '98096', '16595', '51639', '86531', '24719', '1703', '78788', '43810', '38918', '95491', '99903', '82671', '8291', '68288', '31224', '39863', '4265', '77798', '7698', '33804', '92286', '4744', '37038', '44203', '98212', '17369', '77442', '62879', '4145', '96881', '15646', '36824', '19959', '45451', '76049', '54272', '97577', '95298', '81115', '30204', '82041', '8037', '10052', '8756', '76833', '82851', '24276', '75574', '36037', '78079', '92807', '29064', '90000', '84150', '17102', '75092', '49424', '35597', '4693', '82853', '42511', '16119', '23478', '65240', '55585', '91762', '71671', '46682', '72479', '97696', '24615', '12579', '30274', '48255', '2336', '90202', '5808', '45426', '76308', '74639', '31245', '99894', '89638', '6233', '33893', '71899', '85273', '89429', '29761', '50231', '57249', '99347', '22642', '66972', '86221', '47514', '88274', '10819', '73150', '53754', '13304', '20478', '38099', '619', '14669', '8011', '97657', '26569', '65430', '13467', '38180', '23675', '72350', '42257', '39875', '23529', '53407', '11833', @@ -28,8 +28,11 @@ '37674', '44977', '54370', '97381', '60218', '2423', '99591', '69913', '26507', '19708', '6279', '58955', '20126', '1495', '57894', '7638', '38700', '77148', '36844', '7539', '91452', '6914', '74349', '66850', '49104', '6516', '58535', '20851', '27859', '32881', '72919', '28203', '32882', '2419', '77583', '63822', '37703', '66793', '65784', '62281', '55867', '70703', '89344', '1498', '33770', '87176', '95636', '64891', '90736', '95521', '10989', '5237', '99010', '21106', '11422', '1831', '67239', '52557', '36468', '71713', '39637', '49574', '50455', '14953', '96900', '70852', '96982', '4341', '44585', '95651', '79669', '29652', '87294', '74692', '16221', '768', '35380', '21352', '50907', '27259', '11718', '5017', '55964', '94137', '52347', '10595', '12968', '85602', '97965', '18836', '90511', '70960', '97336', '44575', '23791', '42195', '64776', '29363', '42379', '1805', '28919', '6772', '78143', '54797', '27362', '56149', '59048', '38567', '6339', '27787', '42167', '45990', '95532', '54839', '26572', '38496', '89797', '6634', '16468', '24898', '66814', '98126', '31762', '36133', '64539', '43167', '87022', '61295', '30364', '89249', '25756', '63570', '91484', '10564', '79648', '5756', '41376', '61897', '40388', '88927', '62891', '79708', '25495', '22204', '33892', '36871', '19879', '58646', '57061', '73100', '75831', '20029', '67462', '54675', '7766', '2409', '24506', '7877', '11720', '86252', '9897', '8080', '70684', '74497', '2242', '24604', '31969', '83999', '56635', '5283', '64971', '79152', '27470', '89042', '22835', '21476', '50292', '56081', '96342', '32763', '84487', '64856', '79152', '64656', '72169', '69971', '93094', '52804', '80917', '53152', '56016', '28496', '79110', '17133', '12581', '91742', '78929', '2676', '46700', '59528', '93808', '4535', '54035', '40161', '62796', '3598', '97088', '13599', '36337', '73395', '17494', '86275', '62058', '61937', '87747', '94883', '90677', '88544', '72553', '50210', '75481', '64378', '74464', '21659', '30970', '71989', '84846', '72289', '88716', '39143', '8487', '4912', '91013', '18623', '19122', '36507', '76438', '7516', '67970', '72350', '69873', '33635', '55983', '69008', '49545', '3134', '60056', '52509', '63304', '15560', '23651', '81090', '7027', '8317', '33060', '37295', '51961', '53037', '97431', '40512', '23536', '25168', '78455', '85613', '12304', '40733', '99890', '51238', '55439', '96201', '73559', '92533', '90173', '16721', '6078', '29854', '38894', '31117', '63040', '86795', '81786', '21149', '38998', '61811', '48622', '73019', '59296', '13576', '92559', '36300', '77294', '26794', '50912', '98380', '13176', '57746', '75286', '15330', '40921', '7337', '4664', '20384', '4674', '44516', '27633', '31950', '88210', '54536', '9839', '80137', '77491', '18434', '45152', '96942', '41005', '76103', '34825', '86869', '14772', '13384', '21051', '37348', '34434', '97210', '54960', '26598', '60981', '41889', '6446', '64492', '95310', '86236', '81885', '35684', '16539', '98476', '32028', '96470', '6318', '99576', '93935', '48609', '86090', '2476', '65576', '80636', '44817', '99646', '98963', '20486', '26261', '27334', '72946', '82023', '33506', '80193', '13762', '98133', '21134', '33268', '63477', '74609', '30454', '51477', '93391', '96805', '68653', '2714', '63642', '51520', '22972', '13305', '96058', '42336', '74461', '31597', '12050', '81712', '37977', '25718', '4834', '56608', '75731', '406', '28585', '63924', '23702', '29849', '16941', '91921', '65842', '76525', '68534', '50902', '17609', '23852', '53703', '31286', '58526', '9633', '87596', '10654', '2085', '52766', '22135', '76524', '32295', '90072', '70078', '77786', '93741', '87320', '70309', '44024', '95286', '12361', '29682', '59766', '26685', '90686', '81691', '49704', '23431', '53955', '39023', '47261', '1530', '58265', '80065', '95620', '90621', '63760', '90676', '81653', '36397', '20252', '81754', '20256', '67098', '7838', '49408', '88400', '87941', '84533', '6570', '22567', '18850', '55472', '40129', '48425', '23497', '39308', '34698', '53092', '89480', '47785', '57282', '25508', '19006', '50604', '86917', '9436', '88921', '3168', '70537', '3185', '34988', '5462', '69482', '45768', '91955', '56898', '15307', '99731', '89292', '19356', '20646', '66712', '7281', '12856', '31174', '19577', '8726', '62971', '33008', '37118', '59055', '84101', '68445', '91957', '47526', '15627', '79914', '20013', '26147', '80821', '56372', '74205', '28531', '25352', '51775', '93948', '55212', '17863', '91521', '74911', '88160', '2360', '98260', '18294', '62402', '84268', '9580', '42668', '1467', '40059', '5221', '4216', '9917', '35420', '16496', '34369', '50253', '95234', '95114', '84193', '28322', '37031', '81284', '88628', '36782', '42572', '73347', '66188', '43342', '77285', '16513', '89064', '63066', '72645', '67075', '48208', '18181', '77898', '65795', '53707', '39856', '92883', '92567', '49733', '30236', '10273', '53029', '69773', '78379', '72108', '47696', '97557', '95184', '14688', '29853', '62694', '70431', '88435', '58799', '21883', '99866', '69178', '55870', '14414', '85274', '27321', '55555', '613', '15067', '88217', '73655', '99548', '13631', '78789', '36690', '7952', '60830', '77438', '40059', '95602', '43097', '3429', '93731', '90537', '2932', '35702', '16125', '6652', '39632', '39349', '9910', '38103', '78608', '73565', '48556', '28978', '7128', '82326', '53980', '28059', '28212', '87101', '77752', '99170', '56753', '30484', '71470', '32607', '24674', '32687', '25098', '94712', '64024', '48239', '90408', '17316', '99243', '3656', '67402', '48009', '98427', '52800', '56024', '4417', '89747', '93338', '18758', '56411', '44810', '82456', '30808', '75470', '67115', '66876', '53906', '78403', '56059', '34383', '60056', '89136', '7237', '11129', '21351', '78662', '43606', '37454', '45465', '9292', '38099', '81699', '50195', '49368', '47503', '44605', '6523', '81478', '37910', '397', '20256', '6835', '2787', '80383', '4241', '65986', '83870', '21205', '10879', '26593', '44357', '72604', '56131', '43423', '80206', '26240', '87198', '99445', '53504', '10632', '2465', '31793', '89575', '64184', '39988', '60049', '87100', '37151', '61585', '82180', '52065', '72519', '72935', '3201', '5862', '20560', '95339', '21661', '17533', '17182', '71189', '91564', '57999', '35490', '94773', '95056', '51583', '59394', '10727', '8655', '48123', '10701', '25314', '20100', '6533', '46435', '43188', '23001', '23018', '76637', '32018', '36603', '18701', '9550', '61550', '47541', '36500', '67507', '81574', '95490', '69169', '32584', '30045', '64699', '83539', '89396', '42517', '61979', '41528', '8271', '88377', '61423', '1158', '89724', '70789', '14886', '64823', '56675', '97747', '23990', '58495', '82064', '17062', '90258', '86854', '93304', '12925', '49975', '45074', '87155', '72223', '67344', '42733', '42516', '40110', '15444', '88285', '39371', '23198', '61544', '90205', '6192', '15718', '19803', '92712', '20081', '31397', '5555', '70463', '19521', '80401', '74097', '32060', '26495', '20507', '40473', '1449', '57215', '46142', '39303', '50359', '35898', '46908', '90752', '7823', '27416', '73770', '98790', '17907', '29999', '76417', '49926', '76752', '21608', '26524', '88209', '6000', '88897', '19541', '41451', '59538', '56560', '1456', '67828', '82407', '45722', '93344', '54279', '78594', '38354', '93807', '10929', '91560', '60681', '70615', '32527', '10108', '48303', '63134', '28500', '18257', '57081', '24801', '99077', '52197', '15390', '52300', '57116', '417', '7503', '20054', '75315', '81359', '69091', '18853', '2465', '25600', '13522', '74575', '12661', '83071', '15191', '27543', '21730', '60853', '18961', '14773', '89185', '33694', '51143', '1449', '68831', '78062', '65173', '32697', '41674', '9429', '22156', '96022', '46305', '97534', '5685', '48870', '89988', '20686', '66705', '6865', '94250', '16872', '13178', '7420', '73531', '92723', '60620', '48843', '74207', '60016', '50943', '62699', '63507', '76537', '87066', '76922', '24711', '34809', '5021', '31293', '53854', '77607', '52322', '10934', '50284', '87804', '36730', '86946', '80749', '43325', '97958', '7362', '39582', '10042', '42053', '66236', '69931', '23463', '87996', '33563', '4468', '32905', '50815', '79478', '28658', '46018', '23186', '26080', '13494', '6237', '42762', '86440', '77407', '10426', '62902', '73251', '36861', '92357', '98754', '1839', '46391', '11420', '27132', '93028', '39609', '42015', '68218', '54228', '5456', '38705', '64307', '49483', '878', '54360', '54480', '66684', '55089', '4537', '82073', '72602', '96238', '56708', '58625', '32991', '74205', '72868', '79086', '64250', '56376', '10621', '76607', '47706', '72760', '70303', '60715', '14644', '44186', '36264', '29489', '14184', '62699', '30567', '16700', '31222', '15650', '1500', '22950', '54628', '41004', '96094', '70028', '74178', '65328', '26605', '63076', '75271', '79285', '8151', '42101', '56362', '25961', '87864', '972', '29510', '2747', '8877', '9780', '61052', '84105', '15573', '27475', '44570', '25334', '18517', '44237', '84094', '67524', '76761', '65678', '79284', '2462', '42631', '22696', '19223', '29728', '67742', '11883', '59027', '12377', '80538', '2165', '17377', '15030', '49838', '23920', '26025', '68179', '75894', '43783', '97106', '75558', '35528', '52081', '16951', '68855', '402', '21459', '97550', '16948', '5369', '4641', '2663', '15233', '79974', '71093', '15234', '42690', '22322', '54282', '95845', '90010', '40530', '88298', '41885', '7079', '6098', '72786', '36603', '77378', '48393', '45723', '41996', '96025', '89297', '75586', '8422', '24360', '170', '46036', '46725', '67944', '74029', '73069', '45371', '99916', '71085', '42608', '89904', '6393', '51274', '42729', '58924', '82497', '64143', '88622', '18818', '89041', '56090', '21369', '78224', '90450', '45488', '58830', '4133', '98062', '81113', '11285', '51457', '3183', '38800', '65278', '42169', '28602', '52648', '44683', '75647', '11778', '32151', '33528', '23773', '68268', '23367', '70964', '23548', '35575', '67570', '77681', '74158', '25374', '62714', '43100', '4977', '51678', '83460', '29755', '15890', '64626', '54044', '14793', '64339', '94008', '97126', '49202', '33889', '12601', '12275', '56123', '94557', '68226', '67200', '9374', '70687', '29211', '8039', '14598', '74548', '37433', '98991', '29933', '37203', '23973', '96482', '64774', '58350', '61781', '31824', '57193', '26476', '21814', '32297', '32627', '44277', '33876', '55468', '81715', '82505', '61462', '20324', '84293', '40116', '51087', '43594', '6854', '59077', '39841', '26023', '22777', '66859', '82460', '89515', '41712', '33711', '71875', '10685', '12655', '50138', '31063', '37040', '95819', '38919', '27391', '29833', '34350', '65646', '7697', '2688', '41146', '13241', '50305', '86568', '24487', '78741', '96370', '21015', '31719', '39750', '25014', '72415', '8486', '90668', '51143', '49488', '21057', '92803', '53528', '39550', '76039', '44185', '32404', '30217', '19796', '38084', '49161', '80140', '20241', '39357', '68908', '93083', '77231', '6952', '36322', '50790', '623', '29730', '13616', '57546', '17434', '93811', '35148', '81419', '40250', '40329', '89126', '72402', '16053', '27107', '28919', '16829', '96582', '65057', '28416', '30801', '77742', '27420', '73118', '89352', '54706', '23035', '88413', '64608', '61930', '15037', '47327', '59596', '18700', '57576', '63628', '56823', '60091', '68209', '21001', '14962', '72257', '83802', '33721', '86343', '11133', '65737', '68477', '90725', '86869', '98403', '47393', '25356', '61372', '8873', '19888', '48836', '66005', '23531', '72520', '26461', '78508', '28213', '96394', '22983', '37856', '71814', '27425', '72753', '27511', '65471', '38592', '3683', '24652', '64505', '92543', '53201', '40639', '99542', '53425', '35321', '47669', '14134', '47727', '48202', '71931', '32119', '50086', '50266', '67159', '89317', '81905', '30315', '49154', '8690', '69365', '56881', '46473', '64100', '38365', '59377', '65630', '54871', '52745', '91536', '16106', '70066', '62063', '84530', '88103', '33599', '51063', '87299', '41880', '25335', '51252', '42788', '13568', '1721', '62424', '83308', '36787', '91536', '92555', '27600', '24030', '12267', '66336', '30242', '7183', '67624', '28471', '48593', '79766', '31178', '47818', '94522', '88855', '45262', '43670', '18065', '25062', '44558', '37189', '69225', '35216', '42683', '26289', '72816', '31947', '65871', '45715', '59452', '22014', '56669', '60331', '33450', '60601', '95047', '30789', '90107', '81565', '32266', '3252', '5446', '58756', '55370', '34034', '81071', '2560', '39054', '39564', '15010', '5389', '60002', '53320', '49545', '48444', '31415', '39278', '79879', '30148', '10186', '60358', '29011', '14419', '95159', '94815', '55251', '90910', '80582', '92304', '11697', '60061', '38577', '84439', '76196', '34542', '50963', '36294', '11123', '59763', '29873', '47383', '12979', '22119', '21723', '64725', '48377', '77132', '9817', '79920', '47653', '60069', '12924', '53808', '55962', '66969', '13757', '60615', '10994', '9138', '34119', '58436', '64407', '75170', '73524', '51864', '94183', '86847', '15585', '57616', '96267', '5340', '52929', '49096', '50291', '5559', '32382', '84077', '6598', '87921', '59719', '31726', '44772', '63373', '75420', '66829', '47275', '98264', '61387', '94945', '44540', '50098', '13078', '44729', '95332', '63555', '30782', '63203', '15071', '60996', '72812', '17418', '80215', '37610', '30670', '44674', '74822', '15471', '25236', '16266', '76213', '35820', '19567', '8715', '72003', '90606', '1434', '53545', '88170', '75014', '62287', '35436', '38669', '12927', '83877', '38622', '28313', '82884', '73969', '38671', '10450', '24158', '22941', '73162', '86548', '42482', '95315', '92016', '96156', '44012', '35962', '6366', '3881', '74300', '26248', '30182', '19164', '67105', '66771', '52587', '69894', '61820', '16551', '50743', '10096', '69030', '24451', '89165', '23929', '96291', '30685', '64413', '19913', '9049', '71383', '61684', '45384', '45927', '81840', '49521', '89594', '30055', '83430', '14930', '60316', '86585', '99375', '80170', '14207', '19584', '20067', '82874', '30159', '46647', '6942', '66777', '32638', '55662', '75470', '77622', '26893', '96149', '14373', '33252', '50574', '7945', '20696', '56662', '94348', '3384', '20956', '89668', '99052', '65131', '56847', '17589', '16419', '2670', '10705', '59587', '92902', '92424', '48570', '11034', '69149', '35733', '17315', '84966', '69353', '69590', '52834', '32561', '6049', '50156', '71676', '76423', '32361', '61509', '8845', '75709', '35956', '21912', '31188', '59083', '43459', '38614', '92206', '55645', '38737', '34193', '6451', '94163', '24326', '49976', '71600', '58024', '67160', '4365', '38270', '59558', '80834', '60739', '54318', '19738', '42196', '43191', '13463', '88914', '99239', '66869', '75691', '33085', '4323', '7170', '46184', '41423', '89835', '46877', '20349', '14365', '32727', '35322', '841', '23597', '43370', '57527', '73250', '32553', '71489', '44617', '98323', '37672', '59549', '96023', '63176', '13524', '15621', '30448', '28136', '45549', '3513', '64153', '19839', '24219', '41987', '51083', '90268', '52052', '31430', '4727', '99409', '43595', '82374', '61251', '51470', '66562', '98724', '23529', '53895', '67562', '87573', '89964', '30821', '15733', '33062', '86963', '33450', '75338', '32570', '14453', '38080', '36335', '84226', '52790', '42883', '61156', '42789', '57846', '60096', '29946', '80178', '15882', '1971', '60722', '62458', '8754', '59991', '89321', '584', '70565', '36458', '21226', '23561', '9837', '39364', '23065', '30675', '9306', '40085', '52082', '89976', '73283', '77851', '36174', '54470', '63250', '72111', '70853', '26723', '42590', '91230', '47512', '13983', '70898', '70927', '40721', '30642', '41628', '90010', '27306', '1933', '43304', '44499', '87890', '22201', '89249', '63935', '48438', '58588', '1061', '70061', '63075', '9676', '65820', '82156', '82668', '111', '54350', '10328', '23466', '98936', '18285', '53919', '32422', '84859', '58387', '24022', '32423', '6010', '56417', '49452', '69999', '14885', '47102', '59577', '24999', '75984', '96464', '59088', '85987', '71442', '88789', '4753', '8229', '76883', '15284', '90610', '40507', '78882', '55575', '25315', '7214', '70602', '4796', '35767', '54657', '42153', '16050', '93607', '99249', '77236', '59949', '52871', '47837', '33534', '30023', '89137', '99938', '35824', '50775', '30282', '82798', '53312', '65277', '68375', '91445', '58166', '43344', '6589', '82515', '34632', '78588', '152', '67554', '15877', '74334', '32783', '45147', '39483', '92067', '59029', '38298', '55229', '28268', '85140', '33451', '15424', '46695', '23201', '83329', '28372', '19518', '89198', '33305', '43892', '470', '37662', '9407', '14376', '80310', '21459', '72381', '80414', '88305', '69073', '63101', '91054', '47190', '48595', '24696', '41426', '35133', '94399', '21790', '55040', '73279', '20809', '67805', '94115', '58633', '78053', '89444', '4112', '8', '34517', '22106', '85934', '86814', '53333', '93437', '85062', '32791', '72744', '99843', '51161', '22730', '34908', '82918', '92566', '22467', '41226', '98518', '29235', '94042', '84371', '79100', '25214', '7764', '59427', '47891', '61092', '23775', '13641', '30837', '77377', '43032', '38441', '29462', '20300', '19070', '20982', '73987', '87836', '68062', '6419', '51563', '40084', '85694', '86677', '47142', '27222', '17844', '19158', '45120', '88524', '74724', '73229', '42470', '38751', '1132', '28603', '61188', '55021', '88825', '58005', '62411', '8843', '94852', '93664', '39253', '27473', '247', '43824', '1804', '8905', '11509', '95659', '7811', '80691', '15779', '49794', '8991', '76099', '29223', '36060', '85399', '41369', '22885', '38473', '22376', '50446', '89578', '25818', '61333', '78787', '47605', '83654', '99068', '52120', '48367', '86381', '19803', '72600', '31998', '37755', '88031', '83969', '42319', '27974', '35780', '93662', '46808', '60529', '15491', '10447', '48829', '33886', '68333', '44855', '86554', '64794', '66376', '58222', '14021', '52043', '56375', '1300', '38105', '89159', '97456', '26800', '93124', '3673', '32279', '30658', '84475', '3708', '93952', '39245', '91980', '55333', '79440', '64407', '46559', '60759', '10688', '49872', '45810', '87405', '66932', '56530', '57751', '9619', '27361', '6356', '65848', '7524', '20273', '22362', '20504', '28042', '39475', '51677', '85733', '32426', '54558', '17222', '56485', '34928', '90917', '70528', '51732', '61014', '98420', '67265', '41383', '3883', '47642', '53324', '93679', '93088', '57534', '44449', '46779', '81482', '54279', '80135', '11216', '92545', '18426', '96005', '57801', '21898', '5104', '83467', '72015', '43783', '89674', '57468', '96686', '95167', '38507', '95187', '64923', '71214', '42834', '93219', '47342', '24476', '84834', '29080', '86533', '30687', '68400', '26933', '37396', '65169', '89767', '20642', '53843', '85167', '77306', '46723', '68501', '4243', '35044', '15950', '40388', '53630', '76125', '10816', '83285', '4120', '11402', '91344', '95169') - - SELECT count() FROM numbers(10000) WHERE (number, number) IN ((17258, 93148), (4508, 52749), (68660, 70017), (77797, 23528), (1136, 37393), (53237, 15379), (68370, 73211), (15782, 54962), (59432, 45415), (68396, 920), (96154, 21016), (12700, 26887), (88016, 43191), (68153, 51575), (91315, 40005), (18070, 73178), (86, 631), (77717, 20324), (3227, 76188), (74960, 43147), (77538, 19628), (82292, 6525), (24293, 12566), (85244, 96287), (93982, 1329), (38064, 54723), (83999, 45810), (71921, 53673), (88638, 9669), (1959, 39535), (82235, 95796), (27907, 90975), (42383, 91015), (9948, 91514), (81712, 47309), (400, 25808), (31791, 46948), (39740, 36098), (25943, 84598), (99598, 52939), (77134, 15845), (40313, 72174), (85017, 94036), (36595, 14303), (83961, 68078), (55792, 72759), (73574, 43606), (9853, 63560), (28580, 56721), (74804, 41025), (32095, 55657), (52881, 63416), (91368, 90310), (23922, 38883), (30592, 10758), (66448, 61183), (31880, 96697), (11362, 20633), (75331, 2015), (71129, 8785), (1115, 70955), (7886, 83698), (18961, 84556), (16677, 43028), (37347, 70220), (31699, 71244), (10578, 96159), (67600, 39041), (78791, 86687), (21545, 54174), (68774, 37637), (46132, 81768), (98413, 20605), (2960, 23665), (31507, 35719), (96209, 18368), (60558, 38035), (21952, 3264), (11834, 86458), (21651, 17650), (86276, 36087), (18818, 24849), (61951, 3390), (59637, 62545), (30346, 72253), (36281, 2992), (78340, 49872), (94326, 93723), (3416, 94405), (12272, 8741), (22600, 22095), (57636, 37106), (38702, 14889), (70238, 11276), (17325, 60648), (16492, 41271), (52100, 1304), (93416, 7795), (57209, 71008), (48010, 36078), (20384, 74420), (77440, 34439), (69224, 45099), (30374, 33884), (49038, 90140), (1154, 84725), (64926, 86985), (91746, 73472), (59757, 75755), (45860, 71557), (45833, 36526), (74618, 73598), (91360, 65168), (58029, 30793), (56332, 14973), (99943, 96877), (97454, 6450), (64502, 77301), (73182, 31853), (76809, 83964), (82916, 86188), (78736, 65427), (36495, 7422), (76196, 2804), (96117, 61093), (9177, 26099), (52942, 63007), (48578, 47876), (50638, 89903), (7113, 97316), (35301, 12750), (47807, 7254), (38217, 55418), (56970, 41687), (20527, 62886), (358, 14021), (64018, 18582), (91740, 21683), (81967, 53589), (45437, 38450), (45476, 67752), (76851, 72072), (7304, 60091), (40097, 12897), (39906, 29247), (84262, 58734), (30857, 43791), (56087, 78929), (20498, 45954), (48726, 500), (62723, 43763), (28368, 30756), (74048, 52403), (15045, 95926), (75542, 55384), (52543, 22525), (56001, 6935), (11431, 46745), (77731, 7310), (36718, 59909), (32235, 91254), (92417, 25917), (21782, 79277), (46378, 87536), (35324, 26075), (6310, 76915), (1551, 69473), (50642, 68865), (55190, 72934), (49780, 21873), (99466, 29686), (90761, 13179), (72959, 57033), (20020, 90200), (46186, 79105), (73871, 52382), (59559, 38801), (59916, 16082), (33610, 94966), (46001, 45225), (86679, 26469), (77245, 91929), (32887, 36623), (11179, 46898), (87881, 68087), (45438, 47991), (24950, 94525), (91664, 51656), (43914, 47805), (15736, 96156), (56346, 20283), (85053, 48931), (17790, 26179), (96195, 55728), (43765, 54807), (44988, 89269), (55911, 99411), (52446, 47397), (28346, 65442), (96669, 68226), (66194, 26848), (37276, 55864), (14116, 41583), (18058, 16317), (93136, 85318), (35616, 86252), (29222, 29969), (33386, 85372), (71094, 44238), (27733, 31838), (64626, 16692), (52904, 97899), (97619, 12663), (50165, 4688), (67557, 44053), (69184, 66269), (73164, 89705), (39822, 15169), (65499, 72808), (30068, 63697), (30154, 64235), (97016, 58716), (94366, 36592), (1592, 16261), (87985, 52102), (12554, 23652), (15909, 25292), (2527, 91531), (92139, 36031), (28986, 30032), (3038, 56314), (32239, 26707), (15973, 34901), (70246, 39680), (82529, 38132), (45827, 74783), (53665, 64111), (55218, 84170), (20466, 16130), (55734, 71203), (31438, 96906), (66338, 85858), (35988, 68511), (78391, 15191), (80747, 59213), (5357, 11546), (16822, 16607), (36607, 41106), (74949, 30739), (45726, 64887), (1524, 54847), (37371, 89195), (28726, 27788), (22600, 44777), (53999, 63625), (84304, 98338), (49260, 76480), (74564, 53907), (89867, 97096), (60157, 61299), (17165, 10146), (56334, 36268), (62114, 49222), (22715, 23620), (42830, 11539), (41091, 69151), (75471, 68364), (18681, 43249), (42738, 63219), (35474, 98454), (76815, 46024), (66310, 36521), (86095, 77013), (63693, 77319), (80731, 63031), (95478, 92387), (23787, 63724), (46299, 68994), (4800, 2460), (9663, 80639), (77231, 85814), (81615, 11311), (35638, 27340), (13598, 14322), (30657, 17238), (90957, 96846), (69962, 52140), (41681, 65962), (96836, 58177), (36190, 11623), (4231, 40500), (43049, 41949), (71177, 98492), (30193, 39750), (19744, 33204), (63358, 30210), (45638, 58918), (43641, 38741), (35598, 40932), (33238, 36236), (50835, 20968), (25099, 34071), (84986, 88456), (35333, 1529), (79771, 23985), (647, 61658), (9424, 11743), (77766, 31528), (77811, 86973), (76403, 74377), (55568, 79251), (68858, 20762), (68520, 66773), (93598, 89823), (8080, 82539), (87760, 52247), (25191, 16905), (17837, 8339), (85177, 59050), (51680, 77374), (3287, 43018), (43479, 62141), (34909, 46322), (11869, 5885), (96193, 58417), (101, 47460), (34937, 88582), (83216, 88388), (28571, 15292), (66683, 62613), (34478, 8924), (2680, 89973), (62438, 44460), (11724, 4791), (5383, 72888), (88206, 67586), (8124, 21690), (28779, 75789), (66791, 4757), (6176, 47760), (6403, 78084), (78122, 35446), (99494, 73608), (39691, 89098), (59182, 19484), (25389, 98963), (96487, 3692), (76222, 67381), (21199, 50358), (95998, 58137), (28777, 43913), (14176, 60117), (52257, 81703), (14604, 13438), (71301, 14401), (19758, 66914), (15506, 29873), (87205, 29449), (93295, 15930), (63651, 11287), (19785, 15966), (30795, 75112), (69462, 37655), (18793, 85764), (36240, 31236), (98153, 73724), (72491, 4223), (66930, 35048), (25686, 13269), (13940, 13259), (69163, 11235), (1183, 86961), (54323, 67315), (85044, 60872), (48875, 3683), (43052, 92861), (87574, 32969), (92552, 80564), (94832, 47682), (72011, 80994), (60182, 917), (97788, 34169), (66432, 47940), (87468, 80954), (35385, 68758), (50555, 63710), (55311, 44337), (87065, 26514), (84581, 98736), (23212, 56499), (75120, 72447), (56087, 38285), (58171, 45629), (28401, 44319), (70432, 27883), (18891, 14646), (26206, 49924), (79957, 44914), (56064, 27529), (99090, 29197), (49435, 340), (53525, 65601), (76998, 88349), (50416, 70860), (42506, 75290), (34024, 13295), (86663, 46523), (88814, 231), (57809, 21), (84914, 84771), (43042, 66892), (17288, 33908), (4934, 63195), (50590, 1516), (97843, 80208), (20091, 86717), (71566, 15929), (19531, 23634), (41646, 45549), (89226, 82902), (96683, 63386), (31072, 53788), (51135, 41099), (78912, 65609), (36094, 23603), (88403, 51455), (73795, 47066), (26448, 82852), (22829, 2894), (30041, 92548), (27733, 20608), (70180, 19892), (51650, 63440), (76328, 13666), (40514, 6677), (2786, 51059), (40809, 16499), (10857, 82541), (78221, 61067), (17982, 51969), (85369, 66965), (47153, 47149), (43965, 75796), (82725, 60767), (42407, 97249), (51475, 81224), (60957, 89414), (33065, 21663), (36601, 5290), (95842, 67301), (64630, 60398), (55212, 35638), (41750, 44235), (75260, 82400), (91291, 25843), (6477, 8311), (14919, 52306), (66220, 33180), (45736, 2313), (37450, 64444), (98614, 61344), (75007, 50946), (56701, 28117), (66632, 5174), (92323, 76613), (6796, 73695), (33696, 76280), (86876, 5614), (50863, 67993), (36068, 17049), (91912, 34271), (70706, 1904), (97798, 41117), (68154, 72483), (83862, 25578), (61643, 17204), (69974, 64232), (77926, 19637), (64901, 88988), (71424, 91703), (91655, 17147), (46872, 56530), (44189, 98087), (95939, 54420), (72651, 68785), (67624, 84875), (92587, 87663), (65275, 81256), (53798, 2506), (14702, 3638), (71291, 50452), (14909, 13903), (66965, 26606), (14127, 60345), (35306, 1738), (77234, 10468), (53521, 41218), (80681, 82583), (44227, 26521), (32263, 21482), (82270, 56963), (50580, 80567), (11593, 22346), (20074, 26867), (73126, 28667), (62996, 24317), (20295, 57163), (1506, 57668), (69567, 45236), (43366, 26001), (88052, 40181), (1599, 89349), (36789, 1579), (39895, 46673), (30381, 3206), (31723, 5625), (19252, 31317), (16932, 77149), (48794, 34409), (55986, 30328), (47551, 75088), (57363, 78365), (95221, 63385), (26449, 5733), (96588, 53077), (52980, 41140), (8187, 85947), (36723, 26520), (23579, 38909), (33350, 19275), (63930, 19357), (43536, 59941), (31117, 77322), (44638, 94812), (44730, 99097), (95108, 48170), (57813, 49503), (79959, 89436), (86980, 62031), (8275, 44009), (36666, 94645), (22064, 38882), (40471, 16939), (31156, 11337), (13101, 96977), (17906, 26835), (89861, 51405), (73369, 67946), (99141, 58572), (27131, 98703), (15900, 43412), (51768, 93125), (78579, 46689), (23029, 13895), (60870, 55830), (22553, 8236), (76449, 96207), (83766, 51024), (27630, 50614), (53484, 90104), (77626, 21944), (46755, 41583), (53616, 34240), (94159, 44415), (13914, 90059), (44387, 89012), (27499, 64579), (83415, 30809), (77558, 82619), (88880, 9814), (8466, 4424), (43598, 91921), (24695, 3349), (46295, 65208), (51256, 82461), (49126, 93012), (16186, 96585), (43284, 22655), (93130, 90393), (77495, 34372), (85509, 65856), (86662, 61906), (50988, 44393), (29828, 17737), (91651, 35308), (29796, 49716), (14019, 87751), (29688, 71207), (82845, 19100), (11989, 50132), (21158, 99905), (54732, 42547), (32314, 12851), (46405, 43794), (87849, 45643), (53524, 21212), (61925, 75491), (12498, 21937), (30185, 69475), (48421, 52487), (15112, 90935), (33187, 17801), (61704, 25514), (17889, 23917), (18758, 57197), (7693, 47232), (47905, 24618), (11494, 78950), (95662, 54561), (8075, 33909), (90427, 46065), (73962, 19821), (50691, 79400), (58218, 4881), (94106, 2509), (60633, 55169), (49600, 83054), (23339, 13270), (70262, 58946), (48417, 97266), (27629, 46905), (74465, 75514), (41687, 2564), (12814, 19492), (78899, 30168), (17745, 35206), (37972, 35296), (22288, 80001), + + SELECT (rand(), rand()) IN ((17258, 93148), (4508, 52749), (68660, 70017), (77797, 23528), (1136, 37393), (53237, 15379), (68370, 73211), (15782, 54962), (59432, 45415), (68396, 920), (96154, 21016), (12700, 26887), (88016, 43191), (68153, 51575), (91315, 40005), (18070, 73178), (86, 631), (77717, 20324), (3227, 76188), (74960, 43147), (77538, 19628), (82292, 6525), (24293, 12566), (85244, 96287), (93982, 1329), (38064, 54723), (83999, 45810), (71921, 53673), (88638, 9669), (1959, 39535), (82235, 95796), (27907, 90975), (42383, 91015), (9948, 91514), (81712, 47309), (400, 25808), (31791, 46948), (39740, 36098), (25943, 84598), (99598, 52939), (77134, 15845), (40313, 72174), (85017, 94036), (36595, 14303), (83961, 68078), (55792, 72759), (73574, 43606), (9853, 63560), (28580, 56721), (74804, 41025), (32095, 55657), (52881, 63416), (91368, 90310), (23922, 38883), (30592, 10758), (66448, 61183), (31880, 96697), (11362, 20633), (75331, 2015), (71129, 8785), (1115, 70955), (7886, 83698), (18961, 84556), (16677, 43028), (37347, 70220), (31699, 71244), (10578, 96159), (67600, 39041), (78791, 86687), (21545, 54174), (68774, 37637), (46132, 81768), (98413, 20605), (2960, 23665), (31507, 35719), (96209, 18368), (60558, 38035), (21952, 3264), (11834, 86458), (21651, 17650), (86276, 36087), (18818, 24849), (61951, 3390), (59637, 62545), (30346, 72253), (36281, 2992), (78340, 49872), (94326, 93723), (3416, 94405), (12272, 8741), (22600, 22095), (57636, 37106), (38702, 14889), (70238, 11276), (17325, 60648), (16492, 41271), (52100, 1304), (93416, 7795), (57209, 71008), (48010, 36078), (20384, 74420), (77440, 34439), (69224, 45099), (30374, 33884), (49038, 90140), (1154, 84725), (64926, 86985), (91746, 73472), (59757, 75755), (45860, 71557), (45833, 36526), (74618, 73598), (91360, 65168), (58029, 30793), (56332, 14973), (99943, 96877), (97454, 6450), (64502, 77301), (73182, 31853), (76809, 83964), (82916, 86188), (78736, 65427), (36495, 7422), (76196, 2804), (96117, 61093), (9177, 26099), (52942, 63007), (48578, 47876), (50638, 89903), (7113, 97316), (35301, 12750), (47807, 7254), (38217, 55418), (56970, 41687), (20527, 62886), (358, 14021), (64018, 18582), (91740, 21683), (81967, 53589), (45437, 38450), (45476, 67752), (76851, 72072), (7304, 60091), (40097, 12897), (39906, 29247), (84262, 58734), (30857, 43791), (56087, 78929), (20498, 45954), (48726, 500), (62723, 43763), (28368, 30756), (74048, 52403), (15045, 95926), (75542, 55384), (52543, 22525), (56001, 6935), (11431, 46745), (77731, 7310), (36718, 59909), (32235, 91254), (92417, 25917), (21782, 79277), (46378, 87536), (35324, 26075), (6310, 76915), (1551, 69473), (50642, 68865), (55190, 72934), (49780, 21873), (99466, 29686), (90761, 13179), (72959, 57033), (20020, 90200), (46186, 79105), (73871, 52382), (59559, 38801), (59916, 16082), (33610, 94966), (46001, 45225), (86679, 26469), (77245, 91929), (32887, 36623), (11179, 46898), (87881, 68087), (45438, 47991), (24950, 94525), (91664, 51656), (43914, 47805), (15736, 96156), (56346, 20283), (85053, 48931), (17790, 26179), (96195, 55728), (43765, 54807), (44988, 89269), (55911, 99411), (52446, 47397), (28346, 65442), (96669, 68226), (66194, 26848), (37276, 55864), (14116, 41583), (18058, 16317), (93136, 85318), (35616, 86252), (29222, 29969), (33386, 85372), (71094, 44238), (27733, 31838), (64626, 16692), (52904, 97899), (97619, 12663), (50165, 4688), (67557, 44053), (69184, 66269), (73164, 89705), (39822, 15169), (65499, 72808), (30068, 63697), (30154, 64235), (97016, 58716), (94366, 36592), (1592, 16261), (87985, 52102), (12554, 23652), (15909, 25292), (2527, 91531), (92139, 36031), (28986, 30032), (3038, 56314), (32239, 26707), (15973, 34901), (70246, 39680), (82529, 38132), (45827, 74783), (53665, 64111), (55218, 84170), (20466, 16130), (55734, 71203), (31438, 96906), (66338, 85858), (35988, 68511), (78391, 15191), (80747, 59213), (5357, 11546), (16822, 16607), (36607, 41106), (74949, 30739), (45726, 64887), (1524, 54847), (37371, 89195), (28726, 27788), (22600, 44777), (53999, 63625), (84304, 98338), (49260, 76480), (74564, 53907), (89867, 97096), (60157, 61299), (17165, 10146), (56334, 36268), (62114, 49222), (22715, 23620), (42830, 11539), (41091, 69151), (75471, 68364), (18681, 43249), (42738, 63219), (35474, 98454), (76815, 46024), (66310, 36521), (86095, 77013), (63693, 77319), (80731, 63031), (95478, 92387), (23787, 63724), (46299, 68994), (4800, 2460), (9663, 80639), (77231, 85814), (81615, 11311), (35638, 27340), (13598, 14322), (30657, 17238), (90957, 96846), (69962, 52140), (41681, 65962), (96836, 58177), (36190, 11623), (4231, 40500), (43049, 41949), (71177, 98492), (30193, 39750), (19744, 33204), (63358, 30210), (45638, 58918), (43641, 38741), (35598, 40932), (33238, 36236), (50835, 20968), (25099, 34071), (84986, 88456), (35333, 1529), (79771, 23985), (647, 61658), (9424, 11743), (77766, 31528), (77811, 86973), (76403, 74377), (55568, 79251), (68858, 20762), (68520, 66773), (93598, 89823), (8080, 82539), (87760, 52247), (25191, 16905), (17837, 8339), (85177, 59050), (51680, 77374), (3287, 43018), (43479, 62141), (34909, 46322), (11869, 5885), (96193, 58417), (101, 47460), (34937, 88582), (83216, 88388), (28571, 15292), (66683, 62613), (34478, 8924), (2680, 89973), (62438, 44460), (11724, 4791), (5383, 72888), (88206, 67586), (8124, 21690), (28779, 75789), (66791, 4757), (6176, 47760), (6403, 78084), (78122, 35446), (99494, 73608), (39691, 89098), (59182, 19484), (25389, 98963), (96487, 3692), (76222, 67381), (21199, 50358), (95998, 58137), (28777, 43913), (14176, 60117), (52257, 81703), (14604, 13438), (71301, 14401), (19758, 66914), (15506, 29873), (87205, 29449), (93295, 15930), (63651, 11287), (19785, 15966), (30795, 75112), (69462, 37655), (18793, 85764), (36240, 31236), (98153, 73724), (72491, 4223), (66930, 35048), (25686, 13269), (13940, 13259), (69163, 11235), (1183, 86961), (54323, 67315), (85044, 60872), (48875, 3683), (43052, 92861), (87574, 32969), (92552, 80564), (94832, 47682), (72011, 80994), (60182, 917), (97788, 34169), (66432, 47940), (87468, 80954), (35385, 68758), (50555, 63710), (55311, 44337), (87065, 26514), (84581, 98736), (23212, 56499), (75120, 72447), (56087, 38285), (58171, 45629), (28401, 44319), (70432, 27883), (18891, 14646), (26206, 49924), (79957, 44914), (56064, 27529), (99090, 29197), (49435, 340), (53525, 65601), (76998, 88349), (50416, 70860), (42506, 75290), (34024, 13295), (86663, 46523), (88814, 231), (57809, 21), (84914, 84771), (43042, 66892), (17288, 33908), (4934, 63195), (50590, 1516), (97843, 80208), (20091, 86717), (71566, 15929), (19531, 23634), (41646, 45549), (89226, 82902), (96683, 63386), (31072, 53788), (51135, 41099), (78912, 65609), (36094, 23603), (88403, 51455), (73795, 47066), (26448, 82852), (22829, 2894), (30041, 92548), (27733, 20608), (70180, 19892), (51650, 63440), (76328, 13666), (40514, 6677), (2786, 51059), (40809, 16499), (10857, 82541), (78221, 61067), (17982, 51969), (85369, 66965), (47153, 47149), (43965, 75796), (82725, 60767), (42407, 97249), (51475, 81224), (60957, 89414), (33065, 21663), (36601, 5290), (95842, 67301), (64630, 60398), (55212, 35638), (41750, 44235), (75260, 82400), (91291, 25843), (6477, 8311), (14919, 52306), (66220, 33180), (45736, 2313), (37450, 64444), (98614, 61344), (75007, 50946), (56701, 28117), (66632, 5174), (92323, 76613), (6796, 73695), (33696, 76280), (86876, 5614), (50863, 67993), (36068, 17049), (91912, 34271), (70706, 1904), (97798, 41117), (68154, 72483), (83862, 25578), (61643, 17204), (69974, 64232), (77926, 19637), (64901, 88988), (71424, 91703), (91655, 17147), (46872, 56530), (44189, 98087), (95939, 54420), (72651, 68785), (67624, 84875), (92587, 87663), (65275, 81256), (53798, 2506), (14702, 3638), (71291, 50452), (14909, 13903), (66965, 26606), (14127, 60345), (35306, 1738), (77234, 10468), (53521, 41218), (80681, 82583), (44227, 26521), (32263, 21482), (82270, 56963), (50580, 80567), (11593, 22346), (20074, 26867), (73126, 28667), (62996, 24317), (20295, 57163), (1506, 57668), (69567, 45236), (43366, 26001), (88052, 40181), (1599, 89349), (36789, 1579), (39895, 46673), (30381, 3206), (31723, 5625), (19252, 31317), (16932, 77149), (48794, 34409), (55986, 30328), (47551, 75088), (57363, 78365), (95221, 63385), (26449, 5733), (96588, 53077), (52980, 41140), (8187, 85947), (36723, 26520), (23579, 38909), (33350, 19275), (63930, 19357), (43536, 59941), (31117, 77322), (44638, 94812), (44730, 99097), (95108, 48170), (57813, 49503), (79959, 89436), (86980, 62031), (8275, 44009), (36666, 94645), (22064, 38882), (40471, 16939), (31156, 11337), (13101, 96977), (17906, 26835), (89861, 51405), (73369, 67946), (99141, 58572), (27131, 98703), (15900, 43412), (51768, 93125), (78579, 46689), (23029, 13895), (60870, 55830), (22553, 8236), (76449, 96207), (83766, 51024), (27630, 50614), (53484, 90104), (77626, 21944), (46755, 41583), (53616, 34240), (94159, 44415), (13914, 90059), (44387, 89012), (27499, 64579), (83415, 30809), (77558, 82619), (88880, 9814), (8466, 4424), (43598, 91921), (24695, 3349), (46295, 65208), (51256, 82461), (49126, 93012), (16186, 96585), (43284, 22655), (93130, 90393), (77495, 34372), (85509, 65856), (86662, 61906), (50988, 44393), (29828, 17737), (91651, 35308), (29796, 49716), (14019, 87751), (29688, 71207), (82845, 19100), (11989, 50132), (21158, 99905), (54732, 42547), (32314, 12851), (46405, 43794), (87849, 45643), (53524, 21212), (61925, 75491), (12498, 21937), (30185, 69475), (48421, 52487), (15112, 90935), (33187, 17801), (61704, 25514), (17889, 23917), (18758, 57197), (7693, 47232), (47905, 24618), (11494, 78950), (95662, 54561), (8075, 33909), (90427, 46065), (73962, 19821), (50691, 79400), (58218, 4881), (94106, 2509), (60633, 55169), (49600, 83054), (23339, 13270), (70262, 58946), (48417, 97266), (27629, 46905), (74465, 75514), (41687, 2564), (12814, 19492), (78899, 30168), (17745, 35206), (37972, 35296), (22288, 80001), (68026, 36558), (40187, 12234), (92380, 22866), (56488, 64402), (41404, 62562), (47802, 45287), (83302, 85215), (58999, 85776), (35158, 16804), (13416, 94146), (62953, 28243), (83290, 19103), (4564, 21789), (64468, 20927), (25582, 47206), (57810, 18693), (28938, 97986), (61704, 14838), (19214, 3232), (12911, 25438), (85802, 28837), (56506, 89458), (66392, 47773), (68190, 43841), (43044, 52214), (57886, 32830), (15943, 59771), (37081, 89294), (4032, 32960), (46931, 85790), (69656, 72737), (28217, 39872), (86170, 42776), (55116, 51495), (90485, 45274), (60773, 36788), (2193, 2636), (70222, 62086), (75720, 70712), (17549, 51460), (23609, 31515), (70254, 39825), (63762, 11061), (13107, 15394), (45916, 72130), (91558, 86662), (99524, 69106), (93073, 29881), (31724, 3007), (69051, 59452), (59701, 86760), (4967, 82028), (57404, 48226), (71829, 79910), (23714, 62439), (73881, 67618), (63269, 40085), (6164, 23415), (48156, 93907), (18627, 16570), (6676, 22991), (36916, 41488), (99079, 13264), (32533, 99243), (55505, 63339), (89564, 3290), (24886, 34916), (91310, 9343), (49779, 12740), (26320, 3406), (57661, 5702), (10765, 57881), (5518, 47638), (93148, 27438), (73451, 24477), (84075, 96822), (58883, 58883), (96812, 82388), (30659, 59654), (24498, 95808), (25591, 21834), (13090, 87704), (76495, 17249), (75975, 84318), (55459, 70426), (84256, 88604), (79438, 43104), (45331, 7495), (63619, 11123), (24772, 2601), (63343, 14138), (39957, 98339), (55595, 17823), (97676, 53933), (91867, 25023), (64677, 67859), (43737, 34315), (24800, 53968), (93157, 17507), (24264, 35273), (33889, 507), (10207, 40542), (40213, 57800), (38321, 74160), (42391, 7651), (80267, 94736), (52473, 79634), (17075, 2531), (8595, 75890), (31496, 50367), (16069, 79896), (70067, 200), (23420, 49517), (1628, 45646), (8916, 36794), (72294, 88976), (40603, 86008), (91871, 71098), (5447, 70998), (24152, 17561), (65046, 34951), (56950, 9292), (19244, 31385), (74693, 31813), (97343, 21572), (38834, 135), (79717, 62486), (38, 10308), (58035, 71344), (85802, 81079), (5943, 156), (38735, 38867), (3803, 99366), (15853, 19408), (62988, 62008), (8316, 44684), (17035, 71012), (48584, 2117), (75425, 37336), (2405, 50420), (43653, 28836), (12394, 69430), (54522, 4954), (33359, 148), (41018, 82851), (79995, 55417), (65008, 32342), (36547, 88185), (8131, 7054), (38980, 20146), (27976, 63039), (53119, 67009), (40043, 98393), (29333, 51980), (85818, 98405), (77956, 20099), (99747, 16916), (11597, 50181), (40961, 8262), (75103, 13912), (62339, 69155), (3869, 85481), (7053, 30956), (33563, 53272), (96178, 81751), (99365, 88728), (34447, 11164), (62856, 30939), (92486, 3357), (56605, 35330), (42180, 15137), (83946, 62984), (61869, 55711), (52880, 49871), (44588, 27387), (16332, 24496), (1781, 13508), (56674, 95773), (21328, 19628), (96455, 24155), (14302, 74435), (54053, 24590), (86642, 22177), (24089, 16186), (70281, 4601), (18552, 70708), (95442, 5895), (96714, 6293), (43803, 45857), (93257, 18497), (90032, 85086), (40566, 87233), (32674, 73822), (95599, 49334), (62745, 51898), (8245, 93882), (14093, 40977), (47215, 53001), (59737, 68452), (90937, 25354), (43805, 82571), (81953, 68572), (37298, 96262), (94899, 65066), (34772, 80762), (55469, 1186), (8734, 91665), (18622, 51150), (85200, 39575), (65381, 15979), (89734, 89656), (64712, 53691), (87187, 58256), (8476, 89694), (49935, 35239), (63730, 34982), (27687, 91571), (87543, 15350), (85208, 18781), (14783, 2574), (44699, 666), (56440, 87617), (32732, 49301), (76725, 3895), (10419, 90580), (34725, 69476), (14831, 81588), (93924, 38057), (38528, 99060), (57136, 44206), (74685, 99559), (43083, 87511), (43105, 35474), (35582, 17560), (5578, 98727), (78947, 53865), (32013, 95029), (61552, 42674), (52191, 49975), (71566, 16403), (78534, 16350), (18520, 80501), (29114, 46547), (11488, 5069), (89591, 82384), (13741, 42318), (74385, 58849), (49739, 63421), (83821, 6676), (51997, 93321), (36677, 81768), (37915, 73495), (47175, 6086), (39989, 83110), (6489, 48112), (88822, 20370), (12846, 13952), (28930, 20879), (25139, 84552), (76434, 2665), (55145, 31523), (21177, 18630), (81077, 96275), (61006, 30845), (77722, 62651), (61181, 72545), (93838, 84287), (59300, 19014), (75076, 97980), (76979, 1473), (48409, 13097), (51718, 5325), (36522, 72119), (60917, 18995), (61469, 42853), (34387, 37322), (38684, 28120), (64136, 8559), (15368, 99424), (97824, 7864), (33833, 72029), (7024, 9961), (49400, 66220), (63025, 97179), (6135, 98878), (19873, 8438), (3963, 35670), (65186, 89423), (26653, 65943), (83132, 67000), (82578, 35007), (42680, 60479), (71102, 98589), (74842, 94010), (22931, 33725), (46537, 42629), (75793, 48115), (21630, 92454), (97993, 81332), (25747, 31814), (91231, 65953), (91981, 12219), (64719, 16254), (60914, 8334), (15887, 96432), (42110, 28837), (7295, 83147), (50334, 7053), (3949, 33594), (1524, 98230), (17265, 98024), (75969, 36232), (89538, 5212), (13444, 55946), (69823, 81848), (32578, 74024), (52018, 98290), (59118, 40186), (61002, 16977), (69537, 44780), (92, 13937), (33715, 42663), (46347, 8312), (86196, 59301), (17128, 85014), (26429, 57682), (45888, 99588), (22750, 96110), (46809, 49251), (24521, 40071), (287, 22115), (11741, 36315), (22742, 17581), (35808, 3110), (98904, 30407), (4584, 13383), (28585, 69669), (94823, 29715), (9551, 36389), (77997, 45746), (49894, 55722), (23415, 69459), (58246, 85144), (74136, 18102), (97366, 85724), (34271, 51601), (47535, 70883), (59443, 90103), (45213, 45811), (62741, 86898), (17324, 50034), (62080, 25193), (89524, 4421), (13476, 51456), (69198, 56718), (58024, 22969), (65210, 67941), (32561, 44881), (62295, 67448), (66135, 95453), (9417, 20443), (82486, 23745), (19185, 99041), (40662, 91714), (3423, 58624), (4512, 74502), (67772, 98023), (69575, 75779), (69107, 62805), (517, 33801), (47406, 7581), (81108, 10546), (12976, 47001), (16742, 83811), (44593, 82124), (52731, 34642), (81725, 20555), (94126, 91919), (24800, 59302), (97253, 39249), (71692, 10769), (88721, 56321), (7019, 69771), (31464, 61774), (29597, 19263), (65557, 31875), (28653, 69636), (58074, 76848), (15906, 80620), (18259, 40193), (99991, 4769), (98935, 99269), (12123, 60124), (20787, 47346), (13526, 33592), (95370, 40350), (17479, 42884), (58368, 83218), (63290, 74406), (97030, 35102), (45298, 27660), (64593, 21262), (76268, 82641), (1107, 44044), (21427, 79959), (85180, 62412), (7359, 1318), (83618, 9762), (1425, 55804), (32874, 97943), (68191, 38742), (41715, 17902), (3771, 15032), (7848, 74950), (33881, 40904), (75295, 26151), (75775, 13760), (90262, 89822), (88169, 18679), (57506, 32356), (94983, 44281), (37385, 37432), (18248, 48162), (45573, 66278), (25277, 72788), (26977, 36778), (26254, 61758), (12860, 48026), (96819, 3339), (13134, 1173), (26822, 53374), (15989, 29698), (11258, 54515), (37866, 34928), (22996, 26577), (39952, 42732), (6754, 70595), (86245, 44669), (47044, 34170), (6789, 45220), (31706, 2090), (42582, 40023), (35147, 46591), (88210, 11307), (53644, 7680), (11280, 91075), (42961, 65122), (40066, 52185), (20050, 6154), (98440, 20393), (88992, 75432), (32386, 66731), (36952, 34149), (18453, 32715), (84413, 10378), (59440, 2374), (45354, 85009), (50382, 66510), (64428, 95401), (9336, 41760), (26317, 91416), (81941, 99504), (26600, 53522), (81069, 40236), (51126, 27911), (97144, 14243), (62738, 50287), (37372, 28962), (12053, 9090), (69492, 95524), (68141, 52931), (17276, 16487), (69227, 25949), (14143, 70193), (7077, 53032), (65463, 74082), (94997, 66496), (80443, 55832), (66796, 5970), (15852, 95662), (81559, 97272), (55851, 18977), (91142, 48976), (91143, 950), (79225, 31004), (61310, 20760), (74541, 90842), (80322, 11630), (84631, 544), (66785, 86591), (25650, 63252), (59635, 18586), (2964, 6741), (37091, 71148), (11984, 43077), (87505, 62049), (61925, 92290), (18808, 3937), (8300, 33268), (70850, 50661), (86024, 73730), (85161, 47116), (50193, 89155), (37773, 40845), (9251, 41688), (6940, 65399), (42479, 95630), (19401, 43102), (48069, 36040), (62760, 95013), (394, 2641), (32567, 29306), (13870, 58835), (98248, 47291), (49803, 4523), (40222, 12883), (53576, 73105), (88265, 23629), (67865, 67875), (33473, 27144), (80219, 53893), (74878, 47341), (78070, 84803), (30003, 5600), (41103, 6145), (83490, 81076), (55059, 66736), (45015, 10239), (79555, 85819), (81808, 34970), (19235, 85480), (91807, 52177), (40887, 87009), (5003, 2687), (64964, 88122), (765, 94893), (93573, 20504), (28854, 38438), (94244, 93475), (72996, 84801), (75427, 81692), (63161, 98637), (18814, 61343), (22863, 60110), (8949, 12694), (19675, 94313), (43857, 74073), (15737, 58218), (48895, 68474), (22220, 92926), (69055, 50282), (40532, 74934), (59062, 66405), (85784, 87704), (58494, 88222), (2260, 20401), (73112, 99666), (46739, 95433), (21179, 85119), (11545, 38801), (59993, 50866), (10086, 4709), (70560, 29611), (27095, 89017), (6896, 2279), (92506, 5013), (48600, 90491), (18782, 54638), (54337, 82734), (52054, 13481), (38297, 56559), (15998, 30591), (89789, 7522), (18149, 28725), (3532, 28625), (70934, 49617), (84599, 55664), (74229, 52269), (55431, 11893), (32807, 72543), (83882, 53025), (11490, 83442), (14844, 88612), (12526, 45953), (906, 2231), (68240, 95612), (18818, 31535), (57774, 91290), (67250, 67400), (77332, 23550), (42332, 57775), (28792, 11539), (19108, 34608), (12399, 38591), (7329, 10740), (84288, 50928), (29461, 17629), (63884, 88489), (47479, 61085), (75357, 57255), (60107, 94046), (32934, 66312), (28615, 42600), (55553, 85213), (57838, 91426), (9783, 11513), (73677, 28821), (75408, 75561), (22995, 59224), (74874, 54145), (18513, 75901), (46440, 69414), (36072, 22263), (60560, 73325), (69967, 93358), (75949, 98634), (3688, 57991), (43482, 94541), (40922, 31011), (57763, 74497), (93576, 96392), (83038, 80656), (47757, 87045), (14061, 53465), (65619, 33775), (11341, 6702), (6249, 87358), (15766, 85937), (13135, 93945), (24495, 95900), (80359, 1739), (15468, 73426), (49240, 44999), (82839, 90808), (87438, 75613), (348, 73144), (99523, 85853), (21557, 70210), (64933, 1672), (38154, 17477), (97136, 67363), (96491, 8038), (97981, 3434), (54372, 27038), (88480, 86675), (21028, 21083), (43197, 4440), (31702, 78290), (66631, 24438), (11482, 17922), (90351, 39503), (46186, 32439), (73828, 6640), (56916, 26029), (62840, 1815), (20281, 28488), (18211, 30043), (65211, 93012), (43614, 58012), (90322, 77343), (64293, 94525), (59489, 39760), (93219, 78440), (74613, 9732), (38085, 19191), (58029, 48186), (88762, 1764), (28627, 21993), (49975, 41225), (70486, 43480), (82764, 96425), (27218, 78327), (17844, 73333), (70463, 37629), (10500, 33826), (97343, 66575), (82833, 51210), (77353, 45073), (27163, 39728), (78076, 46691), (80302, 39342), (77142, 1319), (87403, 80110), (53805, 27786), (50558, 74264), (83146, 31358), (11567, 4438), (30041, 54287), (91731, 18496), (57591, 93894), (72534, 59009), (98064, 59148), (69626, 66615), (20951, 43949), (61960, 68060), (48892, 67918), (61321, 56222), (75424, 77260), (4916, 81929), (68892, 81531), (28096, 28548), (62016, 107), (8593, 12030), (66743, 36772), (60174, 15106), (52844, 1923), (34768, 22065), (88988, 62910), (79214, 2998), (25675, 31376), (69959, 3614), (43885, 31708), (12206, 46548), (69924, 19343), (12984, 38980), (58250, 69438), (2580, 48684), (38112, 37124), (21842, 43150), (59384, 21921), (19908, 46678), (73396, 79529), (8274, 1557), (36975, 65519), (81069, 18712), (13692, 9148), (60617, 84762), (75749, 66154), (80375, 24553), (4257, 47056), (76880, 7687), (40714, 43448), (79112, 74791), (33119, 72730), (17670, 89183), (51614, 3921), (21247, 39857), (86756, 67673), (32792, 70035), (5917, 7197), (1762, 23130), (6455, 63664), (32806, 3729), (60469, 20511), (12522, 15149), (98106, 79338), (84754, 11162), (52058, 17973), (28789, 1521), (32766, 36325), (78914, 40453), (70297, 71854), (9313, 45190), (54559, 66227), (22342, 43860), (44152, 84294), (36913, 93173), (88523, 36338), (82234, 71140), (8328, 22947), (73250, 88125), (74356, 16820), (94472, 37349), (23126, 87806), (40315, 88729), (19935, 19145), (93312, 65719), (8477, 33108), (86660, 69525), (75557, 66964), (60437, 57494), (94419, 42524), (95372, 72274), (49866, 85685), (96808, 39404), (62961, 72507), (38634, 70815), (91379, 42430), (66359, 98699), (24382, 4186), (4003, 77760), (87840, 75265), (57641, 68871), (9773, 15942), (5664, 51289), (47923, 31308), (58632, 82468), (14097, 71829), (1838, 97710), (70433, 11364), (82363, 97879), (25257, 20615), (18249, 6758), (98581, 13639), (3290, 72449), (74546, 79380), (97254, 44448), (80316, 31760), (40516, 94809), (14444, 88981), (9693, 10259), (83795, 95485), (70201, 81014), (66644, 16761), (35529, 82718), (75774, 73476), (80139, 3957), (34803, 80689), (92085, 46499), (97871, 8004), (67369, 11354), (43578, 81596), (94695, 44963), (93741, 41629), (16005, 96652), (37918, 69012), (89832, 56041), (51798, 32386), (89749, 27647), (76279, 7990), (31746, 1346), (40841, 20480), (99942, 24473), (78495, 99194), (13588, 57088), (22183, 42297), (82707, 34435), (45026, 12747), (8000, 93211), (40453, 13025), (44100, 39880), (83900, 56474), (87691, 42802), (82000, 63867), (76627, 84731), (112, 92774), (34749, 97737), (59262, 57169), (95571, 44144), (74310, 68970), (63232, 92744), (53698, 21610), (39969, 75475), (39942, 28713), (81230, 50140), (97953, 96528), (86144, 48041), (96677, 49677), (22051, 48183), (33436, 54784), (5553, 11920), (67057, 17115), (57736, 72309), (8086, 85329), (72623, 94949), (13096, 48992), (63153, 56337), (30462, 1036), (75000, 24048), (62635, 50128), (91480, 83131), (25929, 79809), (96237, 76974), (59645, 20603), (31850, 29754), (91070, 36568), (30191, 33785), (86776, 67259), (49073, 39179), (16121, 73834), (84217, 52951), (95866, 47244), (63326, 73460), (134, 91953), (48189, 86069), (42971, 3700), (28643, 10479), (80151, 7446), (78798, 2655), (39135, 69364), (80244, 24904), (22818, 74964), (26753, 82419), (16858, 5212), (79891, 11215), (785, 46103), (12559, 24617), (73601, 71490), (70342, 7099), (73330, 6665), (11903, 28194), (16375, 37746), (86132, 51788), (90345, 68366), (5464, 78338), (23569, 83141), (17904, 94046), (35868, 60017), (22591, 93373), (70584, 72116), (49331, 34312), (16180, 91286), (58494, 65441), (9336, 52671), (32523, 26734), (40205, 83549), (28810, 96876), (44703, 38944), (46981, 37157), (8582, 7529), (59718, 71700), (62545, 73716), (6531, 23200), (30528, 59720), (57152, 84660), (16232, 67946), (60446, 45983), (68737, 54959), (57795, 73107), (26930, 35938), (9844, 44760), (3716, 79020), (99126, 8264), (66120, 16151), (50616, 25765), (93340, 95875), (34103, 88003), (14879, 99758), (49188, 6087), (89858, 42861), (36730, 72076), (25069, 26403), (98183, 48108), (3229, 5367), (59306, 80078), (61144, 58598), (72600, 98765), (57701, 23177), (10176, 11553), (82964, 13697), (7788, 28538), (39943, 97491), (56261, 17781), (2458, 1892), (6679, 45554), (42171, 66222), (24420, 44115), (35852, 41965), (50196, 49555), (34718, 60734), (6932, 61638), (69472, 56723), (489, 97620), (41335, 90578), (1333, 92787), (97883, 64754), (14208, 22097), (75776, 5938), (67446, 61518), (58743, 45162), (34749, 81243), (71451, 91991), (27804, 41836), (45274, 8039), (17593, 24498), (8801, 38559), (87460, 7109), (50075, 18284), (84043, 82146), (62932, 25018), (89647, 56768), (59920, 80801), (56357, 35142), (97376, 58181), (70715, 91103), (90829, 78985), (29776, 13275), (30546, 42320), (99266, 35340), (21234, 61062), (39239, 10745), (45990, 715), (47047, 6619), (4270, 94575), (90009, 72203), (25629, 2691), (67926, 89112), (46990, 61101), (22355, 69536), (1977, 56723), (54681, 34041), (83819, 7024), (81235, 7093), (16659, 87135), (49384, 32135), (42204, 17362), (90585, 70374), (51255, 1), (31600, 70085), (90189, 95778), (57349, 87789), (83384, 93771), (20718, 15529), (10644, 53591), (84103, 62101), (91340, 48382), (82854, 84420), (12561, 53517), (64835, 45362), (54154, 75841), (46498, 31175), (75035, 49552), (9306, 53403), (68851, 49139), (13463, 42107), (2322, 36695), (55953, 12098), (60656, 80482), (78987, 19632), (31228, 18523), (98972, 80489), (32367, 98405), (25139, 5164), (5692, 60610), (36535, 70097), (80542, 74320), (87984, 46750), (98201, 41341), (35217, 46813), (81795, 69057), (83927, 41032), (60149, 26087), (39954, 48361), (64485, 61448), (87185, 14580), (74559, 93251), (88544, 83366), (74015, 15864), (78623, 69719), (16941, 80710), (16315, 58313), (47277, 59107), (16067, 66290), (63906, 59891), (20754, 67817), (44428, 10652), (95960, 99045), (52163, 26221), (65566, 22057), (26836, 38898), (57107, 79274), (39020, 74857), (53540, 84159), (76646, 44324), (27967, 40171), (28710, 56332), (84036, 28711), (68742, 57241), (40535, 34737), (62681, 85386), (30472, 58405), (85086, 33013), (67059, 47481), (30441, 55098), (97892, 71991), (90296, 42905), (22441, 18863), (19606, 77242), (11206, 58380), (23901, 49962), (84094, 33761), (64400, 28093), (64228, 94543), (71874, 20871), (25385, 73117), (63398, 20999), (77547, 51893), (80783, 65858), (39807, 80754), (10336, 90318), (7826, 55346), (30206, 10711), (94411, 67364), (33509, 14329), (65350, 17006), (65999, 55699), (82753, 61081), (38851, 11896), (15155, 48635), (19985, 75204), (37144, 5344), (26173, 39587), (61111, 30966), (16180, 22987), (60707, 43599), (30136, 74118), (7880, 43857), (97445, 30233), (62700, 24828), (90914, 89452), (64131, 56925), (25259, 39132), (47104, 43950), (93891, 21913), (84573, 91029), (8604, 79858), (33141, 25534), (12468, 90413), (97063, 76359), (80826, 26061), (64013, 99099), (82158, 38882), (25799, 7564), (25477, 69847), (73374, 58520), (48230, 9453), (91424, 72273), (64893, 11750), (46753, 48434), (15974, 94633), (14872, 27027), (14527, 21313), (25660, 64644), (54196, 15138), (6313, 10911), (36168, 47170), (45346, 76), (10305, 60286), (65283, 39977), (21804, 37972), (65389, 86954), (90674, 64458), (15838, 22392), (43540, 42503), (49584, 67828), (56711, 87887), (40075, 73696), (23832, 91552), (39002, 65562), (20451, 64664), (70783, 92171), (29319, 57694), (56217, 44247), (52856, 57873), (80560, 90902), (31068, 11280), (46996, 34739), (57527, 4554), (8410, 25816), (12269, 38319), (88054, 49939), (337, 13231), (56432, 68236), (74841, 21476), (96006, 15712), (87145, 91660), (58090, 55111), (10310, 79789), (5734, 79710), (98992, 69026), (77033, 5734), (43338, 42635), (23898, 28669), (62708, 81652), (41279, 51722), (93444, 26355), (62046, 52199), (71492, 58736), (7379, 62581), (8592, 71885), (75026, 40387), (46696, 3939), (9787, 88907), (86356, 363), (97479, 20358), (77363, 65553), (44036, 22178), (98279, 64612), (3615, 411), (77003, 93018), (41605, 88489), (55992, 83614), (19493, 21633), (34639, 97064), (94602, 89289), (45853, 26299), (84170, 73386), (9221, 51439), (41513, 68166), (37170, 17690), (82511, 59246), (96674, 27574), (99301, 45675), (42716, 41520), (56623, 49130), (84100, 76804), (73855, 97007), (73303, 26912), (37151, 23837), (49190, 97104), (23487, 45628), (87763, 46550), (65111, 92605), (80481, 8151), (83949, 18930), (81749, 27244), (37449, 3023), (28303, 51545), (96441, 93242), (22082, 43254), (35135, 68407), (37712, 48709), (5111, 26774), (15532, 74246), (93605, 83583), (21491, 66472), (38922, 53076), (55455, 54432), (955, 44063), (311, 91630), (53554, 4522), (29927, 65668), (7525, 16035), (44093, 76745), (21481, 78198), (76875, 5306), (56126, 76437), (96534, 16880), (85600, 68336), (4479, 81002), (80414, 11593), (8186, 61147), (5624, 32879), (79312, 20995), (40407, 41512), (91261, 66022), (93228, 75364), (21136, 40111), (92148, 60681), (42549, 7944), (60157, 15040), (63562, 88365), (69056, 72713), (78263, 89223), (3776, 33039), (30042, 59984), (64567, 20977), (24720, 39157), (63582, 75653), (45363, 20249), (58093, 53833), (27918, 93306), (25791, 92686), (15904, 862), (72093, 19257), (64125, 88986), (41717, 27989), (43165, 98675), (76840, 48170), (64508, 3535), (91964, 33435), (96686, 88673), (66648, 64594), (17927, 30539), (73615, 22800), (18580, 48077), (59803, 48202), (76805, 89886), (2744, 52965), (55596, 22519), (35358, 11629), (83029, 80047), (36120, 91930), (26066, 23035), (48857, 14268), (63020, 26197), (60623, 23252), (34911, 72754), (34808, 21593), (64067, 58963), (34509, 8739), (52686, 96405), (98282, 10463), (6495, 64680), (59016, 86968), (33928, 51222), (39609, 84992), (67603, 89875), (14723, 16144), (30751, 46856), (76874, 75024), (89584, 58806), (51278, 4113), (27187, 93483), (80039, 52159), (6132, 25127), (42358, 77498), (33838, 79064), (74147, 76851), (39752, 27366), (44888, 9809), (10887, 4135), (22303, 36417), (58690, 34613), (53998, 74014), (71567, 32438), (65110, 93406), (77365, 41299), (18044, 70636), (77346, 21236), (78408, 245), (57704, 34662), (75258, 64730), (96992, 15533), (56010, 60769), (69163, 4826), (88709, 20725), (33197, 69743), (97169, 83194), (75277, 53343), (14531, 64740), (19997, 4752), (74016, 55946), (55290, 63626), (32533, 32920), (32946, 74610), (12386, 33853), (34825, 35374), (28772, 32716), (17280, 42683), (54184, 34332), (29964, 16203), (65767, 61448), (29133, 35728), (6861, 14160), (65483, 40224), (78335, 76002), (3061, 40615), (11780, 87517), (46135, 73448), (10920, 72592), (15696, 28810), (44154, 64134), (59365, 27248), (76601, 39862), (68264, 30019), (48572, 54575), (59499, 85796), (35064, 23789), (57028, 83545), (33911, 8463), (21827, 67966), (15983, 69649), (13919, 20584), (82742, 67956), (75457, 45767), (55394, 62309), (6099, 67510), (58078, 9594), (24511, 83149), (24781, 79624), (39745, 777), (92023, 40085), (22889, 37179), (17919, 28607), (79865, 72682), (99829, 38190), (21273, 21278), (88299, 23433), (88887, 48163), (62993, 61567), (82107, 84224), (65049, 61245), (75113, 93564), (81562, 7874), (32314, 32313), (3979, 46996), (40558, 93278), (58758, 68163), (40502, 58941), (76961, 65762), (48032, 36117), (64712, 9137), (12092, 56665), (12315, 66581), (20954, 29083), (57317, 48290), (23534, 86828), (4869, 35950), (26993, 24840), (93007, 45049), (18009, 20350), (43053, 71248), (47320, 66119), (50898, 96627), (669, 40018), (89236, 44039), (47375, 63306), (61906, 6658), (2672, 84546), (59778, 72319), (14497, 71952), (42420, 87023), (96465, 46140), (32857, 22772), (4985, 35125), (61918, 28016), (90275, 24406), (49799, 10811), (74137, 63345), (26135, 86306), (92971, 65541), (40134, 95892), (38554, 46307), (48113, 16343), (63990, 66283), (17793, 49570), (21736, 79819), (13831, 27523), (8939, 93929), (96577, 4909), (38583, 32781), (13701, 24436), (43444, 56054), (17166, 32346), (57202, 26264), (82858, 75049), (46317, 95666), (54911, 68161), (3894, 38521), (26456, 30270), (65214, 35331), (41143, 13109), (85441, 48899), (93226, 25027), (77045, 81171), (30345, 79232), (71167, 40854), (58761, 56824), (89047, 85314), (31686, 81947), (74946, 60661), (49903, 13625), (76341, 69067), (46963, 88891), (97223, 5921), (52143, 9828), (17413, 42731), (30236, 93426), (14540, 17652), (52251, 97233), (41581, 30097), (28771, 46426), (36260, 45179), (4068, 16410), (3146, 95055), (5993, 88855), (46103, 30022), (26667, 18756), (54576, 13438), (12800, 11258), (80761, 44979), (59811, 76627), (77917, 87270), (46286, 28657), (30609, 86852), (15200, 28936), (86331, 34195), (98461, 55054), (91760, 62792), (91551, 70192), (96030, 78205), (8254, 27057), (600, 37830), (58635, 65506), (81661, 73708), (11225, 24255), (15830, 9029), (84384, 46190), (31344, 25765), (25670, 30716), (88507, 19484), (28207, 45941), (91874, 15786), (10094, 10934), (38013, 2179), (14558, 36415), (65079, 48850), (65486, 85046), (54958, 60275), (99800, 96623), (68895, 99829), (3708, 75830), (96368, 22631), (99411, 50094), (56888, 3883), (87288, 45604), (64512, 84543), (45565, 14170), (77114, 15132), (31800, 70333), (57775, 40548), (31788, 67511), (51929, 13684), (53736, 81543), (84251, 86303), (63823, 83258), (77539, 61381), (43570, 39418), (79859, 34773), (8595, 64524), (97242, 9283), (15530, 84591), (75535, 65546), (16516, 50162), (58815, 1815), (34897, 82920), (66215, 81262), (81487, 4902), (64039, 25703), (78006, 90468), (3081, 26910), (58159, 4777), (73715, 36375), (69189, 60971), (18169, 39587), (57960, 57668), (6582, 63707), (11155, 47930), (70829, 92266), (6294, 92305), (2188, 6419), (17141, 54972), (60240, 35276), (10788, 29414), (17464, 76377), (3994, 17227), (12039, 24992), (1340, 77467), (1212, 41758), (52186, 80763), (970, 78819), (92897, 68714), (6349, 77016), (22069, 77732), (78209, 72708), (71986, 56770), (8580, 87225), (97505, 63546), (67459, 39771), (50707, 57066), (68226, 54176), (65425, 27407), (57723, 19288), (56974, 90449), (55878, 1264), (46939, 79863), (34868, 4652), (39872, 78482), (92657, 20961), (99690, 28825), (33761, 52922), (73738, 64995), (92092, 3237), (2463, 45045), (43984, 69864), (60146, 5333), (58127, 79082), (84395, 73949), (50818, 68457), (48585, 47420), (60878, 67337), (16573, 30621), (46524, 14168), (87995, 44854), (73143, 77177), (33967, 37276), (95038, 17670), (69022, 16038), (58485, 90526), (1705, 1443), (97969, 40011), (14719, 42770), (8695, 27192), (47546, 51349), (75263, 24419), (25420, 66286), (39198, 41401), (77896, 85583), (28265, 76766), (88836, 48759), (47768, 39582), (65103, 3167), (92171, 85360), (1549, 79296), (71725, 16645), (87349, 29290), (66201, 61712), (43525, 70338), (99025, 63090), (3687, 79963), (63600, 92088), (2480, 1359), (31384, 63603), (29650, 24391), (8552, 82260), (16729, 29139), (26503, 4767), (88945, 19824), (66695, 95696), (84016, 35417), (71521, 22206), (88433, 55606), (66380, 81316), (30573, 36000), (85223, 20494), (99672, 82813), (65500, 78258), (55817, 98414), (43248, 53800), (62787, 21018), (48981, 36258), (41216, 98585), (18576, 18004), (27272, 72860), (76774, 87664), (26737, 11514), (24472, 42538), (5860, 81355), (29066, 10012), (75308, 28561), (23609, 10007), (10007, 19146), (15568, 1487), (80743, 85294), (11207, 90623), (61258, 63879), (34363, 59005), (74884, 2528), (26604, 52738), (33304, 1202), (20381, 18984), (81968, 92425), (4407, 84677), (2112, 79756), (46970, 4367), (36854, 23482), (88346, 75107), (10643, 31806), (21351, 5590), (69317, 53292), (76711, 10085), (70333, 90592), (88818, 822), (23927, 48141), (84710, 33870), (96932, 22686), (5783, 87468), (7785, 11585), (49497, 33764), (13506, 55969), (37840, 78455), (21532, 22292), (97306, 42065), (6579, 40749), (2593, 4995), (81985, 23611), (63888, 98317), (44975, 83777), (57688, 42688), (641, 45787), (7316, 1967), (43837, 18274), (89994, 32770), (4285, 50388), (84699, 41841), (19564, 20683), (76027, 62278), (26140, 11288), (39656, 79954), (16718, 17335), (11583, 21283), (55441, 32178), (6810, 87225), (27191, 54323), (53406, 31512), (48003, 80077), (78497, 29570), (39140, 66619), (12651, 44576), (1761, 88410), (47139, 20766), (39183, 69367), (80479, 23285), (1568, 78535), (18476, 35058), (93551, 81063), (12059, 60021), (23356, 26572), (79975, 35434), (82230, 67436), (20243, 92343), (47809, 10634), (69537, 60167), (3873, 77404), (1136, 27956), (17470, 24156), (35849, 19150), (74760, 37961), (36660, 44448), (36009, 96619), (87110, 84921), (16080, 60637), (36046, 17351), (96403, 99978), (11060, 68629), (36081, 23464), (4684, 11817), (50126, 82936), (55262, 54135), (53717, 66293), (58028, 28065), (92791, 99766), (46266, 77711), (61912, 65782), (38677, 41158), (4001, 46340), (70987, 12784), (14819, 42857), (78985, 99956), (79737, 42497), (55305, 7329), (64103, 24170), (49093, 22115), (2465, 97282), (29009, 15663), (80976, 86477), (16439, 56685), (53482, 15293), (5038, 5991), (67060, 84201), (54350, 38095), (67539, 68292), (26464, 64908), (92909, 12867), (83517, 26474), (76081, 85247), (23250, 66616), (20783, 34330), (43074, 10165), (93968, 70375), (83802, 70820), (19871, 63094), (35699, 36506), (23905, 2401), (27847, 31968), (76714, 44112), (62599, 32720), (10362, 81985), (35708, 2090), (13071, 39035), (71851, 59493), (62833, 48082), (77164, 22804), (6469, 43229), (92051, 3719), (51910, 77689), (91470, 63253), (57914, 57836), (98819, 97813), (35975, 488), (51431, 34061), (45414, 85971), (56563, 93517), (40789, 53103), (9242, 20814), (784, 22584), (8740, 56894), (28504, 378), (8287, 96930), (74232, 97496), (61565, 7904), (9779, 45122), (33767, 48471), (16766, 10722), (47764, 478), (14374, 30099), (89134, 19977), (60860, 93201), (71123, 29840), (57159, 34410), (82411, 99537), (11307, 3733), (70264, 43028), (30418, 19372), (46543, 31506), (33043, 98980), (21137, 63374), (85640, 36957), (6790, 60751), (78771, 43700), (33808, 38263), (27232, 35152), (39925, 5062), (3120, 65621), (39319, 6795), (77468, 94964), (10481, 43009), (24237, 2103), (16837, 55667), (43846, 2874), (78786, 66811), (92185, 62395), (26318, 87942), (6208, 80815), (66952, 71885), (51435, 25450), (21443, 69801), (92554, 81977), (58912, 82288), (59681, 46177), (60397, 65866), (72065, 13318), (2848, 73852), (7577, 83238), (209, 40659), (72103, 15266), (23365, 67286), (14600, 29269), (85541, 63289), (25427, 54812), (22967, 54965), (81525, 27126), (20473, 55455), (84067, 25794), (46798, 79332), (93825, 74677), (447, 5904), (65661, 92916), (54428, 76482), (1025, 34415), (63761, 30038), (93025, 15090), (98807, 93426), (57562, 59615), (84884, 30620), (75066, 71824), (51199, 37934), (95530, 15260), (513, 98278), (62995, 28267), (47535, 69776), (39266, 4696), (14742, 13225), (44268, 16548), (45976, 41680), (99638, 30285), (85609, 5578), (28156, 12884), (76188, 86573), (44639, 15480), (86789, 72636), (18702, 13337), (96638, 59398), (90988, 26909), (85069, 95193), (67262, 38337), (51694, 19659), (93341, 80988), (48733, 88460), (55630, 22866), (96203, 10316), (30644, 68318), (79292, 63136), (60185, 73681), (60474, 19078), (48721, 82811), (19713, 99527), (17537, 55235), (296, 8353), (58691, 72158), (66734, 92490), (87642, 7174), (78285, 35337), (19503, 14273), (10713, 64116), (85966, 98738), (56561, 99347), (14869, 89963), (95126, 30748), (89272, 79060), (69888, 7193), (32583, 74564), (95542, 52128), (78360, 42675), (86062, 68404), (38732, 21411), (92935, 45415), (99027, 83925), (73232, 37405), From ec933d9d03657edd468c11e2a5f35d4dd219e1b6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 12:09:51 +0300 Subject: [PATCH 0468/2229] Better naming --- src/Storages/AlterCommands.cpp | 27 ++++++++++++++-- src/Storages/IStorage.cpp | 8 ++--- src/Storages/IStorage.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 32 ++++++++----------- src/Storages/MergeTree/MergeTreeData.h | 3 +- .../MergeTree/registerStorageMergeTree.cpp | 7 ++++ src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 22 ++++++------- tests/queries/0_stateless/00933_alter_ttl.sql | 3 +- 9 files changed, 65 insertions(+), 40 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 5652d1717ec..4755ad0fd35 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -314,8 +314,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } }); - if (metadata.sorting_key.sample_block.has(column_name)) - metadata.sorting_key = KeyDescription::getKeyFromAST(metadata.sorting_key.definition_ast, metadata.columns, context); } else if (type == MODIFY_ORDER_BY) { @@ -714,6 +712,31 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co if (!command.ignore) command.apply(metadata_copy, context); + /// Changes in columns may lead to changes in keys expression + metadata_copy.sorting_key = KeyDescription::getKeyFromAST(metadata_copy.sorting_key.definition_ast, metadata_copy.columns, context); + + if (metadata_copy.primary_key.definition_ast != nullptr) + { + metadata_copy.primary_key = KeyDescription::getKeyFromAST(metadata_copy.primary_key.definition_ast, metadata_copy.columns, context); + } + else + { + metadata_copy.primary_key = metadata_copy.sorting_key; + metadata_copy.primary_key.definition_ast = nullptr; + } + + /// Changes in columns may lead to changes in TTL expressions + auto column_ttl_asts = metadata_copy.columns.getColumnTTLs(); + for (const auto & [name, ast] : column_ttl_asts) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, metadata_copy.columns, context, metadata_copy.primary_key); + metadata_copy.column_ttls_by_name[name] = new_ttl_entry; + } + + if (metadata_copy.table_ttl.definition_ast != nullptr) + metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + metadata_copy.table_ttl.definition_ast, metadata_copy.columns, context, metadata_copy.primary_key); + metadata = std::move(metadata_copy); } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 31dd87a7ce4..83c95d9a20f 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -380,10 +380,10 @@ void IStorage::alter( { lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); - StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - params.apply(old_metadata, context); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, old_metadata); - setColumns(std::move(old_metadata.columns)); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setColumns(std::move(new_metadata.columns)); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 82012eacc03..c06b969ebf5 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -154,6 +154,7 @@ public: /// thread-unsafe part. lockStructure must be acquired void setSettingsChanges(const ASTPtr & settings_changes_); bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } + /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 50971bbc881..4a65d05cc8e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -185,7 +185,7 @@ MergeTreeData::MergeTreeData( min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } - setTTLExpressions(metadata_.columns, metadata_.table_ttl); + setTTLExpressions(metadata_); /// format_file always contained on any data path PathWithDisk version_file; @@ -296,9 +296,9 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, if (new_primary_key.definition_ast == nullptr) { /// We copy sorting key, and restore definition_ast to empty value, - /// because in merge tree code we sometimes chech, that our primary key - /// is fake (copied from sorting key, i.e. isPrimaryKeyDefined() == - /// false, but hasSortingKey() == true) + /// because in merge tree code we chech, that our primary key is fake + /// (copied from sorting key, i.e. isPrimaryKeyDefined() == false, but + /// hasSortingKey() == true) new_primary_key = new_metadata.sorting_key; new_primary_key.definition_ast = nullptr; } @@ -522,14 +522,11 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) /// Todo replace columns with TTL for columns -void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, - const TTLTableDescription & new_table_ttl, bool only_check) +void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata, bool only_check) { - auto new_column_ttls_asts = new_columns.getColumnTTLs(); + auto new_column_ttls = new_metadata.column_ttls_by_name; - TTLColumnsDescription new_column_ttl_by_name = getColumnTTLs(); - - if (!new_column_ttls_asts.empty()) + if (!new_column_ttls.empty()) { NameSet columns_ttl_forbidden; @@ -541,20 +538,18 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, for (const auto & col : getColumnsRequiredForSortingKey()) columns_ttl_forbidden.insert(col); - for (const auto & [name, ast] : new_column_ttls_asts) + for (const auto & [name, ttl_description] : new_column_ttls) { if (columns_ttl_forbidden.count(name)) throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); - else - { - auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_columns, global_context, getPrimaryKey()); - new_column_ttl_by_name[name] = new_ttl_entry; - } } + if (!only_check) - setColumnTTLs(new_column_ttl_by_name); + setColumnTTLs(new_column_ttls); } + auto new_table_ttl = new_metadata.table_ttl; + if (new_table_ttl.definition_ast) { for (const auto & move_ttl : new_table_ttl.move_ttl) @@ -570,7 +565,6 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, } } - if (!only_check) { auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); @@ -1367,7 +1361,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S setProperties(new_metadata, /* only_check = */ true); - setTTLExpressions(new_metadata.columns, new_metadata.table_ttl, /* only_check = */ true); + setTTLExpressions(new_metadata, /* only_check = */ true); if (hasSettingsChanges()) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 0e2e9c71324..a239f8cc0d7 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -784,8 +784,7 @@ protected: void initPartitionKey(const KeyDescription & new_partition_key); - void setTTLExpressions(const ColumnsDescription & columns, - const TTLTableDescription & new_table_ttl, bool only_check = false); + void setTTLExpressions(const StorageInMemoryMetadata & new_metadata, bool only_check = false); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 50775a04255..90eee680d65 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -530,6 +530,13 @@ static StoragePtr create(const StorageFactory::Arguments & args) for (auto & constraint : args.query.columns_list->constraints->children) metadata.constraints.constraints.push_back(constraint); + auto column_ttl_asts = args.columns.getColumnTTLs(); + for (const auto & [name, ast] : column_ttl_asts) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, args.columns, args.context, metadata.primary_key); + metadata.column_ttls_by_name[name] = new_ttl_entry; + } + storage_settings->loadFromQuery(*args.storage_def); if (args.storage_def->settings) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 76f752abb68..2a7efa164d4 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -282,7 +282,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. setProperties(new_metadata); - setTTLExpressions(new_metadata.columns, new_metadata.table_ttl); + setTTLExpressions(new_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8efe22e03f9..42adf4d2b45 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -472,9 +472,9 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff) { - StorageInMemoryMetadata current_metadata = getInMemoryMetadata(); - if (new_columns != current_metadata.columns) - current_metadata.columns = new_columns; + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + if (new_columns != new_metadata.columns) + new_metadata.columns = new_columns; if (!metadata_diff.empty()) { @@ -492,37 +492,37 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column tuple->arguments->children = new_sorting_key_expr_list->children; order_by_ast = tuple; } - current_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, current_metadata.columns, global_context); + new_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, new_metadata.columns, global_context); if (!isPrimaryKeyDefined()) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - current_metadata.primary_key = getSortingKey(); + new_metadata.primary_key = getSortingKey(); } } if (metadata_diff.skip_indices_changed) - current_metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, global_context); + new_metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, global_context); if (metadata_diff.constraints_changed) - current_metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); + new_metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); if (metadata_diff.ttl_table_changed) { ParserTTLExpressionList parser; auto ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - current_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl_for_table_ast, current_metadata.columns, global_context, current_metadata.primary_key); + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST(ttl_for_table_ast, new_metadata.columns, global_context, new_metadata.primary_key); } } auto table_id = getStorageID(); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, current_metadata); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, new_metadata); /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - setProperties(current_metadata); - setTTLExpressions(new_columns, current_metadata.table_ttl); + setProperties(new_metadata); + setTTLExpressions(new_metadata); } diff --git a/tests/queries/0_stateless/00933_alter_ttl.sql b/tests/queries/0_stateless/00933_alter_ttl.sql index d3298b3fbe1..4e0fde00952 100644 --- a/tests/queries/0_stateless/00933_alter_ttl.sql +++ b/tests/queries/0_stateless/00933_alter_ttl.sql @@ -21,6 +21,7 @@ drop table if exists ttl; create table ttl (d Date, a Int) engine = MergeTree order by tuple() partition by toDayOfMonth(d); alter table ttl modify column a Int ttl d + interval 1 day; desc table ttl; -alter table ttl modify column d Int ttl d + interval 1 day; -- { serverError 524 } +alter table ttl modify column d Int ttl d + interval 1 day; -- { serverError 43 } +alter table ttl modify column d DateTime ttl d + interval 1 day; -- { serverError 524 } drop table if exists ttl; From 7078355449fb79eee87c4130058e9bcce7f0ccce Mon Sep 17 00:00:00 2001 From: Vxider Date: Wed, 10 Jun 2020 18:06:24 +0800 Subject: [PATCH 0469/2229] remove unused local variables --- src/Parsers/ParserCreateQuery.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 6d8e81edbea..c54033bd27d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -489,15 +489,12 @@ bool ParserCreateLiveViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & e ParserToken s_dot(TokenType::Dot); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); - ParserStorage storage_p; - ParserIdentifier name_p; ParserTablePropertiesDeclarationList table_properties_p; ParserSelectWithUnionQuery select_p; ASTPtr table; ASTPtr to_table; ASTPtr columns_list; - ASTPtr storage; ASTPtr as_database; ASTPtr as_table; ASTPtr select; From 7f09bb82642ad72371ec461d7ba094df94e4dcca Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 10 Jun 2020 12:52:41 +0200 Subject: [PATCH 0470/2229] Replase back/forward quotes and apostrophes by straight --- .../table-engines/special/distributed.md | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index bc2a4dc483d..1c4c6299b51 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot The Distributed engine accepts parameters: -- the cluster name in the server’s config file +- the cluster name in the server's config file - the name of a remote database @@ -31,13 +31,13 @@ Example: Distributed(logs, default, hits[, sharding_key[, policy_name]]) ``` -Data will be read from all servers in the ‘logs’ cluster, from the default.hits table located on every server in the cluster. +Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster. Data is not only read but is partially processed on the remote servers (to the extent that this is possible). For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated. Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase(). -logs – The cluster name in the server’s config file. +logs – The cluster name in the server's config file. Clusters are set like this: @@ -75,15 +75,15 @@ Clusters are set like this: ``` -Here a cluster is defined with the name ‘logs’ that consists of two shards, each of which contains two replicas. +Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas. Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards). Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas). Cluster names must not contain dots. The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server: -- `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn’t start. If you change the DNS record, restart the server. -- `port` – The TCP port for messenger activity (‘tcp\_port’ in the config, usually set to 9000). Do not confuse it with http\_port. +- `host` – The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server. +- `port` – The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port. - `user` – Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md). - `password` – The password for connecting to a remote server (not masked). Default value: empty string. - `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `9440` and have correct certificates. @@ -97,44 +97,44 @@ You can specify just one of the shards (in this case, query processing should be You can specify as many clusters as you wish in the configuration. -To view your clusters, use the ‘system.clusters’ table. +To view your clusters, use the `system.clusters` table. -The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster’s servers). +The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers). -The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don’t need to create a Distributed table – use the ‘remote’ table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md). +The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table – use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md). There are two methods for writing data to a cluster: -First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. +First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. -Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn’t mean anything in this case. +Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case. Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19. -Each shard can have the ‘internal\_replication’ parameter defined in the config file. +Each shard can have the `internal_replication` parameter defined in the config file. -If this parameter is set to ‘true’, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself. +If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself. -If it is set to ‘false’ (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. +If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. -To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from ‘prev\_weight’ to ‘prev\_weights + weight’, where ‘prev\_weights’ is the total weight of the shards with the smallest number, and ‘weight’ is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). +To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). -The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression ‘rand()’ for random distribution of data, or ‘UserID’ for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). +The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). -A simple reminder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. +A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. -SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don’t have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. +SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. +- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. -If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the ‘broken’ subdirectory and no longer used. +If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used. -When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). +When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas). ## Virtual Columns {#virtual-columns} From 1effc16ae8e6269991177eb33490cf02cce172aa Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 14:05:54 +0300 Subject: [PATCH 0471/2229] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 955f9d1a5d1..c7fbbb31ad1 100644 --- a/README.md +++ b/README.md @@ -15,5 +15,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events +* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020. * [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date. * [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date. From 8be957ecb5d4869b311629d741d5b56ef63510cf Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 14:16:31 +0300 Subject: [PATCH 0472/2229] Better checks around metadata --- programs/copier/Internals.cpp | 5 +- src/Storages/AlterCommands.cpp | 10 +- src/Storages/IStorage.cpp | 2 +- src/Storages/IStorage.h | 2 +- src/Storages/KeyDescription.cpp | 12 ++- src/Storages/KeyDescription.h | 9 +- src/Storages/MergeTree/MergeTreeData.cpp | 100 ++++++------------ src/Storages/MergeTree/MergeTreeData.h | 15 ++- .../MergeTree/registerStorageMergeTree.cpp | 15 ++- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageMerge.cpp | 2 +- src/Storages/StorageMerge.h | 2 +- src/Storages/StorageNull.cpp | 2 +- src/Storages/StorageNull.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- ...213_alter_rename_primary_key_zookeeper.sql | 12 +-- 21 files changed, 94 insertions(+), 110 deletions(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0613381a763..518395e3b7d 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -1,5 +1,6 @@ #include "Internals.h" #include +#include namespace DB { @@ -184,9 +185,9 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) const auto sorting_key_ast = extractOrderBy(storage_ast); const auto primary_key_ast = extractPrimaryKey(storage_ast); - const auto sorting_key_expr_list = MergeTreeData::extractKeyExpressionList(sorting_key_ast); + const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); const auto primary_key_expr_list = primary_key_ast - ? MergeTreeData::extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); + ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 4755ad0fd35..52f34d1fdd5 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -324,7 +324,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con metadata.primary_key = metadata.sorting_key; } - metadata.sorting_key = KeyDescription::getKeyFromAST(order_by, metadata.columns, context); + metadata.sorting_key = KeyDescription::getKeyFromAST(order_by, metadata.columns, context, metadata.sorting_key.additional_key_column); } else if (type == COMMENT_COLUMN) { @@ -713,7 +713,11 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co command.apply(metadata_copy, context); /// Changes in columns may lead to changes in keys expression - metadata_copy.sorting_key = KeyDescription::getKeyFromAST(metadata_copy.sorting_key.definition_ast, metadata_copy.columns, context); + metadata_copy.sorting_key = KeyDescription::getKeyFromAST( + metadata_copy.sorting_key.definition_ast, + metadata_copy.columns, + context, + metadata_copy.sorting_key.additional_key_column); if (metadata_copy.primary_key.definition_ast != nullptr) { @@ -721,7 +725,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co } else { - metadata_copy.primary_key = metadata_copy.sorting_key; + metadata_copy.primary_key = KeyDescription::getKeyFromAST(metadata_copy.sorting_key.definition_ast, metadata_copy.columns, context); metadata_copy.primary_key.definition_ast = nullptr; } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 83c95d9a20f..58cbb2bb7d6 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -387,7 +387,7 @@ void IStorage::alter( } -void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) +void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const { for (const auto & command : commands) { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index c06b969ebf5..1a5a0a0753d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -359,7 +359,7 @@ public: /** Checks that alter commands can be applied to storage. For example, columns can be modified, * or primary key can be changes, etc. */ - virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings); + virtual void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const; /** ALTER tables with regard to its partitions. * Should handle locks for each command on its own. diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 5d44ced78db..a6e15bcf816 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -17,6 +17,7 @@ KeyDescription::KeyDescription(const KeyDescription & other) , sample_block(other.sample_block) , column_names(other.column_names) , data_types(other.data_types) + , additional_key_column(other.additional_key_column ? other.additional_key_column->clone() : nullptr) { } @@ -36,18 +37,23 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) sample_block = other.sample_block; column_names = other.column_names; data_types = other.data_types; + if (other.additional_key_column) + additional_key_column = other.additional_key_column->clone(); + else + additional_key_column.reset(); return *this; } -KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, ASTPtr additional_key_expression) +KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const ASTPtr & additional_key_column) { KeyDescription result; result.definition_ast = definition_ast; + result.additional_key_column = additional_key_column; result.expression_list_ast = extractKeyExpressionList(definition_ast); - if (additional_key_expression) - result.expression_list_ast->children.push_back(additional_key_expression); + if (additional_key_column != nullptr) + result.expression_list_ast->children.push_back(additional_key_column); const auto & children = result.expression_list_ast->children; for (const auto & child : children) diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 980bd3baf00..c41154dec6f 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -30,9 +30,16 @@ struct KeyDescription /// Types from sample block ordered in columns order. DataTypes data_types; + /// Additional key column added by storage + ASTPtr additional_key_column; + /// Parse key structure from key definition. Requires all columns, available /// in storage. - static KeyDescription getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, ASTPtr additional_key_expression = nullptr); + static KeyDescription getKeyFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context, + const ASTPtr & additional_key_expression = nullptr); KeyDescription() = default; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4a65d05cc8e..9cd260b6748 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -144,7 +144,7 @@ MergeTreeData::MergeTreeData( setSettingsChanges(metadata_.settings_changes); const auto settings = getSettings(); - setProperties(metadata_, /*only_check*/ false, attach); + setProperties(metadata_, attach); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -275,33 +275,13 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } } -void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool only_check, bool attach) +void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, bool attach) const { - KeyDescription new_primary_key = new_metadata.primary_key; - if (!new_metadata.sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); - KeyDescription new_sorting_key; - if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) - new_sorting_key = KeyDescription::getKeyFromAST( - new_metadata.sorting_key.definition_ast, - new_metadata.columns, - global_context, - std::make_shared(merging_params.version_column)); - else - new_sorting_key = new_metadata.sorting_key; - - /// Primary key not defined at all - if (new_primary_key.definition_ast == nullptr) - { - /// We copy sorting key, and restore definition_ast to empty value, - /// because in merge tree code we chech, that our primary key is fake - /// (copied from sorting key, i.e. isPrimaryKeyDefined() == false, but - /// hasSortingKey() == true) - new_primary_key = new_metadata.sorting_key; - new_primary_key.definition_ast = nullptr; - } + KeyDescription new_sorting_key = new_metadata.sorting_key; + KeyDescription new_primary_key = new_metadata.primary_key; size_t sorting_key_size = new_sorting_key.column_names.size(); size_t primary_key_size = new_primary_key.column_names.size(); @@ -333,7 +313,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, auto all_columns = new_metadata.columns.getAllPhysical(); /// Order by check AST - if (hasSortingKey() && only_check) + if (hasSortingKey()) { /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). @@ -400,15 +380,18 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting"); - if (!only_check) - { - /// Other parts of metadata initialized is separate methods - setColumns(std::move(new_metadata.columns)); - setSecondaryIndices(std::move(new_metadata.secondary_indices)); - setConstraints(std::move(new_metadata.constraints)); - setSortingKey(std::move(new_sorting_key)); - setPrimaryKey(std::move(new_primary_key)); - } +} + +void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool attach) +{ + checkProperties(new_metadata, attach); + + /// Other parts of metadata initialized is separate methods + setColumns(std::move(new_metadata.columns)); + setSecondaryIndices(std::move(new_metadata.secondary_indices)); + setConstraints(std::move(new_metadata.constraints)); + setSortingKey(std::move(new_metadata.sorting_key)); + setPrimaryKey(std::move(new_metadata.primary_key)); } namespace @@ -442,27 +425,6 @@ ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression() cons return getCombinedIndicesExpression(getSortingKey(), getSecondaryIndices(), getColumns(), global_context); } -ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) -{ - if (!node) - return std::make_shared(); - - const auto * expr_func = node->as(); - - if (expr_func && expr_func->name == "tuple") - { - /// Primary key is specified in tuple, extract its arguments. - return expr_func->arguments->clone(); - } - else - { - /// Primary key consists of one column. - auto res = std::make_shared(); - res->children.push_back(node); - return res; - } -} - void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) { @@ -521,8 +483,7 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) } -/// Todo replace columns with TTL for columns -void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata, bool only_check) +void MergeTreeData::checkTTLExpressios(const StorageInMemoryMetadata & new_metadata) const { auto new_column_ttls = new_metadata.column_ttls_by_name; @@ -543,11 +504,7 @@ void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metada if (columns_ttl_forbidden.count(name)) throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); } - - if (!only_check) - setColumnTTLs(new_column_ttls); } - auto new_table_ttl = new_metadata.table_ttl; if (new_table_ttl.definition_ast) @@ -564,15 +521,18 @@ void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metada throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); } } - - if (!only_check) - { - auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); - setTableTTLs(new_table_ttl); - } } } +/// Todo replace columns with TTL for columns +void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata) +{ + checkTTLExpressios(new_metadata); + setColumnTTLs(new_metadata.column_ttls_by_name); + auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); + setTableTTLs(new_metadata.table_ttl); +} + void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const { @@ -1264,7 +1224,7 @@ bool isMetadataOnlyConversion(const IDataType * from, const IDataType * to) } -void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) +void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const { /// Check that needed transformations can be applied to the list of columns without considering type conversions. StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); @@ -1359,9 +1319,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S } } - setProperties(new_metadata, /* only_check = */ true); + checkProperties(new_metadata); - setTTLExpressions(new_metadata, /* only_check = */ true); + checkTTLExpressios(new_metadata); if (hasSettingsChanges()) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index a239f8cc0d7..521f80bed70 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -492,7 +492,7 @@ public: /// - all type conversions can be done. /// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected. /// If something is wrong, throws an exception. - void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override; /// Change MergeTreeSettings void changeSettings( @@ -508,12 +508,6 @@ public: broken_part_callback(name); } - /** Get the key expression AST as an ASTExpressionList. It can be specified - * in the tuple: (CounterID, Date), or as one column: CounterID. - */ - static ASTPtr extractKeyExpressionList(const ASTPtr & node); - - /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const VolumePtr & volume, const String & relative_path) const; @@ -780,11 +774,14 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void setProperties(const StorageInMemoryMetadata & new_metadata, bool only_check = false, bool attach = false); + void checkProperties(const StorageInMemoryMetadata & new_metadata, bool attach = false) const; + + void setProperties(const StorageInMemoryMetadata & new_metadata, bool attach = false); void initPartitionKey(const KeyDescription & new_partition_key); - void setTTLExpressions(const StorageInMemoryMetadata & new_metadata, bool only_check = false); + void checkTTLExpressios(const StorageInMemoryMetadata & new_metadata) const; + void setTTLExpressions(const StorageInMemoryMetadata & new_metadata); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 90eee680d65..050d0790bb5 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -417,6 +417,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) ++arg_num; } + ASTPtr merging_param_key_arg = nullptr; + if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.sign_column)) @@ -480,6 +482,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) ErrorCodes::BAD_ARGUMENTS); --arg_cnt; + merging_param_key_arg = std::make_shared(merging_params.version_column); } String date_column_name; @@ -502,13 +505,15 @@ static StoragePtr create(const StorageFactory::Arguments & args) "If you don't want this table to be sorted, use ORDER BY tuple()", ErrorCodes::BAD_ARGUMENTS); - metadata.sorting_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); + metadata.sorting_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); if (args.storage_def->primary_key) + { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); + } else { - metadata.primary_key = metadata.sorting_key; + metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); metadata.primary_key.definition_ast = nullptr; } @@ -560,7 +565,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) } /// Now only two parameters remain - primary_key, index_granularity. - metadata.sorting_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context); + metadata.sorting_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context, merging_param_key_arg); + + metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context); + metadata.primary_key.definition_ast = nullptr; + ++arg_num; const auto * ast = engine_args[arg_num]->as(); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 1f2c05bf7ad..08dc81b4945 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -723,7 +723,7 @@ void StorageBuffer::reschedule() flush_handle->scheduleAfter(std::min(min, max) * 1000); } -void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) +void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const { for (const auto & command : commands) { diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 10a4482c801..02fd35136bf 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -85,7 +85,7 @@ public: bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override; - void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; /// The structure of the subordinate table is not checked and does not change. void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 7e7460a013f..3383c609520 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -538,7 +538,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & c } -void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) +void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const { for (const auto & command : commands) { diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 4067012c449..63021e0a169 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -84,7 +84,7 @@ public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; void renameOnDisk(const String & new_path_to_table_data); - void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; /// in the sub-tables, you need to manually add and delete columns /// the structure of the sub-table is not checked diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 08984a6e1f3..a0c2fa87eb2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -213,7 +213,7 @@ void StorageMaterializedView::alter( } -void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) +void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const { if (settings.allow_experimental_alter_materialized_view_structure) { diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 06bf659e05f..480c75aa114 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -41,7 +41,7 @@ public: void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; - void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override; void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 64d0f11f853..8264eaa4cb6 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -372,7 +372,7 @@ DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const Context & cont } -void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) +void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const { for (const auto & command : commands) { diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 401c5facf0c..adf4a40e675 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -37,7 +37,7 @@ public: size_t max_block_size, unsigned num_streams) override; - void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; /// you need to add and remove columns in the sub-tables manually /// the structure of sub-tables is not checked diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index b6e4605530d..182ce09ef96 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -32,7 +32,7 @@ void registerStorageNull(StorageFactory & factory) }); } -void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) +void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const { for (const auto & command : commands) { diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index d9aeb60c260..5fb4a16a24b 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -40,7 +40,7 @@ public: return std::make_shared(getSampleBlock()); } - void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) override; + void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 42adf4d2b45..7e42b75104d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -492,7 +492,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column tuple->arguments->children = new_sorting_key_expr_list->children; order_by_ast = tuple; } - new_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, new_metadata.columns, global_context); + new_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, new_metadata.columns, global_context, new_metadata.sorting_key.additional_key_column); if (!isPrimaryKeyDefined()) { diff --git a/tests/queries/0_stateless/01213_alter_rename_primary_key_zookeeper.sql b/tests/queries/0_stateless/01213_alter_rename_primary_key_zookeeper.sql index 5c62d5d9107..daec7666e2d 100644 --- a/tests/queries/0_stateless/01213_alter_rename_primary_key_zookeeper.sql +++ b/tests/queries/0_stateless/01213_alter_rename_primary_key_zookeeper.sql @@ -17,11 +17,11 @@ INSERT INTO table_for_rename_pk SELECT toDate('2019-10-01') + number % 3, number SELECT key1, value1 FROM table_for_rename_pk WHERE key1 = 1 AND key2 = 1 AND key3 = 1; -ALTER TABLE table_for_rename_pk RENAME COLUMN key1 TO renamed_key1; --{serverError 524} +ALTER TABLE table_for_rename_pk RENAME COLUMN key1 TO renamed_key1; --{serverError 47} -ALTER TABLE table_for_rename_pk RENAME COLUMN key3 TO renamed_key3; --{serverError 524} +ALTER TABLE table_for_rename_pk RENAME COLUMN key3 TO renamed_key3; --{serverError 47} -ALTER TABLE table_for_rename_pk RENAME COLUMN key2 TO renamed_key2; --{serverError 524} +ALTER TABLE table_for_rename_pk RENAME COLUMN key2 TO renamed_key2; --{serverError 47} DROP TABLE IF EXISTS table_for_rename_pk NO DELAY; SELECT sleep(1) FORMAT Null; @@ -45,11 +45,11 @@ PRIMARY KEY (key1, key2); INSERT INTO table_for_rename_with_primary_key SELECT toDate('2019-10-01') + number % 3, number, number, number, toString(number), toString(number) from numbers(9); -ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key1 TO renamed_key1; --{serverError 524} +ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key1 TO renamed_key1; --{serverError 47} -ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key2 TO renamed_key2; --{serverError 524} +ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key2 TO renamed_key2; --{serverError 47} -ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key3 TO renamed_key3; --{serverError 524} +ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN key3 TO renamed_key3; --{serverError 47} ALTER TABLE table_for_rename_with_primary_key RENAME COLUMN value1 TO renamed_value1; --{serverError 524} From 24db834046e0dcb3631e67193853ddceb4206541 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 14:27:16 +0300 Subject: [PATCH 0473/2229] Style --- src/Storages/KeyDescription.cpp | 6 +++++- src/Storages/KeyDescription.h | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index a6e15bcf816..b1e74db2c58 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -45,7 +45,11 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) } -KeyDescription KeyDescription::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const ASTPtr & additional_key_column) +KeyDescription KeyDescription::getKeyFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context, + const ASTPtr & additional_key_column) { KeyDescription result; result.definition_ast = definition_ast; diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index c41154dec6f..97f48d435b2 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -30,7 +30,7 @@ struct KeyDescription /// Types from sample block ordered in columns order. DataTypes data_types; - /// Additional key column added by storage + /// Additional key column added by storage type ASTPtr additional_key_column; /// Parse key structure from key definition. Requires all columns, available @@ -39,7 +39,7 @@ struct KeyDescription const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, - const ASTPtr & additional_key_expression = nullptr); + const ASTPtr & additional_key_column = nullptr); KeyDescription() = default; From 93b0d5b29b33825deaf79c462fdd9b7175db8a05 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 10 Jun 2020 15:47:34 +0300 Subject: [PATCH 0474/2229] done --- tests/integration/test_cluster_copier/test.py | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py index 440f0fc016b..9c2bcc22ef7 100644 --- a/tests/integration/test_cluster_copier/test.py +++ b/tests/integration/test_cluster_copier/test.py @@ -1,29 +1,27 @@ import os -import os.path as p import sys import time -import datetime +import kazoo import pytest -from contextlib import contextmanager import docker -from kazoo.client import KazooClient +import random +from contextlib import contextmanager +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV -COPYING_FAIL_PROBABILITY = 0.33 -MOVING_FAIL_PROBABILITY = 0.33 -cluster = None +COPYING_FAIL_PROBABILITY = 0.2 +MOVING_FAIL_PROBABILITY = 0.2 +cluster = ClickHouseCluster(__file__) def check_all_hosts_sucesfully_executed(tsv_content, num_hosts): M = TSV.toMat(tsv_content) hosts = [(l[0], l[1]) for l in M] # (host, port) codes = [l[2] for l in M] - messages = [l[3] for l in M] assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content assert len(set(codes)) == 1, "\n" + tsv_content @@ -36,7 +34,7 @@ def ddl_check_query(instance, query, num_hosts=3): return contents -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def started_cluster(): global cluster try: @@ -51,8 +49,6 @@ def started_cluster(): } } - cluster = ClickHouseCluster(__file__) - for cluster_name, shards in clusters_schema.iteritems(): for shard_name, replicas in shards.iteritems(): for replica_name in replicas: @@ -66,7 +62,6 @@ def started_cluster(): yield cluster finally: - pass cluster.shutdown() @@ -222,6 +217,11 @@ def execute_task(task, cmd_options): zk = cluster.get_kazoo_client('zoo1') print "Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1]) + try: + zk.delete("/clickhouse-copier", recursive=True) + except kazoo.exceptions.NoNodeError: + print "No node /clickhouse-copier. It is Ok in first test." + zk_task_path = task.zk_task_path zk.ensure_path(zk_task_path) zk.create(zk_task_path + "/description", task.copier_task_config) @@ -236,7 +236,10 @@ def execute_task(task, cmd_options): '--base-dir', '/var/log/clickhouse-server/copier'] cmd += cmd_options - for instance_name, instance in cluster.instances.iteritems(): + copiers = random.sample(cluster.instances.keys(), 3) + + for instance_name in copiers: + instance = cluster.instances[instance_name] container = instance.get_docker_handle() exec_id = docker_api.exec_create(container.id, cmd, stderr=True) docker_api.exec_start(exec_id, detach=True) @@ -245,12 +248,13 @@ def execute_task(task, cmd_options): print "Copier for {} ({}) has started".format(instance.name, instance.ip_address) # Wait for copiers stopping and check their return codes - for exec_id, instance in zip(copiers_exec_ids, cluster.instances.itervalues()): + for exec_id, instance_name in zip(copiers_exec_ids, copiers): + instance = cluster.instances[instance_name] while True: res = docker_api.exec_inspect(exec_id) if not res['Running']: break - time.sleep(1) + time.sleep(0.5) assert res['ExitCode'] == 0, "Instance: {} ({}). Info: {}".format(instance.name, instance.ip_address, repr(res)) @@ -307,12 +311,15 @@ def test_copy_with_recovering_after_move_faults(started_cluster, use_sample_offs else: execute_task(Task1(started_cluster), ['--move-fault-probability', str(MOVING_FAIL_PROBABILITY)]) +@pytest.mark.timeout(600) def test_copy_month_to_week_partition(started_cluster): execute_task(Task2(started_cluster), []) +@pytest.mark.timeout(600) def test_copy_month_to_week_partition_with_recovering(started_cluster): execute_task(Task2(started_cluster), ['--copy-fault-probability', str(COPYING_FAIL_PROBABILITY)]) +@pytest.mark.timeout(600) def test_copy_month_to_week_partition_with_recovering_after_move_faults(started_cluster): execute_task(Task2(started_cluster), ['--move-fault-probability', str(MOVING_FAIL_PROBABILITY)]) From 9166ddea41812133d3ff1824c41cd36b6fa5a685 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 10 Jun 2020 15:53:12 +0300 Subject: [PATCH 0475/2229] fix segfault in show create table --- src/Databases/DatabaseMemory.cpp | 2 +- src/Interpreters/InterpreterShowCreateQuery.cpp | 2 +- .../01098_temporary_and_external_tables.sh | 11 ++++++++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 52b1f889943..cd559172197 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -68,7 +68,7 @@ ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, const { std::lock_guard lock{mutex}; auto it = create_queries.find(table_name); - if (it == create_queries.end()) + if (it == create_queries.end() || !it->second) { if (throw_on_error) throw Exception("There is no metadata of table " + table_name + " in database " + database_name, ErrorCodes::UNKNOWN_TABLE); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 30005c7b169..b14baaafbb9 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -69,7 +69,7 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() create_query = DatabaseCatalog::instance().getDatabase(show_query->database)->getCreateDictionaryQuery(show_query->table); } - if (!create_query && show_query && show_query->temporary) + if (!create_query) throw Exception("Unable to show the create query of " + show_query->table + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); if (!context.getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) diff --git a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh index c984f363c31..b671019ca35 100755 --- a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh +++ b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh @@ -3,7 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -url="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/?session_id=test_01098" +url_without_session="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/?" +url="${url_without_session}session_id=test_01098" ${CLICKHOUSE_CURL} -m 30 -sSk "$url" --data "CREATE TEMPORARY TABLE tmp_table AS SELECT number AS n FROM numbers(42)" > /dev/null; @@ -14,3 +15,11 @@ echo "SELECT * FROM $full_tmp_name" | ${CLICKHOUSE_CURL} -m 60 -sSgk $url -d @- echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))"; +echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sleepEachRow(3)+FROM+file" > /dev/null & +sleep 1 +full_tmp_names=`echo "SELECT $name_expr FROM system.tables WHERE database='_temporary_and_external_tables' FORMAT TSV" | ${CLICKHOUSE_CURL} -m 30 -sSgk $url_without_session -d @-` +for name in $full_tmp_names +do + ${CLICKHOUSE_CURL} -m 30 -sSk "${url_without_session}query=SHOW+CREATE+TABLE+$name" 1>/dev/null 2>/dev/null +done; +wait From 6191d33bd9cb67d4dcabb79202bdd91baf467ddd Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 16:30:12 +0300 Subject: [PATCH 0476/2229] Do not cache frames inside StackTrace --- base/daemon/BaseDaemon.cpp | 1 - cmake/find/sentry.cmake | 1 + src/Common/StackTrace.cpp | 30 ++++++++---------------------- src/Common/StackTrace.h | 7 +------ 4 files changed, 10 insertions(+), 29 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 9da8849342d..1467657d31a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -223,7 +223,6 @@ public: DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); DB::readBinary(query_id, in); - stack_trace.resetFrames(); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 7fa384cb906..2281d870dec 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -13,6 +13,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_BACKEND "none" CACHE STRING "") set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") + set (SENTRY_PIC OFF CACHE BOOL "") if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) endif() diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 819f74f37cb..aacda116bfb 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -199,12 +199,12 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t for (size_t i = 0; i < offset; ++i) { - frames.value()[i].virtual_addr = frame_pointers[i]; + frames[i].virtual_addr = frame_pointers[i]; } for (size_t i = offset; i < size; ++i) { - StackTrace::Frame & current_frame = frames.value()[i]; + StackTrace::Frame & current_frame = frames[i]; current_frame.virtual_addr = frame_pointers[i]; const auto * object = symbol_index.findObject(current_frame.virtual_addr); uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; @@ -244,7 +244,7 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t #else for (size_t i = 0; i < size; ++i) { - frames.value()[i].virtual_addr = frame_pointers[i]; + frames[i].virtual_addr = frame_pointers[i]; } UNUSED(offset); #endif @@ -309,16 +309,6 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const return frame_pointers; } -const StackTrace::Frames & StackTrace::getFrames() const -{ - if (!frames.has_value()) - { - frames.emplace(); - symbolize(frame_pointers.data(), offset, size, frames); - } - return frames; -} - static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { @@ -329,7 +319,7 @@ toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t s for (size_t i = offset; i < size; ++i) { - const StackTrace::Frame & current_frame = frames.value()[i]; + const StackTrace::Frame & current_frame = frames[i]; out << i << ". "; if (current_frame.file.has_value() && current_frame.line.has_value()) @@ -356,8 +346,7 @@ toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t s static std::string toStringImpl(const void * const * frame_pointers, size_t offset, size_t size) { std::stringstream out; - StackTrace::Frames frames{}; - frames.emplace(); + StackTrace::Frames frames; symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); @@ -365,12 +354,9 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs void StackTrace::toStringEveryLine(std::function callback) const { - toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); -} - -void StackTrace::resetFrames() -{ - frames.reset(); + Frames frames; + symbolize(frame_pointers.data(), offset, size, frames); + toStringEveryLineImpl(frames, offset, size, std::move(callback)); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 27b2c44dd94..4ec63b3cf86 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -36,7 +36,7 @@ public: }; static constexpr size_t capacity = 32; using FramePointers = std::array; - using Frames = std::optional>; + using Frames = std::array; /// Tries to capture stack trace StackTrace(); @@ -51,22 +51,17 @@ public: size_t getSize() const; size_t getOffset() const; const FramePointers & getFramePointers() const; - const Frames & getFrames() const; std::string toString() const; static std::string toString(void ** frame_pointers, size_t offset, size_t size); void toStringEveryLine(std::function callback) const; - - void resetFrames(); - protected: void tryCapture(); size_t size = 0; size_t offset = 0; /// How many frames to skip while displaying. FramePointers frame_pointers{}; - mutable Frames frames{}; }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); From ba92f96a74404bc02d5455943ee45c40c0907e7c Mon Sep 17 00:00:00 2001 From: Lewinma Date: Wed, 10 Jun 2020 22:23:45 +0800 Subject: [PATCH 0477/2229] fix some translation errors in Chinese (#11128) * fix some translation erros in Chinese * Update docs/zh/sql-reference/functions/date-time-functions.md Co-authored-by: hcz * Update docs/zh/sql-reference/functions/date-time-functions.md Co-authored-by: hcz * Update docs/zh/sql-reference/functions/date-time-functions.md Co-authored-by: hcz * Update docs/zh/sql-reference/functions/date-time-functions.md Co-authored-by: hcz Co-authored-by: alexey-milovidov Co-authored-by: hcz --- docs/zh/sql-reference/functions/date-time-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 3fbe272ebe8..a418379d4ec 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -22,11 +22,11 @@ SELECT 将Date或DateTime转换为指定的时区。 -## 玩一年 {#toyear} +## toYear {#toyear} 将Date或DateTime转换为包含年份编号(AD)的UInt16类型的数字。 -## 到四分钟 {#toquarter} +## toQuarter {#toquarter} 将Date或DateTime转换为包含季度编号的UInt8类型的数字。 From 08073903ed10aa15d1fbddea6d90929d7f9e4619 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 10 Jun 2020 17:24:42 +0300 Subject: [PATCH 0478/2229] fix query_id of http queries --- src/Interpreters/Context.cpp | 2 ++ .../0_stateless/01194_http_query_id.reference | 1 + tests/queries/0_stateless/01194_http_query_id.sh | 16 ++++++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01194_http_query_id.reference create mode 100755 tests/queries/0_stateless/01194_http_query_id.sh diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cbf00836103..7b636b84c68 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -166,6 +166,8 @@ public: if (!session.unique()) throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); + session->context.client_info = context.client_info; + return session; } diff --git a/tests/queries/0_stateless/01194_http_query_id.reference b/tests/queries/0_stateless/01194_http_query_id.reference new file mode 100644 index 00000000000..b8626c4cff2 --- /dev/null +++ b/tests/queries/0_stateless/01194_http_query_id.reference @@ -0,0 +1 @@ +4 diff --git a/tests/queries/0_stateless/01194_http_query_id.sh b/tests/queries/0_stateless/01194_http_query_id.sh new file mode 100755 index 00000000000..381ae67f88f --- /dev/null +++ b/tests/queries/0_stateless/01194_http_query_id.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +url="http://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?session_id=test_01194" +rnd=$RANDOM + +${CLICKHOUSE_CURL} -sS "$url&query=SELECT+$rnd,1" > /dev/null +${CLICKHOUSE_CURL} -sS "$url&query=SELECT+$rnd,2" > /dev/null +${CLICKHOUSE_CURL} -sS "$url" --data "SELECT $rnd,3" > /dev/null +${CLICKHOUSE_CURL} -sS "$url" --data "SELECT $rnd,4" > /dev/null + +${CLICKHOUSE_CURL} -sS "$url" --data "SYSTEM FLUSH LOGS" + +${CLICKHOUSE_CURL} -sS "$url&query=SELECT+count(DISTINCT+query_id)+FROM+system.query_log+WHERE+query+LIKE+'SELECT+$rnd%25'" From 60b40f04039702a0d8d55c44cfb81e96c932836e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 17:51:25 +0300 Subject: [PATCH 0479/2229] Lost part of refactoring --- base/daemon/SentryWriter.cpp | 4 +++- src/Common/StackTrace.cpp | 6 +++--- src/Common/StackTrace.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index c8197d8a160..eddd5bfa49c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -146,9 +146,11 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c offset = 1; } char instruction_addr[100]; + StackTrace::Frames frames; + StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, size, frames); for (size_t i = stack_size - 1; i >= offset; --i) { - const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + const StackTrace::Frame & current_frame = frames[i]; sentry_value_t frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aacda116bfb..8e390154838 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -190,7 +190,7 @@ static void * getCallerAddress(const ucontext_t & context) #endif } -static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) +void StackTrace::symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) { #if defined(__ELF__) && !defined(__FreeBSD__) && !defined(ARCADIA_BUILD) @@ -347,7 +347,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames; - symbolize(frame_pointers, offset, size, frames); + StackTrace::symbolize(frame_pointers.data(), offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } @@ -355,7 +355,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs void StackTrace::toStringEveryLine(std::function callback) const { Frames frames; - symbolize(frame_pointers.data(), offset, size, frames); + StackTrace::symbolize(frame_pointers.data(), offset, size, frames); toStringEveryLineImpl(frames, offset, size, std::move(callback)); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ec63b3cf86..374f0314533 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -54,6 +54,7 @@ public: std::string toString() const; static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames); void toStringEveryLine(std::function callback) const; protected: From f043ba3c8214e351f8339a339164a2e0541d37e1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 18:02:24 +0300 Subject: [PATCH 0480/2229] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c7fbbb31ad1..2008e5d2750 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ ClickHouse is an open-source column-oriented database management system that all * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events. +* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. From 01b9f73e654f43501a1255badf2ee480745cd1c7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 18:04:33 +0300 Subject: [PATCH 0481/2229] Disallow was replaced with robots html tag --- website/robots.txt | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/website/robots.txt b/website/robots.txt index fa3a68b6d69..2cecc12e311 100644 --- a/website/robots.txt +++ b/website/robots.txt @@ -1,14 +1,4 @@ User-agent: * -Disallow: /docs/en/single/ -Disallow: /docs/zh/single/ -Disallow: /docs/es/single/ -Disallow: /docs/fr/single/ -Disallow: /docs/ru/single/ -Disallow: /docs/ja/single/ -Disallow: /docs/fa/single/ -Disallow: /docs/v1* -Disallow: /docs/v2* -Disallow: /docs/v3* Disallow: /cdn-cgi/ Allow: / Host: https://clickhouse.tech From d4c8adbaffe0fea8adef8585bc91fa3524589b83 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Jun 2020 18:06:37 +0300 Subject: [PATCH 0482/2229] Fix sanitizeBlock --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3010dfcfe12..ecfa011f1c8 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -113,7 +113,7 @@ bool sanitizeBlock(Block & block) return false; col.column = col.type->createColumn(); } - else if (isColumnConst(*col.column) && !col.column->empty()) + else if (!col.column->empty()) col.column = col.column->cloneEmpty(); } return true; From e2c6d090192d870edffc40e40deeb4c14417e5be Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Jun 2020 18:06:55 +0300 Subject: [PATCH 0483/2229] Added test. --- .../01305_array_join_prewhere_in_subquery.reference | 1 + .../0_stateless/01305_array_join_prewhere_in_subquery.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.reference create mode 100644 tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.sql diff --git a/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.reference b/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.sql b/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.sql new file mode 100644 index 00000000000..535dee5ebbe --- /dev/null +++ b/tests/queries/0_stateless/01305_array_join_prewhere_in_subquery.sql @@ -0,0 +1,5 @@ +drop table if exists h; +create table h (EventDate Date, CounterID UInt64, WatchID UInt64) engine = MergeTree order by (CounterID, EventDate); +insert into h values ('2020-06-10', 16671268, 1); +SELECT count() from h ARRAY JOIN [1] AS a PREWHERE WatchID IN (SELECT toUInt64(1)) WHERE (EventDate = '2020-06-10') AND (CounterID = 16671268); +drop table if exists h; From b3dad4c3a62f115b67d46f3a641ccaeae171c4c8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 18:16:29 +0300 Subject: [PATCH 0484/2229] Update odbc.md --- docs/en/interfaces/odbc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/odbc.md b/docs/en/interfaces/odbc.md index 4fd7cd23964..42ae4cf5b53 100644 --- a/docs/en/interfaces/odbc.md +++ b/docs/en/interfaces/odbc.md @@ -5,6 +5,6 @@ toc_title: ODBC Driver # ODBC Driver {#odbc-driver} -- [Official driver](https://github.com/ClickHouse/clickhouse-odbc). +- [Official driver](https://github.com/ClickHouse/clickhouse-odbc) [Original article](https://clickhouse.tech/docs/en/interfaces/odbc/) From 0316464ed4eb22593bd7fc18b79584cc0f476ce0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 18:30:13 +0300 Subject: [PATCH 0485/2229] fix --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 8e390154838..cb0488b489a 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -347,7 +347,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames; - StackTrace::symbolize(frame_pointers.data(), offset, size, frames); + StackTrace::symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } From e144e78b31ffc16ea1626d5471d4748aae1c9454 Mon Sep 17 00:00:00 2001 From: "Fan()" <18501341937@163.com> Date: Wed, 10 Jun 2020 23:30:33 +0800 Subject: [PATCH 0486/2229] Update clickhouse-copier.md (#11558) --- docs/en/operations/utilities/clickhouse-copier.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index ab3b49523d2..d450f5753e4 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -29,7 +29,7 @@ To reduce network traffic, we recommend running `clickhouse-copier` on the same The utility should be run manually: ``` bash -$ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir +$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir ``` Parameters: From 8c1981757504b334f129e3aa1845ecf85d843768 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 10 Jun 2020 18:30:50 +0300 Subject: [PATCH 0487/2229] lock_guard --- src/Interpreters/SystemLog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 1f889d53aec..9f5578614c0 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -435,7 +435,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, template void SystemLog::prepareTable() { - std::unique_lock prepare_lock(prepare_mutex); + std::lock_guard prepare_lock(prepare_mutex); String description = table_id.getNameForLogs(); From 897a592ee60b9c4308aa12bc46a2cc3d933a2f75 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 9 Jun 2020 21:52:06 +0000 Subject: [PATCH 0488/2229] Move subscription --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 2 +- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 14 +-- src/Storages/RabbitMQ/RabbitMQHandler.h | 2 +- .../ReadBufferFromRabbitMQConsumer.cpp | 94 ++++++------------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 23 +++-- 5 files changed, 44 insertions(+), 91 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 1c6eaf6f2e9..245320008f3 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -50,7 +50,7 @@ void RabbitMQBlockInputStream::readPrefixImpl() if (!buffer || finished) return; - buffer->subscribeConsumer(); + buffer->checkSubscription(); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index d9dc19afa28..0a432e1b5ca 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -34,23 +34,15 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes } -void RabbitMQHandler::startConsumerLoop(std::atomic & check_param, std::atomic & loop_started) +void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same * event loop). But the loop should not be attempted to start if it is already running. */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { - /* The callback, which changes this variable, could have already been activated by another thread while we waited - * for the mutex to unlock (as it runs all active events on the connection). This means that there is no need to - * start event loop again. - */ - if (!check_param) - { - loop_started = true; - event_base_loop(evbase, EVLOOP_NONBLOCK); - } - + loop_started = true; + event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index a8692a845f1..911651097bb 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -19,7 +19,7 @@ public: RabbitMQHandler(event_base * evbase_, Poco::Logger * log_); void onError(AMQP::TcpConnection * connection, const char * message) override; - void startConsumerLoop(std::atomic & check_param, std::atomic & loop_started); + void startConsumerLoop(std::atomic & loop_started); void startProducerLoop(); void stopWithTimeout(); void stop(); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5d649ab2084..32dcd30e6f5 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,10 +13,6 @@ namespace DB { -enum -{ - Loop_retries_limit = 500 -}; ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, @@ -109,9 +105,6 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /* This varibale can be updated from a different thread in case of some error so its better to check - * whether exchange is in a working state and if not - declare it once again. - */ if (!exchange_declared) { initExchange(); @@ -144,6 +137,9 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } + /// Must be done here, cannot be done in readPrefix() + subscribe(queues.back()); + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) @@ -169,26 +165,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ while (!bindings_created && !bindings_error) { - startEventLoop(bindings_created, loop_started); - } -} - - -void ReadBufferFromRabbitMQConsumer::subscribeConsumer() -{ - if (subscribed) - return; - - for (auto & queue : queues) - { - subscribe(queue); - } - - LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - - if (count_subscribed == queues.size()) - { - subscribed = true; + startEventLoop(loop_started); } } @@ -198,25 +175,13 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) if (subscribed_queue[queue_name]) return; - consumer_created = false, consumer_failed = false; - consumer_channel->consume(queue_name, AMQP::noack) .onSuccess([&](const std::string & /* consumer */) { - consumer_created = true; + subscribed_queue[queue_name] = true; ++count_subscribed; LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); - - /* Unblock current thread if it is looping (any consumer could start the loop and only one of them) so that it does not - * continue to execute all active callbacks on the connection (=> one looping consumer will not be blocked for too - * long and events will be distributed between them) - */ - if (loop_started && count_subscribed == queues.size()) - { - stopEventLoop(); - subscribed = true; - } }) .onReceived([&](const AMQP::Message & message, uint64_t /* deliveryTag */, bool /* redelivered */) { @@ -232,7 +197,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) bool stop_loop = false; - /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl() (below). + /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl(). { std::lock_guard lock(mutex); received.push_back(message_received); @@ -255,35 +220,32 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) }) .onError([&](const char * message) { - consumer_failed = true; + consumer_error = true; LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); }); +} - size_t cnt_retries = 0; - /// These variables are updated in a separate thread. - while (!consumer_created && !consumer_failed) +void ReadBufferFromRabbitMQConsumer::checkSubscription() +{ + /// In general this condition will always be true and looping/resubscribing would not happen + if (count_subscribed == num_queues) + return; + + wait_subscribed = num_queues; + + /// These variables are updated in a separate thread + while (count_subscribed != wait_subscribed && !consumer_error) { - startEventLoop(consumer_created, loop_started); - - if (!consumer_created && !consumer_failed) - { - if (cnt_retries >= Loop_retries_limit) - { - /* For unknown reason there is a case when subscribtion may fail and OnError callback is not activated - * for a long time. In this case there should be resubscription. - */ - LOG_ERROR(log, "Consumer {} failed to subscride to queue {}", channel_id, queue_name); - break; - } - - ++cnt_retries; - } + startEventLoop(loop_started); } - if (consumer_created) + LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); + + /// A case that would not normally happen + for (auto & queue : queues) { - subscribed_queue[queue_name] = true; + subscribe(queue); } } @@ -300,9 +262,9 @@ void ReadBufferFromRabbitMQConsumer::stopEventLoopWithTimeout() } -void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & check_param, std::atomic & loop_started) +void ReadBufferFromRabbitMQConsumer::startEventLoop(std::atomic & loop_started) { - eventHandler.startConsumerLoop(check_param, loop_started); + eventHandler.startConsumerLoop(loop_started); } @@ -316,7 +278,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() if (received.empty()) { /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread - startEventLoop(false_param, loop_started); + startEventLoop(loop_started); loop_started = false; } @@ -328,7 +290,7 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() messages.clear(); - /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback (above). + /// Needed to avoid data race because this vector can be used at the same time by another thread in onReceived callback. std::lock_guard lock(mutex); messages.swap(received); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 9e0b29307c4..7fbc1024d44 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -37,7 +37,7 @@ public: ~ReadBufferFromRabbitMQConsumer() override; void allowNext() { allowed = true; } // Allow to read next message. - void subscribeConsumer(); + void checkSubscription(); private: using Messages = std::vector; @@ -59,16 +59,9 @@ private: const std::atomic & stopped; String current_exchange_name; - - /* Note: as all concurrent consumers share the same connection => they also share the same - * event loop, which can be started by any consumer and the loop is blocking only to the thread that - * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, - * at most two threads will be present: main thread and the one that executes callbacks (1 thread if - * main thread is the one that started the loop). Both reference these variables. - */ - std::atomic exchange_declared = false, subscribed = false, loop_started = false, false_param = false; - std::atomic consumer_created = false, consumer_failed = false; - std::atomic count_subscribed = 0; + bool exchange_declared = false; + std::atomic loop_started = false, consumer_error = false; + std::atomic count_subscribed = 0, wait_subscribed; std::vector queues; Messages received; @@ -76,6 +69,12 @@ private: Messages::iterator current; std::unordered_map subscribed_queue; + /* Note: as all consumers share the same connection => they also share the same + * event loop, which can be started by any consumer and the loop is blocking only to the thread that + * started it, and the loop executes ALL active callbacks on the connection => in case num_consumers > 1, + * at most two threads will be present: main thread and the one that executes callbacks (1 thread if + * main thread is the one that started the loop). + */ std::mutex mutex; bool nextImpl() override; @@ -83,7 +82,7 @@ private: void initExchange(); void initQueueBindings(const size_t queue_id); void subscribe(const String & queue_name); - void startEventLoop(std::atomic & check_param, std::atomic & loop_started); + void startEventLoop(std::atomic & loop_started); void stopEventLoopWithTimeout(); void stopEventLoop(); From bf262a3b04fae0023394b380c09f52937fe23217 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 10 Jun 2020 18:44:28 +0300 Subject: [PATCH 0489/2229] more lock_guard --- src/Interpreters/SystemLog.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 9f5578614c0..3c07af8c985 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -217,7 +217,7 @@ SystemLog::SystemLog(Context & context_, template void SystemLog::startup() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); }); } @@ -231,7 +231,7 @@ void SystemLog::add(const LogElement & element) /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky. auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (is_shutdown) return; @@ -310,7 +310,7 @@ template void SystemLog::stopFlushThread() { { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (!saving_thread.joinable()) { @@ -423,7 +423,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, } { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); flushed_before = to_flush_end; flush_event.notify_all(); } From e221a64c4b76b35eeb4f37945b4b4960479708b7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 18:56:18 +0300 Subject: [PATCH 0490/2229] Better test --- .../integration/test_always_fetch_merged/test.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_always_fetch_merged/test.py b/tests/integration/test_always_fetch_merged/test.py index 63ab6f6b5ea..79458633081 100644 --- a/tests/integration/test_always_fetch_merged/test.py +++ b/tests/integration/test_always_fetch_merged/test.py @@ -55,9 +55,13 @@ def test_replica_always_download(started_cluster): node1.query("SYSTEM START MERGES") - time.sleep(3) - - node1_parts = node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() - node2_parts = node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() - assert int(node1_parts) < 10 # something merged - assert int(node2_parts) < 10 + for i in range(30): + node1_parts = node1.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() + node2_parts = node2.query("SELECT COUNT() FROM system.parts WHERE table = 'test_table' and active=1").strip() + if int(node1_parts) < 10 and int(node2_parts) < 10: + break + else: + time.sleep(0.5) + else: + assert int(node1_parts) < 10 + assert int(node2_parts) < 10 From 0a74c9373ec46713b3deb51e86c702418fc29a0d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 19:48:08 +0300 Subject: [PATCH 0491/2229] less confusing --- base/daemon/SentryWriter.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index eddd5bfa49c..003a2816ce0 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -136,7 +136,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c setExtras(); /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/stacktrace/ - sentry_value_t frames = sentry_value_new_list(); + sentry_value_t sentry_frames = sentry_value_new_list(); size_t stack_size = stack_trace.getSize(); if (stack_size > 0) { @@ -151,33 +151,33 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c for (size_t i = stack_size - 1; i >= offset; --i) { const StackTrace::Frame & current_frame = frames[i]; - sentry_value_t frame = sentry_value_new_object(); + sentry_value_t sentry_frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); - sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) { - sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + sentry_value_set_by_key(sentry_frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); } if (current_frame.file.has_value()) { - sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + sentry_value_set_by_key(sentry_frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); } if (current_frame.line.has_value()) { - sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + sentry_value_set_by_key(sentry_frame, "lineno", sentry_value_new_int32(current_frame.line.value())); } - sentry_value_append(frames, frame); + sentry_value_append(sentry_frames, sentry_frame); } } /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/ sentry_value_t stacktrace = sentry_value_new_object(); - sentry_value_set_by_key(stacktrace, "frames", frames); + sentry_value_set_by_key(stacktrace, "frames", sentry_frames); sentry_value_t thread = sentry_value_new_object(); sentry_value_set_by_key(thread, "stacktrace", stacktrace); From ba6574d428c87ee1c31a2eec314a19f3265028b8 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Wed, 10 Jun 2020 13:50:27 -0300 Subject: [PATCH 0492/2229] fix typo in greatCircleAngle example (#11583) fix typo in greatCircleAngle example --- docs/ru/sql-reference/functions/geo.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/geo.md b/docs/ru/sql-reference/functions/geo.md index e747d719938..45c30b3c2cd 100644 --- a/docs/ru/sql-reference/functions/geo.md +++ b/docs/ru/sql-reference/functions/geo.md @@ -40,7 +40,7 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) Вычисляет угловое расстояние на сфере по [формуле большого круга](https://en.wikipedia.org/wiki/Great-circle_distance). ``` sql -greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) +greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg) ``` **Входные параметры** From f53773a083a0b8945f294c9eb20254146f9fb686 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 20:12:34 +0300 Subject: [PATCH 0493/2229] Update quickstart.html --- website/templates/index/quickstart.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/quickstart.html b/website/templates/index/quickstart.html index 74449b4da3a..ca7544f3b51 100644 --- a/website/templates/index/quickstart.html +++ b/website/templates/index/quickstart.html @@ -44,7 +44,7 @@

   Tutorial    -   +    Documentation

From b4a72b17748a5741530b652a7f2f8130fe2b3609 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 20:20:45 +0300 Subject: [PATCH 0494/2229] Update features.html --- website/templates/index/features.html | 3 +++ 1 file changed, 3 insertions(+) diff --git a/website/templates/index/features.html b/website/templates/index/features.html index c659e0d9301..f266bbaabb7 100644 --- a/website/templates/index/features.html +++ b/website/templates/index/features.html @@ -14,6 +14,7 @@
  • Real-time query processing
  • Real-time data ingestion
  • On-disk locality of reference
  • +
  • Secondary data-skipping indexes
  • Data compression
  • @@ -24,12 +25,14 @@
  • High availability
  • Cross-datacenter replication
  • Local and distributed joins
  • +
  • Adaptive join algorithm
  • Pluggable external dimension tables
  • Arrays and nested data types
    • +
    • Focus on OLAP workloads
    • Approximate query processing
    • Probabilistic data structures
    • Features for web and mobile analytics
    • From e07068e3d0851ef3cc974a55a88e3570c3e82dec Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 20:23:12 +0300 Subject: [PATCH 0495/2229] Update performance.html --- website/templates/index/performance.html | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/website/templates/index/performance.html b/website/templates/index/performance.html index 230eb0de974..2a9b766c74b 100644 --- a/website/templates/index/performance.html +++ b/website/templates/index/performance.html @@ -3,7 +3,7 @@

      ClickHouse works 100-1000x faster than traditional approaches

      -

      ClickHouse's performance exceeds comparable column-oriented DBMS currently available +

      ClickHouse's performance exceeds comparable column-oriented database management systems currently available on the market. It processes hundreds of millions to more than a billion rows and tens of gigabytes of data per single server per second.

      Detailed comparison @@ -42,6 +42,5 @@
    - {## TODO: ##} From 2de9ddce532f6471715a1b27310457d795ea16d4 Mon Sep 17 00:00:00 2001 From: Ivan <5627721+abyss7@users.noreply.github.com> Date: Wed, 10 Jun 2020 20:30:34 +0300 Subject: [PATCH 0496/2229] Update CMakeLists.txt (#11560) Update CMakeLists.txt (#11560) --- contrib/hyperscan-cmake/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt index bed774afdbf..1f30bfccbe8 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -220,15 +220,13 @@ if (ENABLE_HYPERSCAN) target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) target_compile_options (hyperscan PRIVATE -g0 -march=corei7 # library has too much debug information - PUBLIC -Wno-documentation ) target_include_directories (hyperscan PRIVATE common ${LIBRARY_DIR}/include - PUBLIC - ${LIBRARY_DIR}/src ) + target_include_directories (hyperscan SYSTEM PUBLIC ${LIBRARY_DIR}/src) if (ARCH_AMD64) target_include_directories (hyperscan PRIVATE x86_64) endif () From f81a3fb803eafa695138b9d6b138bd0fe0c5a6bd Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 20:41:52 +0300 Subject: [PATCH 0497/2229] Update rich.html --- website/templates/index/rich.html | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/website/templates/index/rich.html b/website/templates/index/rich.html index 1f2b4957306..ec2620e0a86 100644 --- a/website/templates/index/rich.html +++ b/website/templates/index/rich.html @@ -3,14 +3,14 @@
    -

    Feature-rich

    +

    Feature-rich SQL database

    1

    User-friendly SQL dialect

    -

    ClickHouse features a user-friendly SQL query dialect with a number of built-in analytics capabilities. In addition to common functions that could be found in most DBMS, ClickHouse comes with a lot of domain-specific functions and features out of the box.

    +

    ClickHouse features a SQL query dialect with a number of built-in analytics capabilities. In addition to common functions that could be found in most DBMS, ClickHouse comes with a lot of domain-specific functions and features for OLAP scenarios out of the box.

    2
    @@ -23,13 +23,11 @@
    3

    Join distributed or co-located data

    ClickHouse provides various options for joining tables. Joins could be either cluster local, they can also access data stored in external systems. There's also an external dictionaries support that provides an alternative more simple syntax for accessing data from an outside source.

    -
    4

    Approximate query processing

    Users can control the trade-off between result accuracy and query execution time, which is handy when dealing with multiple terabytes or petabytes of data. ClickHouse also provides probabilistic data structures for fast and memory-efficient calculation of cardinalities and quantiles

    -
    From 11a4ea5665350680106b390b5e821d9649b3da9d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 20:42:15 +0300 Subject: [PATCH 0498/2229] Update why.html --- website/templates/index/why.html | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/website/templates/index/why.html b/website/templates/index/why.html index a2917258923..dc9071a2067 100644 --- a/website/templates/index/why.html +++ b/website/templates/index/why.html @@ -11,8 +11,7 @@

    Blazing fast

    -

    ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak - processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.

    +

    ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.

    Fault tolerant @@ -28,7 +27,7 @@

    Easy to use

    -

    ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some DBMS.

    +

    ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.

    Highly reliable From 8418612e095d611bc5c214a29ab849239d386497 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 Jun 2020 20:56:47 +0300 Subject: [PATCH 0499/2229] Add unbundled boost support Follow-up-for: #11390 Cc: @@abyss7 --- contrib/boost-cmake/CMakeLists.txt | 33 +++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index fb7b236d30d..4360304403f 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -97,5 +97,36 @@ if (USE_INTERNAL_BOOST_LIBRARY) add_library (boost::system ALIAS _boost_system) target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) else () - message (FATAL_ERROR "TODO: external Boost library is not supported!") + # 1.70 like in contrib/boost + set(BOOST_VERSION 1.70) + + find_package(Boost ${BOOST_VERSION} COMPONENTS + system + filesystem + iostreams + program_options + regex + REQUIRED) + + add_library (_boost_headers_only INTERFACE) + add_library (boost::headers_only ALIAS _boost_headers_only) + target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR}) + + add_library (_boost_filesystem INTERFACE) + add_library (_boost_iostreams INTERFACE) + add_library (_boost_program_options INTERFACE) + add_library (_boost_regex INTERFACE) + add_library (_boost_system INTERFACE) + + target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY}) + target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY}) + target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY}) + target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY}) + target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY}) + + add_library (boost::filesystem ALIAS _boost_filesystem) + add_library (boost::iostreams ALIAS _boost_iostreams) + add_library (boost::program_options ALIAS _boost_program_options) + add_library (boost::regex ALIAS _boost_regex) + add_library (boost::system ALIAS _boost_system) endif () From 1f3c640da2c08ea98576e6641a9a53d66902f4fa Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 Jun 2020 20:56:47 +0300 Subject: [PATCH 0500/2229] Use unbundled boost for unbundled builds --- docker/packager/packager | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index 85dd3cc421c..ccb01a4df92 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -142,7 +142,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ if unbundled: # TODO: fix build with ENABLE_RDKAFKA - cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_BOOST_LIBRARY=1') + cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0') if split_binary: cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') From 3f389c1a193cfcff63f45c3a7a0d1415af6b4531 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 10 Jun 2020 20:58:13 +0300 Subject: [PATCH 0501/2229] try fix flacky test --- .../0_stateless/01108_restart_replicas_rename_deadlock.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01108_restart_replicas_rename_deadlock.sh b/tests/queries/0_stateless/01108_restart_replicas_rename_deadlock.sh index 65b738aed8e..a2fce893672 100755 --- a/tests/queries/0_stateless/01108_restart_replicas_rename_deadlock.sh +++ b/tests/queries/0_stateless/01108_restart_replicas_rename_deadlock.sh @@ -61,13 +61,16 @@ timeout $TIMEOUT bash -c restart_thread_1 2> /dev/null & timeout $TIMEOUT bash -c restart_thread_2 2> /dev/null & wait -sleep 3 for i in `seq 4`; do $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA replica_01108_$i" >/dev/null 2>&1 $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA replica_01108_${i}_tmp" >/dev/null 2>&1 done +while [[ `$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query LIKE 'RENAME%'"` -gt 0 ]]; do + sleep 1 +done; + $CLICKHOUSE_CLIENT -q "SELECT replaceOne(name, '_tmp', '') FROM system.tables WHERE database = currentDatabase() AND match(name, '^replica_01108_')" $CLICKHOUSE_CLIENT -q "SELECT sum(n), count(n) FROM merge(currentDatabase(), '^replica_01108_') GROUP BY position(_table, 'tmp')" From dd3cf0fe0aaf528a555d51487bc5c74c83d31591 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 10 Jun 2020 21:02:45 +0300 Subject: [PATCH 0502/2229] Don't miss columns TTLs update --- src/Storages/StorageReplicatedMergeTree.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7e42b75104d..1465cef58e1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -474,8 +474,17 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); if (new_columns != new_metadata.columns) + { new_metadata.columns = new_columns; + new_metadata.column_ttls_by_name.clear(); + for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, global_context, new_metadata.primary_key); + new_metadata.column_ttls_by_name[name] = new_ttl_entry; + } + } + if (!metadata_diff.empty()) { if (metadata_diff.sorting_key_changed) From f5b8e610469bc6566b84c970e1721f86def4c5e9 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 21:04:47 +0300 Subject: [PATCH 0503/2229] Update configuration-files.md --- docs/en/operations/configuration-files.md | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index e1f9e427413..f574240ea39 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -7,26 +7,33 @@ toc_title: Configuration Files ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. -!!! note "Note" - All the configuration files should be in XML format. Also, they should have the same root element, usually ``. +All the configuration files should be in XML format. Also, they should have the same root element, usually ``. -Some settings specified in the main configuration file can be overridden in other configuration files. The `replace` or `remove` attributes can be specified for the elements of these configuration files. +## Override -If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. +Some settings specified in the main configuration file can be overridden in other configuration files: -If `replace` is specified, it replaces the entire element with the specified one. +- The `replace` or `remove` attributes can be specified for the elements of these configuration files. +- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. +- If `replace` is specified, it replaces the entire element with the specified one. +- If `remove` is specified, it deletes the element. -If `remove` is specified, it deletes the element. +## Substitution The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)). Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +## User Settings + The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. Users configuration can be splitted into separate files similar to `config.xml` and `config.d/`. Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`. Directory `users.d` is used by default, as `users_config` defaults to `users.xml`. + +## Example + For example, you can have separate config file for each user like this: ``` bash @@ -48,8 +55,10 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` +## Implementation Details + For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. The server tracks changes in config files, as well as files and ZooKeeper nodes that were used when performing substitutions and overrides, and reloads the settings for users and clusters on the fly. This means that you can modify the cluster, users, and their settings without restarting the server. -[Original article](https://clickhouse.tech/docs/en/operations/configuration_files/) +[Original article](https://clickhouse.tech/docs/en/operations/configuration-files/) From 9c86c128915bf5ae350273a09e6f114bac4325bd Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 10 Jun 2020 21:11:30 +0300 Subject: [PATCH 0504/2229] Update ParserShowTablesQuery.cpp --- src/Parsers/ParserShowTablesQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserShowTablesQuery.cpp b/src/Parsers/ParserShowTablesQuery.cpp index c60e442542d..ee50d23ffc8 100644 --- a/src/Parsers/ParserShowTablesQuery.cpp +++ b/src/Parsers/ParserShowTablesQuery.cpp @@ -73,7 +73,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec String cluster_str; if (!parseIdentifierOrStringLiteral(pos, expected, cluster_str)) return false; - + query->cluster_str = std::move(cluster_str); } else From 982c85ac6c61264560684b433ed5104fdd99c0c3 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 10 Jun 2020 21:13:16 +0300 Subject: [PATCH 0505/2229] Update 01293_show_clusters.reference --- tests/queries/0_stateless/01293_show_clusters.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index 85a14155529..39e25143131 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,7 +1,7 @@ test_cluster_two_shards +test_cluster_two_shards_different_databases test_cluster_two_shards_localhost test_shard_localhost -test_shard_localhost[1] test_shard_localhost_secure test_unavailable_shard test_cluster_two_shards From 5fa44019918581a4378582bcd72ff5b160e5f659 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 21:18:34 +0300 Subject: [PATCH 0506/2229] fix --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 003a2816ce0..2ce43c9f0a2 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -147,7 +147,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c } char instruction_addr[100]; StackTrace::Frames frames; - StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, size, frames); + StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, stack_size, frames); for (size_t i = stack_size - 1; i >= offset; --i) { const StackTrace::Frame & current_frame = frames[i]; From 3d9424fd5a26aabfbc863984a6e2364eb21e1110 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 21:54:55 +0300 Subject: [PATCH 0507/2229] perl -pi -e 's/repo\.yandex\.ru\/clickhouse/repo.clickhouse.tech/g' {} --- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- docs/en/getting-started/install.md | 2 +- docs/es/getting-started/install.md | 2 +- docs/fa/getting-started/install.md | 2 +- docs/fr/getting-started/install.md | 2 +- docs/ja/getting-started/install.md | 2 +- docs/ru/getting-started/install.md | 16 ++++++++-------- docs/tr/getting-started/install.md | 2 +- docs/zh/getting-started/install.md | 6 +++--- 11 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 5ca7e508d56..453c165afad 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" +ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* RUN apt-get update \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 93f192c3f3c..45565ec659e 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" +ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* ARG gosu_ver=1.10 diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 1dd756ed7c2..f215e21288e 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" +ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* RUN apt-get update && \ diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index e917b8ef58c..7c8ae631e1a 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -64,7 +64,7 @@ You can also download and install packages manually from [here](https://repo.cli It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. -The required version can be downloaded with `curl` or `wget` from repository https://repo.yandex.ru/clickhouse/tgz/. +The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/. After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version: ``` bash diff --git a/docs/es/getting-started/install.md b/docs/es/getting-started/install.md index 83f5fff8af2..89b5735f192 100644 --- a/docs/es/getting-started/install.md +++ b/docs/es/getting-started/install.md @@ -66,7 +66,7 @@ También puede descargar e instalar paquetes manualmente desde [aqui](https://re Se recomienda utilizar pre-compilado oficial `tgz` para todas las distribuciones de Linux, donde la instalación de `deb` o `rpm` paquetes no es posible. -La versión requerida se puede descargar con `curl` o `wget` desde el repositorio https://repo.yandex.ru/clickhouse/tgz/. +La versión requerida se puede descargar con `curl` o `wget` desde el repositorio https://repo.clickhouse.tech/tgz/. Después de eso, los archivos descargados deben desempaquetarse e instalarse con scripts de instalación. Ejemplo para la última versión: ``` bash diff --git a/docs/fa/getting-started/install.md b/docs/fa/getting-started/install.md index ec0b67944d9..d3e39e97c80 100644 --- a/docs/fa/getting-started/install.md +++ b/docs/fa/getting-started/install.md @@ -67,7 +67,7 @@ sudo yum install clickhouse-server clickhouse-client توصیه می شود به استفاده از رسمی از پیش وارد شده `tgz` بایگانی برای همه توزیع های لینوکس, که نصب و راه اندازی `deb` یا `rpm` بسته امکان پذیر نیست. -نسخه مورد نیاز را می توان با دانلود `curl` یا `wget` از مخزن https://repo.yandex.ru/clickhouse/tgz/. +نسخه مورد نیاز را می توان با دانلود `curl` یا `wget` از مخزن https://repo.clickhouse.tech/tgz/. پس از که دانلود بایگانی باید غیر بستهای و نصب شده با اسکریپت نصب و راه اندازی. به عنوان مثال برای جدیدترین نسخه: ``` bash diff --git a/docs/fr/getting-started/install.md b/docs/fr/getting-started/install.md index 770c4cf8e42..3659369fb4a 100644 --- a/docs/fr/getting-started/install.md +++ b/docs/fr/getting-started/install.md @@ -66,7 +66,7 @@ Vous pouvez également télécharger et installer des paquets manuellement à pa Il est recommandé d'utiliser officiel pré-compilé `tgz` archives pour toutes les distributions Linux, où l'installation de `deb` ou `rpm` les emballages n'est pas possible. -La version requise peut être téléchargée avec `curl` ou `wget` depuis le référentiel https://repo.yandex.ru/clickhouse/tgz/. +La version requise peut être téléchargée avec `curl` ou `wget` depuis le référentiel https://repo.clickhouse.tech/tgz/. Après cela, les archives téléchargées doivent être décompressées et installées avec des scripts d'installation. Exemple pour la dernière version: ``` bash diff --git a/docs/ja/getting-started/install.md b/docs/ja/getting-started/install.md index 9710fdae6a7..66867652811 100644 --- a/docs/ja/getting-started/install.md +++ b/docs/ja/getting-started/install.md @@ -66,7 +66,7 @@ sudo yum install clickhouse-server clickhouse-client 公式の事前コンパイルを使用することをお勧めします `tgz` のインストール `deb` または `rpm` パッケージはできません。 -必要なバージョンは次のとおりです `curl` または `wget` リポジトリからhttps://repo.yandex.ru/clickhouse/tgz/. +必要なバージョンは次のとおりです `curl` または `wget` リポジトリからhttps://repo.clickhouse.tech/tgz/. その後、アーカイブをダウンロードは開梱と設置と設置のためのイントロダクションです。 最新バージョンの例: ``` bash diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 04712328844..19943f182d8 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -34,8 +34,8 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not ``` bash sudo yum install yum-utils -sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64 ``` Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. @@ -46,21 +46,21 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/ sudo yum install clickhouse-server clickhouse-client ``` -Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64. +Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.tech/rpm/stable/x86\_64. ### Из Tgz архивов {#from-tgz-archives} Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов. -Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.yandex.ru/clickhouse/tgz/. +Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.tech/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: ``` bash export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.yandex.ru/clickhouse/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.yandex.ru/clickhouse/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.yandex.ru/clickhouse/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.yandex.ru/clickhouse/tgz/clickhouse-client-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.tech/tgz/clickhouse-client-$LATEST_VERSION.tgz tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh diff --git a/docs/tr/getting-started/install.md b/docs/tr/getting-started/install.md index 3bf319430bd..361f59b3a10 100644 --- a/docs/tr/getting-started/install.md +++ b/docs/tr/getting-started/install.md @@ -66,7 +66,7 @@ Ayrıca paketleri manuel olarak indirebilir ve yükleyebilirsiniz [burada](https Resmi önceden derlenmiş kullanılması tavsiye edilir `tgz` Arch ,iv ,es for tüm Linux dağıtım installationları, kurulumu `deb` veya `rpm` paketler mümkün değildir. -Gerekli sürümü ile indirilebilir `curl` veya `wget` depo fromdan https://repo.yandex.ru/clickhouse/tgz/. +Gerekli sürümü ile indirilebilir `curl` veya `wget` depo fromdan https://repo.clickhouse.tech/tgz/. Bundan sonra indirilen arşivler açılmalı ve kurulum komut dosyaları ile kurulmalıdır. En son sürüm için örnek: ``` bash diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index 32eb7fa0a82..bbc79c3bf78 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -34,8 +34,8 @@ Yandex ClickHouse团队建议使用官方预编译的`rpm`软件包,用于Cent ``` bash sudo yum install yum-utils -sudo rpm --import https://repo.yandex.ru/clickhouse/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/x86_64 +sudo rpm --import https://repo.clickhouse.tech/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.tech/rpm/stable/x86_64 ``` 如果您想使用最新版本,请将`stable`替换为`testing`(建议您在测试环境中使用)。 @@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/ sudo yum install clickhouse-server clickhouse-client ``` -您也可以从此处手动下载和安装软件包:https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。 +您也可以从此处手动下载和安装软件包:https://repo.clickhouse.tech/rpm/stable/x86_64。 ### 来自Docker {#from-docker-image} From 5f5f10b48a647849289a29c7932e1fa5b0aad1e6 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Wed, 10 Jun 2020 15:59:17 -0300 Subject: [PATCH 0508/2229] greatCircleAngle en translation (#11584) * greatCircleAngle en translation * Update geo.md * Update geo.md --- docs/en/sql-reference/functions/geo.md | 33 +++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/geo.md b/docs/en/sql-reference/functions/geo.md index 58c501e7e1c..942b951c7c8 100644 --- a/docs/en/sql-reference/functions/geo.md +++ b/docs/en/sql-reference/functions/geo.md @@ -7,7 +7,7 @@ toc_title: Working with geographical coordinates ## greatCircleDistance {#greatcircledistance} -Calculate the distance between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance). +Calculates the distance between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance). ``` sql greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) @@ -40,6 +40,37 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673) └───────────────────────────────────────────────────────────────────┘ ``` +## greatCircleAngle {#greatcircleangle} + +Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance). + +``` sql +greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg) +``` + +**Input parameters** + +- `lon1Deg` — Longitude of the first point in degrees. +- `lat1Deg` — Latitude of the first point in degrees. +- `lon2Deg` — Longitude of the second point in degrees. +- `lat2Deg` — Latitude of the second point in degrees. + +**Returned value** + +The central angle between two points in degrees. + +**Example** + +``` sql +SELECT greatCircleAngle(0, 0, 45, 0) AS arc +``` + +``` text +┌─arc─┐ +│ 45 │ +└─────┘ +``` + ## pointInEllipses {#pointinellipses} Checks whether the point belongs to at least one of the ellipses. From e4be52f35f8e019c3e9fb37100fae7850858579e Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 10 Jun 2020 22:17:30 +0300 Subject: [PATCH 0509/2229] Add system.asynchronous_metric_log --- docs/en/operations/system-tables.md | 4 ++ programs/server/config.xml | 14 ++++ src/Interpreters/AsynchronousMetrics.cpp | 77 +++++++++++---------- src/Interpreters/AsynchronousMetrics.h | 17 ++--- src/Interpreters/Context.cpp | 11 +++ src/Interpreters/Context.h | 2 + src/Interpreters/InterpreterSystemQuery.cpp | 4 +- src/Interpreters/SystemLog.cpp | 7 ++ src/Interpreters/SystemLog.h | 3 + src/Interpreters/ya.make | 1 + 10 files changed, 93 insertions(+), 47 deletions(-) diff --git a/docs/en/operations/system-tables.md b/docs/en/operations/system-tables.md index 7b76f737824..28f448b632c 100644 --- a/docs/en/operations/system-tables.md +++ b/docs/en/operations/system-tables.md @@ -83,6 +83,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [system.events](#system_tables-events) — Contains a number of events that have occurred. - [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +## system.asynchronous_metric_log {#system-tables-async-log} + +Contains the historical values for `system.asynchronous_log` (see [system.asynchronous_metrics](#system_tables-asynchronous_metrics)) + ## system.clusters {#system-clusters} Contains information about clusters available in the config file and the servers in them. diff --git a/programs/server/config.xml b/programs/server/config.xml index ba870d8a8ea..944181ceee4 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -481,6 +481,20 @@ 1000 + + + system + asynchronous_metric_log
    + + 300000 +
    + diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 09622302893..6cd8fafa2a8 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -37,7 +38,7 @@ AsynchronousMetrics::~AsynchronousMetrics() try { { - std::lock_guard lock{wait_mutex}; + std::lock_guard lock{mutex}; quit = true; } @@ -51,17 +52,10 @@ AsynchronousMetrics::~AsynchronousMetrics() } -AsynchronousMetrics::Container AsynchronousMetrics::getValues() const +AsynchronousMetricValues AsynchronousMetrics::getValues() const { - std::lock_guard lock{container_mutex}; - return container; -} - - -void AsynchronousMetrics::set(const std::string & name, Value value) -{ - std::lock_guard lock{container_mutex}; - container[name] = value; + std::lock_guard lock{mutex}; + return values; } @@ -69,8 +63,6 @@ void AsynchronousMetrics::run() { setThreadName("AsyncMetrics"); - std::unique_lock lock{wait_mutex}; - /// Next minute + 30 seconds. To be distant with moment of transmission of metrics, see MetricsTransmitter. const auto get_next_minute = [] { @@ -89,6 +81,7 @@ void AsynchronousMetrics::run() tryLogCurrentException(__PRETTY_FUNCTION__); } + std::unique_lock lock{mutex}; if (wait_cond.wait_until(lock, get_next_minute(), [this] { return quit; })) break; } @@ -113,41 +106,43 @@ static void calculateMaxAndSum(Max & max, Sum & sum, T x) void AsynchronousMetrics::update() { + AsynchronousMetricValues new_values; + { if (auto mark_cache = context.getMarkCache()) { - set("MarkCacheBytes", mark_cache->weight()); - set("MarkCacheFiles", mark_cache->count()); + new_values["MarkCacheBytes"] = mark_cache->weight(); + new_values["MarkCacheFiles"] = mark_cache->count(); } } { if (auto uncompressed_cache = context.getUncompressedCache()) { - set("UncompressedCacheBytes", uncompressed_cache->weight()); - set("UncompressedCacheCells", uncompressed_cache->count()); + new_values["UncompressedCacheBytes"] = uncompressed_cache->weight(); + new_values["UncompressedCacheCells"] = uncompressed_cache->count(); } } #if USE_EMBEDDED_COMPILER { if (auto compiled_expression_cache = context.getCompiledExpressionCache()) - set("CompiledExpressionCacheCount", compiled_expression_cache->count()); + new_values["CompiledExpressionCacheCount"] = compiled_expression_cache->count(); } #endif - set("Uptime", context.getUptimeSeconds()); + new_values["Uptime"] = context.getUptimeSeconds(); /// Process memory usage according to OS #if defined(OS_LINUX) { MemoryStatisticsOS::Data data = memory_stat.get(); - set("MemoryVirtual", data.virt); - set("MemoryResident", data.resident); - set("MemoryShared", data.shared); - set("MemoryCode", data.code); - set("MemoryDataAndStack", data.data_and_stack); + new_values["MemoryVirtual"] = data.virt; + new_values["MemoryResident"] = data.resident; + new_values["MemoryShared"] = data.shared; + new_values["MemoryCode"] = data.code; + new_values["MemoryDataAndStack"] = data.data_and_stack; /// We must update the value of total_memory_tracker periodically. /// Otherwise it might be calculated incorrectly - it can include a "drift" of memory amount. @@ -228,21 +223,21 @@ void AsynchronousMetrics::update() } } - set("ReplicasMaxQueueSize", max_queue_size); - set("ReplicasMaxInsertsInQueue", max_inserts_in_queue); - set("ReplicasMaxMergesInQueue", max_merges_in_queue); + new_values["ReplicasMaxQueueSize"] = max_queue_size; + new_values["ReplicasMaxInsertsInQueue"] = max_inserts_in_queue; + new_values["ReplicasMaxMergesInQueue"] = max_merges_in_queue; - set("ReplicasSumQueueSize", sum_queue_size); - set("ReplicasSumInsertsInQueue", sum_inserts_in_queue); - set("ReplicasSumMergesInQueue", sum_merges_in_queue); + new_values["ReplicasSumQueueSize"] = sum_queue_size; + new_values["ReplicasSumInsertsInQueue"] = sum_inserts_in_queue; + new_values["ReplicasSumMergesInQueue"] = sum_merges_in_queue; - set("ReplicasMaxAbsoluteDelay", max_absolute_delay); - set("ReplicasMaxRelativeDelay", max_relative_delay); + new_values["ReplicasMaxAbsoluteDelay"] = max_absolute_delay; + new_values["ReplicasMaxRelativeDelay"] = max_relative_delay; - set("MaxPartCountForPartition", max_part_count_for_partition); + new_values["MaxPartCountForPartition"] = max_part_count_for_partition; - set("NumberOfDatabases", number_of_databases); - set("NumberOfTables", total_number_of_tables); + new_values["NumberOfDatabases"] = number_of_databases; + new_values["NumberOfTables"] = total_number_of_tables; } #if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4 @@ -265,7 +260,7 @@ void AsynchronousMetrics::update() TYPE value{}; \ size_t size = sizeof(value); \ mallctl("stats." NAME, &value, &size, nullptr, 0); \ - set("jemalloc." NAME, value); \ + new_values["jemalloc." NAME] = value; \ } while (false); FOR_EACH_METRIC(GET_METRIC) @@ -276,6 +271,16 @@ void AsynchronousMetrics::update() #endif /// Add more metrics as you wish. + + // Log the new metrics. + if (auto log = context.getAsynchronousMetricLog()) + { + log->addValues(new_values); + } + + // Finally, update the current metrics. + std::lock_guard lock(mutex); + values = new_values; } } diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h index ce6c0aae552..6817f545c8f 100644 --- a/src/Interpreters/AsynchronousMetrics.h +++ b/src/Interpreters/AsynchronousMetrics.h @@ -14,6 +14,9 @@ namespace DB class Context; +typedef double AsynchronousMetricValue; +typedef std::unordered_map AsynchronousMetricValues; + /** Periodically (each minute, starting at 30 seconds offset) * calculates and updates some metrics, @@ -29,21 +32,17 @@ public: ~AsynchronousMetrics(); - using Value = double; - using Container = std::unordered_map; /// Returns copy of all values. - Container getValues() const; + AsynchronousMetricValues getValues() const; private: Context & context; - bool quit {false}; - std::mutex wait_mutex; + mutable std::mutex mutex; std::condition_variable wait_cond; - - Container container; - mutable std::mutex container_mutex; + bool quit {false}; + AsynchronousMetricValues values; #if defined(OS_LINUX) MemoryStatisticsOS memory_stat; @@ -53,8 +52,6 @@ private: void run(); void update(); - - void set(const std::string & name, Value value); }; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cbf00836103..4a2408d64f1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1676,6 +1676,17 @@ std::shared_ptr Context::getMetricLog() } +std::shared_ptr Context::getAsynchronousMetricLog() +{ + auto lock = getLock(); + + if (!shared->system_logs) + return {}; + + return shared->system_logs->asynchronous_metric_log; +} + + CompressionCodecPtr Context::chooseCompressionCodec(size_t part_size, double part_size_ratio) const { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1d46049fb92..5a4e959229f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -80,6 +80,7 @@ class PartLog; class TextLog; class TraceLog; class MetricLog; +class AsynchronousMetricLog; struct MergeTreeSettings; class StorageS3Settings; class IDatabase; @@ -526,6 +527,7 @@ public: std::shared_ptr getTraceLog(); std::shared_ptr getTextLog(); std::shared_ptr getMetricLog(); + std::shared_ptr getAsynchronousMetricLog(); /// Returns an object used to log opertaions with parts if it possible. /// Provide table name to make required cheks. diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 9ebdb155643..f35bafbe25a 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -306,7 +307,8 @@ BlockIO InterpreterSystemQuery::execute() [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(); }, [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(); }, [&] () { if (auto text_log = context.getTextLog()) text_log->flush(); }, - [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(); } + [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(); }, + [&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(); } ); break; case Type::STOP_LISTEN_QUERIES: diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 9ce2f7a4d0e..d79edde7052 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,9 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi trace_log = createSystemLog(global_context, "system", "trace_log", config, "trace_log"); text_log = createSystemLog(global_context, "system", "text_log", config, "text_log"); metric_log = createSystemLog(global_context, "system", "metric_log", config, "metric_log"); + asynchronous_metric_log = createSystemLog( + global_context, "system", "asynchronous_metric_log", config, + "asynchronous_metric_log"); if (query_log) logs.emplace_back(query_log.get()); @@ -88,6 +92,9 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi logs.emplace_back(text_log.get()); if (metric_log) logs.emplace_back(metric_log.get()); + if (asynchronous_metric_log) + logs.emplace_back(asynchronous_metric_log.get()); + try { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index dd2f815ce92..e49ce574478 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -69,6 +69,7 @@ class PartLog; class TextLog; class TraceLog; class MetricLog; +class AsynchronousMetricLog; class ISystemLog @@ -99,6 +100,8 @@ struct SystemLogs std::shared_ptr trace_log; /// Used to log traces from query profiler std::shared_ptr text_log; /// Used to log all text messages. std::shared_ptr metric_log; /// Used to log all metrics. + /// Metrics from system.asynchronous_metrics. + std::shared_ptr asynchronous_metric_log; std::vector logs; }; diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 178c3ee3125..29be5d3c216 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -105,6 +105,7 @@ SRCS( MarkTableIdentifiersVisitor.cpp MergeJoin.cpp MetricLog.cpp + AsynchronousMetricLog.cpp MutationsInterpreter.cpp NullableUtils.cpp OptimizeIfChains.cpp From a07b296ee4afa8c9d29dceb9af3b3203d7f78e7b Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 22:26:25 +0300 Subject: [PATCH 0510/2229] Update Dockerfile --- docker/client/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 453c165afad..493cdaac543 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" +ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* RUN apt-get update \ From f6b5cd7de212fede834389e6f70a24619bf89203 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 22:26:44 +0300 Subject: [PATCH 0511/2229] Update Dockerfile --- docker/server/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 45565ec659e..3a16a1fd158 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" +ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* ARG gosu_ver=1.10 From 7b54ff02f05f2a4ee93e0f467774ea9a2a533229 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Wed, 10 Jun 2020 22:27:05 +0300 Subject: [PATCH 0512/2229] Collect async metric log in perf test --- docker/test/performance-comparison/compare.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index f7986689020..a2760907cb3 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -198,12 +198,14 @@ function get_profiles clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & + clickhouse-client --port 9001 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & + clickhouse-client --port 9002 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: & wait From e5cd2716da4fc25cf1200410ea4e64152333eecf Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 22:27:05 +0300 Subject: [PATCH 0513/2229] Update Dockerfile --- docker/test/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index f215e21288e..6673d32c2e2 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,6 +1,6 @@ FROM ubuntu:18.04 -ARG repository="deb http://repo.clickhouse.tech/deb/stable/ main/" +ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG version=20.5.1.* RUN apt-get update && \ From 941887fbfd233a8ac1dfaa4638081a325c050b90 Mon Sep 17 00:00:00 2001 From: Denis Zhuravlev Date: Wed, 10 Jun 2020 16:31:51 -0300 Subject: [PATCH 0514/2229] removed a sentence about global lock during rename (#11577) * Update misc.md removed a sentence about global lock during rename * Update misc.md removed a sentence about global lock during rename * Update docs/en/sql-reference/statements/misc.md Co-authored-by: Ivan Blinkov * Update docs/ru/sql-reference/statements/misc.md Co-authored-by: Ivan Blinkov Co-authored-by: Ivan Blinkov --- docs/en/sql-reference/statements/misc.md | 2 +- docs/ru/sql-reference/statements/misc.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md index 18cbf1a90e8..bd978908588 100644 --- a/docs/en/sql-reference/statements/misc.md +++ b/docs/en/sql-reference/statements/misc.md @@ -271,7 +271,7 @@ Renames one or more tables. RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] ``` -All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). +Renaming tables is a light operation. If you indicated another database after `TO`, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). If you rename multiple tables in one query, this is a non-atomic operation, it may be partially executed, queries in other sessions may receive the error `Table ... doesn't exist ..`. ## SET {#query-set} diff --git a/docs/ru/sql-reference/statements/misc.md b/docs/ru/sql-reference/statements/misc.md index 97d2ce5818e..77f9570ae47 100644 --- a/docs/ru/sql-reference/statements/misc.md +++ b/docs/ru/sql-reference/statements/misc.md @@ -256,7 +256,7 @@ OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION I RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] ``` -Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). +Переименовывание таблицы является лёгкой операцией. Если вы указали после `TO` другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). В случае переименования нескольких таблиц в одном запросе — это неатомарная операция, может выполнится частично, запросы в других сессиях могут получить ошибку `Table ... doesn't exist...`. ## SET {#query-set} From fd3ff19868c94270a33d6fde51f7429597c7d8bd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Jun 2020 22:34:23 +0300 Subject: [PATCH 0515/2229] Fix trivial error in log message #11399 --- programs/server/Server.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8b58c5664b6..9734bafe30e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -604,7 +604,8 @@ int Server::main(const std::vector & /*args*/) if (uncompressed_cache_size > max_cache_size) { uncompressed_cache_size = max_cache_size; - LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", + formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); } global_context->setUncompressedCache(uncompressed_cache_size); @@ -619,7 +620,8 @@ int Server::main(const std::vector & /*args*/) if (mark_cache_size > max_cache_size) { mark_cache_size = max_cache_size; - LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size)); + LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", + formatReadableSizeWithBinarySuffix(mark_cache_size)); } global_context->setMarkCache(mark_cache_size); From 472b04b69c58adaac2cd1030d7921c7ceb6f09a8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 10 Jun 2020 19:59:37 +0000 Subject: [PATCH 0516/2229] Better producer --- .../RabbitMQ/RabbitMQBlockOutputStream.cpp | 7 +-- .../WriteBufferToRabbitMQProducer.cpp | 50 +++++++------------ .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 +- 3 files changed, 21 insertions(+), 39 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp index 8e867db6de9..5dc2c1f8fc4 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockOutputStream.cpp @@ -37,17 +37,14 @@ void RabbitMQBlockOutputStream::writePrefix() { buffer->countRow(); }); + + buffer->startEventLoop(); } void RabbitMQBlockOutputStream::write(const Block & block) { child->write(block); - - if (buffer) - buffer->flush(); - - storage.pingConnection(); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 31c3dea97aa..151d5fc62d4 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -16,7 +16,7 @@ enum { Connection_setup_sleep = 200, Connection_setup_retries_max = 1000, - Buffer_limit_to_flush = 10000 /// It is important to keep it low in order not to kill consumers + Batch = 10000 }; WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( @@ -64,12 +64,13 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( } producer_channel = std::make_shared(&connection); + checkExchange(); } WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - flush(); + checkExchange(); connection.close(); assert(rows == 0 && chunks.empty()); @@ -98,18 +99,29 @@ void WriteBufferToRabbitMQProducer::countRow() chunks.clear(); set(nullptr, 0); - messages.emplace_back(payload); + next_queue = next_queue % num_queues + 1; + + if (bind_by_id || hash_exchange) + { + producer_channel->publish(exchange_name, std::to_string(next_queue), payload); + } + else + { + producer_channel->publish(exchange_name, routing_key, payload); + } + ++message_counter; - if (messages.size() >= Buffer_limit_to_flush) + /// run event loop to actually publish, checking exchange is just a point to stop the event loop + if ((message_counter %= Batch) == 0) { - flush(); + checkExchange(); } } } -void WriteBufferToRabbitMQProducer::flush() +void WriteBufferToRabbitMQProducer::checkExchange() { std::atomic exchange_declared = false, exchange_error = false; @@ -120,32 +132,6 @@ void WriteBufferToRabbitMQProducer::flush() .onSuccess([&]() { exchange_declared = true; - - /* The reason for accumulating payloads and not publishing each of them at once in count_row() is that publishing - * needs to be wrapped inside declareExchange() callback and it is too expensive in terms of time to declare it - * each time we publish. Declaring it once and then publishing without wrapping inside onSuccess callback leads to - * exchange becoming inactive at some point and part of messages is lost as a result. - */ - for (auto & payload : messages) - { - if (!message_counter) - break; - - next_queue = next_queue % num_queues + 1; - - if (bind_by_id || hash_exchange) - { - producer_channel->publish(exchange_name, std::to_string(next_queue), payload); - } - else - { - producer_channel->publish(exchange_name, routing_key, payload); - } - - --message_counter; - } - - messages.clear(); }) .onError([&](const char * message) { diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 9ae3893d6ae..3cbcec9ccc2 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -35,12 +35,11 @@ public: ~WriteBufferToRabbitMQProducer() override; void countRow(); - void flush(); + void startEventLoop(); private: void nextImpl() override; void checkExchange(); - void startEventLoop(); std::pair & login_password; const String routing_key; From 6ad0f240cec6cf73cf6bbee7a364e3c8a927d470 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:05:22 +0300 Subject: [PATCH 0517/2229] Update generate.md --- docs/en/engines/table-engines/special/generate.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 396b039f8df..aa12092367c 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -3,7 +3,7 @@ toc_priority: 46 toc_title: GenerateRandom --- -# Generaterandom {#table_engines-generate} +# GenerateRandom Table Engine {#table_engines-generate} The GenerateRandom table engine produces random data for given table schema. @@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries. It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. -**Example:** +## Example **1.** Set up the `generate_engine_table` table: From 38347cde2eb558ce5d67a88f0fa21138550d61c1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:07:17 +0300 Subject: [PATCH 0518/2229] Update buffer.md --- docs/en/engines/table-engines/special/buffer.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index 12143044f21..bf6c08f8f6c 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -3,7 +3,7 @@ toc_priority: 45 toc_title: Buffer --- -# Buffer {#buffer} +# Buffer Table Engine {#buffer} Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously. @@ -34,9 +34,9 @@ Example: CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) ``` -Creating a ‘merge.hits\_buffer’ table with the same structure as ‘merge.hits’ and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner. +Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner. -When the server is stopped, with DROP TABLE or DETACH TABLE, buffer data is also flushed to the destination table. +When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table. You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory. @@ -52,11 +52,11 @@ If you need to run ALTER for a subordinate table and the Buffer table, we recomm If the server is restarted abnormally, the data in the buffer is lost. -FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table. +`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table. When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table. -Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set ‘num\_layers’ to 1. +Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1. If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable ‘exactly once’ write to replicated tables. From d4daa006f90bcf7416bb58f024419295d38b8aeb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:08:20 +0300 Subject: [PATCH 0519/2229] Update memory.md --- docs/en/engines/table-engines/special/memory.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index abdb07658d4..a6c833ebdba 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -3,15 +3,16 @@ toc_priority: 44 toc_title: Memory --- -# Memory {#memory} +# Memory Table Engine {#memory} The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free. Concurrent data access is synchronized. Locks are short: read and write operations don’t block each other. Indexes are not supported. Reading is parallelized. + Maximal productivity (over 10 GB/sec) is reached on simple queries, because there is no reading from the disk, decompressing, or deserializing data. (We should note that in many cases, the productivity of the MergeTree engine is almost as high.) When restarting a server, data disappears from the table and the table becomes empty. Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000). -The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing GLOBAL IN (see the section “IN operators”). +The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”). [Original article](https://clickhouse.tech/docs/en/operations/table_engines/memory/) From 8be01aae1d0f2b1d5aa7e99550a2ec80ce2910d7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:08:56 +0300 Subject: [PATCH 0520/2229] Update materializedview.md --- docs/en/engines/table-engines/special/materializedview.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/materializedview.md b/docs/en/engines/table-engines/special/materializedview.md index b0d99bc67d9..53ebf9641af 100644 --- a/docs/en/engines/table-engines/special/materializedview.md +++ b/docs/en/engines/table-engines/special/materializedview.md @@ -3,8 +3,8 @@ toc_priority: 43 toc_title: MaterializedView --- -# Materializedview {#materializedview} +# MaterializedView Table Engine {#materializedview} -Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine. +Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine. [Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) From 79120387a4eafb11df82aa934dab6bcdbead4cbd Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:09:39 +0300 Subject: [PATCH 0521/2229] Update view.md --- docs/en/engines/table-engines/special/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/special/view.md b/docs/en/engines/table-engines/special/view.md index f5d74795dec..c4f95adfa6d 100644 --- a/docs/en/engines/table-engines/special/view.md +++ b/docs/en/engines/table-engines/special/view.md @@ -3,7 +3,7 @@ toc_priority: 42 toc_title: View --- -# View {#table_engines-view} +# View Table Engine {#table_engines-view} Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). From ceb34f352ebdc7a6b7626dcf7a8141685f60e3eb Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:11:41 +0300 Subject: [PATCH 0522/2229] Update url.md --- docs/en/engines/table-engines/special/url.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index d560487c788..4fa1a50df38 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -3,12 +3,13 @@ toc_priority: 41 toc_title: URL --- -# URL(URL, Format) {#table_engines-url} +# URL Table Engine {#table_engines-url} -Manages data on a remote HTTP/HTTPS server. This engine is similar -to the [File](file.md) engine. +Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. -## Using the Engine in the ClickHouse Server {#using-the-engine-in-the-clickhouse-server} +Syntax: `URL(URL, Format)` + +## Usage {#using-the-engine-in-the-clickhouse-server} The `format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see @@ -24,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting. -**Example:** +## Example **1.** Create a `url_engine_table` table on the server : From 33d4ed893a47d4bbf971866d433fd1c784c4da7a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:14:00 +0300 Subject: [PATCH 0523/2229] Update join.md --- docs/en/engines/table-engines/special/join.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index d0e685f9c48..0f88f0a56e7 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -3,9 +3,12 @@ toc_priority: 40 toc_title: Join --- -# Join {#join} +# Join Table Engine {#join} -Prepared data structure for using in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations. +Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations. + +!!! note "Note" + This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself. ## Creating a Table {#creating-a-table} From 06ba3bc4800f4acca0d937205c67a7ab5b24c0d0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:14:51 +0300 Subject: [PATCH 0524/2229] Update set.md --- docs/en/engines/table-engines/special/set.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/special/set.md b/docs/en/engines/table-engines/special/set.md index fb0c5952ae4..65fd7376532 100644 --- a/docs/en/engines/table-engines/special/set.md +++ b/docs/en/engines/table-engines/special/set.md @@ -3,14 +3,14 @@ toc_priority: 39 toc_title: Set --- -# Set {#set} +# Set Table Engine {#set} -A data set that is always in RAM. It is intended for use on the right side of the IN operator (see the section “IN operators”). +A data set that is always in RAM. It is intended for use on the right side of the `IN` operator (see the section “IN operators”). -You can use INSERT to insert data in the table. New elements will be added to the data set, while duplicates will be ignored. -But you can’t perform SELECT from the table. The only way to retrieve data is by using it in the right half of the IN operator. +You can use `INSERT` to insert data in the table. New elements will be added to the data set, while duplicates will be ignored. +But you can’t perform `SELECT` from the table. The only way to retrieve data is by using it in the right half of the `IN` operator. -Data is always located in RAM. For INSERT, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place. +Data is always located in RAM. For `INSERT`, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place. For a rough server restart, the block of data on the disk might be lost or damaged. In the latter case, you may need to manually delete the file with damaged data. From 7d5a1ea485ed0861297a1c892bff34ea1a177e23 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:16:54 +0300 Subject: [PATCH 0525/2229] Update null.md --- docs/en/engines/table-engines/special/null.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/en/engines/table-engines/special/null.md b/docs/en/engines/table-engines/special/null.md index 73c5a2b1ea6..5f9a2ac679b 100644 --- a/docs/en/engines/table-engines/special/null.md +++ b/docs/en/engines/table-engines/special/null.md @@ -3,10 +3,11 @@ toc_priority: 38 toc_title: 'Null' --- -# Null {#null} +# Null Table Engine {#null} -When writing to a Null table, data is ignored. When reading from a Null table, the response is empty. +When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty. -However, you can create a materialized view on a Null table. So the data written to the table will end up in the view. +!!! info "Hint" + However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. [Original article](https://clickhouse.tech/docs/en/operations/table_engines/null/) From f5b766527147aac770bfd6d7d37570de1809cca2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:18:36 +0300 Subject: [PATCH 0526/2229] Update distributed.md --- docs/en/engines/table-engines/special/distributed.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 1c4c6299b51..11245bbf262 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -3,9 +3,9 @@ toc_priority: 33 toc_title: Distributed --- -# Distributed {#distributed} +# Distributed Table Engine {#distributed} -**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers. +Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. The Distributed engine accepts parameters: From 139d3578923dc7850c20678d07a93028904cd3a8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:19:58 +0300 Subject: [PATCH 0527/2229] Update external-data.md --- docs/en/engines/table-engines/special/external-data.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/special/external-data.md b/docs/en/engines/table-engines/special/external-data.md index de487aef154..a2f6c076196 100644 --- a/docs/en/engines/table-engines/special/external-data.md +++ b/docs/en/engines/table-engines/special/external-data.md @@ -1,11 +1,11 @@ --- -toc_priority: 34 -toc_title: External data +toc_priority: 45 +toc_title: External Data --- # External Data for Query Processing {#external-data-for-query-processing} -ClickHouse allows sending a server the data that is needed for processing a query, together with a SELECT query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in IN operators). +ClickHouse allows sending a server the data that is needed for processing a query, together with a `SELECT` query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in `IN` operators). For example, if you have a text file with important user identifiers, you can upload it to the server along with a query that uses filtration by this list. @@ -46,7 +46,7 @@ $ cat /etc/passwd | sed 's/:/\t/g' | clickhouse-client --query="SELECT shell, co /bin/sync 1 ``` -When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The ‘query\_string’ is passed the parameters ‘name\_format’, ‘name\_types’, and ‘name\_structure’, where ‘name’ is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client. +When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The `query_string` is passed the parameters `name_format`, `name_types`, and `name_structure`, where `name` is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client. Example: From 5084496ca0f5e0f4e392216e6c771e1a209b065f Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:20:54 +0300 Subject: [PATCH 0528/2229] Update dictionary.md --- docs/en/engines/table-engines/special/dictionary.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index 31806b54ff3..086ad53fab5 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -3,15 +3,17 @@ toc_priority: 35 toc_title: Dictionary --- -# Dictionary {#dictionary} +# Dictionary Table Engine {#dictionary} The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. +## Example + As an example, consider a dictionary of `products` with the following configuration: ``` xml - + products @@ -36,7 +38,7 @@ As an example, consider a dictionary of `products` with the following configurat - + ``` From ba57fbaa20f1f02eda5fdfacd1602a73d4c2ebf6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:21:42 +0300 Subject: [PATCH 0529/2229] Update merge.md --- docs/en/engines/table-engines/special/merge.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index a683fcdbc9e..5dca7f8602d 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -3,13 +3,17 @@ toc_priority: 36 toc_title: Merge --- -# Merge {#merge} +# Merge Table Engine {#merge} The `Merge` engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously. + Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist. + The `Merge` engine accepts parameters: the database name and a regular expression for tables. -Example: +## Examples + +Example 1: ``` sql Merge(hits, '^WatchLog') From 14cda2d1dbc97ddd2b5eb953568daa44df56a2c2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:22:56 +0300 Subject: [PATCH 0530/2229] Update file.md --- docs/en/engines/table-engines/special/file.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 28470e526b2..afccf0a4552 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -3,12 +3,11 @@ toc_priority: 37 toc_title: File --- -# File {#table_engines-file} +# File Table Engine {#table_engines-file} -The File table engine keeps the data in a file in one of the supported [file -formats](../../../interfaces/formats.md#formats) (TabSeparated, Native, etc.). +The File table engine keeps the data in a file in one of the supported [file formats](../../../interfaces/formats.md#formats) (`TabSeparated`, `Native`, etc.). -Usage examples: +Usage scenarios: - Data export from ClickHouse to file. - Convert data from one format to another. @@ -34,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A !!! warning "Warning" Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. -**Example:** +## Example **1.** Set up the `file_engine_table` table: From b5b3cb1c05300cca486c159a5fb034504e862496 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:48:54 +0300 Subject: [PATCH 0531/2229] Update index.md --- docs/en/sql-reference/statements/select/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 8224bf1e798..565b5fbcd5d 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -1,12 +1,12 @@ --- toc_priority: 33 toc_folder_title: SELECT -toc_title: Queries Syntax +toc_title: Query Syntax --- -# SELECT Queries Syntax {#select-queries-syntax} +# SELECT Query {#select-queries-syntax} -`SELECT` performs data retrieval. +`SELECT` queries perform data retrieval. By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../insert-into.md) it can be forwarded to a different table. ``` sql [WITH expr_list|(subquery)] From f433ffaec645bae06ae654dddd65efaf17b6513f Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 23:49:37 +0300 Subject: [PATCH 0532/2229] Update index.md --- docs/en/sql-reference/statements/select/index.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 565b5fbcd5d..87dde4839ff 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -2,6 +2,7 @@ toc_priority: 33 toc_folder_title: SELECT toc_title: Query Syntax +title: SELECT Query --- # SELECT Query {#select-queries-syntax} From 8ec3ab9c9fef665f0159bc7aab206e3e3791f61b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 10 Jun 2020 23:57:54 +0300 Subject: [PATCH 0533/2229] Require less recent boost - 1.67 --- contrib/boost-cmake/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 4360304403f..e92fe4b7159 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -98,7 +98,8 @@ if (USE_INTERNAL_BOOST_LIBRARY) target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) else () # 1.70 like in contrib/boost - set(BOOST_VERSION 1.70) + # 1.67 on CI + set(BOOST_VERSION 1.67) find_package(Boost ${BOOST_VERSION} COMPONENTS system From 67ccd6703ea9de805b65c7fa25a3c43620571b55 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 00:03:13 +0300 Subject: [PATCH 0534/2229] maybe fix the unbundled gcc build --- cmake/find/sentry.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 2281d870dec..2d3aa71248a 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) @@ -14,9 +14,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") set (SENTRY_PIC OFF CACHE BOOL "") - if (OS_LINUX AND NOT_UNBUNDLED) - set (BUILD_SHARED_LIBS OFF) - endif() + set (BUILD_SHARED_LIBS OFF) message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") From a6648516f45307949c3fe82ee2fe35aab4f841ef Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 11 Jun 2020 00:16:58 +0300 Subject: [PATCH 0535/2229] Add files; flush more often --- programs/server/config.xml | 4 +- src/Interpreters/AsynchronousMetricLog.cpp | 64 ++++++++++++++++++++++ src/Interpreters/AsynchronousMetricLog.h | 41 ++++++++++++++ 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 src/Interpreters/AsynchronousMetricLog.cpp create mode 100644 src/Interpreters/AsynchronousMetricLog.h diff --git a/programs/server/config.xml b/programs/server/config.xml index 944181ceee4..0ceba85593a 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -490,9 +490,9 @@ asynchronous_metric_log
    - 300000 + 60000 /var/lib/clickhouse/access/ + + + + + users.xml diff --git a/programs/server/users.xml b/programs/server/users.xml index 286c065722e..838b46e6938 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -27,38 +27,6 @@ - - - - - @@ -76,7 +44,7 @@ If you want to specify double SHA1, place it in 'password_double_sha1_hex' element. Example: e395796d6546b1b65db9d665cd43f0e858dd4303 - If you want to specify a previously defined LDAP server (see 'ldap_servers' above) for authentication, place its name in 'server' element inside 'ldap' element. + If you want to specify a previously defined LDAP server (see 'ldap_servers' in main config) for authentication, place its name in 'server' element inside 'ldap' element. Example: my_ldap_server How to generate decent password: diff --git a/src/Access/AccessControlManager.cpp b/src/Access/AccessControlManager.cpp index 940ddc5dcdc..5d02eca2bc8 100644 --- a/src/Access/AccessControlManager.cpp +++ b/src/Access/AccessControlManager.cpp @@ -99,9 +99,14 @@ void AccessControlManager::setLocalDirectory(const String & directory_path) } +void AccessControlManager::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) +{ + external_authenticators = std::make_unique(config, getLogger()); +} + + void AccessControlManager::setUsersConfig(const Poco::Util::AbstractConfiguration & users_config) { - external_authenticators = std::make_unique(users_config, getLogger()); auto & users_config_access_storage = dynamic_cast(getStorageByIndex(USERS_CONFIG_ACCESS_STORAGE_INDEX)); users_config_access_storage.setConfiguration(users_config); } diff --git a/src/Access/AccessControlManager.h b/src/Access/AccessControlManager.h index 4ef971dca65..267f92a2aa4 100644 --- a/src/Access/AccessControlManager.h +++ b/src/Access/AccessControlManager.h @@ -48,6 +48,7 @@ public: ~AccessControlManager(); void setLocalDirectory(const String & directory); + void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); void setUsersConfig(const Poco::Util::AbstractConfiguration & users_config); void setDefaultProfileName(const String & default_profile_name); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5e2f4ecadab..b3e5e348cb9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -618,6 +618,7 @@ void Context::setConfig(const ConfigurationPtr & config) { auto lock = getLock(); shared->config = config; + shared->access_control_manager.setExternalAuthenticatorsConfig(*shared->config); } const Poco::Util::AbstractConfiguration & Context::getConfigRef() const @@ -637,6 +638,11 @@ const AccessControlManager & Context::getAccessControlManager() const return shared->access_control_manager; } +void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) +{ + auto lock = getLock(); + shared->access_control_manager.setExternalAuthenticatorsConfig(config); +} void Context::setUsersConfig(const ConfigurationPtr & config) { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 864468c0663..10f9d688136 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -243,6 +243,9 @@ public: AccessControlManager & getAccessControlManager(); const AccessControlManager & getAccessControlManager() const; + /// Sets external authenticators config (LDAP). + void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); + /** Take the list of users, quotas and configuration profiles from this config. * The list of users is completely replaced. * The accumulated quota values are not reset if the quota is not deleted. From d7b269480641aabbcec3f1702dacfefab123e6c5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 10 Jun 2020 23:01:47 +0000 Subject: [PATCH 0540/2229] Support all exchange types --- src/Storages/RabbitMQ/RabbitMQSettings.h | 3 +- .../ReadBufferFromRabbitMQConsumer.cpp | 132 +++++-- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 12 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 6 +- .../WriteBufferToRabbitMQProducer.cpp | 6 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 4 +- .../integration/test_storage_rabbitmq/test.py | 368 ++++++++++++++---- 8 files changed, 411 insertions(+), 153 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index 509ed68b8d3..a3f133cfed0 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -12,12 +12,13 @@ namespace DB #define LIST_OF_RABBITMQ_SETTINGS(M) \ M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ - M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exhange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent. Needed to bind queues to it.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ + M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 32dcd30e6f5..1321a4fb3b6 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -23,8 +23,9 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( Poco::Logger * log_, char row_delimiter_, const bool bind_by_id_, - const bool hash_exchange_, const size_t num_queues_, + const String & exchange_type_, + const String table_name_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -35,18 +36,22 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , log(log_) , row_delimiter(row_delimiter_) , bind_by_id(bind_by_id_) - , hash_exchange(hash_exchange_) , num_queues(num_queues_) + , exchange_type(exchange_type_) + , table_name(table_name_) , stopped(stopped_) { messages.clear(); current = messages.begin(); + exchange_type_set = exchange_type != "default" ? 1 : 0; + /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. */ for (size_t queue_id = 0; queue_id < num_queues; ++queue_id) { + /// Queue bingings must be declared before any publishing => it must be done here and not in readPrefix() initQueueBindings(queue_id); } } @@ -64,54 +69,88 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* As there are 5 different types of exchanges and the type should be set as a parameter while publishing the message, - * then for uniformity this parameter should always be set as fanout-exchange type. In current implementation, the exchange, - * to which messages a published, will be bound to the exchange of the needed type, which will distribute messages according to its type. + /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest + * and it is also used for INSERT query. */ - consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; + consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) { - exchange_declared = false; - LOG_ERROR(log, "Failed to declare fanout exchange: {}", message); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - if (hash_exchange) + internal_exchange_name = producer_exchange + "_direct"; + consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) { - current_exchange_name = exchange_name + "_hash"; - consumer_channel->declareExchange(current_exchange_name, AMQP::consistent_hash).onError([&](const char * /* message */) - { - exchange_declared = false; - }); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange: {}", message); + }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) - { - exchange_declared = false; - }); - } - else + consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_key).onError([&](const char * message) { - current_exchange_name = exchange_name + "_direct"; - consumer_channel->declareExchange(current_exchange_name, AMQP::direct).onError([&](const char * /* message */) - { - exchange_declared = false; - }); + internal_exchange_declared = false; + LOG_ERROR(log, "Failed to bind exchange: {}", message); + }); - consumer_channel->bindExchange(exchange_name, current_exchange_name, routing_key).onError([&](const char * /* message */) - { - exchange_declared = false; - }); - } + if (!exchange_type_set) + return; + + /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is also supported. + + AMQP::ExchangeType type; + if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; + else if (exchange_type == "direct") type = AMQP::ExchangeType::direct; + else if (exchange_type == "topic") type = AMQP::ExchangeType::topic; + else if (exchange_type == "consistent_hash") type = AMQP::ExchangeType::consistent_hash; + else return; + + /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages + * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same + * messages more than once - is having only one consumer with one queue, which is not good.) + */ + consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + }); + + hash_exchange = true; + + /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash + if (!bind_by_id && exchange_type != "consistent_hash") + return; + + AMQP::Table exchange_arguments; + exchange_arguments["hash-property"] = "message_id"; + + local_exchange_name = exchange_name + "_" + table_name; + consumer_channel->declareExchange(local_exchange_name, AMQP::consistent_hash, exchange_arguments) + .onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + }); + + consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); } void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - if (!exchange_declared) + /// These variables might be updated later from a separate thread in onError callbacks + if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) { initExchange(); - exchange_declared = true; + local_exchange_declared = true; + internal_exchange_declared = true; } - std::atomic bindings_created = false, bindings_error = false; + bool internal_bindings_created = false, internal_bindings_error = false; + bool local_bindings_created = false, local_bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -137,25 +176,39 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - /// Must be done here, cannot be done in readPrefix() + /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection subscribe(queues.back()); LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); - consumer_channel->bindQueue(current_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) .onSuccess([&] { - bindings_created = true; + internal_bindings_created = true; }) .onError([&](const char * message) { - bindings_error = true; + internal_bindings_error = true; LOG_ERROR(log, "Failed to create queue binding: {}", message); }); + + if (exchange_type_set) + { + consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } }) .onError([&](const char * message) { - bindings_error = true; + internal_bindings_error = true; LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); @@ -163,7 +216,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) * It is important at this moment to make sure that queue bindings are created before any publishing can happen because * otherwise messages will be routed nowhere. */ - while (!bindings_created && !bindings_error) + while (!internal_bindings_created && !internal_bindings_error + || (exchange_type_set && !local_bindings_created && !local_bindings_error)) { startEventLoop(loop_started); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 7fbc1024d44..51eae60cdeb 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -30,8 +30,9 @@ public: Poco::Logger * log_, char row_delimiter_, const bool bind_by_id_, - const bool hash_exchange_, const size_t num_queues_, + const String & exchange_type_, + const String table_name_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -49,8 +50,9 @@ private: const String & routing_key; const size_t channel_id; const bool bind_by_id; - const bool hash_exchange; const size_t num_queues; + const String & exchange_type; + const String table_name; Poco::Logger * log; char row_delimiter; @@ -58,8 +60,10 @@ private: bool allowed = true; const std::atomic & stopped; - String current_exchange_name; - bool exchange_declared = false; + String internal_exchange_name, local_exchange_name; + bool internal_exchange_declared = false, local_exchange_declared = false; + bool exchange_type_set = false, hash_exchange = false; + std::atomic loop_started = false, consumer_error = false; std::atomic count_subscribed = 0, wait_subscribed; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 481314a38c2..895b9ca2bec 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -59,9 +59,9 @@ StorageRabbitMQ::StorageRabbitMQ( const String & exchange_name_, const String & format_name_, char row_delimiter_, + const String & exchange_type_, size_t num_consumers_, - size_t num_queues_, - bool hash_exchange_) + size_t num_queues_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) @@ -71,7 +71,7 @@ StorageRabbitMQ::StorageRabbitMQ( , row_delimiter(row_delimiter_) , num_consumers(num_consumers_) , num_queues(num_queues_) - , hash_exchange(hash_exchange_) + , exchange_type(exchange_type_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( @@ -212,16 +212,20 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); + auto table_id = getStorageID(); + String table_name = table_id.getNameForLogs(); + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, - next_channel_id, log, row_delimiter, bind_by_id, hash_exchange, num_queues, stream_cancelled); + next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - return std::make_shared(parsed_address, login_password, routing_key, exchange_name, - log, num_consumers * num_queues, bind_by_id, hash_exchange, - row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; + + return std::make_shared(parsed_address, login_password, routing_key, producer_exchange, + log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -436,20 +440,19 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - bool hash_exchange = static_cast(rabbitmq_settings.rabbitmq_hash_exchange); + String exchange_type = rabbitmq_settings.rabbitmq_exchange_type.value; if (args_count >= 6) { + engine_args[5] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[5], args.local_context); + const auto * ast = engine_args[5]->as(); - if (ast && ast->value.getType() == Field::Types::UInt64) + if (ast && ast->value.getType() == Field::Types::String) { - hash_exchange = static_cast(safeGet(ast->value)); - } - else - { - throw Exception("Hash exchange flag must be a boolean", ErrorCodes::BAD_ARGUMENTS); + exchange_type = safeGet(ast->value); } } + UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; if (args_count >= 7) { @@ -480,7 +483,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_key, exchange, format, row_delimiter, num_consumers, num_queues, hash_exchange); + host_port, routing_key, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 111e52768d0..27a9b8834f4 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -66,9 +66,9 @@ protected: const String & exchange_name_, const String & format_name_, char row_delimiter_, + const String & exchange_type_, size_t num_consumers_, - size_t num_queues_, - bool hash_exchange); + size_t num_queues_); private: Context global_context; @@ -83,7 +83,7 @@ private: size_t num_created_consumers = 0; bool bind_by_id; size_t num_queues; - const bool hash_exchange; + const String exchange_type; Poco::Logger * log; std::pair parsed_address; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 151d5fc62d4..8fa241dade5 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -23,11 +23,10 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( std::pair & parsed_address, std::pair & login_password_, const String & routing_key_, - const String & exchange_, + const String exchange_, Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, - const bool hash_exchange_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_) @@ -38,7 +37,6 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) - , hash_exchange(hash_exchange_) , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) @@ -101,7 +99,7 @@ void WriteBufferToRabbitMQProducer::countRow() next_queue = next_queue % num_queues + 1; - if (bind_by_id || hash_exchange) + if (bind_by_id) { producer_channel->publish(exchange_name, std::to_string(next_queue), payload); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 3cbcec9ccc2..90e0d90b356 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -22,11 +22,10 @@ public: std::pair & parsed_address, std::pair & login_password_, const String & routing_key_, - const String & exchange_, + const String exchange_, Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, - const bool hash_exchange_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_ @@ -45,7 +44,6 @@ private: const String routing_key; const String exchange_name; const bool bind_by_id; - const bool hash_exchange; const size_t num_queues; event_base * producerEvbase; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index d7e991fe7ae..d9c08ef7b6b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -497,90 +497,6 @@ def test_rabbitmq_big_message(rabbitmq_cluster): assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) -def test_rabbitmq_sharding_between_tables(rabbitmq_cluster): - - NUMBER_OF_CONCURRENT_CONSUMERS = 10 - - instance.query(''' - DROP TABLE IF EXISTS test.destination; - CREATE TABLE test.destination(key UInt64, value UInt64, - _consumed_by LowCardinality(String)) - ENGINE = MergeTree() - ORDER BY key; - ''') - - for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): - table_name = 'rabbitmq_consumer{}'.format(consumer_id) - print("Setting up {}".format(table_name)) - - instance.query(''' - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - CREATE TABLE test.{0} (key UInt64, value UInt64) - ENGINE = RabbitMQ - SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_hash_exchange = 1, - rabbitmq_format = 'JSONEachRow', - rabbitmq_row_delimiter = '\\n'; - CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS - SELECT key, value, '{0}' as _consumed_by FROM test.{0}; - '''.format(table_name)) - - i = [0] - messages_num = 1000 - - credentials = pika.PlainCredentials('root', 'clickhouse') - parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) - - def produce(): - # init connection here because otherwise python rabbitmq client fails sometimes - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - channel.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') - messages = [] - for _ in range(messages_num): - messages.append(json.dumps({'key': i[0], 'value': i[0]})) - i[0] += 1 - key = str(randrange(1, NUMBER_OF_CONCURRENT_CONSUMERS)) - for message in messages: - channel.basic_publish(exchange='clickhouse-exchange', routing_key=key, body=message) - connection.close() - time.sleep(1) - - threads = [] - threads_num = 20 - - for _ in range(threads_num): - threads.append(threading.Thread(target=produce)) - for thread in threads: - time.sleep(random.uniform(0, 1)) - thread.start() - - while True: - result = instance.query('SELECT count() FROM test.destination') - time.sleep(1) - if int(result) == messages_num * threads_num: - break - - for consumer_id in range(NUMBER_OF_CONCURRENT_CONSUMERS): - print("dropping rabbitmq_consumer{}".format(consumer_id)) - table_name = 'rabbitmq_consumer{}'.format(consumer_id) - instance.query(''' - DROP TABLE IF EXISTS test.{0}; - DROP TABLE IF EXISTS test.{0}_mv; - '''.format(table_name)) - - instance.query(''' - DROP TABLE IF EXISTS test.destination; - ''') - - for thread in threads: - thread.join() - - assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) - - @pytest.mark.timeout(320) def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): @@ -1011,6 +927,7 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) + print result if int(result) == messages_num * threads_num: break @@ -1085,6 +1002,289 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_direct_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + CREATE TABLE test.direct_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'direct_exchange_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key = 'direct_{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.direct_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.direct_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='direct_exchange_testing', exchange_type='direct') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "direct_" + str(key_num) + key_num += 1 + for message in messages: + channel.basic_publish(exchange='direct_exchange_testing', routing_key=key, body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_fanout_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + CREATE TABLE test.fanout_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_routing_key = 'key_{0}', + rabbitmq_exchange_name = 'fanout_exchange_testing', + rabbitmq_exchange_type = 'fanout', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.fanout_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.fanout_exchange_{0}; + '''.format(consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='fanout_exchange_testing', exchange_type='fanout') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for message in messages: + channel.basic_publish(exchange='fanout_exchange_testing', routing_key='', body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(420) +def test_rabbitmq_topic_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 5 + for consumer_id in range(num_tables): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key = '*.{0}', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(consumer_id)) + + for consumer_id in range(num_tables): + print("Setting up table {}".format(num_tables + consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + CREATE TABLE test.topic_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'topic_exchange_testing', + rabbitmq_exchange_type = 'topic', + rabbitmq_routing_key = '*.logs', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.topic_exchange_{0}; + '''.format(num_tables + consumer_id)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='topic_exchange_testing', exchange_type='topic') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + key_num = 0 + for num in range(num_tables): + key = "topic." + str(key_num) + key_num += 1 + for message in messages: + channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + + key = "random.logs" + for message in messages: + channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables + messages_num * num_tables: + break + + assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) + + +@pytest.mark.timeout(320) +def test_rabbitmq_hash_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables = 4 + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + print("Setting up {}".format(table_name)) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + CREATE TABLE test.{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 10, + rabbitmq_exchange_type = 'consistent_hash', + rabbitmq_exchange_name = 'hash_exchange_testing', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.{0}; + '''.format(table_name)) + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + key = str(randrange(10)) + for message in messages: + channel.basic_publish(exchange='hash_exchange_testing', routing_key=key, body=message) + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * threads_num: + break + + for consumer_id in range(num_tables): + table_name = 'rabbitmq_consumer{}'.format(consumer_id) + instance.query(''' + DROP TABLE IF EXISTS test.{0}; + DROP TABLE IF EXISTS test.{0}_mv; + '''.format(table_name)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From b72a095e993076904cf5889ed9cd408450ce1081 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 06:24:52 +0300 Subject: [PATCH 0541/2229] Fix race conditions in CREATE/DROP of different replicas of ReplicatedMergeTree --- src/Databases/DatabaseOnDisk.cpp | 8 +- src/Storages/MergeTree/MergeTreeData.cpp | 21 +- src/Storages/MergeTree/MergeTreeData.h | 3 + src/Storages/StorageReplicatedMergeTree.cpp | 447 ++++++++++++++------ src/Storages/StorageReplicatedMergeTree.h | 5 +- 5 files changed, 356 insertions(+), 128 deletions(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 1886d0fc555..364c9d50c48 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -122,7 +122,12 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) return statement_stream.str(); } -DatabaseOnDisk::DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context) +DatabaseOnDisk::DatabaseOnDisk( + const String & name, + const String & metadata_path_, + const String & data_path_, + const String & logger, + const Context & context) : DatabaseWithOwnTablesBase(name, logger, context) , metadata_path(metadata_path_) , data_path(data_path_) @@ -154,7 +159,6 @@ void DatabaseOnDisk::createTable( /// A race condition would be possible if a table with the same name is simultaneously created using CREATE and using ATTACH. /// But there is protection from it - see using DDLGuard in InterpreterCreateQuery. - if (isDictionaryExist(table_name)) throw Exception("Dictionary " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b399584f4d9..023e67ec3de 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -152,7 +152,8 @@ MergeTreeData::MergeTreeData( if (metadata.sample_by_ast != nullptr) { - StorageMetadataKeyField candidate_sampling_key = StorageMetadataKeyField::getKeyFromAST(metadata.sample_by_ast, getColumns(), global_context); + StorageMetadataKeyField candidate_sampling_key = StorageMetadataKeyField::getKeyFromAST( + metadata.sample_by_ast, getColumns(), global_context); const auto & pk_sample_block = getPrimaryKey().sample_block; if (!pk_sample_block.has(candidate_sampling_key.column_names[0]) && !attach @@ -1304,6 +1305,24 @@ void MergeTreeData::dropAllData() LOG_TRACE(log, "dropAllData: done."); } +void MergeTreeData::dropIfEmpty() +{ + LOG_TRACE(log, "dropIfEmpty"); + + auto lock = lockParts(); + + if (!data_parts_by_info.empty()) + return; + + for (const auto & [path, disk] : getRelativeDataPathsWithDisks()) + { + /// Non recursive, exception is thrown if there are more files. + disk->remove(path + "format_version.txt"); + disk->remove(path + "detached"); + disk->remove(path); + } +} + namespace { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 6df181e3f98..217e5000cf6 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -485,6 +485,9 @@ public: /// Deletes the data directory and flushes the uncompressed blocks cache and the marks cache. void dropAllData(); + /// Drop data directories if they are empty. It is safe to call this method if table creation was unsuccessful. + void dropIfEmpty(); + /// Moves the entire data directory. /// Flushes the uncompressed blocks cache and the marks cache. /// Must be called with locked lockStructureForAlter(). diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d109fa464b0..cf36224b070 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -194,15 +194,19 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( zookeeper_path = "/" + zookeeper_path; replica_path = zookeeper_path + "/replicas/" + replica_name; - queue_updating_task = global_context.getSchedulePool().createTask(getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); + queue_updating_task = global_context.getSchedulePool().createTask( + getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); }); - mutations_updating_task = global_context.getSchedulePool().createTask(getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsUpdatingTask)", [this]{ mutationsUpdatingTask(); }); + mutations_updating_task = global_context.getSchedulePool().createTask( + getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsUpdatingTask)", [this]{ mutationsUpdatingTask(); }); - merge_selecting_task = global_context.getSchedulePool().createTask(getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); + merge_selecting_task = global_context.getSchedulePool().createTask( + getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mergeSelectingTask)", [this] { mergeSelectingTask(); }); /// Will be activated if we win leader election. merge_selecting_task->deactivate(); - mutations_finalizing_task = global_context.getSchedulePool().createTask(getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); + mutations_finalizing_task = global_context.getSchedulePool().createTask( + getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::mutationsFinalizingTask)", [this] { mutationsFinalizingTask(); }); if (global_context.hasZooKeeper()) current_zookeeper = global_context.getZooKeeper(); @@ -248,20 +252,30 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (!getDataParts().empty()) throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); - createTableIfNotExists(); + try + { + bool is_first_replica = createTableIfNotExists(); - /// We have to check granularity on other replicas. If it's fixed we - /// must create our new replica with fixed granularity and store this - /// information in /replica/metadata. - other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); + /// We have to check granularity on other replicas. If it's fixed we + /// must create our new replica with fixed granularity and store this + /// information in /replica/metadata. + other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); - checkTableStructure(zookeeper_path); + checkTableStructure(zookeeper_path); - Coordination::Stat metadata_stat; - current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); - metadata_version = metadata_stat.version; + Coordination::Stat metadata_stat; + current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); + metadata_version = metadata_stat.version; - createReplica(); + if (!is_first_replica) + createReplica(); + } + catch (...) + { + /// If replica was not created, rollback creation of data directory. + dropIfEmpty(); + throw; + } } else { @@ -403,46 +417,310 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() } -void StorageReplicatedMergeTree::createTableIfNotExists() +bool StorageReplicatedMergeTree::createTableIfNotExists() +{ + auto zookeeper = getZooKeeper(); + zookeeper->createAncestors(zookeeper_path); + + for (size_t i = 0; i < 1000; ++i) + { + /// Invariant: "replicas" does not exist if there is no table or if there are leftovers from incompletely dropped table. + if (zookeeper->exists(zookeeper_path + "/replicas")) + { + LOG_DEBUG(log, "This table {} is already created, will add new replica", zookeeper_path); + return false; + } + + /// There are leftovers from incompletely dropped table. + if (zookeeper->exists(zookeeper_path + "/dropped")) + { + /// This condition may happen when the previous drop attempt was not completed + /// or when table is dropped by another replica right now. + /// This is Ok because another replica is definitely going to drop the table. + + LOG_WARNING(log, "Removing leftovers from table {} (this might take several minutes)", zookeeper_path); + + Strings children; + int32_t code = zookeeper->tryGetChildren(zookeeper_path, children); + if (code == Coordination::ZNONODE) + { + LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); + } + else + { + for (const auto & child : children) + if (child != "dropped") + zookeeper->tryRemoveRecursive(zookeeper_path + "/" + child); + + Coordination::Requests ops; + Coordination::Responses responses; + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1)); + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1)); + code = zookeeper->tryMulti(ops, responses); + + if (code == Coordination::ZNONODE) + { + LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); + } + else if (code == Coordination::ZNOTEMPTY) + { + throw Exception(fmt::format( + "The old table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", zookeeper_path), ErrorCodes::TABLE_WAS_NOT_DROPPED); + } + else if (code != Coordination::ZOK) + { + /// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation. + zkutil::KeeperMultiException::check(code, ops, responses); + } + else + { + LOG_WARNING(log, "The leftovers from table {} was successfully removed from ZooKeeper", zookeeper_path); + } + } + } + + LOG_DEBUG(log, "Creating table {}", zookeeper_path); + + /// We write metadata of table so that the replicas can check table parameters with them. + String metadata = ReplicatedMergeTreeTableMetadata(*this).toString(); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent)); + + /// Check that the table is not being dropped right now. + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1)); + + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata, + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(), + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "", + zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility. + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "last added replica: " + replica_name, + zkutil::CreateMode::Persistent)); + + /// And create first replica atomically. See also "createReplica" method that is used to create not the first replicas. + + ops.emplace_back(zkutil::makeCreateRequest(replica_path, "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/host", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_pointer", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/queue", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/parts", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/flags", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", "0", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", metadata, + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", std::to_string(metadata_version), + zkutil::CreateMode::Persistent)); + + Coordination::Responses responses; + auto code = zookeeper->tryMulti(ops, responses); + if (code == Coordination::ZNODEEXISTS) + { + LOG_WARNING(log, "It looks like the table {} was created by another server at the same moment, will retry", zookeeper_path); + continue; + } + else if (code != Coordination::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + + return true; + } + + throw Exception("Cannot create table, because it is created concurrently every time or because of logical error", ErrorCodes::LOGICAL_ERROR); +} + +void StorageReplicatedMergeTree::createReplica() { auto zookeeper = getZooKeeper(); - if (zookeeper->exists(zookeeper_path)) - return; + LOG_DEBUG(log, "Creating replica {}", replica_path); - LOG_DEBUG(log, "Creating table {}", zookeeper_path); + int32_t code; - zookeeper->createAncestors(zookeeper_path); + do + { + Coordination::Stat replicas_stat; - /// We write metadata of table so that the replicas can check table parameters with them. - String metadata = ReplicatedMergeTreeTableMetadata(*this).toString(); + try + { + zookeeper->get(zookeeper_path + "/replicas", &replicas_stat); + } + catch (Exception & e) + { + e.addMessage("because the last replica of the table was dropped right now"); + throw; + } - Coordination::Requests ops; - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata, - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(), - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/blocks", "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/block_numbers", "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/nonincrement_block_numbers", "", - zkutil::CreateMode::Persistent)); /// /nonincrement_block_numbers dir is unused, but is created nonetheless for backwards compatibility. - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/leader_election", "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/temp", "", - zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/replicas", "", - zkutil::CreateMode::Persistent)); + /// It is not the first replica, we will mark it as "lost", to immediately repair (clone) from existing replica. + /// By the way, it's possible that the replica will be first, if all previous replicas were removed concurrently. + String is_lost_value = replicas_stat.numChildren ? "1" : "0"; - Coordination::Responses responses; - auto code = zookeeper->tryMulti(ops, responses); - if (code && code != Coordination::ZNODEEXISTS) - throw Coordination::Exception(code); + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(replica_path, "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/host", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_pointer", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/queue", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/parts", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/flags", "", + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", is_lost_value, + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this).toString(), + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), + zkutil::CreateMode::Persistent)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", std::to_string(metadata_version), + zkutil::CreateMode::Persistent)); + + /// Check version of /replicas to see if there are any replicas created at the same moment of time. + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/replicas", "last added replica: " + replica_name, replicas_stat.version)); + + Coordination::Responses responses; + code = zookeeper->tryMulti(ops, responses); + if (code == Coordination::ZNODEEXISTS) + { + throw Exception("Replica " + replica_path + " already exists.", ErrorCodes::REPLICA_IS_ALREADY_EXIST); + } + else if (code == Coordination::ZBADVERSION) + { + LOG_ERROR(log, "Retrying createReplica(), because some other replicas were created at the same time"); + } + else if (code == Coordination::ZNONODE) + { + throw Exception("Table " + zookeeper_path + " was suddenly removed.", ErrorCodes::ALL_REPLICAS_LOST); + } + else + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + } while (code == Coordination::ZBADVERSION); +} + +void StorageReplicatedMergeTree::drop() +{ + { + auto zookeeper = tryGetZooKeeper(); + + if (is_readonly || !zookeeper) + throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); + + shutdown(); + + if (zookeeper->expired()) + throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); + + LOG_INFO(log, "Removing replica {}", replica_path); + replica_is_active_node = nullptr; + /// It may left some garbage if replica_path subtree are concurently modified + zookeeper->tryRemoveRecursive(replica_path); + if (zookeeper->exists(replica_path)) + LOG_ERROR(log, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", replica_path); + + /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. + Strings replicas; + if (Coordination::ZOK == zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) && replicas.empty()) + { + LOG_INFO(log, "{} is the last replica, will remove table", replica_path); + + /** At this moment, another replica can be created and we cannot remove the table. + * Try to remove /replicas node first. If we successfully removed it, + * it guarantees that we are the only replica that proceed to remove the table + * and no new replicas can be created after that moment (it requires the existence of /replicas node). + * and table cannot be recreated with new /replicas node on another servers while we are removing data, + * because table creation is executed in single transaction that will conflict with remaining nodes. + */ + + Coordination::Requests ops; + Coordination::Responses responses; + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/replicas", -1)); + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent)); + int32_t code = zookeeper->tryMulti(ops, responses); + + if (code == Coordination::ZNONODE || code == Coordination::ZNODEEXISTS) + { + LOG_WARNING(log, "Table {} is already started to be removing by another replica right now", replica_path); + } + else if (code == Coordination::ZNOTEMPTY) + { + LOG_WARNING(log, "Another replica was suddenly created, will keep the table {}", replica_path); + } + else if (code != Coordination::ZOK) + { + zkutil::KeeperMultiException::check(code, ops, responses); + } + else + { + LOG_INFO(log, "Removing table {} (this might take several minutes)", zookeeper_path); + + Strings children; + code = zookeeper->tryGetChildren(zookeeper_path, children); + if (code == Coordination::ZNONODE) + { + LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); + } + else + { + for (const auto & child : children) + if (child != "dropped") + zookeeper->tryRemoveRecursive(zookeeper_path + "/" + child); + + ops.clear(); + responses.clear(); + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1)); + ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1)); + code = zookeeper->tryMulti(ops, responses); + + if (code == Coordination::ZNONODE) + { + LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); + } + else if (code == Coordination::ZNOTEMPTY) + { + LOG_ERROR(log, "Table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", + zookeeper_path); + } + else if (code != Coordination::ZOK) + { + /// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation. + zkutil::KeeperMultiException::check(code, ops, responses); + } + else + { + LOG_INFO(log, "Table {} was successfully removed from ZooKeeper", zookeeper_path); + } + } + } + } + } + + dropAllData(); } @@ -542,48 +820,6 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin } -void StorageReplicatedMergeTree::createReplica() -{ - auto zookeeper = getZooKeeper(); - - LOG_DEBUG(log, "Creating replica {}", replica_path); - - int32_t code; - - do - { - Coordination::Stat replicas_stat; - String last_added_replica = zookeeper->get(zookeeper_path + "/replicas", &replicas_stat); - - /// If it is not the first replica, we will mark it as "lost", to immediately repair (clone) from existing replica. - String is_lost_value = last_added_replica.empty() ? "0" : "1"; - - Coordination::Requests ops; - Coordination::Responses responses; - ops.emplace_back(zkutil::makeCreateRequest(replica_path, "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/host", "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/log_pointer", "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/queue", "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/parts", "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/flags", "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", is_lost_value, zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this).toString(), zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent)); - /// Check version of /replicas to see if there are any replicas created at the same moment of time. - ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/replicas", "last added replica: " + replica_name, replicas_stat.version)); - - code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::Error::ZNODEEXISTS) - throw Exception("Replica " + replica_path + " already exists.", ErrorCodes::REPLICA_IS_ALREADY_EXIST); - else if (code == Coordination::Error::ZBADVERSION) - LOG_ERROR(log, "Retrying createReplica(), because some other replicas were created at the same time"); - else - zkutil::KeeperMultiException::check(code, ops, responses); - } while (code == Coordination::Error::ZBADVERSION); -} - - void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) { auto zookeeper = getZooKeeper(); @@ -3699,41 +3935,6 @@ void StorageReplicatedMergeTree::checkPartitionCanBeDropped(const ASTPtr & parti } -void StorageReplicatedMergeTree::drop() -{ - { - auto zookeeper = tryGetZooKeeper(); - - if (is_readonly || !zookeeper) - throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); - - shutdown(); - - if (zookeeper->expired()) - throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); - - LOG_INFO(log, "Removing replica {}", replica_path); - replica_is_active_node = nullptr; - /// It may left some garbage if replica_path subtree are concurently modified - zookeeper->tryRemoveRecursive(replica_path); - if (zookeeper->exists(replica_path)) - LOG_ERROR(log, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", replica_path); - - /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. - Strings replicas; - if (zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) == Coordination::ZOK && replicas.empty()) - { - LOG_INFO(log, "Removing table {} (this might take several minutes)", zookeeper_path); - zookeeper->tryRemoveRecursive(zookeeper_path); - if (zookeeper->exists(zookeeper_path)) - LOG_ERROR(log, "Table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", zookeeper_path); - } - } - - dropAllData(); -} - - void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, const StorageID & new_table_id) { MergeTreeData::rename(new_path_to_table_data, new_table_id); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index b82b387a623..18c691046d6 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -291,9 +291,10 @@ private: template void foreachCommittedParts(const Func & func) const; - /** Creates the minimum set of nodes in ZooKeeper. + /** Creates the minimum set of nodes in ZooKeeper and create first replica. + * Returns true if was created, false if exists. */ - void createTableIfNotExists(); + bool createTableIfNotExists(); /** Creates a replica in ZooKeeper and adds to the queue all that it takes to catch up with the rest of the replicas. */ From c0f8a3b5f269cfbdc3f397406932b15e0d9ae80f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 06:27:20 +0300 Subject: [PATCH 0542/2229] Added a test --- ...05_replica_create_drop_zookeeper.reference | 0 .../01305_replica_create_drop_zookeeper.sh | 28 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01305_replica_create_drop_zookeeper.reference create mode 100755 tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh diff --git a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.reference b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh new file mode 100755 index 00000000000..03325d3da13 --- /dev/null +++ b/tests/queries/0_stateless/01305_replica_create_drop_zookeeper.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +function thread() +{ + while true; do + $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS test_table_$1; + CREATE TABLE test_table_$1 (a UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/alter_table', 'r_$1') ORDER BY tuple();" 2>&1 | + grep -vP '(^$)|(^Received exception from server)|(^\d+\. )|because the last replica of the table was dropped right now|is already started to be removing by another replica right now|is already finished removing by another replica right now|Removing leftovers from table|Another replica was suddenly created|was successfully removed from ZooKeeper|was created by another server at the same moment|was suddenly removed' + done +} + + +# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout +export -f thread; + +TIMEOUT=10 + +timeout $TIMEOUT bash -c 'thread 1' & +timeout $TIMEOUT bash -c 'thread 2' & + +wait + +for i in {1,2}; do $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_table_$i"; done From 790bc4e8e0571b1ea712198b5a6b91f1322cc3df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 06:35:59 +0300 Subject: [PATCH 0543/2229] Better code --- src/Storages/StorageReplicatedMergeTree.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cf36224b070..95bc46d4cb4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -562,16 +562,12 @@ void StorageReplicatedMergeTree::createReplica() do { Coordination::Stat replicas_stat; + String replicas_value; - try - { - zookeeper->get(zookeeper_path + "/replicas", &replicas_stat); - } - catch (Exception & e) - { - e.addMessage("because the last replica of the table was dropped right now"); - throw; - } + code = zookeeper->tryGet(zookeeper_path + "/replicas", replicas_value, &replicas_stat); + if (code == Coordination::ZNONODE) + throw Exception(fmt::format("Cannot create a replica of the table {}, because the last replica of the table was dropped right now", + zookeeper_path), ErrorCodes::ALL_REPLICAS_LOST); /// It is not the first replica, we will mark it as "lost", to immediately repair (clone) from existing replica. /// By the way, it's possible that the replica will be first, if all previous replicas were removed concurrently. From e40ee1a173c86af5a7202d29bdde19dcc6c4d668 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 06:45:12 +0300 Subject: [PATCH 0544/2229] Return non-Nullable results from COUNT(DISTINCT) --- .../AggregateFunctionCount.h | 6 +++ .../AggregateFunctionFactory.cpp | 10 ++-- .../AggregateFunctionNull.cpp | 47 +++++++++++++------ .../AggregateFunctionNull.h | 32 +++++++------ .../AggregateFunctionUniq.h | 12 +++++ .../AggregateFunctionWindowFunnel.h | 5 +- src/AggregateFunctions/IAggregateFunction.h | 6 +++ 7 files changed, 83 insertions(+), 35 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index 092ffc6b6cf..e54f014f7a4 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -67,6 +67,12 @@ public: { data(place).count = new_count; } + + /// The function returns non-Nullable type even when wrapped with Null combinator. + bool returnDefaultWhenOnlyNull() const override + { + return true; + } }; diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index aeb4fb6db96..3982c48700b 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -63,14 +63,15 @@ AggregateFunctionPtr AggregateFunctionFactory::get( { auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); - /// If one of types is Nullable, we apply aggregate function combinator "Null". + /// If one of the types is Nullable, we apply aggregate function combinator "Null". if (std::any_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), [](const auto & type) { return type->isNullable(); })) { AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix("Null"); if (!combinator) - throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", + ErrorCodes::LOGICAL_ERROR); DataTypes nested_types = combinator->transformArguments(type_without_low_cardinality); Array nested_parameters = combinator->transformParameters(parameters); @@ -132,9 +133,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( auto hints = this->getHints(name); if (!hints.empty()) - throw Exception("Unknown aggregate function " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + throw Exception(fmt::format("Unknown aggregate function {}. Maybe you meant: {}", name, toString(hints)), + ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); else - throw Exception("Unknown aggregate function " + name, ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); + throw Exception(fmt::format("Unknown aggregate function {}", name), ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION); } diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 60712636562..77687f9f328 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -49,35 +49,52 @@ public: } if (!has_nullable_types) - throw Exception("Aggregate function combinator 'Null' requires at least one argument to be Nullable", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (nested_function) - if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) - return adapter; - - /// Special case for 'count' function. It could be called with Nullable arguments - /// - that means - count number of calls, when all arguments are not NULL. - if (nested_function && nested_function->getName() == "count") - return std::make_shared(arguments[0], params); + throw Exception("Aggregate function combinator 'Null' requires at least one argument to be Nullable", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (has_null_types) return std::make_shared(arguments, params); - bool return_type_is_nullable = nested_function->getReturnType()->canBeInsideNullable(); + assert(nested_function); + + if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) + return adapter; + + /// Special case for 'count' function. It could be called with Nullable arguments + /// - that means - count number of calls, when all arguments are not NULL. + if (nested_function->getName() == "count") + return std::make_shared(arguments[0], params); + + bool return_type_is_nullable = !nested_function->returnDefaultWhenOnlyNull() && nested_function->getReturnType()->canBeInsideNullable(); + bool serialize_flag = return_type_is_nullable || nested_function->returnDefaultWhenOnlyNull(); if (arguments.size() == 1) { if (return_type_is_nullable) - return std::make_shared>(nested_function, arguments, params); + { + return std::make_shared>(nested_function, arguments, params); + } else - return std::make_shared>(nested_function, arguments, params); + { + if (serialize_flag) + return std::make_shared>(nested_function, arguments, params); + else + return std::make_shared>(nested_function, arguments, params); + } } else { if (return_type_is_nullable) - return std::make_shared>(nested_function, arguments, params); + { + return std::make_shared>(nested_function, arguments, params); + } else - return std::make_shared>(nested_function, arguments, params); + { + if (serialize_flag) + return std::make_shared>(nested_function, arguments, params); + else + return std::make_shared>(nested_function, arguments, params); + } } } }; diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index 55d610207f1..d6f0079232c 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -28,7 +28,10 @@ namespace ErrorCodes /// If all rows had NULL, the behaviour is determined by "result_is_nullable" template parameter. /// true - return NULL; false - return value from empty aggregation state of nested function. -template +/// When serialize_flag is set to true, the flag about presense of values is serialized +/// regardless to the "result_is_nullable" even if it's unneeded - for protocol compatibility. + +template class AggregateFunctionNullBase : public IAggregateFunctionHelper { protected: @@ -129,7 +132,7 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { bool flag = getFlag(place); - if constexpr (result_is_nullable) + if constexpr (serialize_flag) writeBinary(flag, buf); if (flag) nested_function->serialize(nestedPlace(place), buf); @@ -138,7 +141,7 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { bool flag = 1; - if constexpr (result_is_nullable) + if constexpr (serialize_flag) readBinary(flag, buf); if (flag) { @@ -183,12 +186,15 @@ public: /** There are two cases: for single argument and variadic. * Code for single argument is much more efficient. */ -template -class AggregateFunctionNullUnary final : public AggregateFunctionNullBase> +template +class AggregateFunctionNullUnary final + : public AggregateFunctionNullBase> { public: AggregateFunctionNullUnary(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params) + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params) { } @@ -218,12 +224,15 @@ public: }; -template -class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase> +template +class AggregateFunctionNullVariadic final + : public AggregateFunctionNullBase> { public: AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params) - : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params), + : AggregateFunctionNullBase>(std::move(nested_function_), arguments, params), number_of_arguments(arguments.size()) { if (number_of_arguments == 1) @@ -263,11 +272,6 @@ public: this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena); } - bool allocatesMemoryInArena() const override - { - return this->nested_function->allocatesMemoryInArena(); - } - private: enum { MAX_ARGS = 8 }; size_t number_of_arguments = 0; diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 334e809ebe7..1588611b8a2 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -244,6 +244,12 @@ public: { assert_cast(to).getData().push_back(this->data(place).set.size()); } + + /// The function returns non-Nullable type even when wrapped with Null combinator. + bool returnDefaultWhenOnlyNull() const override + { + return true; + } }; @@ -298,6 +304,12 @@ public: { assert_cast(to).getData().push_back(this->data(place).set.size()); } + + /// The function returns non-Nullable type even when wrapped with Null combinator. + bool returnDefaultWhenOnlyNull() const override + { + return true; + } }; } diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index 726656d1ca8..b5704203ade 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -240,9 +240,10 @@ public: return std::make_shared(); } - AggregateFunctionPtr getOwnNullAdapter(const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override { - return std::make_shared>(nested_function, arguments, params); + return std::make_shared>(nested_function, arguments, params); } void add(AggregateDataPtr place, const IColumn ** columns, const size_t row_num, Arena *) const override diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 0087a41d437..439a5e07c2e 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -171,6 +171,12 @@ public: return nullptr; } + /** When the function is wrapped with Null combinator, + * should we return Nullable type with NULL when no values were aggregated + * or we should return non-Nullable type with default value (example: count, countDistinct). + */ + virtual bool returnDefaultWhenOnlyNull() const { return false; } + const DataTypes & getArgumentTypes() const { return argument_types; } const Array & getParameters() const { return parameters; } From 50a184acac24eb0fcd0906d4cd722122da6d4267 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Tue, 9 Jun 2020 12:47:59 +0300 Subject: [PATCH 0545/2229] extractAllGroupsHorizontal and extractAllGroupsVertical Split tests, fixed some error messages Fixed test and error reporting of extractGroups --- .../tests/gtest_data_type_get_common_type.cpp | 5 - src/Functions/extractAllGroups.cpp | 127 ---------- src/Functions/extractAllGroups.h | 238 ++++++++++++++++++ src/Functions/extractAllGroupsHorizontal.cpp | 23 ++ src/Functions/extractAllGroupsVertical.cpp | 24 ++ src/Functions/extractGroups.cpp | 4 +- .../registerFunctionsStringRegexp.cpp | 6 +- tests/performance/extract.xml | 3 +- .../0_stateless/01246_extractAllGroups.sql | 51 ---- ...01246_extractAllGroupsHorizontal.reference | 22 ++ .../01246_extractAllGroupsHorizontal.sql | 51 ++++ ... 01246_extractAllGroupsVertical.reference} | 0 .../01246_extractAllGroupsVertical.sql | 51 ++++ .../01275_extract_groups_check.sql | 4 +- 14 files changed, 418 insertions(+), 191 deletions(-) delete mode 100644 src/Functions/extractAllGroups.cpp create mode 100644 src/Functions/extractAllGroups.h create mode 100644 src/Functions/extractAllGroupsHorizontal.cpp create mode 100644 src/Functions/extractAllGroupsVertical.cpp delete mode 100644 tests/queries/0_stateless/01246_extractAllGroups.sql create mode 100644 tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference create mode 100644 tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql rename tests/queries/0_stateless/{01246_extractAllGroups.reference => 01246_extractAllGroupsVertical.reference} (100%) create mode 100644 tests/queries/0_stateless/01246_extractAllGroupsVertical.sql diff --git a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index fd511bfbbb4..bd13de79ef6 100644 --- a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -14,11 +14,6 @@ static bool operator==(const IDataType & left, const IDataType & right) return left.equals(right); } -std::ostream & operator<<(std::ostream & ostr, const IDataType & dt) -{ - return ostr << dt.getName(); -} - } using namespace DB; diff --git a/src/Functions/extractAllGroups.cpp b/src/Functions/extractAllGroups.cpp deleted file mode 100644 index a79efe86356..00000000000 --- a/src/Functions/extractAllGroups.cpp +++ /dev/null @@ -1,127 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ARGUMENT_OUT_OF_BOUND; - extern const int BAD_ARGUMENTS; -} - - -/** Match all groups of given input string with given re, return array of arrays of matches. - * - * SELECT extractAllGroups('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)') - * should produce: - * [['abc', '111'], ['def', '222'], ['ghi', '333']] - */ -class FunctionExtractAllGroups : public IFunction -{ -public: - static constexpr auto name = "extractAllGroups"; - static FunctionPtr create(const Context &) { return std::make_shared(); } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 2; } - - bool useDefaultImplementationForConstants() const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - FunctionArgumentDescriptors args{ - {"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"}, - {"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"}, - }; - validateFunctionArgumentTypes(*this, arguments, args); - - /// Two-dimensional array of strings, each `row` of top array represents matching groups. - return std::make_shared(std::make_shared(std::make_shared())); - } - - void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override - { - const ColumnPtr column_haystack = block.getByPosition(arguments[0]).column; - const ColumnPtr column_needle = block.getByPosition(arguments[1]).column; - - const auto needle = typeid_cast(*column_needle).getValue(); - - if (needle.empty()) - throw Exception(getName() + " length of 'needle' argument must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - - const auto regexp = Regexps::get(needle); - const auto & re2 = regexp->getRE2(); - - if (!re2) - throw Exception("There is no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); - - const size_t groups_count = re2->NumberOfCapturingGroups(); - - if (!groups_count) - throw Exception("There is no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); - - // Including 0-group, which is the whole regexp. - PODArrayWithStackMemory matched_groups(groups_count + 1); - - ColumnArray::ColumnOffsets::MutablePtr root_offsets_col = ColumnArray::ColumnOffsets::create(); - ColumnArray::ColumnOffsets::MutablePtr nested_offsets_col = ColumnArray::ColumnOffsets::create(); - ColumnString::MutablePtr data_col = ColumnString::create(); - - auto & root_offsets_data = root_offsets_col->getData(); - auto & nested_offsets_data = nested_offsets_col->getData(); - - root_offsets_data.resize(input_rows_count); - ColumnArray::Offset current_root_offset = 0; - ColumnArray::Offset current_nested_offset = 0; - - for (size_t i = 0; i < input_rows_count; ++i) - { - StringRef current_row = column_haystack->getDataAt(i); - - // Extract all non-intersecting matches from haystack except group #0. - const auto * pos = current_row.data; - const auto * end = pos + current_row.size; - while (pos < end - && re2->Match(re2_st::StringPiece(pos, end - pos), - 0, end - pos, re2_st::RE2::UNANCHORED, matched_groups.data(), matched_groups.size())) - { - // 1 is to exclude group #0 which is whole re match. - for (size_t group = 1; group <= groups_count; ++group) - data_col->insertData(matched_groups[group].data(), matched_groups[group].size()); - - pos = matched_groups[0].data() + matched_groups[0].size(); - - current_nested_offset += groups_count; - nested_offsets_data.push_back(current_nested_offset); - - ++current_root_offset; - } - - root_offsets_data[i] = current_root_offset; - } - ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(data_col), std::move(nested_offsets_col)); - ColumnArray::MutablePtr root_array_col = ColumnArray::create(std::move(nested_array_col), std::move(root_offsets_col)); - block.getByPosition(result).column = std::move(root_array_col); - } -}; - -void registerFunctionExtractAllGroups(FunctionFactory & factory) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h new file mode 100644 index 00000000000..a9206e7327e --- /dev/null +++ b/src/Functions/extractAllGroups.h @@ -0,0 +1,238 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + +enum class ExtractAllGroupsResultKind +{ + VERTICAL, + HORIZONTAL +}; + + +/** Match all groups of given input string with given re, return array of arrays of matches. + * + * Depending on `Impl::Kind`, result is either grouped by grop id (Horizontal) or in order of appearance (Vertical): + * + * SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)') + * => + * [['abc', '111'], ['def', '222'], ['ghi', '333']] + * + * SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)') + * => + * [['abc', 'def', 'ghi'], ['111', '222', '333'] +*/ +template +class FunctionExtractAllGroups : public IFunction +{ +public: + static constexpr auto Kind = Impl::Kind; + static constexpr auto name = Impl::Name; + + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + bool useDefaultImplementationForConstants() const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"haystack", isStringOrFixedString, nullptr, "const String or const FixedString"}, + {"needle", isStringOrFixedString, isColumnConst, "const String or const FixedString"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); + + /// Two-dimensional array of strings, each `row` of top array represents matching groups. + return std::make_shared(std::make_shared(std::make_shared())); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + static const auto MAX_GROUPS_COUNT = 128; + + const ColumnPtr column_haystack = block.getByPosition(arguments[0]).column; + const ColumnPtr column_needle = block.getByPosition(arguments[1]).column; + + const auto needle = typeid_cast(*column_needle).getValue(); + + if (needle.empty()) + throw Exception("Length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS); + + using StringPiece = typename Regexps::Regexp::StringPieceType; + const auto & regexp = Regexps::get(needle)->getRE2(); + + if (!regexp) + throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + + const size_t groups_count = regexp->NumberOfCapturingGroups(); + + if (!groups_count) + throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + + if (groups_count > MAX_GROUPS_COUNT - 1) + throw Exception("Too many groups in regexp: " + std::to_string(groups_count) + + ", max: " + std::to_string(MAX_GROUPS_COUNT - 1), + ErrorCodes::BAD_ARGUMENTS); + + // Including 0-group, which is the whole regexp. + PODArrayWithStackMemory matched_groups(groups_count + 1); + + ColumnArray::ColumnOffsets::MutablePtr root_offsets_col = ColumnArray::ColumnOffsets::create(); + ColumnArray::ColumnOffsets::MutablePtr nested_offsets_col = ColumnArray::ColumnOffsets::create(); + ColumnString::MutablePtr data_col = ColumnString::create(); + + auto & root_offsets_data = root_offsets_col->getData(); + auto & nested_offsets_data = nested_offsets_col->getData(); + + ColumnArray::Offset current_root_offset = 0; + ColumnArray::Offset current_nested_offset = 0; + + if constexpr (Kind == ExtractAllGroupsResultKind::VERTICAL) + { + root_offsets_data.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) + { + StringRef current_row = column_haystack->getDataAt(i); + + // Extract all non-intersecting matches from haystack except group #0. + const auto * pos = current_row.data; + const auto * end = pos + current_row.size; + while (pos < end + && regexp->Match({pos, static_cast(end - pos)}, + 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + { + // 1 is to exclude group #0 which is whole re match. + for (size_t group = 1; group <= groups_count; ++group) + data_col->insertData(matched_groups[group].data(), matched_groups[group].size()); + + pos = matched_groups[0].data() + matched_groups[0].size(); + + current_nested_offset += groups_count; + nested_offsets_data.push_back(current_nested_offset); + + ++current_root_offset; + } + + root_offsets_data[i] = current_root_offset; + } + } + else + { + std::vector all_matches; + // number of times RE matched on each row of haystack column. + std::vector number_of_matches_per_row; + + // we expect RE to match multiple times on each row, `* 8` is arbitrary to reduce number of re-allocations. + all_matches.reserve(input_rows_count * groups_count * 8); + number_of_matches_per_row.reserve(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + size_t matches_per_row = 0; + + const auto & current_row = column_haystack->getDataAt(i); + + // Extract all non-intersecting matches from haystack except group #0. + const auto * pos = current_row.data; + const auto * end = pos + current_row.size; + while (pos < end + && regexp->Match({pos, static_cast(end - pos)}, + 0, end - pos, regexp->UNANCHORED, matched_groups.data(), matched_groups.size())) + { + // 1 is to exclude group #0 which is whole re match. + for (size_t group = 1; group <= groups_count; ++group) + all_matches.push_back(matched_groups[group]); + + pos = matched_groups[0].data() + matched_groups[0].size(); + + ++matches_per_row; + } + + number_of_matches_per_row.push_back(matches_per_row); + } + + { + size_t total_matched_groups_string_len = 0; + for (const auto & m : all_matches) + total_matched_groups_string_len += m.length(); + + data_col->reserve(total_matched_groups_string_len); + } + + nested_offsets_col->reserve(matched_groups.size()); + root_offsets_col->reserve(groups_count); + + // Re-arrange `all_matches` from: + // [ + // "ROW 0: 1st group 1st match", + // "ROW 0: 2nd group 1st match", + // ..., + // "ROW 0: 1st group 2nd match", + // "ROW 0: 2nd group 2nd match", + // ..., + // "ROW 1: 1st group 1st match", + // ... + // ] + // + // into column of 2D arrays: + // [ + // /* all matchig groups from ROW 0 of haystack column */ + // ["ROW 0: 1st group 1st match", "ROW 0: 1st group 2nd match", ...], + // ["ROW 0: 2nd group 1st match", "ROW 0: 2nd group 2nd match", ...], + // ... + // ], + // [ + // /* all matchig groups from row 1 of haystack column */ + // ["ROW 1: 1st group 1st match", ...], + // ... + // ] + + size_t row_offset = 0; + for (const auto matches_per_row : number_of_matches_per_row) + { + const size_t next_row_offset = row_offset + matches_per_row * groups_count; + for (size_t group_id = 0; group_id < groups_count; ++group_id) + { + for (size_t i = row_offset + group_id; i < next_row_offset && i < all_matches.size(); i += groups_count) + { + const auto & match = all_matches[i]; + data_col->insertData(match.begin(), match.length()); + } + nested_offsets_col->insertValue(data_col->size()); + } + root_offsets_col->insertValue(nested_offsets_col->size()); + row_offset = next_row_offset; + } + } + DUMP(Kind, needle, column_haystack, root_offsets_col, nested_offsets_col); + + ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(data_col), std::move(nested_offsets_col)); + ColumnArray::MutablePtr root_array_col = ColumnArray::create(std::move(nested_array_col), std::move(root_offsets_col)); + block.getByPosition(result).column = std::move(root_array_col); + } +}; + +} diff --git a/src/Functions/extractAllGroupsHorizontal.cpp b/src/Functions/extractAllGroupsHorizontal.cpp new file mode 100644 index 00000000000..fba7483ba03 --- /dev/null +++ b/src/Functions/extractAllGroupsHorizontal.cpp @@ -0,0 +1,23 @@ +#include +#include + +namespace +{ + +struct HorizontalImpl +{ + static constexpr auto Kind = DB::ExtractAllGroupsResultKind::HORIZONTAL; + static constexpr auto Name = "extractAllGroupsHorizontal"; +}; + +} + +namespace DB +{ + +void registerFunctionExtractAllGroupsHorizontal(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/extractAllGroupsVertical.cpp b/src/Functions/extractAllGroupsVertical.cpp new file mode 100644 index 00000000000..9cbd148b016 --- /dev/null +++ b/src/Functions/extractAllGroupsVertical.cpp @@ -0,0 +1,24 @@ +#include +#include + +namespace +{ + +struct VerticalImpl +{ + static constexpr auto Kind = DB::ExtractAllGroupsResultKind::VERTICAL; + static constexpr auto Name = "extractAllGroupsVertical"; +}; + +} + +namespace DB +{ + +void registerFunctionExtractAllGroupsVertical(FunctionFactory & factory) +{ + factory.registerFunction>(); + factory.registerAlias("extractAllGroups", VerticalImpl::Name, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index 882147ef664..f24abd2d0ff 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; } @@ -49,7 +48,6 @@ public: }; validateFunctionArgumentTypes(*this, arguments, args); - /// Two-dimensional array of strings, each `row` of top array represents matching groups. return std::make_shared(std::make_shared()); } @@ -61,7 +59,7 @@ public: const auto needle = typeid_cast(*column_needle).getValue(); if (needle.empty()) - throw Exception(getName() + " length of 'needle' argument must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(getName() + " length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS); const auto regexp = Regexps::get(needle); const auto & re2 = regexp->getRE2(); diff --git a/src/Functions/registerFunctionsStringRegexp.cpp b/src/Functions/registerFunctionsStringRegexp.cpp index 350f7bd5d00..2a0a3c0ea1f 100644 --- a/src/Functions/registerFunctionsStringRegexp.cpp +++ b/src/Functions/registerFunctionsStringRegexp.cpp @@ -18,7 +18,8 @@ void registerFunctionMultiFuzzyMatchAny(FunctionFactory &); void registerFunctionMultiFuzzyMatchAnyIndex(FunctionFactory &); void registerFunctionMultiFuzzyMatchAllIndices(FunctionFactory &); void registerFunctionExtractGroups(FunctionFactory &); -void registerFunctionExtractAllGroups(FunctionFactory &); +void registerFunctionExtractAllGroupsVertical(FunctionFactory &); +void registerFunctionExtractAllGroupsHorizontal(FunctionFactory &); void registerFunctionsStringRegexp(FunctionFactory & factory) { @@ -37,7 +38,8 @@ void registerFunctionsStringRegexp(FunctionFactory & factory) registerFunctionMultiFuzzyMatchAnyIndex(factory); registerFunctionMultiFuzzyMatchAllIndices(factory); registerFunctionExtractGroups(factory); - registerFunctionExtractAllGroups(factory); + registerFunctionExtractAllGroupsVertical(factory); + registerFunctionExtractAllGroupsHorizontal(factory); } } diff --git a/tests/performance/extract.xml b/tests/performance/extract.xml index 71dd8ce775d..b370152c7b2 100644 --- a/tests/performance/extract.xml +++ b/tests/performance/extract.xml @@ -6,5 +6,6 @@ SELECT count() FROM test.hits WHERE NOT ignore(extract(URL, '(\\w+=\\w+)')) SELECT count() FROM test.hits WHERE NOT ignore(extractAll(URL, '(\\w+=\\w+)')) SELECT count() FROM test.hits WHERE NOT ignore(extractGroups(URL, '(\\w+)=(\\w+)')) - SELECT count() FROM test.hits WHERE NOT ignore(extractAllGroups(URL, '(\\w+)=(\\w+)')) + SELECT count() FROM test.hits WHERE NOT ignore(extractAllGroupsVertical(URL, '(\\w+)=(\\w+)')) + SELECT count() FROM test.hits WHERE NOT ignore(extractAllGroupsHorizontal(URL, '(\\w+)=(\\w+)')) diff --git a/tests/queries/0_stateless/01246_extractAllGroups.sql b/tests/queries/0_stateless/01246_extractAllGroups.sql deleted file mode 100644 index ade52117d76..00000000000 --- a/tests/queries/0_stateless/01246_extractAllGroups.sql +++ /dev/null @@ -1,51 +0,0 @@ --- error cases -SELECT extractAllGroups(); --{serverError 42} not enough arguments -SELECT extractAllGroups('hello'); --{serverError 42} not enough arguments -SELECT extractAllGroups('hello', 123); --{serverError 43} invalid argument type -SELECT extractAllGroups(123, 'world'); --{serverError 43} invalid argument type -SELECT extractAllGroups('hello world', '((('); --{serverError 427} invalid re -SELECT extractAllGroups('hello world', materialize('\\w+')); --{serverError 44} non-const needle - -SELECT '0 groups, zero matches'; -SELECT extractAllGroups('hello world', '\\w+'); -- { serverError 36 } - -SELECT '1 group, multiple matches, String and FixedString'; -SELECT extractAllGroups('hello world', '(\\w+)'); -SELECT extractAllGroups('hello world', CAST('(\\w+)' as FixedString(5))); -SELECT extractAllGroups(CAST('hello world' AS FixedString(12)), '(\\w+)'); -SELECT extractAllGroups(CAST('hello world' AS FixedString(12)), CAST('(\\w+)' as FixedString(5))); -SELECT extractAllGroups(materialize(CAST('hello world' AS FixedString(12))), '(\\w+)'); -SELECT extractAllGroups(materialize(CAST('hello world' AS FixedString(12))), CAST('(\\w+)' as FixedString(5))); - -SELECT 'mutiple groups, multiple matches'; -SELECT extractAllGroups('abc=111, def=222, ghi=333 "jkl mno"="444 foo bar"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); - -SELECT 'big match'; -SELECT - length(haystack), length(matches[1]), length(matches), arrayMap((x) -> length(x), arrayMap(x -> x[1], matches)) -FROM ( - SELECT - repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, - extractAllGroups(haystack, '(abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz)') AS matches - FROM numbers(3) -); - -SELECT 'lots of matches'; -SELECT - length(haystack), length(matches[1]), length(matches), arrayReduce('sum', arrayMap((x) -> length(x), arrayMap(x -> x[1], matches))) -FROM ( - SELECT - repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, - extractAllGroups(haystack, '(\\w)') AS matches - FROM numbers(3) -); - -SELECT 'lots of groups'; -SELECT - length(haystack), length(matches[1]), length(matches), arrayMap((x) -> length(x), arrayMap(x -> x[1], matches)) -FROM ( - SELECT - repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, - extractAllGroups(haystack, repeat('(\\w)', 100)) AS matches - FROM numbers(3) -); diff --git a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference new file mode 100644 index 00000000000..13e717485d8 --- /dev/null +++ b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.reference @@ -0,0 +1,22 @@ +0 groups, zero matches +1 group, multiple matches, String and FixedString +[['hello','world']] +[['hello','world']] +[['hello','world']] +[['hello','world']] +[['hello','world']] +[['hello','world']] +mutiple groups, multiple matches +[['abc','def','ghi','"jkl mno"'],['111','222','333','"444 foo bar"']] +big match +0 1 0 [] +260 1 1 [156] +520 1 3 [156,156,156] +lots of matches +0 1 0 0 +260 1 260 260 +520 1 520 520 +lots of groups +0 100 0 [] +260 100 2 [1,1] +520 100 5 [1,1,1,1,1] diff --git a/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql new file mode 100644 index 00000000000..b7a71415a9d --- /dev/null +++ b/tests/queries/0_stateless/01246_extractAllGroupsHorizontal.sql @@ -0,0 +1,51 @@ +-- error cases +SELECT extractAllGroupsHorizontal(); --{serverError 42} not enough arguments +SELECT extractAllGroupsHorizontal('hello'); --{serverError 42} not enough arguments +SELECT extractAllGroupsHorizontal('hello', 123); --{serverError 43} invalid argument type +SELECT extractAllGroupsHorizontal(123, 'world'); --{serverError 43} invalid argument type +SELECT extractAllGroupsHorizontal('hello world', '((('); --{serverError 427} invalid re +SELECT extractAllGroupsHorizontal('hello world', materialize('\\w+')); --{serverError 44} non-cons needle + +SELECT '0 groups, zero matches'; +SELECT extractAllGroupsHorizontal('hello world', '\\w+'); -- { serverError 36 } + +SELECT '1 group, multiple matches, String and FixedString'; +SELECT extractAllGroupsHorizontal('hello world', '(\\w+)'); +SELECT extractAllGroupsHorizontal('hello world', CAST('(\\w+)' as FixedString(5))); +SELECT extractAllGroupsHorizontal(CAST('hello world' AS FixedString(12)), '(\\w+)'); +SELECT extractAllGroupsHorizontal(CAST('hello world' AS FixedString(12)), CAST('(\\w+)' as FixedString(5))); +SELECT extractAllGroupsHorizontal(materialize(CAST('hello world' AS FixedString(12))), '(\\w+)'); +SELECT extractAllGroupsHorizontal(materialize(CAST('hello world' AS FixedString(12))), CAST('(\\w+)' as FixedString(5))); + +SELECT 'mutiple groups, multiple matches'; +SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333 "jkl mno"="444 foo bar"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); + +SELECT 'big match'; +SELECT + length(haystack), length(matches), length(matches[1]), arrayMap((x) -> length(x), matches[1]) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsHorizontal(haystack, '(abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz)') AS matches + FROM numbers(3) +); + +SELECT 'lots of matches'; +SELECT + length(haystack), length(matches), length(matches[1]), arrayReduce('sum', arrayMap((x) -> length(x), matches[1])) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsHorizontal(haystack, '(\\w)') AS matches + FROM numbers(3) +); + +SELECT 'lots of groups'; +SELECT + length(haystack), length(matches), length(matches[1]), arrayMap((x) -> length(x), matches[1]) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsHorizontal(haystack, repeat('(\\w)', 100)) AS matches + FROM numbers(3) +); diff --git a/tests/queries/0_stateless/01246_extractAllGroups.reference b/tests/queries/0_stateless/01246_extractAllGroupsVertical.reference similarity index 100% rename from tests/queries/0_stateless/01246_extractAllGroups.reference rename to tests/queries/0_stateless/01246_extractAllGroupsVertical.reference diff --git a/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql b/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql new file mode 100644 index 00000000000..8edc3f3e741 --- /dev/null +++ b/tests/queries/0_stateless/01246_extractAllGroupsVertical.sql @@ -0,0 +1,51 @@ +-- error cases +SELECT extractAllGroupsVertical(); --{serverError 42} not enough arguments +SELECT extractAllGroupsVertical('hello'); --{serverError 42} not enough arguments +SELECT extractAllGroupsVertical('hello', 123); --{serverError 43} invalid argument type +SELECT extractAllGroupsVertical(123, 'world'); --{serverError 43} invalid argument type +SELECT extractAllGroupsVertical('hello world', '((('); --{serverError 427} invalid re +SELECT extractAllGroupsVertical('hello world', materialize('\\w+')); --{serverError 44} non-const needle + +SELECT '0 groups, zero matches'; +SELECT extractAllGroupsVertical('hello world', '\\w+'); -- { serverError 36 } + +SELECT '1 group, multiple matches, String and FixedString'; +SELECT extractAllGroupsVertical('hello world', '(\\w+)'); +SELECT extractAllGroupsVertical('hello world', CAST('(\\w+)' as FixedString(5))); +SELECT extractAllGroupsVertical(CAST('hello world' AS FixedString(12)), '(\\w+)'); +SELECT extractAllGroupsVertical(CAST('hello world' AS FixedString(12)), CAST('(\\w+)' as FixedString(5))); +SELECT extractAllGroupsVertical(materialize(CAST('hello world' AS FixedString(12))), '(\\w+)'); +SELECT extractAllGroupsVertical(materialize(CAST('hello world' AS FixedString(12))), CAST('(\\w+)' as FixedString(5))); + +SELECT 'mutiple groups, multiple matches'; +SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333 "jkl mno"="444 foo bar"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); + +SELECT 'big match'; +SELECT + length(haystack), length(matches[1]), length(matches), arrayMap((x) -> length(x), arrayMap(x -> x[1], matches)) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsVertical(haystack, '(abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz)') AS matches + FROM numbers(3) +); + +SELECT 'lots of matches'; +SELECT + length(haystack), length(matches[1]), length(matches), arrayReduce('sum', arrayMap((x) -> length(x), arrayMap(x -> x[1], matches))) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsVertical(haystack, '(\\w)') AS matches + FROM numbers(3) +); + +SELECT 'lots of groups'; +SELECT + length(haystack), length(matches[1]), length(matches), arrayMap((x) -> length(x), arrayMap(x -> x[1], matches)) +FROM ( + SELECT + repeat('abcdefghijklmnopqrstuvwxyz', number * 10) AS haystack, + extractAllGroupsVertical(haystack, repeat('(\\w)', 100)) AS matches + FROM numbers(3) +); diff --git a/tests/queries/0_stateless/01275_extract_groups_check.sql b/tests/queries/0_stateless/01275_extract_groups_check.sql index 2dd236f2a3b..f8bc5943a78 100644 --- a/tests/queries/0_stateless/01275_extract_groups_check.sql +++ b/tests/queries/0_stateless/01275_extract_groups_check.sql @@ -1,5 +1,5 @@ -SELECT extractGroups('hello', ''); -- { serverError 69 } -SELECT extractAllGroups('hello', ''); -- { serverError 69 } +SELECT extractGroups('hello', ''); -- { serverError 36 } +SELECT extractAllGroups('hello', ''); -- { serverError 36 } SELECT extractGroups('hello', ' '); -- { serverError 36 } SELECT extractAllGroups('hello', ' '); -- { serverError 36 } From db555b088d231064197bf8c47bc2fecb58807607 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 11 Jun 2020 11:05:08 +0300 Subject: [PATCH 0546/2229] Regenerated ya.make --- src/Functions/ya.make | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 31491dec02c..ab8600802dd 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -144,7 +144,8 @@ SRCS( exp10.cpp exp2.cpp exp.cpp - extractAllGroups.cpp + extractAllGroupsHorizontal.cpp + extractAllGroupsVertical.cpp extract.cpp extractGroups.cpp extractTimeZoneFromFunctionArguments.cpp From 0731f2293dda54afff45b5b68f3b2f5864d36c27 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jun 2020 11:47:30 +0300 Subject: [PATCH 0547/2229] Add missing boost libraries into docker image --- docker/packager/deb/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index b2e4f76c00c..8a67ff24692 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \ libboost-system-dev \ libboost-filesystem-dev \ libboost-thread-dev \ + libboost-iostreams-dev \ + libboost-regex-dev \ zlib1g-dev \ liblz4-dev \ libdouble-conversion-dev \ From 585fee1a639605e77b371d40a0ff032756a64ecd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jun 2020 11:49:47 +0300 Subject: [PATCH 0548/2229] Add missing boost libraries into build scripts --- utils/build/build_debian.sh | 2 +- utils/build/build_debian_unbundled.sh | 2 +- utils/build/build_no_submodules.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/build/build_debian.sh b/utils/build/build_debian.sh index 0c194fe53db..4ae54b0d29f 100755 --- a/utils/build/build_debian.sh +++ b/utils/build/build_debian.sh @@ -8,7 +8,7 @@ # install compiler and libs sudo apt install -y git bash cmake ninja-build gcc-8 g++-8 libicu-dev libreadline-dev gperf # for -DUNBUNDLED=1 mode: -#sudo apt install -y libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzstd-dev libre2-dev librdkafka-dev libcapnp-dev libpoco-dev libgoogle-perftools-dev libunwind-dev googletest libcctz-dev +#sudo apt install -y libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzstd-dev libre2-dev librdkafka-dev libcapnp-dev libpoco-dev libgoogle-perftools-dev libunwind-dev googletest libcctz-dev # install testing only stuff if you want: sudo apt install -y expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd telnet diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index a75239321be..f5f59ce7a13 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev libboost-iostreams-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev rapidjson-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/build/build_no_submodules.sh b/utils/build/build_no_submodules.sh index fae10ab3270..4bcbe0b2a17 100755 --- a/utils/build/build_no_submodules.sh +++ b/utils/build/build_no_submodules.sh @@ -18,5 +18,5 @@ unzip -ou ch.zip # TODO: USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY : cmake test # Shared because /usr/bin/ld.gold: error: /usr/lib/x86_64-linux-gnu/libcrypto.a(err.o): multiple definition of 'ERR_remove_thread_state' CMAKE_FLAGS+="-DUSE_STATIC_LIBRARIES=0 -DUSE_INTERNAL_DOUBLE_CONVERSION_LIBRARY=0 $CMAKE_FLAGS" -EXTRAPACKAGES+="libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev liblz4-dev libzstd-dev libpoco-dev libdouble-conversion-dev libcctz-dev libre2-dev libsparsehash-dev $EXTRAPACKAGES" +EXTRAPACKAGES+="libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-iostreams-dev libboost-regex-dev liblz4-dev libzstd-dev libpoco-dev libdouble-conversion-dev libcctz-dev libre2-dev libsparsehash-dev $EXTRAPACKAGES" . $ROOT_DIR/ClickHouse-${BRANCH}/release From 22707508c1ccb85d3dd9e4fd9bbee39b73ade962 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:17:33 +0300 Subject: [PATCH 0549/2229] experiment --- tests/queries/0_stateless/00816_long_concurrent_alter_column.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 965408065cf..d3a26b0ed75 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -59,6 +59,7 @@ wait echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT} +sleep 1 # Check for deadlocks echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT} From 706c5452482d633b69b8fa54c0eb6b3ccd248d9e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:18:12 +0300 Subject: [PATCH 0550/2229] experiment --- tests/queries/0_stateless/00816_long_concurrent_alter_column.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index d3a26b0ed75..3ed0c6e1a6a 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -59,7 +59,7 @@ wait echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT} -sleep 1 +sleep 7 # Check for deadlocks echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT} From 3e5d735871c9a995c5450b05d030d3046f2d3051 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:21:23 +0300 Subject: [PATCH 0551/2229] back to upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 93a0078a051..2fed57a519d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -170,4 +170,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 78fb54989cd..f91ed3f95b5 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 78fb54989cd61cf11dcea142e12d1ecc6940c962 +Subproject commit f91ed3f95b5653f247189d720ab00765b4899d6f From 5a9a63e9a1acfc3c2397fb967a021b82ab04537e Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 11 Jun 2020 13:22:24 +0300 Subject: [PATCH 0552/2229] Revert "Force table creation on SYSTEM FLUSH LOGS" --- src/Interpreters/InterpreterSystemQuery.cpp | 12 +++--- src/Interpreters/SystemLog.h | 22 ++++------- .../test_SYSTEM_FLUSH_LOGS/test.py | 38 ------------------- 3 files changed, 13 insertions(+), 59 deletions(-) delete mode 100644 tests/integration/test_SYSTEM_FLUSH_LOGS/test.py diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1480651b4b6..9ebdb155643 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -301,12 +301,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: context.checkAccess(AccessType::SYSTEM_FLUSH_LOGS); executeCommandsAndThrowIfError( - [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(true); }, - [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(true); }, - [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(true); }, - [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(true); }, - [&] () { if (auto text_log = context.getTextLog()) text_log->flush(true); }, - [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(true); } + [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(); }, + [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(); }, + [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(); }, + [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(); }, + [&] () { if (auto text_log = context.getTextLog()) text_log->flush(); }, + [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(); } ); break; case Type::STOP_LISTEN_QUERIES: diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 3c07af8c985..dd2f815ce92 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -76,8 +76,7 @@ class ISystemLog public: virtual String getName() = 0; virtual ASTPtr getCreateTableQuery() = 0; - //// force -- force table creation (used for SYSTEM FLUSH LOGS) - virtual void flush(bool force = false) = 0; + virtual void flush() = 0; virtual void prepareTable() = 0; virtual void startup() = 0; virtual void shutdown() = 0; @@ -134,7 +133,7 @@ public: void stopFlushThread(); /// Flush data in the buffer to disk - void flush(bool force = false) override; + void flush() override; /// Start the background thread. void startup() override; @@ -167,8 +166,6 @@ private: /* Data shared between callers of add()/flush()/shutdown(), and the saving thread */ std::mutex mutex; - /* prepareTable() guard */ - std::mutex prepare_mutex; // Queue is bounded. But its size is quite large to not block in all normal cases. std::vector queue; // An always-incrementing index of the first message currently in the queue. @@ -217,7 +214,7 @@ SystemLog::SystemLog(Context & context_, template void SystemLog::startup() { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); }); } @@ -231,7 +228,7 @@ void SystemLog::add(const LogElement & element) /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky. auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); if (is_shutdown) return; @@ -275,16 +272,13 @@ void SystemLog::add(const LogElement & element) template -void SystemLog::flush(bool force) +void SystemLog::flush() { std::unique_lock lock(mutex); if (is_shutdown) return; - if (force) - prepareTable(); - const uint64_t queue_end = queue_front_index + queue.size(); if (requested_flush_before < queue_end) @@ -310,7 +304,7 @@ template void SystemLog::stopFlushThread() { { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); if (!saving_thread.joinable()) { @@ -423,7 +417,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, } { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); flushed_before = to_flush_end; flush_event.notify_all(); } @@ -435,8 +429,6 @@ void SystemLog::flushImpl(const std::vector & to_flush, template void SystemLog::prepareTable() { - std::lock_guard prepare_lock(prepare_mutex); - String description = table_id.getNameForLogs(); table = DatabaseCatalog::instance().tryGetTable(table_id, context); diff --git a/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py b/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py deleted file mode 100644 index 2329094e150..00000000000 --- a/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py +++ /dev/null @@ -1,38 +0,0 @@ -# pylint: disable=line-too-long -# pylint: disable=unused-argument -# pylint: disable=redefined-outer-name - -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node_default') - -system_logs = [ - # disabled by default - ('system.part_log', 0), - ('system.text_log', 0), - - # enabled by default - ('system.query_log', 1), - ('system.query_thread_log', 1), - ('system.trace_log', 1), - ('system.metric_log', 1), -] - -@pytest.fixture(scope='module') -def start_cluster(): - try: - cluster.start() - node.query('SYSTEM FLUSH LOGS') - yield cluster - finally: - cluster.shutdown() - -@pytest.mark.parametrize('table,exists', system_logs) -def test_system_logs(start_cluster, table, exists): - q = 'SELECT * FROM {}'.format(table) - if exists: - node.query(q) - else: - assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) From 9350472ee456bd0561e263df8b9c4f13bef3aaf6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 09:23:23 +0000 Subject: [PATCH 0553/2229] Support multiple bindings --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 6 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 5 +- .../ReadBufferFromRabbitMQConsumer.cpp | 77 +++++++++----- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 6 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 33 +++--- src/Storages/RabbitMQ/StorageRabbitMQ.h | 7 +- .../integration/test_storage_rabbitmq/test.py | 100 +++++++++++++++--- 7 files changed, 162 insertions(+), 72 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 245320008f3..2d995d97f18 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -20,7 +20,7 @@ RabbitMQBlockInputStream::RabbitMQBlockInputStream( , column_names(columns) , log(log_) , non_virtual_header(storage.getSampleBlockNonMaterialized()) - , virtual_header(storage.getSampleBlockForColumns({"_exchange", "_routingKey"})) + , virtual_header(storage.getSampleBlockForColumns({"_exchange"})) { } @@ -122,13 +122,11 @@ Block RabbitMQBlockInputStream::readImpl() auto new_rows = read_rabbitmq_message(); - auto exchange_name = storage.getExchangeName(); - auto routing_key = storage.getRoutingKey(); + auto exchange_name = buffer->getExchange(); for (size_t i = 0; i < new_rows; ++i) { virtual_columns[0]->insert(exchange_name); - virtual_columns[1]->insert(routing_key); } total_rows = total_rows + new_rows; diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index a3f133cfed0..d81a887747b 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -11,13 +11,12 @@ namespace DB #define LIST_OF_RABBITMQ_SETTINGS(M) \ M(SettingString, rabbitmq_host_port, "", "A host-port to connect to RabbitMQ server.", 0) \ - M(SettingString, rabbitmq_routing_key, "5672", "A routing key to connect producer->exchange->queue<->consumer.", 0) \ - M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent. Needed to bind queues to it.", 0) \ + M(SettingString, rabbitmq_routing_key_list, "5672", "A string of routing keys, separated by dots.", 0) \ + M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ - M(SettingUInt64, rabbitmq_hash_exchange, 0, "A flag which indicates whether consistent-hash-exchange should be used.", 0) \ M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 1321a4fb3b6..967da1a75ad 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -18,7 +18,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, const String & exchange_name_, - const String & routing_key_, + const Names & routing_keys_, const size_t channel_id_, Poco::Logger * log_, char row_delimiter_, @@ -31,7 +31,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , consumer_channel(std::move(consumer_channel_)) , eventHandler(eventHandler_) , exchange_name(exchange_name_) - , routing_key(routing_key_) + , routing_keys(routing_keys_) , channel_id(channel_id_) , log(log_) , row_delimiter(row_delimiter_) @@ -44,7 +44,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != "default" ? 1 : 0; + exchange_type_set = exchange_type != "default" ? true : false; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -69,8 +69,8 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest - * and it is also used for INSERT query. + /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different + * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. */ String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) @@ -86,7 +86,8 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_key).onError([&](const char * message) + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary + consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_keys[0]).onError([&](const char * message) { internal_exchange_declared = false; LOG_ERROR(log, "Failed to bind exchange: {}", message); @@ -95,7 +96,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() if (!exchange_type_set) return; - /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is also supported. + /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; @@ -131,11 +132,14 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + for (auto & routing_key : routing_keys) { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); - }); + consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); + } } @@ -158,7 +162,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) queues.emplace_back(queue_name_); subscribed_queue[queue_name_] = false; - String binding_key = routing_key; + String binding_key = routing_keys[0]; /* Every consumer has at least one unique queue. Bind the queues to exchange based on the consumer_channel_id * in case there is one queue per consumer and bind by queue_id in case there is more than 1 queue per consumer. @@ -176,11 +180,6 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection - subscribe(queues.back()); - - LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); - consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) .onSuccess([&] { @@ -189,21 +188,47 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { internal_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to bind to key {}, the reason is: {}", binding_key, message); }); + /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection + subscribe(queues.back()); + + LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + if (exchange_type_set) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) - .onSuccess([&] + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + if (hash_exchange) { - local_bindings_created = true; - }) - .onError([&](const char * message) + consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } + else { - local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); - }); + /// means there is only one queue with one consumer - no even distribution needed - no hash-exchange + for (auto & routing_key : routing_keys) + { + consumer_channel->bindQueue(local_exchange_name, queue_name_, routing_key) + .onSuccess([&] + { + local_bindings_created = true; + }) + .onError([&](const char * message) + { + local_bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding: {}", message); + }); + } + } } }) .onError([&](const char * message) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 51eae60cdeb..3d02eeab761 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -25,7 +25,7 @@ public: ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, const String & exchange_name_, - const String & routing_key_, + const Names & routing_keys_, const size_t channel_id_, Poco::Logger * log_, char row_delimiter_, @@ -40,6 +40,8 @@ public: void allowNext() { allowed = true; } // Allow to read next message. void checkSubscription(); + auto getExchange() const { return exchange_name; } + private: using Messages = std::vector; @@ -47,7 +49,7 @@ private: RabbitMQHandler & eventHandler; const String & exchange_name; - const String & routing_key; + const Names & routing_keys; const size_t channel_id; const bool bind_by_id; const size_t num_queues; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 895b9ca2bec..e17d541b661 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -55,7 +55,7 @@ StorageRabbitMQ::StorageRabbitMQ( Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, + const Names & routing_keys_, const String & exchange_name_, const String & format_name_, char row_delimiter_, @@ -65,7 +65,7 @@ StorageRabbitMQ::StorageRabbitMQ( : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) - , routing_key(global_context.getMacros()->expand(routing_key_)) + , routing_keys(global_context.getMacros()->expand(routing_keys_)) , exchange_name(exchange_name_) , format_name(global_context.getMacros()->expand(format_name_)) , row_delimiter(row_delimiter_) @@ -215,7 +215,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() auto table_id = getStorageID(); String table_name = table_id.getNameForLogs(); - return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_key, + return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); } @@ -224,7 +224,7 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; - return std::make_shared(parsed_address, login_password, routing_key, producer_exchange, + return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -369,18 +369,18 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } - String routing_key = rabbitmq_settings.rabbitmq_routing_key.value; + String routing_key_list = rabbitmq_settings.rabbitmq_routing_key_list.value; if (args_count >= 2) { - const auto * ast = engine_args[1]->as(); - if (ast && ast->value.getType() == Field::Types::String) - { - routing_key = safeGet(ast->value); - } - else - { - throw Exception(String("RabbitMQ routing key must be a string"), ErrorCodes::BAD_ARGUMENTS); - } + engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context); + routing_key_list = engine_args[1]->as().value.safeGet(); + } + + Names routing_keys; + boost::split(routing_keys, routing_key_list, [](char c){ return c == ','; }); + for (String & key : routing_keys) + { + boost::trim(key); } String exchange = rabbitmq_settings.rabbitmq_exchange_name.value; @@ -483,7 +483,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_key, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); + host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); @@ -494,8 +494,7 @@ void registerStorageRabbitMQ(StorageFactory & factory) NamesAndTypesList StorageRabbitMQ::getVirtuals() const { return NamesAndTypesList{ - {"_exchange", std::make_shared()}, - {"_routingKey", std::make_shared()} + {"_exchange", std::make_shared()} }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 27a9b8834f4..45ced9d247b 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -48,9 +48,6 @@ public: ProducerBufferPtr createWriteBuffer(); - const String & getExchangeName() const { return exchange_name; } - const String & getRoutingKey() const { return routing_key; } - const String & getFormatName() const { return format_name; } NamesAndTypesList getVirtuals() const override; @@ -62,7 +59,7 @@ protected: Context & context_, const ColumnsDescription & columns_, const String & host_port_, - const String & routing_key_, + const Names & routing_keys_, const String & exchange_name_, const String & format_name_, char row_delimiter_, @@ -74,7 +71,7 @@ private: Context global_context; Context rabbitmq_context; - String routing_key; + Names routing_keys; const String exchange_name; const String format_name; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index d9c08ef7b6b..46b622bde8a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -120,7 +120,7 @@ def test_rabbitmq_select_from_new_syntax_table(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'new', + rabbitmq_routing_key_list = 'new', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; @@ -193,7 +193,7 @@ def test_rabbitmq_select_empty(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'empty', + rabbitmq_routing_key_list = 'empty', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; ''') @@ -207,7 +207,7 @@ def test_rabbitmq_json_without_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'json', + rabbitmq_routing_key_list = 'json', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'JSONEachRow' ''') @@ -249,7 +249,7 @@ def test_rabbitmq_csv_with_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'csv', + rabbitmq_routing_key_list = 'csv', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'CSV', rabbitmq_row_delimiter = '\\n'; @@ -285,7 +285,7 @@ def test_rabbitmq_tsv_with_delimiter(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'tsv', + rabbitmq_routing_key_list = 'tsv', rabbitmq_exchange_name = 'clickhouse-exchange', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; @@ -322,7 +322,7 @@ def test_rabbitmq_materialized_view(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mv', + rabbitmq_routing_key_list = 'mv', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) @@ -365,7 +365,7 @@ def test_rabbitmq_materialized_view_with_subquery(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mvsq', + rabbitmq_routing_key_list = 'mvsq', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view (key UInt64, value UInt64) @@ -410,7 +410,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'mmv', + rabbitmq_routing_key_list = 'mmv', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view1 (key UInt64, value UInt64) @@ -471,7 +471,7 @@ def test_rabbitmq_big_message(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value String) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'big', + rabbitmq_routing_key_list = 'big', rabbitmq_format = 'JSONEachRow'; CREATE TABLE test.view (key UInt64, value String) ENGINE = MergeTree @@ -774,7 +774,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'insert1', + rabbitmq_routing_key_list = 'insert1', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; ''') @@ -829,7 +829,7 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): CREATE TABLE test.rabbitmq_many (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', - rabbitmq_routing_key = 'insert2', + rabbitmq_routing_key_list = 'insert2', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_many (key UInt64, value UInt64) @@ -1024,7 +1024,7 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 5, rabbitmq_exchange_name = 'direct_exchange_testing', rabbitmq_exchange_type = 'direct', - rabbitmq_routing_key = 'direct_{0}', + rabbitmq_routing_key_list = 'direct_{0}', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.direct_exchange_{0}_mv TO test.destination AS @@ -1083,7 +1083,7 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, - rabbitmq_routing_key = 'key_{0}', + rabbitmq_routing_key_list = 'key_{0}', rabbitmq_exchange_name = 'fanout_exchange_testing', rabbitmq_exchange_type = 'fanout', rabbitmq_format = 'JSONEachRow', @@ -1143,7 +1143,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 5, rabbitmq_exchange_name = 'topic_exchange_testing', rabbitmq_exchange_type = 'topic', - rabbitmq_routing_key = '*.{0}', + rabbitmq_routing_key_list = '*.{0}', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS @@ -1161,7 +1161,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): rabbitmq_num_consumers = 4, rabbitmq_exchange_name = 'topic_exchange_testing', rabbitmq_exchange_type = 'topic', - rabbitmq_routing_key = '*.logs', + rabbitmq_routing_key_list = '*.logs', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; CREATE MATERIALIZED VIEW test.topic_exchange_{0}_mv TO test.destination AS @@ -1285,6 +1285,76 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_multiple_bindings(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.bindings; + DROP TABLE IF EXISTS test.bindings_mv; + CREATE TABLE test.bindings (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE TABLE test.view_bindings (key UInt64, value UInt64) + ENGINE = MergeTree + ORDER BY key; + CREATE MATERIALIZED VIEW test.bindings_mv TO test.view_bindings AS + SELECT * FROM test.bindings; + ''') + + + i = [0] + messages_num = 500 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + + def produce(): + # init connection here because otherwise python rabbitmq client might fail + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + keys = ['key1', 'key2', 'key3', 'key4', 'key5'] + + for key in keys: + for message in messages: + channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, body=message) + + connection.close() + + threads = [] + threads_num = 10 + + for _ in range(threads_num): + threads.append(threading.Thread(target=produce)) + for thread in threads: + time.sleep(random.uniform(0, 1)) + thread.start() + + while True: + result = instance.query('SELECT count() FROM test.view_bindings') + time.sleep(1) + print result + if int(result) == messages_num * threads_num * 5: + break + + for thread in threads: + thread.join() + + assert int(result) == messages_num * threads_num * 5, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From 626eb53baae96de1bfbd2f736ad444ecee34827f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 10:56:40 +0000 Subject: [PATCH 0554/2229] Fix multiple bindings for single queue & rm hardcoded strings --- .../ReadBufferFromRabbitMQConsumer.cpp | 48 ++++++++++++------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../integration/test_storage_rabbitmq/test.py | 46 ++++++++++++------ 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 967da1a75ad..5d2e3073d41 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -14,6 +14,17 @@ namespace DB { +namespace Exchange +{ + /// Note that default here means default by implementation and not by rabbitmq settings + static const String DEFAULT = "default"; + static const String FANOUT = "fanout"; + static const String DIRECT = "direct"; + static const String TOPIC = "topic"; + static const String HASH = "consistent_hash"; +} + + ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( ChannelPtr consumer_channel_, RabbitMQHandler & eventHandler_, @@ -44,7 +55,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != "default" ? true : false; + exchange_type_set = exchange_type != Exchange::DEFAULT ? true : false; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -72,14 +83,14 @@ void ReadBufferFromRabbitMQConsumer::initExchange() /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. */ - String producer_exchange = exchange_type_set ? exchange_name + "_default" : exchange_name; + String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) { internal_exchange_declared = false; LOG_ERROR(log, "Failed to declare exchange: {}", message); }); - internal_exchange_name = producer_exchange + "_direct"; + internal_exchange_name = producer_exchange + "_" + Exchange::DIRECT; consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) { internal_exchange_declared = false; @@ -99,11 +110,11 @@ void ReadBufferFromRabbitMQConsumer::initExchange() /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; - if (exchange_type == "fanout") type = AMQP::ExchangeType::fanout; - else if (exchange_type == "direct") type = AMQP::ExchangeType::direct; - else if (exchange_type == "topic") type = AMQP::ExchangeType::topic; - else if (exchange_type == "consistent_hash") type = AMQP::ExchangeType::consistent_hash; - else return; + if (exchange_type == Exchange::FANOUT) type = AMQP::ExchangeType::fanout; + else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; + else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; + else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; + else return; /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same @@ -115,12 +126,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - hash_exchange = true; - /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash - if (!bind_by_id && exchange_type != "consistent_hash") + if (!bind_by_id && exchange_type != Exchange::HASH) return; + hash_exchange = true; + AMQP::Table exchange_arguments; exchange_arguments["hash-property"] = "message_id"; @@ -153,6 +164,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) internal_exchange_declared = true; } + /* Internal exchange is a default exchange (by implementstion, not by rabbitmq settings) and is used for INSERT query + * and if exchange_type is not set - there is no local exchange. If it is set - then local exchange is a distributor + * exchange, which is bound to the exchange specified by the client. + */ bool internal_bindings_created = false, internal_bindings_error = false; bool local_bindings_created = false, local_bindings_error = false; @@ -188,7 +203,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { internal_bindings_error = true; - LOG_ERROR(log, "Failed to bind to key {}, the reason is: {}", binding_key, message); + LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection @@ -209,15 +224,16 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } else { - /// means there is only one queue with one consumer - no even distribution needed - no hash-exchange + /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange for (auto & routing_key : routing_keys) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, routing_key) + /// Binding directly to exchange, specified by the client + consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { local_bindings_created = true; @@ -225,7 +241,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { local_bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding: {}", message); + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); }); } } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e17d541b661..212d1fbc783 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -213,7 +213,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); auto table_id = getStorageID(); - String table_name = table_id.getNameForLogs(); + String table_name = table_id.getNameForLogs(); return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 46b622bde8a..1a56395eb29 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -927,7 +927,6 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): while True: result = instance.query('SELECT count() FROM test.view_sharding') time.sleep(1) - print result if int(result) == messages_num * threads_num: break @@ -1288,9 +1287,17 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): @pytest.mark.timeout(420) def test_rabbitmq_multiple_bindings(rabbitmq_cluster): instance.query(''' - DROP TABLE IF EXISTS test.bindings; - DROP TABLE IF EXISTS test.bindings_mv; - CREATE TABLE test.bindings (key UInt64, value UInt64) + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_1_mv; + CREATE TABLE test.bindings_1 (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, @@ -1300,13 +1307,25 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', rabbitmq_format = 'JSONEachRow', rabbitmq_row_delimiter = '\\n'; - CREATE TABLE test.view_bindings (key UInt64, value UInt64) - ENGINE = MergeTree - ORDER BY key; - CREATE MATERIALIZED VIEW test.bindings_mv TO test.view_bindings AS - SELECT * FROM test.bindings; + CREATE MATERIALIZED VIEW test.bindings_1_mv TO test.destination AS + SELECT * FROM test.bindings_1; ''') + # in case num_consumers and num_queues are not set - multiple bindings are implemented differently, so test them too + instance.query(''' + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.bindings_2_mv; + CREATE TABLE test.bindings_2 (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'multiple_bindings_testing', + rabbitmq_exchange_type = 'direct', + rabbitmq_routing_key_list = 'key1,key2,key3,key4,key5', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.bindings_2_mv TO test.destination AS + SELECT * FROM test.bindings_2; + ''') i = [0] messages_num = 500 @@ -1318,7 +1337,7 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): # init connection here because otherwise python rabbitmq client might fail connection = pika.BlockingConnection(parameters) channel = connection.channel() - channel.exchange_declare(exchange='hash_exchange_testing', exchange_type='x-consistent-hash') + channel.exchange_declare(exchange='multiple_bindings_testing', exchange_type='direct') messages = [] for _ in range(messages_num): @@ -1343,16 +1362,15 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): thread.start() while True: - result = instance.query('SELECT count() FROM test.view_bindings') + result = instance.query('SELECT count() FROM test.destination') time.sleep(1) - print result - if int(result) == messages_num * threads_num * 5: + if int(result) == messages_num * threads_num * 5 * 2: break for thread in threads: thread.join() - assert int(result) == messages_num * threads_num * 5, 'ClickHouse lost some messages: {}'.format(result) + assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) if __name__ == '__main__': From 5f73c87c7142e7d137a29f827d8fab8ebdc10ad2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 15:18:19 +0300 Subject: [PATCH 0555/2229] change used flag --- cmake/find/sentry.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 2d3aa71248a..eadf071141e 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) From ed0826efa7f68523e561edb98f5274f534e45884 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jun 2020 15:18:45 +0300 Subject: [PATCH 0556/2229] Remove extra columns from sample in StorageBuffer. --- src/Storages/StorageBuffer.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 2d8c3fd9a2f..72d77a6481e 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -229,6 +229,17 @@ Pipes StorageBuffer::read( for (auto & buf : buffers) pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, *this)); + /// Convert pipes from table to structure from buffer. + if (!pipes_from_buffers.empty() && !pipes_from_dst.empty() + && !blocksHaveEqualStructure(pipes_from_buffers.front().getHeader(), pipes_from_dst.front().getHeader())) + { + for (auto & pipe : pipes_from_dst) + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), + pipes_from_buffers.front().getHeader(), + ConvertingTransform::MatchColumnsMode::Name)); + } + /** If the sources from the table were processed before some non-initial stage of query execution, * then sources from the buffers must also be wrapped in the processing pipeline before the same stage. */ From fa47fc3f30eb144aa1593ebbfc630fdb4f095c39 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 15:34:02 +0300 Subject: [PATCH 0557/2229] fix address formatting --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 2ce43c9f0a2..45f5bd56ca1 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -153,7 +153,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c const StackTrace::Frame & current_frame = frames[i]; sentry_value_t sentry_frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); - std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); + std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIx64, frame_ptr); sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) From 36ea4abb48b7190eb40157b937bc7e7801a97ca5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 11 Jun 2020 15:54:29 +0300 Subject: [PATCH 0558/2229] Add several boost libraries to deb image --- docker/packager/deb/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index b2e4f76c00c..8a67ff24692 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \ libboost-system-dev \ libboost-filesystem-dev \ libboost-thread-dev \ + libboost-iostreams-dev \ + libboost-regex-dev \ zlib1g-dev \ liblz4-dev \ libdouble-conversion-dev \ From e5edd472d6f4a4ba2de23417b082af43d65e8762 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 16:01:35 +0300 Subject: [PATCH 0559/2229] Avoid logging error as a warning --- docs/tools/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tools/test.py b/docs/tools/test.py index 63b84885d9f..5c0cf4b799d 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -92,9 +92,11 @@ def test_single_page(input_path, lang): logging.warning('Found %d duplicate anchor points' % duplicate_anchor_points) if links_to_nowhere: - logging.warning(f'Found {links_to_nowhere} links to nowhere in {lang}') if lang == 'en': # TODO: check all languages again + logging.error(f'Found {links_to_nowhere} links to nowhere in {lang}') sys.exit(1) + else: + logging.warning(f'Found {links_to_nowhere} links to nowhere in {lang}') if len(anchor_points) <= 10: logging.error('Html parsing is probably broken') From 887a24b73a25e22202df0055bf5cc694d38a6019 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Thu, 11 Jun 2020 15:08:24 +0200 Subject: [PATCH 0560/2229] Fix corner case (only) for exit code overflow --- programs/client/Client.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 917acdc2a83..12db1dc0225 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1920,7 +1920,11 @@ public: std::string text = e.displayText(); std::cerr << "Code: " << e.code() << ". " << text << std::endl; std::cerr << "Table №" << i << std::endl << std::endl; - exit(e.code()); + auto exit_code = e.code() % 256; + if (exit_code == 0) + exit_code = 255; + exit(exit_code); + } } From 3eed1c8c2b5e35b26b24030aa17473d083697525 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jun 2020 16:41:14 +0300 Subject: [PATCH 0561/2229] Added test --- .../0_stateless/01305_buffer_final_bug.reference | 0 tests/queries/0_stateless/01305_buffer_final_bug.sql | 11 +++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/01305_buffer_final_bug.reference create mode 100644 tests/queries/0_stateless/01305_buffer_final_bug.sql diff --git a/tests/queries/0_stateless/01305_buffer_final_bug.reference b/tests/queries/0_stateless/01305_buffer_final_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01305_buffer_final_bug.sql b/tests/queries/0_stateless/01305_buffer_final_bug.sql new file mode 100644 index 00000000000..8d1586932ce --- /dev/null +++ b/tests/queries/0_stateless/01305_buffer_final_bug.sql @@ -0,0 +1,11 @@ +drop table if exists t; +drop table if exists t_buf; + +create table t (x UInt64) engine = MergeTree order by (x, intHash64(x)) sample by intHash64(x); +insert into t select number from numbers(10000); +create table t_buf as t engine = Buffer(currentDatabase(), 't', 16, 20, 100, 100000, 10000000, 50000000, 250000000); +insert into t_buf values (1); +select count() from t_buf sample 1/2 format Null; + +drop table if exists t_buf; +drop table if exists t; From 5f073c3ef47df755c3710be94bb60edb13752698 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 11 Jun 2020 17:11:13 +0300 Subject: [PATCH 0562/2229] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 95bc46d4cb4..bb12bf98481 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -465,7 +465,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() else if (code == Coordination::ZNOTEMPTY) { throw Exception(fmt::format( - "The old table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", zookeeper_path), ErrorCodes::TABLE_WAS_NOT_DROPPED); + "The old table was not completely removed from ZooKeeper, {} still exists and may contain some garbage. But it should never happen according to the logic of operations (it's a bug).", zookeeper_path), ErrorCodes::LOGICAL_ERROR); } else if (code != Coordination::ZOK) { From d61da39c10161227b640207ad5f6db780b3743e9 Mon Sep 17 00:00:00 2001 From: giordyb Date: Thu, 11 Jun 2020 17:23:02 +0200 Subject: [PATCH 0563/2229] Update entrypoint.sh without -q option the database does not get created at startup --- docker/server/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 059f3cb631b..8fc9c670b06 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # create default database, if defined if [ -n "$CLICKHOUSE_DB" ]; then echo "$0: create database '$CLICKHOUSE_DB'" - "${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; + "${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; fi for f in /docker-entrypoint-initdb.d/*; do From 47a902a6ce593d9ee55a29fdb0b35bc6f44152a7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 11 Jun 2020 18:55:44 +0300 Subject: [PATCH 0564/2229] Simple github hook --- utils/github-hook/hook.py | 195 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 utils/github-hook/hook.py diff --git a/utils/github-hook/hook.py b/utils/github-hook/hook.py new file mode 100644 index 00000000000..13d62b311f7 --- /dev/null +++ b/utils/github-hook/hook.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +import json +import requests +import time +import os + +DB = 'gh-data' +RETRIES = 5 + + +def process_issue_event(response): + issue = response['issue'] + return dict( + action=response['action'], + sender=response['sender']['login'], + updated_at=issue['updated_at'], + url=issue['url'], + number=issue['number'], + author=issue['user']['login'], + labels=[label['name'] for label in issue['labels']], + state=issue['state'], + assignees=[assignee['login'] for assignee in issue['assignees']], + created_at=issue['created_at'], + body=issue['body'], + title=issue['title'], + comments=issue['comments'], + raw_json=json.dumps(response),) + + +def process_issue_comment_event(response): + issue = response['issue'] + comment = response['comment'] + + return dict( + action='comment_' + response['action'], + sender=response['sender']['login'], + updated_at=issue['updated_at'], + url=issue['url'], + number=issue['number'], + author=issue['user']['login'], + labels=[label['name'] for label in issue['labels']], + state=issue['state'], + assignees=[assignee['login'] for assignee in issue['assignees']], + created_at=issue['created_at'], + body=issue['body'], + title=issue['title'], + comments=issue['comments'], + comment_body=comment['body'], + comment_author=comment['user']['login'], + comment_url=comment['url'], + comment_created_at=comment['created_at'], + comment_updated_at=comment['updated_at'], + raw_json=json.dumps(response),) + + +def process_pull_request_event(response): + pull_request = response['pull_request'] + result = dict( + updated_at=pull_request['updated_at'], + number=pull_request['number'], + action=response['action'], + sender=response['sender']['login'], + url=pull_request['url'], + author=pull_request['user']['login'], + labels=[label['name'] for label in pull_request['labels']], + state=pull_request['state'], + body=pull_request['body'], + title=pull_request['title'], + created_at=pull_request['created_at'], + assignees=[assignee['login'] for assignee in pull_request['assignees']], + requested_reviewers=[reviewer['login'] for reviewer in pull_request['requested_reviewers']], + head_repo=pull_request['head']['repo']['full_name'], + head_ref=pull_request['head']['ref'], + head_clone_url=pull_request['head']['repo']['clone_url'], + head_ssh_url=pull_request['head']['repo']['ssh_url'], + base_repo=pull_request['base']['repo']['full_name'], + base_ref=pull_request['base']['ref'], + base_clone_url=pull_request['base']['repo']['clone_url'], + base_ssh_url=pull_request['base']['repo']['ssh_url'], + raw_json=json.dumps(response), + ) + + if 'mergeable' in pull_request and pull_request['mergeable'] is not None: + result['mergeable'] = 1 if pull_request['mergeable'] else 0 + + if 'merged_by' in pull_request and pull_request['merged_by'] is not None: + result['merged_by'] = pull_request['merged_by']['login'] + + if 'merged_at' in pull_request and pull_request['merged_at'] is not None: + result['merged_at'] = pull_request['merged_at'] + + if 'closed_at' in pull_request and pull_request['closed_at'] is not None: + result['closed_at'] = pull_request['closed_at'] + + if 'merge_commit_sha' in pull_request and pull_request['merge_commit_sha'] is not None: + result['merge_commit_sha'] = pull_request['merge_commit_sha'] + + if 'draft' in pull_request: + result['draft'] = 1 if pull_request['draft'] else 0 + + for field in ['comments', 'review_comments', 'commits', 'additions', 'deletions', 'changed_files']: + if field in pull_request: + result[field] = pull_request[field] + + return result + + +def process_pull_request_review(response): + result = process_pull_request_event(response) + review = response['review'] + result['action'] = 'review_' + result['action'] + result['review_body'] = review['body'] if review['body'] is not None else '' + result['review_id'] = review['id'] + result['review_author'] = review['user']['login'] + result['review_commit_sha'] = review['commit_id'] + result['review_submitted_at'] = review['submitted_at'] + result['review_state'] = review['state'] + return result + + +def process_pull_request_review_comment(response): + result = process_pull_request_event(response) + comment = response['comment'] + result['action'] = 'review_comment_' + result['action'] + result['review_id'] = comment['pull_request_review_id'] + result['review_comment_path'] = comment['path'] + result['review_commit_sha'] = comment['commit_id'] + result['review_comment_body'] = comment['body'] + result['review_comment_author'] = comment['user']['login'] + result['review_comment_created_at'] = comment['created_at'] + result['review_comment_updated_at'] = comment['updated_at'] + return result + + +def event_processor_dispatcher(headers, body, inserter): + if 'X-Github-Event' in headers: + if headers['X-Github-Event'] == 'issues': + result = process_issue_event(body) + inserter.insert_event_into(DB, 'issues', result) + elif headers['X-Github-Event'] == 'issue_comment': + result = process_issue_comment_event(body) + inserter.insert_event_into(DB, 'issues', result) + elif headers['X-Github-Event'] == 'pull_request': + result = process_pull_request_event(body) + inserter.insert_event_into(DB, 'pull_requests', result) + elif headers['X-Github-Event'] == 'pull_request_review': + result = process_pull_request_review(body) + inserter.insert_event_into(DB, 'pull_requests', result) + elif headers['X-Github-Event'] == 'pull_request_review_comment': + result = process_pull_request_review_comment(body) + inserter.insert_event_into(DB, 'pull_requests', result) + + +class ClickHouseInserter(object): + def __init__(self, url, user, password): + self.url = url + self.auth = { + 'X-ClickHouse-User': user, + 'X-ClickHouse-Key': password + } + + def insert_event_into(self, db, table, event): + params = { + 'database': db, + 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), + 'date_time_input_format': 'best_effort' + } + event_str = json.dumps(event) + for i in range(RETRIES): + try: + response = requests.post(self.url, params=params, data=event_str, headers=self.auth, verify=False) + response.raise_for_status() + break + except Exception as ex: + print("Exception inseting into ClickHouse:", ex) + time.sleep(0.1) + + +def test(event, context): + inserter = ClickHouseInserter( + os.getenv('CLICKHOUSE_URL'), + os.getenv('CLICKHOUSE_USER'), + os.getenv('CLICKHOUSE_PASSWORD')) + + body = json.loads(event['body'], strict=False) + headers = event['headers'] + event_processor_dispatcher(headers, body, inserter) + + return { + 'statusCode': 200, + 'headers': { + 'Content-Type': 'text/plain' + }, + 'isBase64Encoded': False, + } From ebc781a0c84350b82113cd921feee028c7a5f6a7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 11 Jun 2020 19:14:22 +0300 Subject: [PATCH 0565/2229] make polymorphic_parts perf test not blazing fast --- ...phic_parts.xml => polymorphic_parts_l.xml} | 22 +++++------- tests/performance/polymorphic_parts_m.xml | 35 +++++++++++++++++++ tests/performance/polymorphic_parts_s.xml | 35 +++++++++++++++++++ 3 files changed, 79 insertions(+), 13 deletions(-) rename tests/performance/{polymorphic_parts.xml => polymorphic_parts_l.xml} (60%) create mode 100644 tests/performance/polymorphic_parts_m.xml create mode 100644 tests/performance/polymorphic_parts_s.xml diff --git a/tests/performance/polymorphic_parts.xml b/tests/performance/polymorphic_parts_l.xml similarity index 60% rename from tests/performance/polymorphic_parts.xml rename to tests/performance/polymorphic_parts_l.xml index a8e305953d0..75ad857c9a8 100644 --- a/tests/performance/polymorphic_parts.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -18,20 +18,16 @@ ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) - INSERT INTO hits_wide(UserID) VALUES (rand()) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + + + 1000 + 1000 + - INSERT INTO hits_compact(UserID) VALUES (rand()) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) - - INSERT INTO hits_buffer(UserID) VALUES (rand()) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(1000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml new file mode 100644 index 00000000000..fbe0c18d07e --- /dev/null +++ b/tests/performance/polymorphic_parts_m.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + + + + + 100 + 100 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(5000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(5000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(5000) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml new file mode 100644 index 00000000000..085295af842 --- /dev/null +++ b/tests/performance/polymorphic_parts_s.xml @@ -0,0 +1,35 @@ + + + CREATE TABLE hits_wide AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 + + + CREATE TABLE hits_compact AS hits_10m_single ENGINE = MergeTree() + PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) + SAMPLE BY intHash32(UserID) + SETTINGS min_bytes_for_wide_part = '10M' + + + CREATE TABLE hits_buffer AS hits_10m_single + ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) + + + + + 1 + 1 + + + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50) + + DROP TABLE IF EXISTS hits_wide + DROP TABLE IF EXISTS hits_compact + DROP TABLE IF EXISTS hits_buffer + From 7b8c1657a5397350f911bffa37da4cbb4c13910d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jun 2020 19:34:36 +0300 Subject: [PATCH 0566/2229] Fix nullable prewhere column type. --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index f2b53fbb70a..6cdd0270dea 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -905,7 +905,9 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r if (prewhere->remove_prewhere_column) result.columns.erase(result.columns.begin() + prewhere_column_pos); else - result.columns[prewhere_column_pos] = DataTypeUInt8().createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); + result.columns[prewhere_column_pos] = + result.block_before_prewhere.getByPosition(prewhere_column_pos).type-> + createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); } } /// Filter in WHERE instead From 8234a430a0221a1ef078f624ae169b2bc81b7277 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jun 2020 19:40:44 +0300 Subject: [PATCH 0567/2229] Added test. --- .../0_stateless/01305_nullable-prewhere_bug.reference | 1 + tests/queries/0_stateless/01305_nullable-prewhere_bug.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/01305_nullable-prewhere_bug.reference create mode 100644 tests/queries/0_stateless/01305_nullable-prewhere_bug.sql diff --git a/tests/queries/0_stateless/01305_nullable-prewhere_bug.reference b/tests/queries/0_stateless/01305_nullable-prewhere_bug.reference new file mode 100644 index 00000000000..bd81ae06cec --- /dev/null +++ b/tests/queries/0_stateless/01305_nullable-prewhere_bug.reference @@ -0,0 +1 @@ +some_field_value 1 diff --git a/tests/queries/0_stateless/01305_nullable-prewhere_bug.sql b/tests/queries/0_stateless/01305_nullable-prewhere_bug.sql new file mode 100644 index 00000000000..35d3762660f --- /dev/null +++ b/tests/queries/0_stateless/01305_nullable-prewhere_bug.sql @@ -0,0 +1,5 @@ +drop table if exists data; +CREATE TABLE data (ts DateTime, field String, num_field Nullable(Float64)) ENGINE = MergeTree() PARTITION BY ts ORDER BY ts; +insert into data values(toDateTime('2020-05-14 02:08:00'),'some_field_value',7.); +SELECT field, countIf(num_field > 6.0) FROM data PREWHERE (num_field>6.0) GROUP BY field; +drop table if exists data; From 11c79e13718f5411bbc6ef996f80a8713eb1b380 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 20:53:33 +0300 Subject: [PATCH 0568/2229] Fix JSON in clickhouse-benchmark; add test --- programs/benchmark/Benchmark.cpp | 4 ++-- tests/queries/0_stateless/01306_benchmark_json.reference | 1 + tests/queries/0_stateless/01306_benchmark_json.sh | 9 +++++++++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01306_benchmark_json.reference create mode 100755 tests/queries/0_stateless/01306_benchmark_json.sh diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 1b2867940ea..e17320b39ea 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -469,7 +469,7 @@ private: const auto & info = infos[i]; json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics: {\n"; + json_out << double_quote << "statistics" << ": {\n"; print_key_value("QPS", info->queries / info->work_time); print_key_value("RPS", info->read_rows / info->work_time); @@ -479,7 +479,7 @@ private: print_key_value("num_queries", info->queries.load(), false); json_out << "},\n"; - json_out << double_quote << "query_time_percentiles: {\n"; + json_out << double_quote << "query_time_percentiles" << ": {\n"; for (int percent = 0; percent <= 90; percent += 10) print_percentile(*info, percent); diff --git a/tests/queries/0_stateless/01306_benchmark_json.reference b/tests/queries/0_stateless/01306_benchmark_json.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01306_benchmark_json.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01306_benchmark_json.sh b/tests/queries/0_stateless/01306_benchmark_json.sh new file mode 100755 index 00000000000..3b9a9b93180 --- /dev/null +++ b/tests/queries/0_stateless/01306_benchmark_json.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_BENCHMARK --iterations 10 --json ${CLICKHOUSE_TMP}/out.json <<< "SELECT 1" 2>/dev/null && cat ${CLICKHOUSE_TMP}/out.json | + $CLICKHOUSE_LOCAL --input-format JSONAsString --structure "s String" --query "SELECT isValidJSON(s) FROM table" From 615ab071d39162f6fe0e6a429cf79c42020fcc56 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 11 Jun 2020 20:57:03 +0300 Subject: [PATCH 0569/2229] Update Client.cpp --- programs/client/Client.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 12db1dc0225..73148e3247c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1920,6 +1920,7 @@ public: std::string text = e.displayText(); std::cerr << "Code: " << e.code() << ". " << text << std::endl; std::cerr << "Table №" << i << std::endl << std::endl; + /// Avoid the case when error exit code can possibly overflow to normal (zero). auto exit_code = e.code() % 256; if (exit_code == 0) exit_code = 255; From e690d0a3ea3b1811517ffbd2de3c4227c4ace417 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 11 Jun 2020 20:57:23 +0300 Subject: [PATCH 0570/2229] Update Client.cpp --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 73148e3247c..7808120d09e 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1925,7 +1925,6 @@ public: if (exit_code == 0) exit_code = 255; exit(exit_code); - } } From 176ebb26e0db3555efbfbf6554b327be5f352263 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jun 2020 20:51:14 +0300 Subject: [PATCH 0571/2229] Prepend slash to path for system.zookeeper queries path should starts with '/', otherwise ZBADARGUMENTS will be thrown in ZooKeeper::sendThread and the session will fail. --- src/Storages/System/StorageSystemZooKeeper.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c3f1d8a8505..e1e960cd769 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -112,8 +112,13 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Contex zkutil::ZooKeeperPtr zookeeper = context.getZooKeeper(); + String path_corrected; + /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in + /// ZooKeeper::sendThread and the session will fail. + if (path[0] != '/') + path_corrected = '/'; + path_corrected += path; /// In all cases except the root, path must not end with a slash. - String path_corrected = path; if (path_corrected != "/" && path_corrected.back() == '/') path_corrected.resize(path_corrected.size() - 1); From bbeb768a1952541a895b51ecb70eee5dd4532224 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 21:12:48 +0300 Subject: [PATCH 0572/2229] use the sentry logger hook --- base/daemon/SentryWriter.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 45f5bd56ca1..bb176db813c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -13,6 +13,7 @@ #if USE_SENTRY # include // Y_IGNORE +# include #endif @@ -39,6 +40,33 @@ void setExtras() sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); } + +void sentry_logger(sentry_level_t level, const char * message, va_list args) +{ + auto * logger = &Poco::Logger::get("SentryWriter"); + size_t size = 1024; + char buffer[size]; + if (vsnprintf(buffer, size, message, args) >= 0) { + switch (level) { + case SENTRY_LEVEL_DEBUG: + logger->debug(buffer); + break; + case SENTRY_LEVEL_INFO: + logger->information(buffer); + break; + case SENTRY_LEVEL_WARNING: + logger->warning(buffer); + break; + case SENTRY_LEVEL_ERROR: + logger->error(buffer); + break; + case SENTRY_LEVEL_FATAL: + logger->fatal(buffer); + break; + } + } +} +} } #endif @@ -65,6 +93,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_logger(options, &sentry_logger); if (debug) { sentry_options_set_debug(options, 1); From 7884c8ef2c9d60add30ed18784f10c34a9d725e6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 11 Jun 2020 21:55:23 +0300 Subject: [PATCH 0573/2229] Use lock_guard over unique_lock in SystemLog --- src/Interpreters/SystemLog.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index e49ce574478..81a28a7edd5 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -217,7 +217,7 @@ SystemLog::SystemLog(Context & context_, template void SystemLog::startup() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); saving_thread = ThreadFromGlobalPool([this] { savingThreadFunction(); }); } @@ -231,7 +231,7 @@ void SystemLog::add(const LogElement & element) /// Otherwise the tests like 01017_uniqCombined_memory_usage.sql will be flacky. auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (is_shutdown) return; @@ -307,7 +307,7 @@ template void SystemLog::stopFlushThread() { { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (!saving_thread.joinable()) { @@ -420,7 +420,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, } { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); flushed_before = to_flush_end; flush_event.notify_all(); } From 540f8532cb90b0b99c9168ee569cb26f1e5fd086 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 Jun 2020 22:59:15 +0300 Subject: [PATCH 0574/2229] Fix error --- src/Storages/StorageReplicatedMergeTree.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb12bf98481..809536f7452 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -564,8 +564,7 @@ void StorageReplicatedMergeTree::createReplica() Coordination::Stat replicas_stat; String replicas_value; - code = zookeeper->tryGet(zookeeper_path + "/replicas", replicas_value, &replicas_stat); - if (code == Coordination::ZNONODE) + if (!zookeeper->tryGet(zookeeper_path + "/replicas", replicas_value, &replicas_stat)) throw Exception(fmt::format("Cannot create a replica of the table {}, because the last replica of the table was dropped right now", zookeeper_path), ErrorCodes::ALL_REPLICAS_LOST); From 103eb17107e24704e6100eab4760a38eb3a21286 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 21:11:08 +0300 Subject: [PATCH 0575/2229] Force table creation on SYSTEM FLUSH LOGS (v2) After this patch SYSTEM FLUSH LOGS will force system.*_log table creatoins (only for enabled tables of course). This will avoid such hacks like: set log_queries=1; select 1; system flush logs; This is the second version of the patch, since first has a deadlock [1] (reported by @alesapin). This version does not use separate lock and do not issue CREATE TABLE from the query execution thread, instead it activate flushing thread to do so, hence this should be the same as if the table will be created during flush. [1]: https://gist.github.com/alesapin/d915d1deaa27d49aa31223daded02be2 Fixes: #11563 Cc: @alesapin Cc: @nikitamikhaylov --- docs/en/sql-reference/statements/system.md | 1 + docs/ru/sql-reference/statements/system.md | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 14 ++--- src/Interpreters/SystemLog.h | 33 +++++++---- .../test_SYSTEM_FLUSH_LOGS/test.py | 58 +++++++++++++++++++ 5 files changed, 90 insertions(+), 17 deletions(-) create mode 100644 tests/integration/test_SYSTEM_FLUSH_LOGS/test.py diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index e4823686c68..f22436a0cae 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -79,6 +79,7 @@ Complied expression cache used when query/user/profile enable option [compile](. ## FLUSH LOGS {#query_language-system-flush_logs} Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging. +This will also create system tables even if message queue is empty. ## RELOAD CONFIG {#query_language-system-reload-config} diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index 1b66fa039d9..a91a7edad82 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -74,6 +74,7 @@ SELECT name, status FROM system.dictionaries; ## FLUSH LOGS {#query_language-system-flush_logs} Записывает буферы логов в системные таблицы (например system.query\_log). Позволяет не ждать 7.5 секунд при отладке. +Если буфер логов пустой, то этот запрос просто создаст системные таблицы. ## RELOAD CONFIG {#query_language-system-reload-config} diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index f35bafbe25a..a11e0b8feb2 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -302,13 +302,13 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_LOGS: context.checkAccess(AccessType::SYSTEM_FLUSH_LOGS); executeCommandsAndThrowIfError( - [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(); }, - [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(); }, - [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(); }, - [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(); }, - [&] () { if (auto text_log = context.getTextLog()) text_log->flush(); }, - [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(); }, - [&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(); } + [&] () { if (auto query_log = context.getQueryLog()) query_log->flush(true); }, + [&] () { if (auto part_log = context.getPartLog("")) part_log->flush(true); }, + [&] () { if (auto query_thread_log = context.getQueryThreadLog()) query_thread_log->flush(true); }, + [&] () { if (auto trace_log = context.getTraceLog()) trace_log->flush(true); }, + [&] () { if (auto text_log = context.getTextLog()) text_log->flush(true); }, + [&] () { if (auto metric_log = context.getMetricLog()) metric_log->flush(true); }, + [&] () { if (auto asynchronous_metric_log = context.getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); } ); break; case Type::STOP_LISTEN_QUERIES: diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 81a28a7edd5..0e1ddc09644 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -77,7 +77,8 @@ class ISystemLog public: virtual String getName() = 0; virtual ASTPtr getCreateTableQuery() = 0; - virtual void flush() = 0; + //// force -- force table creation (used for SYSTEM FLUSH LOGS) + virtual void flush(bool force = false) = 0; virtual void prepareTable() = 0; virtual void startup() = 0; virtual void shutdown() = 0; @@ -136,7 +137,7 @@ public: void stopFlushThread(); /// Flush data in the buffer to disk - void flush() override; + void flush(bool force = false) override; /// Start the background thread. void startup() override; @@ -177,6 +178,7 @@ private: // synchronous log flushing for SYSTEM FLUSH LOGS. uint64_t queue_front_index = 0; bool is_shutdown = false; + bool is_force_prepare_tables = false; std::condition_variable flush_event; // Requested to flush logs up to this index, exclusive uint64_t requested_flush_before = 0; @@ -275,7 +277,7 @@ void SystemLog::add(const LogElement & element) template -void SystemLog::flush() +void SystemLog::flush(bool force) { std::unique_lock lock(mutex); @@ -284,7 +286,8 @@ void SystemLog::flush() const uint64_t queue_end = queue_front_index + queue.size(); - if (requested_flush_before < queue_end) + is_force_prepare_tables = force; + if (requested_flush_before < queue_end || force) { requested_flush_before = queue_end; flush_event.notify_all(); @@ -293,7 +296,7 @@ void SystemLog::flush() // Use an arbitrary timeout to avoid endless waiting. const int timeout_seconds = 60; bool result = flush_event.wait_for(lock, std::chrono::seconds(timeout_seconds), - [&] { return flushed_before >= queue_end; }); + [&] { return flushed_before >= queue_end && !is_force_prepare_tables; }); if (!result) { @@ -350,8 +353,7 @@ void SystemLog::savingThreadFunction() std::chrono::milliseconds(flush_interval_milliseconds), [&] () { - return requested_flush_before > flushed_before - || is_shutdown; + return requested_flush_before > flushed_before || is_shutdown || is_force_prepare_tables; } ); @@ -367,10 +369,20 @@ void SystemLog::savingThreadFunction() if (to_flush.empty()) { - continue; - } + if (is_force_prepare_tables) + { + prepareTable(); + LOG_TRACE(log, "Table created (force)"); - flushImpl(to_flush, to_flush_end); + std::lock_guard lock(mutex); + is_force_prepare_tables = false; + flush_event.notify_all(); + } + } + else + { + flushImpl(to_flush, to_flush_end); + } } catch (...) { @@ -422,6 +434,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, { std::lock_guard lock(mutex); flushed_before = to_flush_end; + is_force_prepare_tables = false; flush_event.notify_all(); } diff --git a/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py b/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py new file mode 100644 index 00000000000..7e8f2000bca --- /dev/null +++ b/tests/integration/test_SYSTEM_FLUSH_LOGS/test.py @@ -0,0 +1,58 @@ +# pylint: disable=line-too-long +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node_default') + +system_logs = [ + # disabled by default + ('system.part_log', 0), + ('system.text_log', 0), + + # enabled by default + ('system.query_log', 1), + ('system.query_thread_log', 1), + ('system.trace_log', 1), + ('system.metric_log', 1), +] + +# Default timeout for flush is 60 +# decrease timeout for the test to show possible issues. +timeout = pytest.mark.timeout(30) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +@pytest.fixture(scope='function') +def flush_logs(): + node.query('SYSTEM FLUSH LOGS') + +@timeout +@pytest.mark.parametrize('table,exists', system_logs) +def test_system_logs(flush_logs, table, exists): + q = 'SELECT * FROM {}'.format(table) + if exists: + node.query(q) + else: + assert "Table {} doesn't exist".format(table) in node.query_and_get_error(q) + +# Logic is tricky, let's check that there is no hang in case of message queue +# is not empty (this is another code path in the code). +@timeout +def test_system_logs_non_empty_queue(): + node.query('SELECT 1', settings={ + # right now defaults are the same, + # this set explicitly to avoid depends from defaults. + 'log_queries': 1, + 'log_queries_min_type': 'QUERY_START', + }) + node.query('SYSTEM FLUSH LOGS') From d840a9990c3d70588eb1b85dcbe02898379d3b97 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Thu, 11 Jun 2020 23:39:15 +0300 Subject: [PATCH 0576/2229] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 5afaf725c50..06036405447 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -87,7 +87,7 @@ then # tests for use by compare.sh. Compare to merge base, because master might be # far in the future and have unrelated test changes. base=$(git -C ch merge-base "$SHA_TO_TEST" master) - git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt + git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt if grep -vq '^tests/performance' changed-tests.txt then # Have some other changes besides the tests, so truncate the test list, From 4eb8a1cfef543a83b3f8724ed6da6f0186473b34 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jun 2020 00:03:25 +0300 Subject: [PATCH 0577/2229] Fix gcc build --- src/Storages/StorageReplicatedMergeTree.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4b93d31d596..3ed290a69b3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -482,7 +482,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() LOG_DEBUG(log, "Creating table {}", zookeeper_path); /// We write metadata of table so that the replicas can check table parameters with them. - String metadata = ReplicatedMergeTreeTableMetadata(*this).toString(); + String metadata_str = ReplicatedMergeTreeTableMetadata(*this).toString(); Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent)); @@ -491,7 +491,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/dropped", -1)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata, + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(), zkutil::CreateMode::Persistent)); @@ -526,7 +526,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", "0", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", metadata, + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), zkutil::CreateMode::Persistent)); From 7ba5063b7a6c80ea07ec30b473e329bf11c93879 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 00:24:56 +0300 Subject: [PATCH 0578/2229] Add concurrent benchmark to performance test After the main test, run queries from `website.xml` in parallel using `clickhouse-benchmark`. This can be useful to test the effects of concurrency on performance. Comparison test can miss some effects because it always runs queries sequentially, and many of them are even single-threaded. --- docker/test/performance-comparison/compare.sh | 17 +++ docker/test/performance-comparison/perf.py | 100 ++++++++---------- docker/test/performance-comparison/report.py | 31 ++++++ 3 files changed, 90 insertions(+), 58 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index a2760907cb3..3d49e9e841a 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -161,6 +161,20 @@ function run_tests wait } +# Run some queries concurrently and report the resulting TPS. This additional +# (relatively) short test helps detect concurrency-related effects, because the +# main performance comparison testing is done query-by-query. +function run_benchmark +{ + rm -rf benchmark ||: + mkdir bencmhark ||: + + # TODO disable this when there is an explicit list of tests to run + "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json < benchmark/website-queries.tsv +} + function get_profiles_watchdog { sleep 6000 @@ -716,6 +730,9 @@ case "$stage" in # Ignore the errors to collect the log and build at least some report, anyway time run_tests ||: ;& +"run_benchmark") + time run_benchmark 2> >(tee -a run-errors.tsv 1>&2) ||: + ;& "get_profiles") # Getting profiles inexplicably hangs sometimes, so try to save some logs if # this happens again. Give the servers some time to collect all info, then diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 308d4760b48..74d0300b074 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -14,22 +14,14 @@ import traceback def tsv_escape(s): return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') -stage_start_seconds = time.perf_counter() - -def report_stage_end(stage_name): - global stage_start_seconds - print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds)) - stage_start_seconds = time.perf_counter() - -report_stage_end('start') - parser = argparse.ArgumentParser(description='Run performance test.') # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.") parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.") -parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') -parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') +parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 17)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') +parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') +parser.add_argument('--print', action='store_true', help='Print test queries and exit.') args = parser.parse_args() test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] @@ -37,35 +29,6 @@ test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] tree = et.parse(args.file[0]) root = tree.getroot() -# Skip long tests -for tag in root.findall('.//tag'): - if tag.text == 'long': - print('skipped\tTest is tagged as long.') - sys.exit(0) - -# Check main metric -main_metric_element = root.find('main_metric/*') -if main_metric_element is not None and main_metric_element.tag != 'min_time': - raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) - -# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't. -infinite_sign = root.find('.//average_speed_not_changing_for_ms') -if infinite_sign is not None: - raise Exception('Looks like the test is infinite (sign 1)') - -# Print report threshold for the test if it is set. -if 'max_ignored_relative_change' in root.attrib: - print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}') - -# Open connections -servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] -connections = [clickhouse_driver.Client(**server) for server in servers] - -for s in servers: - print('server\t{}\t{}'.format(s['host'], s['port'])) - -report_stage_end('connect') - # Process query parameters subst_elems = root.findall('substitutions/substitution') available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } @@ -84,7 +47,45 @@ def substitute_parameters(query_templates): for values_combo in itertools.product(*values)]) return result -report_stage_end('substitute') +# Build a list of test queries, processing all substitutions +test_query_templates = [q.text for q in root.findall('query')] +test_queries = substitute_parameters(test_query_templates) + +# If we're only asked to print the queries, do that and exit +if args.print: + for q in test_queries: + print(q) + exit(0) + +# Skip long tests +if not args.long: + for tag in root.findall('.//tag'): + if tag.text == 'long': + print('skipped\tTest is tagged as long.') + sys.exit(0) + +# Check main metric to detect infinite tests. We shouldn't have such tests anymore, +# but we did in the past, and it is convenient to be able to process old tests. +main_metric_element = root.find('main_metric/*') +if main_metric_element is not None and main_metric_element.tag != 'min_time': + raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) + +# Another way to detect infinite tests. They should have an appropriate main_metric +# but sometimes they don't. +infinite_sign = root.find('.//average_speed_not_changing_for_ms') +if infinite_sign is not None: + raise Exception('Looks like the test is infinite (sign 1)') + +# Print report threshold for the test if it is set. +if 'max_ignored_relative_change' in root.attrib: + print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}') + +# Open connections +servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] +connections = [clickhouse_driver.Client(**server) for server in servers] + +for s in servers: + print('server\t{}\t{}'.format(s['host'], s['port'])) # Run drop queries, ignoring errors. Do this before all other activity, because # clickhouse_driver disconnects on error (this is not configurable), and the new @@ -98,8 +99,6 @@ for c in connections: except: pass -report_stage_end('drop1') - # Apply settings. # If there are errors, report them and continue -- maybe a new test uses a setting # that is not in master, but the queries can still run. If we have multiple @@ -115,8 +114,6 @@ for c in connections: except: print(traceback.format_exc(), file=sys.stderr) -report_stage_end('settings') - # Check tables that should exist. If they don't exist, just skip this test. tables = [e.text for e in root.findall('preconditions/table_exists')] for t in tables: @@ -129,8 +126,6 @@ for t in tables: print(f'skipped\t{tsv_escape(skipped_message)}') sys.exit(0) -report_stage_end('preconditions') - # Run create queries create_query_templates = [q.text for q in root.findall('create_query')] create_queries = substitute_parameters(create_query_templates) @@ -145,14 +140,7 @@ for c in connections: for q in fill_queries: c.execute(q) -report_stage_end('fill') - # Run test queries -test_query_templates = [q.text for q in root.findall('query')] -test_queries = substitute_parameters(test_query_templates) - -report_stage_end('substitute2') - for query_index, q in enumerate(test_queries): query_prefix = f'{test_name}.query{query_index}' @@ -199,13 +187,9 @@ for query_index, q in enumerate(test_queries): client_seconds = time.perf_counter() - start_seconds print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') -report_stage_end('benchmark') - # Run drop queries drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) for c in connections: for q in drop_queries: c.execute(q) - -report_stage_end('drop2') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 9db37932aea..d7e30190aef 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -5,6 +5,7 @@ import ast import collections import csv import itertools +import json import os import sys import traceback @@ -321,6 +322,36 @@ if args.report == 'main': print_test_times() + def print_benchmark_results(): + left_json = json.load(open('benchmark/website-left.json')); + right_json = json.load(open('benchmark/website-right.json')); + left_qps = left_json["statistics"]["QPS"] + right_qps = right_json["statistics"]["QPS"] + relative_diff = (right_qps - left_qps) / left_qps; + times_diff = max(right_qps, left_qps) / max(0.01, min(right_qps, left_qps)) + print(tableStart('Concurrent benchmarks')) + print(tableHeader(['Benchmark', 'Old, queries/s', 'New, queries/s', 'Relative difference', 'Times difference'])) + row = ['website', f'{left_qps:.3f}', f'{right_qps:.3f}', f'{relative_diff:.3f}', f'x{times_diff:.3f}'] + attrs = ['' for r in row] + if abs(relative_diff) > 0.1: + # More queries per second is better. + if relative_diff > 0.: + attrs[3] = f'style="background: {color_good}"' + else: + attrs[3] = f'style="background: {color_bad}"' + else: + attrs[3] = '' + print(tableRow(row, attrs)) + print(tableEnd()) + + try: + print_benchmark_results() + except: + report_errors.append( + traceback.format_exception_only( + *sys.exc_info()[:2])[-1]) + pass + print_report_errors() print(""" From 7fb8a985281f9c663817d32d7939f61f3e258217 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 12 Jun 2020 00:33:30 +0300 Subject: [PATCH 0579/2229] increase number of rows --- tests/performance/polymorphic_parts_l.xml | 8 ++++---- tests/performance/polymorphic_parts_m.xml | 8 ++++---- tests/performance/polymorphic_parts_s.xml | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/performance/polymorphic_parts_l.xml b/tests/performance/polymorphic_parts_l.xml index 75ad857c9a8..acda0de281a 100644 --- a/tests/performance/polymorphic_parts_l.xml +++ b/tests/performance/polymorphic_parts_l.xml @@ -24,10 +24,10 @@ 1000 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_m.xml b/tests/performance/polymorphic_parts_m.xml index fbe0c18d07e..a9842496de0 100644 --- a/tests/performance/polymorphic_parts_m.xml +++ b/tests/performance/polymorphic_parts_m.xml @@ -24,10 +24,10 @@ 100 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(5000) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(5000) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(5000) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(10000) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(10000) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact diff --git a/tests/performance/polymorphic_parts_s.xml b/tests/performance/polymorphic_parts_s.xml index 085295af842..3b9eea91b1d 100644 --- a/tests/performance/polymorphic_parts_s.xml +++ b/tests/performance/polymorphic_parts_s.xml @@ -24,10 +24,10 @@ 1 - - INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(50) - INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(50) - INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(50) + + INSERT INTO hits_wide(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_compact(UserID) SELECT rand() FROM numbers(100) + INSERT INTO hits_buffer(UserID) SELECT rand() FROM numbers(100) DROP TABLE IF EXISTS hits_wide DROP TABLE IF EXISTS hits_compact From 10a8021418a854ee8b04a9e5b2358d30dea26be2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 00:58:25 +0300 Subject: [PATCH 0580/2229] performance comparison --- docker/test/performance-comparison/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 06036405447..fb3c80ba8a1 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -131,5 +131,5 @@ done dmesg -T > dmesg.log -7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze +7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze benchmark cp compare.log /output From e92641858e6fdf132c4b1f9ecf401fc985d2693e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 01:00:35 +0300 Subject: [PATCH 0581/2229] fixes --- base/daemon/SentryWriter.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index bb176db813c..d7f08864e96 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -46,8 +46,10 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) auto * logger = &Poco::Logger::get("SentryWriter"); size_t size = 1024; char buffer[size]; - if (vsnprintf(buffer, size, message, args) >= 0) { - switch (level) { + if (vsnprintf(buffer, size, message, args) >= 0) + { + switch (level) + { case SENTRY_LEVEL_DEBUG: logger->debug(buffer); break; @@ -67,7 +69,6 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) } } } -} #endif void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) From 395ef1ecafff89a880bd47c2b11aa12b42e52c03 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 09:35:31 +0300 Subject: [PATCH 0582/2229] experiment --- base/daemon/SentryWriter.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index d7f08864e96..88639d8bf94 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -46,8 +46,15 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) auto * logger = &Poco::Logger::get("SentryWriter"); size_t size = 1024; char buffer[size]; +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-nonliteral" +#endif if (vsnprintf(buffer, size, message, args) >= 0) { +#ifdef __clang__ +#pragma clang diagnostic pop +#endif switch (level) { case SENTRY_LEVEL_DEBUG: From 465c4b65b720ca0262eb42c6fee6c4fe4f48bace Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jun 2020 12:37:52 +0300 Subject: [PATCH 0583/2229] Slightly better interfaces and comments --- src/Storages/AlterCommands.cpp | 23 ++++----- src/Storages/KeyDescription.cpp | 51 +++++++++++++++---- src/Storages/KeyDescription.h | 28 ++++++++-- src/Storages/MergeTree/MergeTreeData.cpp | 6 +-- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 33 +++++++++--- src/Storages/StorageInMemoryMetadata.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 11 ++-- 8 files changed, 118 insertions(+), 37 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 52f34d1fdd5..8e8b308ac3d 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -317,14 +317,18 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con } else if (type == MODIFY_ORDER_BY) { - if (metadata.primary_key.definition_ast == nullptr && metadata.sorting_key.definition_ast != nullptr) + auto & sorting_key = metadata.sorting_key; + auto & primary_key = metadata.primary_key; + if (primary_key.definition_ast == nullptr && sorting_key.definition_ast != nullptr) { - /// Primary and sorting key become independent after this ALTER so we have to - /// save the old ORDER BY expression as the new primary key. - metadata.primary_key = metadata.sorting_key; + /// Primary and sorting key become independent after this ALTER so + /// we have to save the old ORDER BY expression as the new primary + /// key. + primary_key = KeyDescription::getKeyFromAST(sorting_key.definition_ast, metadata.columns, context); } - metadata.sorting_key = KeyDescription::getKeyFromAST(order_by, metadata.columns, context, metadata.sorting_key.additional_key_column); + /// Recalculate key with new order_by expression + sorting_key.recalculateWithNewAST(order_by, metadata.columns, context); } else if (type == COMMENT_COLUMN) { @@ -713,15 +717,10 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co command.apply(metadata_copy, context); /// Changes in columns may lead to changes in keys expression - metadata_copy.sorting_key = KeyDescription::getKeyFromAST( - metadata_copy.sorting_key.definition_ast, - metadata_copy.columns, - context, - metadata_copy.sorting_key.additional_key_column); - + metadata_copy.sorting_key.recalculateWithNewColumns(metadata_copy.columns, context); if (metadata_copy.primary_key.definition_ast != nullptr) { - metadata_copy.primary_key = KeyDescription::getKeyFromAST(metadata_copy.primary_key.definition_ast, metadata_copy.columns, context); + metadata_copy.primary_key.recalculateWithNewColumns(metadata_copy.columns, context); } else { diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index b1e74db2c58..4eb18320ad9 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -10,6 +11,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + KeyDescription::KeyDescription(const KeyDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) @@ -17,7 +23,7 @@ KeyDescription::KeyDescription(const KeyDescription & other) , sample_block(other.sample_block) , column_names(other.column_names) , data_types(other.data_types) - , additional_key_column(other.additional_key_column ? other.additional_key_column->clone() : nullptr) + , additional_column(other.additional_column) { } @@ -37,27 +43,54 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) sample_block = other.sample_block; column_names = other.column_names; data_types = other.data_types; - if (other.additional_key_column) - additional_key_column = other.additional_key_column->clone(); - else - additional_key_column.reset(); + + /// additional_column is constant property It should never be lost. + if (additional_column.has_value() && !other.additional_column.has_value()) + throw Exception("Wrong key assignment, loosing additional_column", ErrorCodes::LOGICAL_ERROR); + additional_column = other.additional_column; return *this; } +void KeyDescription::recalculateWithNewAST( + const ASTPtr & new_ast, + const ColumnsDescription & columns, + const Context & context) +{ + *this = getSortingKeyFromAST(new_ast, columns, context, additional_column); +} + +void KeyDescription::recalculateWithNewColumns( + const ColumnsDescription & new_columns, + const Context & context) +{ + *this = getSortingKeyFromAST(definition_ast, new_columns, context, additional_column); +} + KeyDescription KeyDescription::getKeyFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context) +{ + return getSortingKeyFromAST(definition_ast, columns, context, {}); +} + +KeyDescription KeyDescription::getSortingKeyFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, - const ASTPtr & additional_key_column) + const std::optional & additional_column) { KeyDescription result; result.definition_ast = definition_ast; - result.additional_key_column = additional_key_column; result.expression_list_ast = extractKeyExpressionList(definition_ast); - if (additional_key_column != nullptr) - result.expression_list_ast->children.push_back(additional_key_column); + if (additional_column) + { + result.additional_column = additional_column; + ASTPtr column_identifier = std::make_shared(*additional_column); + result.expression_list_ast->children.push_back(column_identifier); + } const auto & children = result.expression_list_ast->children; for (const auto & child : children) diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 97f48d435b2..7d1e7efb55f 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -30,16 +30,38 @@ struct KeyDescription /// Types from sample block ordered in columns order. DataTypes data_types; - /// Additional key column added by storage type - ASTPtr additional_key_column; + /// Additional key column added by storage type. Never changes after + /// initialization with non empty value. Doesn't stored in definition_ast, + /// but added to expression_list_ast and all its derivatives. + std::optional additional_column; /// Parse key structure from key definition. Requires all columns, available /// in storage. static KeyDescription getKeyFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context); + + /// Sorting key can contain additional column defined by storage type (like + /// Version column in VersionedCollapsingMergeTree). + static KeyDescription getSortingKeyFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, - const ASTPtr & additional_key_column = nullptr); + const std::optional & additional_column); + + /// Recalculate all expressions and fields for key with new columns without + /// changes in constant fields. Just wrapper for static methods. + void recalculateWithNewColumns( + const ColumnsDescription & new_columns, + const Context & context); + + /// Recalculate all expressions and fields for key with new ast without + /// changes in constant fields. Just wrapper for static methods. + void recalculateWithNewAST( + const ASTPtr & new_ast, + const ColumnsDescription & columns, + const Context & context); KeyDescription() = default; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index f8e14048265..7f8de6f3856 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -483,7 +483,7 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) } -void MergeTreeData::checkTTLExpressios(const StorageInMemoryMetadata & new_metadata) const +void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_metadata) const { auto new_column_ttls = new_metadata.column_ttls_by_name; @@ -527,7 +527,7 @@ void MergeTreeData::checkTTLExpressios(const StorageInMemoryMetadata & new_metad /// Todo replace columns with TTL for columns void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata) { - checkTTLExpressios(new_metadata); + checkTTLExpressions(new_metadata); setColumnTTLs(new_metadata.column_ttls_by_name); auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); setTableTTLs(new_metadata.table_ttl); @@ -1339,7 +1339,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S checkProperties(new_metadata); - checkTTLExpressios(new_metadata); + checkTTLExpressions(new_metadata); if (hasSettingsChanges()) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 39bc4c78efe..a101e0645e3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -783,7 +783,7 @@ protected: void initPartitionKey(const KeyDescription & new_partition_key); - void checkTTLExpressios(const StorageInMemoryMetadata & new_metadata) const; + void checkTTLExpressions(const StorageInMemoryMetadata & new_metadata) const; void setTTLExpressions(const StorageInMemoryMetadata & new_metadata); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 050d0790bb5..6dc3ad6645a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -417,7 +417,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) ++arg_num; } - ASTPtr merging_param_key_arg = nullptr; + /// This merging param maybe used as part of sorting key + std::optional merging_param_key_arg; if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { @@ -482,7 +483,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) ErrorCodes::BAD_ARGUMENTS); --arg_cnt; - merging_param_key_arg = std::make_shared(merging_params.version_column); + /// Version collapsing is the only engine which add additional column to + /// sorting key. + merging_param_key_arg = merging_params.version_column; } String date_column_name; @@ -498,6 +501,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (args.storage_def->partition_by) partition_by_key = args.storage_def->partition_by->ptr(); + /// Partition key may be undefined, but despite this we store it's empty + /// value in partition_key structure. MergeTree checks this case and use + /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.context); if (!args.storage_def->order_by) @@ -505,15 +511,23 @@ static StoragePtr create(const StorageFactory::Arguments & args) "If you don't want this table to be sorted, use ORDER BY tuple()", ErrorCodes::BAD_ARGUMENTS); - metadata.sorting_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); + /// Get sorting key from engine arguments. + /// + /// NOTE: store merging_param_key_arg as additional key column. We do it + /// before storage creation. After that storage will just copy this + /// column if sorting key will be changed. + metadata.sorting_key = KeyDescription::getSortingKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context, merging_param_key_arg); + /// If primary key explicitely defined, than get it from AST if (args.storage_def->primary_key) { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context); } - else + else /// Otherwise we copy it from primary key definition { metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->order_by->ptr(), metadata.columns, args.context); + /// and set it's definition_ast to nullptr (so isPrimaryKeyDefined() + /// will return false but hasPrimaryKey() will return true. metadata.primary_key.definition_ast = nullptr; } @@ -564,10 +578,17 @@ static StoragePtr create(const StorageFactory::Arguments & args) ++arg_num; } - /// Now only two parameters remain - primary_key, index_granularity. - metadata.sorting_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context, merging_param_key_arg); + /// Get sorting key from engine arguments. + /// + /// NOTE: store merging_param_key_arg as additional key column. We do it + /// before storage creation. After that storage will just copy this + /// column if sorting key will be changed. + metadata.sorting_key = KeyDescription::getSortingKeyFromAST(engine_args[arg_num], metadata.columns, args.context, merging_param_key_arg); + /// In old syntax primary_key always equals to sorting key. metadata.primary_key = KeyDescription::getKeyFromAST(engine_args[arg_num], metadata.columns, args.context); + /// But it's not explicitely defined, so we evaluate definition to + /// nullptr metadata.primary_key.definition_ast = nullptr; ++arg_num; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index e83a41a3877..8d23bd7bccf 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -50,4 +50,5 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo return *this; } + } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3ed290a69b3..4c385647f65 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -775,13 +775,18 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column tuple->arguments->children = new_sorting_key_expr_list->children; order_by_ast = tuple; } - new_metadata.sorting_key = KeyDescription::getKeyFromAST(order_by_ast, new_metadata.columns, global_context, new_metadata.sorting_key.additional_key_column); + auto & sorting_key = new_metadata.sorting_key; + auto & primary_key = new_metadata.primary_key; - if (!isPrimaryKeyDefined()) + sorting_key.recalculateWithNewAST(order_by_ast, new_metadata.columns, global_context); + + if (primary_key.definition_ast == nullptr) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - new_metadata.primary_key = getSortingKey(); + auto old_sorting_key_ast = getSortingKey().definition_ast; + primary_key = KeyDescription::getKeyFromAST( + old_sorting_key_ast, new_metadata.columns, global_context); } } From 80b765542ff1b97165e5426235d72fb3bcd042d7 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 12 Jun 2020 18:04:42 +0800 Subject: [PATCH 0584/2229] ISSUES-7572 support config default http handler --- src/Server/HTTPHandlerFactory.cpp | 72 ++++++++++--------------- src/Server/HTTPHandlerFactory.h | 10 ++++ src/Server/PrometheusRequestHandler.cpp | 14 +++++ src/Server/ReplicasStatusHandler.cpp | 8 +++ src/Server/StaticRequestHandler.cpp | 16 ++++++ 5 files changed, 77 insertions(+), 43 deletions(-) diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 2f00aa0aa72..ec75656a9a8 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -1,9 +1,5 @@ #include "HTTPHandlerFactory.h" -#include -#include -#include -#include #include #include "HTTPHandler.h" @@ -68,7 +64,8 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler return this; } -static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix) +static inline auto createHandlersFactoryFromConfig( + IServer & server, const std::string & name, const String & prefix, AsynchronousMetrics & async_metrics) { auto main_handler_factory = std::make_unique(name); @@ -82,7 +79,17 @@ static inline auto createHandlersFactoryFromConfig(IServer & server, const std:: const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); - if (handler_type == "static") + if (handler_type == "root") + addRootHandlerFactory(*main_handler_factory, server); + else if (handler_type == "ping") + addPingHandlerFactory(*main_handler_factory, server); + else if (handler_type == "defaults") + addDefaultHandlersFactory(*main_handler_factory, server, async_metrics); + else if (handler_type == "prometheus") + addPrometheusHandlerFactory(*main_handler_factory, server, async_metrics); + else if (handler_type == "replicas_status") + addReplicasStatusHandlerFactory(*main_handler_factory, server); + else if (handler_type == "static") main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key)); else if (handler_type == "dynamic_query_handler") main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key)); @@ -99,44 +106,23 @@ static inline auto createHandlersFactoryFromConfig(IServer & server, const std:: return main_handler_factory.release(); } -static const auto ping_response_expression = "Ok.\n"; -static const auto root_response_expression = "config://http_server_default_response"; - static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory( IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) { if (server.config().has("http_handlers")) - return createHandlersFactoryFromConfig(server, name, "http_handlers"); + return createHandlersFactoryFromConfig(server, name, "http_handlers", async_metrics); else { auto factory = std::make_unique(name); - auto root_handler = std::make_unique>(server, root_response_expression); - root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); - factory->addHandler(root_handler.release()); - - auto ping_handler = std::make_unique>(server, ping_response_expression); - ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); - factory->addHandler(ping_handler.release()); - - auto replicas_status_handler = std::make_unique>(server); - replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); - factory->addHandler(replicas_status_handler.release()); + addRootHandlerFactory(*factory, server); + addPingHandlerFactory(*factory, server); + addReplicasStatusHandlerFactory(*factory, server); + addPrometheusHandlerFactory(*factory, server, async_metrics); auto query_handler = std::make_unique>(server, "query"); query_handler->allowPostAndGetParamsRequest(); factory->addHandler(query_handler.release()); - - /// We check that prometheus handler will be served on current (default) port. - /// Otherwise it will be created separately, see below. - if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) - { - auto prometheus_handler = std::make_unique>( - server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); - prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); - factory->addHandler(prometheus_handler.release()); - } - return factory.release(); } } @@ -145,17 +131,9 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandle { auto factory = std::make_unique(name); - auto root_handler = std::make_unique>(server, root_response_expression); - root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); - factory->addHandler(root_handler.release()); - - auto ping_handler = std::make_unique>(server, ping_response_expression); - ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); - factory->addHandler(ping_handler.release()); - - auto replicas_status_handler = std::make_unique>(server); - replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); - factory->addHandler(replicas_status_handler.release()); + addRootHandlerFactory(*factory, server); + addPingHandlerFactory(*factory, server); + addReplicasStatusHandlerFactory(*factory, server); auto main_handler = std::make_unique>(server); main_handler->allowPostAndGetParamsRequest(); @@ -183,4 +161,12 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); } +void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) +{ + addRootHandlerFactory(factory, server); + addPingHandlerFactory(factory, server); + addReplicasStatusHandlerFactory(factory, server); + addPrometheusHandlerFactory(factory, server, async_metrics); +} + } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 273e337813e..ac3a7451338 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -103,6 +103,16 @@ private: std::function creator; }; +void addRootHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); + +void addPingHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); + +void addReplicasStatusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); + +void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics); + +void addPrometheusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics); + Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix); Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix); diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 43f39e36de8..0f5df54b002 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -12,6 +12,7 @@ #include #include +#include namespace DB @@ -40,4 +41,17 @@ void PrometheusRequestHandler::handleRequest( } } +void addPrometheusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) +{ + /// We check that prometheus handler will be served on current (default) port. + /// Otherwise it will be created separately, see below. + if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) + { + auto prometheus_handler = std::make_unique>( + server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); + prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); + factory.addHandler(prometheus_handler.release()); + } +} + } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 57c97b0e4e0..9b3e00cc069 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -7,8 +7,10 @@ #include #include +#include #include #include +#include namespace DB @@ -104,5 +106,11 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request } } +void addReplicasStatusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) +{ + auto replicas_status_handler = std::make_unique>(server); + replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); + factory->addHandler(replicas_status_handler.release()); +} } diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 22f32e6a0e7..255e3cab5af 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -155,6 +155,22 @@ StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & exp { } +void addRootHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) +{ + static const auto root_response_expression = "config://http_server_default_response"; + + auto root_handler = std::make_unique>(server, root_response_expression); + root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); + factory.addHandler(root_handler.release()); +} + +void addPingHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) +{ + auto ping_handler = std::make_unique>(server, "Ok.\n"); + ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); + factory.addHandler(ping_handler.release()); +} + Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix) { int status = server.config().getInt(config_prefix + ".handler.status", 200); From 1c55aa03334f4c011c5c587468641b873ffcd83d Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 12 Jun 2020 18:19:03 +0800 Subject: [PATCH 0585/2229] ISSUES-7572 add integration test --- .../test_http_handlers_config/test.py | 20 +++++++++++++++++++ .../test_custom_defaults_handlers/config.xml | 10 ++++++++++ .../test_defaults_handlers/config.xml | 9 +++++++++ 3 files changed, 39 insertions(+) create mode 100644 tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml create mode 100644 tests/integration/test_http_handlers_config/test_defaults_handlers/config.xml diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index 31d40bd8a1d..a38bd3ff343 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -113,3 +113,23 @@ def test_relative_path_static_handler(): assert 'text/html; charset=UTF-8' == cluster.instance.http_request('test_get_relative_path_static_handler', method='GET', headers={'XXX': 'xxx'}).headers['Content-Type'] assert 'Relative Path File\n' == cluster.instance.http_request('test_get_relative_path_static_handler', method='GET', headers={'XXX': 'xxx'}).content +def test_defaults_http_handlers(): + with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "defaults_handlers", "test_defaults_handlers")) as cluster: + assert 200 == cluster.instance.http_request('', method='GET').status_code + assert 'Default server response' == cluster.instance.http_request('', method='GET').content + + assert 200 == cluster.instance.http_request('ping', method='GET').status_code + assert 'Ok\n' == cluster.instance.http_request('ping', method='GET').content + + assert 200 == cluster.instance.http_request('replicas_status', method='GET').status_code + assert 'Ok\n' == cluster.instance.http_request('replicas_status', method='GET').content + +def test_custom_defaults_http_handlers(): + with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "custom_defaults_handlers", "test_custom_defaults_handlers")) as cluster: + assert 200 == cluster.instance.http_request('', method='GET').status_code + assert 'Default server response' == cluster.instance.http_request('', method='GET').content + + assert 200 == cluster.instance.http_request('ping', method='GET').status_code + assert 'Ok\n' == cluster.instance.http_request('ping', method='GET').content + + assert 404 == cluster.instance.http_request('replicas_status', method='GET').status_code diff --git a/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml b/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml new file mode 100644 index 00000000000..54008c2c4b8 --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml @@ -0,0 +1,10 @@ + + + + Default server response + + + + + + diff --git a/tests/integration/test_http_handlers_config/test_defaults_handlers/config.xml b/tests/integration/test_http_handlers_config/test_defaults_handlers/config.xml new file mode 100644 index 00000000000..fd280e05cf4 --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_defaults_handlers/config.xml @@ -0,0 +1,9 @@ + + + + Default server response + + + + + From a2ed8d70a71dbe0accb0618543c52d5ea61778e7 Mon Sep 17 00:00:00 2001 From: zzsmdfj Date: Fri, 12 Jun 2020 19:03:01 +0800 Subject: [PATCH 0586/2229] update zh-docs-aggregate-functions (#11623) --- .../parametric-functions.md | 2 +- .../aggregate-functions/reference.md | 4 +- .../data-types/aggregatefunction.md | 2 +- .../aggregate-functions/combinators.md | 58 ++-- .../aggregate-functions/index.md | 5 +- .../parametric-functions.md | 93 +++--- .../aggregate-functions/reference.md | 300 +++++++++--------- 7 files changed, 228 insertions(+), 236 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3dec141d736..a7908c1e740 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -316,7 +316,7 @@ Result: The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event. Any condition can be specified as an argument (as in [WHERE](../../sql-reference/statements/select/where.md#select-where)). -The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and fird are true, etc. +The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and third are true, etc. **Syntax** diff --git a/docs/en/sql-reference/aggregate-functions/reference.md b/docs/en/sql-reference/aggregate-functions/reference.md index 4c505a46fd1..baba1ce904e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference.md +++ b/docs/en/sql-reference/aggregate-functions/reference.md @@ -1805,7 +1805,7 @@ For more information see [parameters](#agg_functions-stochasticlinearregression- stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') ``` -1. Fitting +**1.** Fitting @@ -1813,7 +1813,7 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') Predicted labels have to be in \[-1, 1\]. -1. Predicting +**2.** Predicting diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index f214db20ea7..ce810f9b2a9 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -28,7 +28,7 @@ CREATE TABLE t ) ENGINE = ... ``` -[uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference.md) are the aggregate functions supported in ClickHouse. +[uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference.md#quantiles) are the aggregate functions supported in ClickHouse. ## Usage {#usage} diff --git a/docs/zh/sql-reference/aggregate-functions/combinators.md b/docs/zh/sql-reference/aggregate-functions/combinators.md index c5c3e8a9577..c458097a5fb 100644 --- a/docs/zh/sql-reference/aggregate-functions/combinators.md +++ b/docs/zh/sql-reference/aggregate-functions/combinators.md @@ -1,51 +1,49 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 37 -toc_title: "\u7EC4\u5408\u5668" +toc_title: 聚合函数组合器 --- # 聚合函数组合器 {#aggregate_functions_combinators} 聚合函数的名称可以附加一个后缀。 这改变了聚合函数的工作方式。 -## -如果 {#agg-functions-combinator-if} +## -If {#agg-functions-combinator-if} -The suffix -If can be appended to the name of any aggregate function. In this case, the aggregate function accepts an extra argument – a condition (Uint8 type). The aggregate function processes only the rows that trigger the condition. If the condition was not triggered even once, it returns a default value (usually zeros or empty strings). +-If可以加到任何聚合函数之后。加了-If之后聚合函数需要接受一个额外的参数,一个条件(Uint8类型),如果条件满足,那聚合函数处理当前的行数据,如果不满足,那返回默认值(通常是0或者空字符串)。 -例: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` 等等。 +例: `sumIf(column, cond)`, `countIf(cond)`, `avgIf(x, cond)`, `quantilesTimingIf(level1, level2)(x, cond)`, `argMinIf(arg, val, cond)` 等等。 -使用条件聚合函数,您可以一次计算多个条件的聚合,而无需使用子查询和 `JOIN`例如,在Yandex的。Metrica,条件聚合函数用于实现段比较功能。 +使用条件聚合函数,您可以一次计算多个条件的聚合,而无需使用子查询和 `JOIN`例如,在Yandex.Metrica,条件聚合函数用于实现段比较功能。 -## -阵列 {#agg-functions-combinator-array} +## -Array {#agg-functions-combinator-array} -Array后缀可以附加到任何聚合函数。 在这种情况下,聚合函数采用的参数 ‘Array(T)’ 类型(数组)而不是 ‘T’ 类型参数。 如果聚合函数接受多个参数,则它必须是长度相等的数组。 在处理数组时,聚合函数的工作方式与所有数组元素的原始聚合函数类似。 -示例1: `sumArray(arr)` -总计所有的所有元素 ‘arr’ 阵列。 在这个例子中,它可以更简单地编写: `sum(arraySum(arr))`. +示例1: `sumArray(arr)` -总计所有的所有元素 ‘arr’ 阵列。在这个例子中,它可以更简单地编写: `sum(arraySum(arr))`. -示例2: `uniqArray(arr)` – Counts the number of unique elements in all ‘arr’ 阵列。 这可以做一个更简单的方法: `uniq(arrayJoin(arr))`,但它并不总是可以添加 ‘arrayJoin’ 到查询。 +示例2: `uniqArray(arr)` – 计算‘arr’中唯一元素的个数。这可以是一个更简单的方法: `uniq(arrayJoin(arr))`,但它并不总是可以添加 ‘arrayJoin’ 到查询。 --如果和-阵列可以组合。 然而, ‘Array’ 必须先来,然后 ‘If’. 例: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`. 由于这个顺序,该 ‘cond’ 参数不会是数组。 +如果和-If组合,‘Array’ 必须先来,然后 ‘If’. 例: `uniqArrayIf(arr, cond)`, `quantilesTimingArrayIf(level1, level2)(arr, cond)`。由于这个顺序,该 ‘cond’ 参数不会是数组。 -## -州 {#agg-functions-combinator-state} +## -State {#agg-functions-combinator-state} -如果应用此combinator,则聚合函数不会返回结果值(例如唯一值的数量 [uniq](reference.md#agg_function-uniq) 函数),但聚合的中间状态(用于 `uniq`,这是用于计算唯一值的数量的散列表)。 这是一个 `AggregateFunction(...)` 可用于进一步处理或存储在表中以完成聚合。 +如果应用此combinator,则聚合函数不会返回结果值(例如唯一值的数量 [uniq](reference.md#agg_function-uniq) 函数),但是返回聚合的中间状态(对于 `uniq`,返回的是计算唯一值的数量的哈希表)。 这是一个 `AggregateFunction(...)` 可用于进一步处理或存储在表中以完成稍后的聚合。 要使用这些状态,请使用: - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) 表引擎。 -- [最后聚会](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) 功能。 -- [跑累积](../../sql-reference/functions/other-functions.md#function-runningaccumulate) 功能。 -- [-合并](#aggregate_functions_combinators-merge) combinator +- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) 功能。 +- [runningAccumulate](../../sql-reference/functions/other-functions.md#function-runningaccumulate) 功能。 +- [-Merge](#aggregate_functions_combinators-merge) combinator - [-MergeState](#aggregate_functions_combinators-mergestate) combinator -## -合并 {#aggregate_functions_combinators-merge} +## -Merge {#aggregate_functions_combinators-merge} 如果应用此组合器,则聚合函数将中间聚合状态作为参数,组合状态以完成聚合,并返回结果值。 ## -MergeState {#aggregate_functions_combinators-mergestate} -以与-Merge combinator相同的方式合并中间聚合状态。 但是,它不会返回结果值,而是返回中间聚合状态,类似于-State combinator。 +以与-Merge 相同的方式合并中间聚合状态。 但是,它不会返回结果值,而是返回中间聚合状态,类似于-State。 ## -ForEach {#agg-functions-combinator-foreach} @@ -55,7 +53,7 @@ The suffix -If can be appended to the name of any aggregate function. In this ca 更改聚合函数的行为。 -如果聚合函数没有输入值,则使用此combinator,它返回其返回数据类型的默认值。 适用于可以采用空输入数据的聚合函数。 +如果聚合函数没有输入值,则使用此组合器它返回其返回数据类型的默认值。 适用于可以采用空输入数据的聚合函数。 `-OrDefault` 可与其他组合器一起使用。 @@ -67,7 +65,7 @@ The suffix -If can be appended to the name of any aggregate function. In this ca **参数** -- `x` — Aggregate function parameters. +- `x` — 聚合函数参数。 **返回值** @@ -174,7 +172,7 @@ FROM └────────────────────────────────┘ ``` -## -重新采样 {#agg-functions-combinator-resample} +## -Resample {#agg-functions-combinator-resample} 允许您将数据划分为组,然后单独聚合这些组中的数据。 通过将一列中的值拆分为间隔来创建组。 @@ -184,19 +182,19 @@ FROM **参数** -- `start` — Starting value of the whole required interval for `resampling_key` 值。 -- `stop` — Ending value of the whole required interval for `resampling_key` 值。 整个时间间隔不包括 `stop` 价值 `[start, stop)`. -- `step` — Step for separating the whole interval into subintervals. The `aggFunction` 在每个子区间上独立执行。 -- `resampling_key` — Column whose values are used for separating data into intervals. +- `start` — `resampling_key` 开始值。 +- `stop` — `resampling_key` 结束边界。 区间内部不包含 `stop` 值,即 `[start, stop)`. +- `step` — 分组的步长。 The `aggFunction` 在每个子区间上独立执行。 +- `resampling_key` — 取样列,被用来分组. - `aggFunction_params` — `aggFunction` 参数。 **返回值** -- 阵列 `aggFunction` 每个子区间的结果。 +- `aggFunction` 每个子区间的结果,结果为数组。 **示例** -考虑一下 `people` 具有以下数据的表: +考虑一下 `people` 表具有以下数据的表结构: ``` text ┌─name───┬─age─┬─wage─┐ @@ -209,9 +207,9 @@ FROM └────────┴─────┴──────┘ ``` -让我们得到的人的名字,他们的年龄在于的时间间隔 `[30,60)` 和 `[60,75)`. 由于我们使用整数表示的年龄,我们得到的年龄 `[30, 59]` 和 `[60,74]` 间隔。 +让我们得到的人的名字,他们的年龄在于的时间间隔 `[30,60)` 和 `[60,75)`。 由于我们使用整数表示的年龄,我们得到的年龄 `[30, 59]` 和 `[60,74]` 间隔。 -要在数组中聚合名称,我们使用 [groupArray](reference.md#agg_function-grouparray) 聚合函数。 这需要一个参数。 在我们的例子中,它是 `name` 列。 该 `groupArrayResample` 函数应该使用 `age` 按年龄聚合名称的列。 要定义所需的时间间隔,我们通过 `30, 75, 30` 参数到 `groupArrayResample` 功能。 +要在数组中聚合名称,我们使用 [groupArray](reference.md#agg_function-grouparray) 聚合函数。 这需要一个参数。 在我们的例子中,它是 `name` 列。 `groupArrayResample` 函数应该使用 `age` 按年龄聚合名称, 要定义所需的时间间隔,我们传入 `30, 75, 30` 参数给 `groupArrayResample` 函数。 ``` sql SELECT groupArrayResample(30, 75, 30)(name, age) FROM people @@ -225,7 +223,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people 考虑结果。 -`Jonh` 是因为他太年轻了 其他人按照指定的年龄间隔进行分配。 +`Jonh` 没有被选中,因为他太年轻了。 其他人按照指定的年龄间隔进行分配。 现在让我们计算指定年龄间隔内的总人数和平均工资。 diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md index 06666c49d03..57d8e362d99 100644 --- a/docs/zh/sql-reference/aggregate-functions/index.md +++ b/docs/zh/sql-reference/aggregate-functions/index.md @@ -1,9 +1,6 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u805A\u5408\u51FD\u6570" toc_priority: 33 -toc_title: "\u5BFC\u8A00" +toc_title: 简介 --- # 聚合函数 {#aggregate-functions} diff --git a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md index 830581beba7..69572086549 100644 --- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 38 -toc_title: "\u53C2\u6570" +toc_title: 参数聚合函数 --- # 参数聚合函数 {#aggregate_functions_parametric} -Some aggregate functions can accept not only argument columns (used for compression), but a set of parameters – constants for initialization. The syntax is two pairs of brackets instead of one. The first is for parameters, and the second is for arguments. +一些聚合函数不仅可以接受参数列(用于压缩),也可以接收常量的初始化参数。这种语法是接受两个括号的参数,第一个数初始化参数,第二个是入参。 -## 直方图 {#histogram} +## histogram {#histogram} 计算自适应直方图。 它不能保证精确的结果。 @@ -21,20 +19,21 @@ histogram(number_of_bins)(values) **参数** -`number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [表达式](../syntax.md#syntax-expressions) 导致输入值。 +`number_of_bins` — 直方图bin个数,这个函数会自动计算bin的数量,而且会尽量使用指定值,如果无法做到,那就使用更小的bin个数。 + +`values` — [表达式](../syntax.md#syntax-expressions) 输入值。 **返回值** -- [阵列](../../sql-reference/data-types/array.md) 的 [元组](../../sql-reference/data-types/tuple.md) 下面的格式: +- [Array](../../sql-reference/data-types/array.md) 的 [Tuples](../../sql-reference/data-types/tuple.md) 如下: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] ``` - - `lower` — Lower bound of the bin. - - `upper` — Upper bound of the bin. - - `height` — Calculated height of the bin. + - `lower` — bin的下边界。 + - `upper` — bin的上边界。 + - `height` — bin的计算权重。 **示例** @@ -53,7 +52,7 @@ FROM ( └─────────────────────────────────────────────────────────────────────────┘ ``` -您可以使用 [酒吧](../../sql-reference/functions/other-functions.md#function-bar) 功能,例如: +您可以使用 [bar](../../sql-reference/functions/other-functions.md#function-bar) 功能,例如: ``` sql WITH histogram(5)(rand() % 100) AS hist @@ -93,11 +92,11 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) **参数** -- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). +- `pattern` — 模式字符串。 参考 [模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 +- `timestamp` — 包含时间的列。典型的时间类型是: `Date` 和 `DateTime`。您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 +- `cond1`, `cond2` — 事件链的约束条件。 数据类型是: `UInt8`。 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 **返回值** @@ -109,11 +108,11 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) **模式语法** -- `(?N)` — Matches the condition argument at position `N`. 条件在编号 `[1, 32]` 范围。 例如, `(?1)` 匹配传递给 `cond1` 参数。 +- `(?N)` — 在位置`N`匹配条件参数。 条件在编号 `[1, 32]` 范围。 例如, `(?1)` 匹配传递给 `cond1` 参数。 -- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern. +- `.*` — 匹配任何事件的数字。 不需要条件参数来匹配这个模式。 -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运营商。 +- `(?t operator value)` — 分开两个事件的时间。 例如: `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运算符。 **例** @@ -169,7 +168,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM ## sequenceCount(pattern)(time, cond1, cond2, …) {#function-sequencecount} -计数与模式匹配的事件链的数量。 该函数搜索不重叠的事件链。 当前链匹配后,它开始搜索下一个链。 +计算与模式匹配的事件链的数量。该函数搜索不重叠的事件链。当前链匹配后,它开始搜索下一个链。 !!! warning "警告" 在同一秒钟发生的事件可能以未定义的顺序排列在序列中,影响结果。 @@ -180,11 +179,11 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) **参数** -- `pattern` — Pattern string. See [模式语法](#sequence-function-pattern-syntax). +- `pattern` — 模式字符串。 参考:[模式语法](#sequence-function-pattern-syntax). -- `timestamp` — Column considered to contain time data. Typical data types are `Date` 和 `DateTime`. 您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 +- `timestamp` — 包含时间的列。典型的时间类型是: `Date` 和 `DateTime`。您还可以使用任何支持的 [UInt](../../sql-reference/data-types/int-uint.md) 数据类型。 -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. 最多可以传递32个条件参数。 该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 +- `cond1`, `cond2` — 事件链的约束条件。 数据类型是: `UInt8`。 最多可以传递32个条件参数。该函数只考虑这些条件中描述的事件。 如果序列包含未在条件中描述的数据,则函数将跳过这些数据。 **返回值** @@ -227,9 +226,9 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t 搜索滑动时间窗中的事件链,并计算从链中发生的最大事件数。 -该函数根据算法工作: +该函数采用如下算法: -- 该函数搜索触发链中的第一个条件并将事件计数器设置为1的数据。 这是滑动窗口启动的时刻。 +- 该函数搜索触发链中的第一个条件并将事件计数器设置为1。 这是滑动窗口启动的时刻。 - 如果来自链的事件在窗口内顺序发生,则计数器将递增。 如果事件序列中断,则计数器不会增加。 @@ -243,11 +242,11 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) **参数** -- `window` — Length of the sliding window in seconds. -- `mode` -这是一个可选的参数。 - - `'strict'` -当 `'strict'` 设置时,windowFunnel()仅对唯一值应用条件。 -- `timestamp` — Name of the column containing the timestamp. Data types supported: [日期](../../sql-reference/data-types/date.md), [日期时间](../../sql-reference/data-types/datetime.md#data_type-datetime) 和其他无符号整数类型(请注意,即使时间戳支持 `UInt64` 类型,它的值不能超过Int64最大值,即2^63-1)。 -- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `window` — 滑动窗户的大小,单位是秒。 +- `mode` - 这是一个可选的参数。 + - `'strict'` - 当 `'strict'` 设置时,windowFunnel()仅对唯一值应用匹配条件。 +- `timestamp` — 包含时间的列。 数据类型支持: [日期](../../sql-reference/data-types/date.md), [日期时间](../../sql-reference/data-types/datetime.md#data_type-datetime) 和其他无符号整数类型(请注意,即使时间戳支持 `UInt64` 类型,它的值不能超过Int64最大值,即2^63-1)。 +- `cond` — 事件链的约束条件。 [UInt8](../../sql-reference/data-types/int-uint.md) 类型。 **返回值** @@ -284,7 +283,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) └────────────┴─────────┴─────────────────────┴─────────┴─────────┘ ``` -了解用户有多远 `user_id` 可以在2019的1-2月期间通过链条。 +了解用户`user_id` 可以在2019的1-2月期间通过链条多远。 查询: @@ -315,10 +314,10 @@ ORDER BY level ASC ## Retention {#retention} -该函数将一组条件作为参数,类型为1到32个参数 `UInt8` 表示事件是否满足特定条件。 +该函数将一组条件作为参数,类型为1到32个 `UInt8` 类型的参数,用来表示事件是否满足特定条件。 任何条件都可以指定为参数(如 [WHERE](../../sql-reference/statements/select/where.md#select-where)). -除了第一个以外,条件成对适用:如果第一个和第二个是真的,第二个结果将是真的,如果第一个和fird是真的,第三个结果将是真的,等等。 +除了第一个以外,条件成对适用:如果第一个和第二个是真的,第二个结果将是真的,如果第一个和第三个是真的,第三个结果将是真的,等等。 **语法** @@ -328,22 +327,22 @@ retention(cond1, cond2, ..., cond32); **参数** -- `cond` — an expression that returns a `UInt8` 结果(1或0)。 +- `cond` — 返回 `UInt8` 结果(1或0)的表达式。 **返回值** 数组为1或0。 -- 1 — condition was met for the event. -- 0 — condition wasn't met for the event. +- 1 — 条件满足。 +- 0 — 条件不满足。 类型: `UInt8`. **示例** -让我们考虑计算的一个例子 `retention` 功能,以确定网站流量。 +让我们考虑使用 `retention` 功能的一个例子 ,以确定网站流量。 -**1.** Сreate a table to illustrate an example. +**1.** 举例说明,先创建一张表。 ``` sql CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory; @@ -402,7 +401,7 @@ SELECT * FROM retention_test └────────────┴─────┘ ``` -**2.** 按唯一ID对用户进行分组 `uid` 使用 `retention` 功能。 +**2.** 按唯一ID `uid` 对用户进行分组,使用 `retention` 功能。 查询: @@ -466,7 +465,7 @@ FROM └────┴────┴────┘ ``` -哪里: +条件: - `r1`-2020-01-01期间访问该网站的独立访问者数量( `cond1` 条件)。 - `r2`-在2020-01-01和2020-01-02之间的特定时间段内访问该网站的唯一访问者的数量 (`cond1` 和 `cond2` 条件)。 @@ -474,9 +473,9 @@ FROM ## uniqUpTo(N)(x) {#uniquptonx} -Calculates the number of different argument values ​​if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1. +计算小于或者等于N的不同参数的个数。如果结果大于N,那返回N+1。 -建议使用小Ns,高达10。 N的最大值为100。 +建议使用较小的Ns,比如:10。N的最大值为100。 对于聚合函数的状态,它使用的内存量等于1+N\*一个字节值的大小。 对于字符串,它存储8个字节的非加密哈希。 也就是说,计算是近似的字符串。 @@ -488,12 +487,12 @@ Calculates the number of different argument values ​​if it is less than or e 用法示例: ``` text -Problem: Generate a report that shows only keywords that produced at least 5 unique users. -Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5 +问题:产出一个不少于五个唯一用户的关键字报告 +解决方案: 写group by查询语句 HAVING uniqUpTo(4)(UserID) >= 5 ``` +## sumMapFiltered(keys\_to\_keep)(keys, values) {#summapfilteredkeys-to-keepkeys-values} + +和 [sumMap](reference.md#agg_functions-summap) 基本一致, 除了一个键数组作为参数传递。这在使用高基数key时尤其有用。 + [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/parametric_functions/) - -## sumMapFiltered(keys\_to\_keep)(键值) {#summapfilteredkeys-to-keepkeys-values} - -同样的行为 [sumMap](reference.md#agg_functions-summap) 除了一个键数组作为参数传递。 这在使用高基数密钥时尤其有用。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md index 53510900536..7d5ecda7bb4 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference.md +++ b/docs/zh/sql-reference/aggregate-functions/reference.md @@ -1,13 +1,11 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 36 -toc_title: "\u53C2\u8003\u8D44\u6599" +toc_title: 聚合函数 --- # 聚合函数引用 {#aggregate-functions-reference} -## 计数 {#agg_function-count} +## count {#agg_function-count} 计数行数或非空值。 @@ -73,7 +71,7 @@ SELECT count(DISTINCT num) FROM t 这个例子表明 `count(DISTINCT num)` 由执行 `uniqExact` 根据功能 `count_distinct_implementation` 设定值。 -## 任何(x) {#agg_function-any} +## any(x) {#agg_function-any} 选择第一个遇到的值。 查询可以以任何顺序执行,甚至每次都以不同的顺序执行,因此此函数的结果是不确定的。 @@ -115,7 +113,7 @@ FROM ontime 选择遇到的最后一个值。 其结果是一样不确定的 `any` 功能。 -## 集团比特 {#groupbitand} +## groupBitAnd {#groupbitand} 按位应用 `AND` 对于一系列的数字。 @@ -337,7 +335,7 @@ SELECT argMin(user, salary) FROM salary 总计 ‘value’ 数组根据在指定的键 ‘key’ 阵列。 传递键和值数组的元组与传递两个键和值数组是同义的。 元素的数量 ‘key’ 和 ‘value’ 总计的每一行必须相同。 -Returns a tuple of two arrays: keys in sorted order, and values ​​summed for the corresponding keys. +返回两个数组的一个二元组: key是排好序的,value是对应key的求和。 示例: @@ -374,7 +372,7 @@ GROUP BY timeslot ## skewPop {#skewpop} -计算 [歪斜](https://en.wikipedia.org/wiki/Skewness) 的序列。 +计算的序列[偏度](https://en.wikipedia.org/wiki/Skewness)。 ``` sql skewPop(expr) @@ -386,7 +384,7 @@ skewPop(expr) **返回值** -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) +给定序列的偏度。类型 — [Float64](../../sql-reference/data-types/float.md) **示例** @@ -410,7 +408,7 @@ skewSamp(expr) **返回值** -The skewness of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. +给定序列的偏度。 类型 — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. **示例** @@ -432,7 +430,7 @@ kurtPop(expr) **返回值** -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md) +给定序列的峰度。 类型 — [Float64](../../sql-reference/data-types/float.md) **示例** @@ -456,7 +454,7 @@ kurtSamp(expr) **返回值** -The kurtosis of the given distribution. Type — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. +给定序列的峰度。类型 — [Float64](../../sql-reference/data-types/float.md). 如果 `n <= 1` (`n` 是样本的大小),则该函数返回 `nan`. **示例** @@ -533,7 +531,7 @@ FROM ( 只适用于数字。 结果总是Float64。 -## 平均加权 {#avgweighted} +## avgWeighted {#avgweighted} 计算 [加权算术平均值](https://en.wikipedia.org/wiki/Weighted_arithmetic_mean). @@ -545,10 +543,10 @@ avgWeighted(x, weight) **参数** -- `x` — Values. [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). -- `weight` — Weights of the values. [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). +- `x` — 值。 [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). +- `weight` — 值的加权。 [整数](../data-types/int-uint.md) 或 [浮点](../data-types/float.md). -类型 `x` 和 `weight` 一定是一样的 +`x` 和 `weight` 的类型一定是一样的 **返回值** @@ -590,7 +588,7 @@ uniq(x[, ...]) - A [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 -**实施细节** +**实现细节** 功能: @@ -598,7 +596,7 @@ uniq(x[, ...]) - 使用自适应采样算法。 对于计算状态,该函数使用最多65536个元素哈希值的样本。 - This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. + 这个算法是非常精确的,并且对于CPU来说非常高效。如果查询包含一些这样的函数,那和其他聚合函数相比 `uniq` 将是几乎一样快。 - 确定性地提供结果(它不依赖于查询处理顺序)。 @@ -629,17 +627,17 @@ uniqCombined(HLL_precision)(x[, ...]) **返回值** -- 一个数字 [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 +- 一个[UInt64](../../sql-reference/data-types/int-uint.md)类型的数字。 -**实施细节** +**实现细节** 功能: - 计算散列(64位散列 `String` 否则32位)对于聚合中的所有参数,然后在计算中使用它。 -- 使用三种算法的组合:数组、哈希表和HyperLogLog与error错表。 +- 使用三种算法的组合:数组、哈希表和包含错误修正表的HyperLogLog。 - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. + 少量的不同的值,使用数组。 值再多一些,使用哈希表。对于大量的数据来说,使用HyperLogLog,HyperLogLog占用一个固定的内存空间。 - 确定性地提供结果(它不依赖于查询处理顺序)。 @@ -650,7 +648,7 @@ uniqCombined(HLL_precision)(x[, ...]) - 消耗少几倍的内存。 - 计算精度高出几倍。 -- 通常具有略低的性能。 在某些情况下, `uniqCombined` 可以表现得比 `uniq`,例如,使用通过网络传输大量聚合状态的分布式查询。 +- 通常具有略低的性能。 在某些情况下, `uniqCombined` 可以表现得比 `uniq` 好,例如,使用通过网络传输大量聚合状态的分布式查询。 **另请参阅** @@ -679,7 +677,7 @@ uniqHLL12(x[, ...]) - A [UInt64](../../sql-reference/data-types/int-uint.md)-键入号码。 -**实施细节** +**实现细节** 功能: @@ -707,9 +705,9 @@ uniqHLL12(x[, ...]) uniqExact(x[, ...]) ``` -使用 `uniqExact` 功能,如果你绝对需要一个确切的结果。 否则使用 [uniq](#agg_function-uniq) 功能。 +如果你绝对需要一个确切的结果,使用 `uniqExact` 功能。 否则使用 [uniq](#agg_function-uniq) 功能。 -该 `uniqExact` 功能使用更多的内存比 `uniq`,因为状态的大小随着不同值的数量的增加而无界增长。 +`uniqExact` 比 `uniq` 使用更多的内存,因为状态的大小随着不同值的数量的增加而无界增长。 **参数** @@ -721,7 +719,7 @@ uniqExact(x[, ...]) - [uniqCombined](#agg_function-uniqcombined) - [uniqHLL12](#agg_function-uniqhll12) -## 群交(x),群交(max\_size)(x) {#agg_function-grouparray} +## groupArray(x), groupArray(max\_size)(x) {#agg_function-grouparray} 创建参数值的数组。 值可以按任何(不确定)顺序添加到数组中。 @@ -748,10 +746,10 @@ groupArrayInsertAt(default_x, size)(x, pos); **参数** -- `x` — Value to be inserted. [表达式](../syntax.md#syntax-expressions) 导致的一个 [支持的数据类型](../../sql-reference/data-types/index.md). -- `pos` — Position at which the specified element `x` 将被插入。 数组中的索引编号从零开始。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). -- `default_x`— Default value for substituting in empty positions. Optional parameter. [表达式](../syntax.md#syntax-expressions) 导致为配置的数据类型 `x` 参数。 如果 `default_x` 未定义,则 [默认值](../../sql-reference/statements/create.md#create-default-values) 被使用。 -- `size`— Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` 必须指定。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `x` — 被插入的值。[表达式](../syntax.md#syntax-expressions) 导致的一个 [支持的数据类型](../../sql-reference/data-types/index.md). +- `pos` — `x` 将被插入的位置。 数组中的索引编号从零开始。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `default_x`— 如果代入值为空,则使用默认值。可选参数。[表达式](../syntax.md#syntax-expressions) 为 `x` 数据类型的数据。 如果 `default_x` 未定义,则 [默认值](../../sql-reference/statements/create.md#create-default-values) 被使用。 +- `size`— 结果数组的长度。可选参数。如果使用该参数,`default_x` 必须指定。 [UInt32](../../sql-reference/data-types/int-uint.md#uint-ranges). **返回值** @@ -803,7 +801,7 @@ SELECT groupArrayInsertAt('-', 5)(toString(number), number * 2) FROM numbers(5); └───────────────────────────────────────────────────────────────────┘ ``` -元件的多线程插入到一个位置。 +在一个位置多线程插入数据。 查询: @@ -832,8 +830,8 @@ groupArrayMovingSum(window_size)(numbers_for_summing) **参数** -- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 为数值数据类型值。 +- `window_size` — 窗口大小。 **返回值** @@ -906,13 +904,13 @@ groupArrayMovingAvg(window_size)(numbers_for_summing) **参数** - `numbers_for_summing` — [表达式](../syntax.md#syntax-expressions) 生成数值数据类型值。 -- `window_size` — Size of the calculation window. +- `window_size` — 窗口大小。 **返回值** - 与输入数据大小和类型相同的数组。 -该函数使用 [四舍五入到零](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). 它截断结果数据类型的小数位数。 +该函数使用 [四舍五入到零](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). 它截断无意义的小数位来保证结果的数据类型。 **示例** @@ -967,20 +965,20 @@ FROM t └───────────┴──────────────────────────────────┴───────────────────────┘ ``` -## 禄,赂麓ta脌麓,):脡,,拢脢,group媒group)galaxy s8碌胫脢)禄煤)酶脱脩) {#groupuniqarrayx-groupuniqarraymax-sizex} +## groupUniqArray(x), groupUniqArray(max\_size)(x) {#groupuniqarrayx-groupuniqarraymax-sizex} 从不同的参数值创建一个数组。 内存消耗是一样的 `uniqExact` 功能。 -第二个版本(与 `max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 +第二个版本(`max_size` 参数)将结果数组的大小限制为 `max_size` 元素。 例如, `groupUniqArray(1)(x)` 相当于 `[any(x)]`. -## 分位数 {#quantile} +## quantile {#quantile} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +计算数字序列的近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 随着储存器大小高达8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 +此功能适用 [水塘抽样(](https://en.wikipedia.org/wiki/Reservoir_sampling),使用储存器最大到8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -992,12 +990,12 @@ quantile(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的近似分位数。 类型: @@ -1037,13 +1035,13 @@ SELECT quantile(val) FROM t - [中位数](#median) - [分位数](#quantiles) -## 量化确定 {#quantiledeterministic} +## quantileDeterministic {#quantiledeterministic} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +计算数字序列的近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -此功能适用 [油藏采样](https://en.wikipedia.org/wiki/Reservoir_sampling) 与储层大小高达8192和采样的确定性算法。 结果是确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 +此功能适用 [水塘抽样(](https://en.wikipedia.org/wiki/Reservoir_sampling),使用储存器最大到8192和随机数发生器进行采样。 结果是非确定性的。 要获得精确的分位数,请使用 [quantileExact](#quantileexact) 功能。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1055,13 +1053,13 @@ quantileDeterministic(level)(expr, determinator) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `determinator` — 一个数字,其hash被用来代替在水塘抽样中随机生成的数字,这样可以保证取样的确定性。你可以使用用户ID或者事件ID等任何正数,但是如果相同的 `determinator` 出现多次,那结果很可能不正确。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的近似分位数。 类型: @@ -1103,11 +1101,11 @@ SELECT quantileDeterministic(val, 1) FROM t ## quantileExact {#quantileexact} -正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +准确计算数字序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Therefore, the function consumes `O(n)` 内存,其中 `n` 是传递的多个值。 然而,对于少量的值,该函数是非常有效的。 +为了准确计算,所有输入的数据被合并为一个数组,并且部分的排序。因此该函数需要 `O(n)` 的内存,n为输入数据的个数。但是对于少量数据来说,该函数还是非常有效的。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1119,12 +1117,12 @@ quantileExact(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: @@ -1153,13 +1151,13 @@ SELECT quantileExact(number) FROM numbers(10) - [中位数](#median) - [分位数](#quantiles) -## 分位数加权 {#quantileexactweighted} +## quantileExactWeighted {#quantileexactweighted} -正是计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 数值数据序列,考虑到每个元素的权重。 +考虑到每个元素的权重,然后准确计算数值序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 -To get exact value, all the passed values ​​are combined into an array, which is then partially sorted. Each value is counted with its weight, as if it is present `weight` times. A hash table is used in the algorithm. Because of this, if the passed values ​​are frequently repeated, the function consumes less RAM than [quantileExact](#quantileexact). 您可以使用此功能,而不是 `quantileExact` 并指定重量1。 +为了准确计算,所有输入的数据被合并为一个数组,并且部分的排序。每个输入值需要根据 `weight` 计算求和。该算法使用哈希表。正因为如此,在数据重复较多的时候使用的内存是少于[quantileExact](#quantileexact)的。 您可以使用此函数代替 `quantileExact` 并指定重量1。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1171,13 +1169,13 @@ quantileExactWeighted(level)(expr, weight) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `weight` — 权重序列。 权重是一个数据出现的数值。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: @@ -1217,13 +1215,13 @@ SELECT quantileExactWeighted(n, val) FROM t - [中位数](#median) - [分位数](#quantiles) -## 分位定时 {#quantiletiming} +## quantileTiming {#quantiletiming} -随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 的数字数据序列。 +使用确定的精度计算数字数据序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1235,12 +1233,12 @@ quantileTiming(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql-reference/data-types/float.md)-键入号码。 +- `expr` — [表达式](../syntax.md#syntax-expressions),返回 [浮动\*](../../sql-reference/data-types/float.md)类型数据。 - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - 如果输入负值,那结果是不可预期的。 + - 如果输入值大于30000(页面加载时间大于30s),那我们假设为30000。 **精度** @@ -1252,16 +1250,16 @@ quantileTiming(level)(expr) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 !!! note "注" - 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile)更有效和准确。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: `Float32`. !!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select/order-by.md#select-order-by) 对于排序注意事项 `NaN` 值。 + 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [ORDER BY clause](../statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 **示例** @@ -1300,13 +1298,13 @@ SELECT quantileTiming(response_time) FROM t - [中位数](#median) - [分位数](#quantiles) -## 分位时间加权 {#quantiletimingweighted} +## quantileTimingWeighted {#quantiletimingweighted} -随着确定的精度计算 [分位数](https://en.wikipedia.org/wiki/Quantile) 根据每个序列成员的权重对数字数据序列进行处理。 +根据每个序列成员的权重,使用确定的精度计算数字序列的[分位数](https://en.wikipedia.org/wiki/Quantile)。 结果是确定性的(它不依赖于查询处理顺序)。 该函数针对描述加载网页时间或后端响应时间等分布的序列进行了优化。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1318,14 +1316,14 @@ quantileTimingWeighted(level)(expr, weight) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — [表达式](../syntax.md#syntax-expressions) 在一个列值返回 [浮动\*](../../sql-reference/data-types/float.md)-键入号码。 +- `expr` — [表达式](../syntax.md#syntax-expressions),返回 [浮动\*](../../sql-reference/data-types/float.md)类型数据。 - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - 如果输入负值,那结果是不可预期的。 + - 如果输入值大于30000(页面加载时间大于30s),那我们假设为30000。 -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `weight` — 权重序列。 权重是一个数据出现的数值。 **精度** @@ -1337,16 +1335,16 @@ quantileTimingWeighted(level)(expr, weight) 否则,计算结果将四舍五入到16毫秒的最接近倍数。 !!! note "注" - 对于计算页面加载时间分位数,此函数比 [分位数](#quantile). + 对于计算页面加载时间分位数,此函数比 [分位数](#quantile)更高效和准确。 **返回值** -- 指定电平的分位数。 +- 指定层次的分位数。 类型: `Float32`. !!! note "注" - 如果没有值传递给函数(当使用 `quantileTimingIf`), [阿南](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。 看 [按条款订购](../statements/select/order-by.md#select-order-by) 对于排序注意事项 `NaN` 值。 + 如果没有值传递给函数(当使用 `quantileTimingIf`), [NaN](../../sql-reference/data-types/float.md#data_type-float-nan-inf) 被返回。 这样做的目的是将这些案例与导致零的案例区分开来。看 [ORDER BY clause](../statements/select/order-by.md#select-order-by) 对于 `NaN` 值排序注意事项。 **示例** @@ -1384,13 +1382,13 @@ SELECT quantileTimingWeighted(response_time, weight) FROM t ## quantileTDigest {#quantiletdigest} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 +使用[t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法计算近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 -最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 结果取决于运行查询的顺序,并且是不确定的。 +最大误差为1%。 内存消耗 `log(n)`,这里 `n` 是值的个数。 结果取决于运行查询的顺序,并且是不确定的。 -该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. +该功能的性能低于性能 [分位数](#quantile) 或 [时间分位](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`更优秀。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能。 **语法** @@ -1402,12 +1400,12 @@ quantileTDigest(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 **回值** -- 指定电平的近似分位数。 +- 指定层次的分位数。 类型: @@ -1438,13 +1436,13 @@ SELECT quantileTDigest(number) FROM numbers(10) ## quantileTDigestWeighted {#quantiletdigestweighted} -计算近似值 [分位数](https://en.wikipedia.org/wiki/Quantile) 使用的数字数据序列 [t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法。 该函数考虑了每个序列成员的权重。 最大误差为1%。 内存消耗 `log(n)`,哪里 `n` 是多个值。 +使用[t-digest](https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf) 算法计算近似[分位数](https://en.wikipedia.org/wiki/Quantile)。 该函数考虑了每个序列成员的权重。最大误差为1%。 内存消耗 `log(n)`,这里 `n` 是值的个数。 -该功能的性能低于性能 [分位数](#quantile) 或 [分位定时](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`. +该功能的性能低于性能 [分位数](#quantile) 或 [时间分位](#quantiletiming). 在状态大小与精度的比率方面,这个函数比 `quantile`更优秀。 结果取决于运行查询的顺序,并且是不确定的。 -当使用多个 `quantile*` 在查询中具有不同级别的函数,内部状态不会被组合(即查询的工作效率低于它可以)。 在这种情况下,使用 [分位数](#quantiles) 功能。 +当在一个查询中使用多个不同层次的 `quantile*` 时,内部状态不会被组合(即查询的工作效率低于组合情况)。在这种情况下,使用[分位数](#quantiles)功能 **语法** @@ -1456,13 +1454,13 @@ quantileTDigest(level)(expr) **参数** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` 值的范围 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [数据类型](../../sql-reference/data-types/index.md#data_types), [日期](../../sql-reference/data-types/date.md) 或 [日期时间](../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `level` — 分位数层次。可选参数。 从0到1的一个float类型的常量。 我们推荐 `level` 值的范围为 `[0.01, 0.99]`. 默认值:0.5。 在 `level=0.5` 该函数计算 [中位数](https://en.wikipedia.org/wiki/Median). +- `expr` — 求职表达式,类型为:数值[数据类型](../../sql-reference/data-types/index.md#data_types),[日期](../../sql-reference/data-types/date.md)数据类型或[时间](../../sql-reference/data-types/datetime.md)数据类型。 +- `weight` — 权重序列。 权重是一个数据出现的数值。 **返回值** -- 指定电平的近似分位数。 +- 指定层次的分位数。 类型: @@ -1491,20 +1489,20 @@ SELECT quantileTDigestWeighted(number, 1) FROM numbers(10) - [中位数](#median) - [分位数](#quantiles) -## 中位数 {#median} +## median {#median} -该 `median*` 函数是相应的别名 `quantile*` 功能。 它们计算数字数据样本的中位数。 +`median*` 函数是 `quantile*` 函数的别名。 它们计算数字数据样本的中位数。 -功能: +函数: -- `median` — Alias for [分位数](#quantile). -- `medianDeterministic` — Alias for [量化确定](#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](#quantileexact). -- `medianExactWeighted` — Alias for [分位数加权](#quantileexactweighted). -- `medianTiming` — Alias for [分位定时](#quantiletiming). -- `medianTimingWeighted` — Alias for [分位时间加权](#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](#quantiletdigestweighted). +- `median` — [quantile](#quantile)别名。 +- `medianDeterministic` — [quantileDeterministic](#quantiledeterministic)别名。 +- `medianExact` — [quantileExact](#quantileexact)别名。 +- `medianExactWeighted` — [quantileExactWeighted](#quantileexactweighted)别名。 +- `medianTiming` — [quantileTiming](#quantiletiming)别名。 +- `medianTimingWeighted` — [quantileTimingWeighted](#quantiletimingweighted)别名。 +- `medianTDigest` — [quantileTDigest](#quantiletdigest)别名。 +- `medianTDigestWeighted` — [quantileTDigestWeighted](#quantiletdigestweighted)别名。 **示例** @@ -1535,11 +1533,11 @@ SELECT medianDeterministic(val, 1) FROM t ## quantiles(level1, level2, …)(x) {#quantiles} -所有分位数函数也具有相应的分位数函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`. 这些函数在一遍中计算所列电平的所有分位数,并返回结果值的数组。 +所有分位数函数也有相应的函数: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantilesTDigest`。这些函数一次计算所列层次的所有分位数,并返回结果值的数组。 ## varSamp(x) {#varsampx} -计算金额 `Σ((x - x̅)^2) / (n - 1)`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. +计算 `Σ((x - x̅)^2) / (n - 1)`,这里 `n` 是样本大小, `x̅`是`x`的平均值。 它表示随机变量的方差的无偏估计,如果传递的值形成其样本。 @@ -1550,23 +1548,23 @@ SELECT medianDeterministic(val, 1) FROM t ## varPop(x) {#varpopx} -计算金额 `Σ((x - x̅)^2) / n`,哪里 `n` 是样本大小和 `x̅`是平均值 `x`. +计算 `Σ((x - x̅)^2) / n`,这里 `n` 是样本大小, `x̅`是`x`的平均值。 -换句话说,分散为一组值。 返回 `Float64`. +换句话说,计算一组数据的离差。 返回 `Float64`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `varPopStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## stddevSamp(x) {#stddevsampx} -结果等于平方根 `varSamp(x)`. +结果等于平方根 `varSamp(x)`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## stddevPop(x) {#stddevpopx} -结果等于平方根 `varPop(x)`. +结果等于平方根 `varPop(x)`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevPopStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 @@ -1575,15 +1573,15 @@ SELECT medianDeterministic(val, 1) FROM t 返回指定列中近似最常见值的数组。 生成的数组按值的近似频率降序排序(而不是值本身)。 -实现了 [过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) 基于reduce-and-combine算法的TopK分析算法 [并行节省空间](https://arxiv.org/pdf/1401.0702.pdf). +实现了[过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf)算法, 使用基于reduce-and-combine的算法,借鉴[并行节省空间](https://arxiv.org/pdf/1401.0702.pdf). ``` sql topK(N)(column) ``` -此函数不提供保证的结果。 在某些情况下,可能会发生错误,并且可能会返回不是最常见值的常见值。 +此函数不提供保证的结果。 在某些情况下,可能会发生错误,并且可能会返回不是最高频的值。 -我们建议使用 `N < 10` 值;性能降低了大 `N` 值。 的最大值 `N = 65536`. +我们建议使用 `N < 10` 值,`N` 值越大,性能越低。最大值 `N = 65536`。 **参数** @@ -1593,11 +1591,11 @@ topK(N)(column) **参数** -- ' x ' – The value to calculate frequency. +- ' x ' – 计算的频率值。 **示例** -就拿 [时间](../../getting-started/example-datasets/ontime.md) 数据集,并选择在三个最频繁出现的值 `AirlineID` 列。 +就拿 [OnTime](../../getting-started/example-datasets/ontime.md) 数据集来说,选择`AirlineID` 列中出现最频繁的三个。 ``` sql SELECT topK(3)(AirlineID) AS res @@ -1612,7 +1610,7 @@ FROM ontime ## topKWeighted {#topkweighted} -类似于 `topK` 但需要一个整数类型的附加参数 - `weight`. 每个价值都被记入 `weight` 次频率计算。 +类似于 `topK` 但需要一个整数类型的附加参数 - `weight`. 每个输入都被记入 `weight` 次频率计算。 **语法** @@ -1622,12 +1620,12 @@ topKWeighted(N)(x, weight) **参数** -- `N` — The number of elements to return. +- `N` — 返回值个数。 **参数** -- `x` – The value. -- `weight` — The weight. [UInt8](../../sql-reference/data-types/int-uint.md). +- `x` – 输入值。 +- `weight` — 权重。 [UInt8](../../sql-reference/data-types/int-uint.md)类型。 **返回值** @@ -1651,36 +1649,36 @@ SELECT topKWeighted(10)(number, number) FROM numbers(1000) ## covarSamp(x,y) {#covarsampx-y} -计算的值 `Σ((x - x̅)(y - y̅)) / (n - 1)`. +计算 `Σ((x - x̅)(y - y̅)) / (n - 1)`。 -返回Float64。 当 `n <= 1`, returns +∞. +返回Float64。 当 `n <= 1`, returns +∞。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarSampStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## covarPop(x,y) {#covarpopx-y} -计算的值 `Σ((x - x̅)(y - y̅)) / n`. +计算 `Σ((x - x̅)(y - y̅)) / n`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `covarPopStable` 功能。 它的工作速度较慢,但提供了较低的计算错误。 ## corr(x,y) {#corrx-y} -计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. +计算Pearson相关系数: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`。 !!! note "注" 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `corrStable` 功能。 它的工作速度较慢,但提供较低的计算错误。 ## categoricalInformationValue {#categoricalinformationvalue} -计算的值 `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` 对于每个类别。 +对于每个类别计算 `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` 。 ``` sql categoricalInformationValue(category1, category2, ..., tag) ``` -结果指示离散(分类)要素如何使用 `[category1, category2, ...]` 有助于预测的价值的学习模型 `tag`. +结果指示离散(分类)要素如何使用 `[category1, category2, ...]` 有助于使用学习模型预测`tag`的值。 ## simpleLinearRegression {#simplelinearregression} @@ -1692,12 +1690,12 @@ simpleLinearRegression(x, y) 参数: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — x轴。 +- `y` — y轴。 返回值: -常量 `(a, b)` 结果行的 `y = a*x + b`. +符合`y = a*x + b`的常量 `(a, b)` 。 **例** @@ -1721,9 +1719,9 @@ SELECT arrayReduce('simpleLinearRegression', [0, 1, 2, 3], [3, 4, 5, 6]) └───────────────────────────────────────────────────────────────────┘ ``` -## 随机指标线上回归 {#agg_functions-stochasticlinearregression} +## stochasticLinearRegression {#agg_functions-stochasticlinearregression} -该函数实现随机线性回归。 它支持自定义参数的学习率,L2正则化系数,迷你批量大小,并具有更新权重的方法很少 ([亚当](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (默认使用), [简单SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [动量](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). +该函数实现随机线性回归。 它支持自定义参数的学习率、L2正则化系数、微批,并且具有少量更新权重的方法([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (默认), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf))。 ### 参数 {#agg_functions-stochasticlinearregression-parameters} @@ -1738,14 +1736,14 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 3. `mini-batch size` 设置元素的数量,这些元素将被计算和求和以执行梯度下降的一个步骤。 纯随机下降使用一个元素,但是具有小批量(约10个元素)使梯度步骤更稳定。 默认值为 `15`. 4. `method for updating weights` 他们是: `Adam` (默认情况下), `SGD`, `Momentum`, `Nesterov`. `Momentum` 和 `Nesterov` 需要更多的计算和内存,但是它们恰好在收敛速度和随机梯度方法的稳定性方面是有用的。 -### 用途 {#agg_functions-stochasticlinearregression-usage} +### 用法 {#agg_functions-stochasticlinearregression-usage} `stochasticLinearRegression` 用于两个步骤:拟合模型和预测新数据。 为了拟合模型并保存其状态以供以后使用,我们使用 `-State` combinator,它基本上保存了状态(模型权重等)。 为了预测我们使用函数 [evalMLMethod](../functions/machine-learning-functions.md#machine_learning_methods-evalmlmethod),这需要一个状态作为参数以及特征来预测。 -**1.** 适合 +**1.** 安装 可以使用这种查询。 @@ -1807,28 +1805,28 @@ evalMLMethod(model, param1, param2) FROM test_data stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') ``` -1. 适合 +**1.** 安装 - See the `Fitting` section in the [stochasticLinearRegression](#stochasticlinearregression-usage-fitting) description. + 参考stochasticLinearRegression相关文档 - Predicted labels have to be in \[-1, 1\]. + 预测标签的取值范围为[-1, 1] -1. 预测 +**2.** 预测 - Using saved state we can predict probability of object having label `1`. + 使用已经保存的state我们可以预测标签为 `1` 的对象的概率。 ``` sql WITH (SELECT state FROM your_model) AS model SELECT evalMLMethod(model, param1, param2) FROM test_data ``` - The query will return a column of probabilities. Note that first argument of `evalMLMethod` is `AggregateFunctionState` object, next are columns of features. + 查询结果返回一个列的概率。注意 `evalMLMethod` 的第一个参数是 `AggregateFunctionState` 对象,接下来的参数是列的特性。 - We can also set a bound of probability, which assigns elements to different labels. + 我们也可以设置概率的范围, 这样需要给元素指定不同的标签。 ``` sql SELECT ans < 1.1 AND ans > 0.5 FROM @@ -1836,14 +1834,14 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') evalMLMethod(model, param1, param2) AS ans FROM test_data) ``` - Then the result will be labels. + 结果是标签。 - `test_data` is a table like `train_data` but may not contain target value. + `test_data` 是一个像 `train_data` 一样的表,但是不包含目标值。 **另请参阅** - [随机指标线上回归](#agg_functions-stochasticlinearregression) -- [线性回归和逻辑回归之间的差异。](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [线性回归和逻辑回归之间的差异](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) ## groupBitmapAnd {#groupbitmapand} From 2c439afc015b15d9babb632b423991aeea0f397f Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 12 Jun 2020 19:17:34 +0800 Subject: [PATCH 0587/2229] ISSUES-7572 fix build failure --- src/Server/ReplicasStatusHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 9b3e00cc069..d6bbfdbd090 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -110,7 +110,7 @@ void addReplicasStatusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IS { auto replicas_status_handler = std::make_unique>(server); replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); - factory->addHandler(replicas_status_handler.release()); + factory.addHandler(replicas_status_handler.release()); } } From 0da6e1c9de6d5cdd7b24e08de815589c6d2d36cc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 15:12:12 +0300 Subject: [PATCH 0588/2229] typo --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 3d49e9e841a..241fdaec70d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -167,7 +167,7 @@ function run_tests function run_benchmark { rm -rf benchmark ||: - mkdir bencmhark ||: + mkdir benchmark ||: # TODO disable this when there is an explicit list of tests to run "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv From 04f222f85b0e240c3511df68beab63f423379a89 Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 12 Jun 2020 16:48:00 +0400 Subject: [PATCH 0589/2229] Tell OpenLDAP too create a new SSL/TLS context for each connection --- src/Access/LDAPClient.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index fc6ee697468..b538c06c9ea 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -182,6 +182,13 @@ int LDAPClient::openConnection(const bool graceful_bind_failure) if (!params.ca_cert_file.empty()) diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.ca_cert_file.c_str())); +#ifdef LDAP_OPT_X_TLS_NEWCTX + { + const int i_am_a_server = 0; + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server)); + } +#endif + if (params.enable_tls == LDAPServerParams::TLSEnable::YES_STARTTLS) diag(ldap_start_tls_s(handle, nullptr, nullptr)); From a9514d725768d0025ed848e2a5ea016fb8ac5001 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 16:52:41 +0300 Subject: [PATCH 0590/2229] trigger ci From 787163d0b489c464d33b3ba9ee003dcdaa4bf6a9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 17:03:00 +0300 Subject: [PATCH 0591/2229] Minor modifications after merging #11554 --- src/Functions/extractAllGroups.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index a9206e7327e..b75e54b490e 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -54,7 +54,7 @@ public: size_t getNumberOfArguments() const override { return 2; } - bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override From 0edf5ff7a25fea2bc9028a02ec0a147b0ca47f4d Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 12 Jun 2020 17:32:47 +0300 Subject: [PATCH 0592/2229] Fix race condition --- src/Interpreters/ExpressionActions.h | 2 + src/Storages/IStorage.cpp | 85 ++++++++++++++++++------ src/Storages/IStorage.h | 37 ++++++----- src/Storages/IndicesDescription.cpp | 10 ++- src/Storages/KeyDescription.cpp | 13 +++- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/TTLDescription.cpp | 47 +++++++++++-- src/Storages/TTLDescription.h | 4 ++ 8 files changed, 153 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index 080e8f8a10f..26493f857b0 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -163,6 +163,8 @@ public: ~ExpressionActions(); + ExpressionActions(const ExpressionActions & other) = default; + /// Add the input column. /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression. /// The expression must not have any PROJECT actions. diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 58cbb2bb7d6..a09bb45f9d0 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -32,24 +32,28 @@ namespace ErrorCodes extern const int DEADLOCK_AVOIDED; } -const ColumnsDescription & IStorage::getColumns() const +ColumnsDescription IStorage::getColumns() const { + std::lock_guard lock(metadata_mutex); return metadata.columns; } -const IndicesDescription & IStorage::getSecondaryIndices() const +IndicesDescription IStorage::getSecondaryIndices() const { + std::lock_guard lock(metadata_mutex); return metadata.secondary_indices; } bool IStorage::hasSecondaryIndices() const { + std::lock_guard lock(metadata_mutex); return !metadata.secondary_indices.empty(); } -const ConstraintsDescription & IStorage::getConstraints() const +ConstraintsDescription IStorage::getConstraints() const { + std::lock_guard lock(metadata_mutex); return metadata.constraints; } @@ -290,6 +294,7 @@ void IStorage::check(const Block & block, bool need_all) const void IStorage::setColumns(ColumnsDescription columns_) { + std::lock_guard lock(metadata_mutex); if (columns_.getOrdinary().empty()) throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); metadata.columns = std::move(columns_); @@ -297,11 +302,13 @@ void IStorage::setColumns(ColumnsDescription columns_) void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_) { + std::lock_guard lock(metadata_mutex); metadata.secondary_indices = std::move(secondary_indices_); } void IStorage::setConstraints(ConstraintsDescription constraints_) { + std::lock_guard lock(metadata_mutex); metadata.constraints = std::move(constraints_); } @@ -416,136 +423,160 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const KeyDescription & IStorage::getPartitionKey() const +KeyDescription IStorage::getPartitionKey() const { + std::lock_guard lock(metadata_mutex); return metadata.partition_key; } void IStorage::setPartitionKey(const KeyDescription & partition_key_) { + std::lock_guard lock(metadata_mutex); metadata.partition_key = partition_key_; } bool IStorage::isPartitionKeyDefined() const { + std::lock_guard lock(metadata_mutex); return metadata.partition_key.definition_ast != nullptr; } bool IStorage::hasPartitionKey() const { + std::lock_guard lock(metadata_mutex); return !metadata.partition_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPartitionKey() const { - if (hasPartitionKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.partition_key.column_names.empty()) return metadata.partition_key.expression->getRequiredColumns(); return {}; } -const KeyDescription & IStorage::getSortingKey() const +KeyDescription IStorage::getSortingKey() const { + std::lock_guard lock(metadata_mutex); return metadata.sorting_key; } void IStorage::setSortingKey(const KeyDescription & sorting_key_) { + std::lock_guard lock(metadata_mutex); metadata.sorting_key = sorting_key_; } bool IStorage::isSortingKeyDefined() const { + std::lock_guard lock(metadata_mutex); return metadata.sorting_key.definition_ast != nullptr; } bool IStorage::hasSortingKey() const { + std::lock_guard lock(metadata_mutex); return !metadata.sorting_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSortingKey() const { - if (hasSortingKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.sorting_key.column_names.empty()) return metadata.sorting_key.expression->getRequiredColumns(); return {}; } Names IStorage::getSortingKeyColumns() const { - if (hasSortingKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.sorting_key.column_names.empty()) return metadata.sorting_key.column_names; return {}; } -const KeyDescription & IStorage::getPrimaryKey() const +KeyDescription IStorage::getPrimaryKey() const { + std::lock_guard lock(metadata_mutex); return metadata.primary_key; } void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { + std::lock_guard lock(metadata_mutex); metadata.primary_key = primary_key_; } bool IStorage::isPrimaryKeyDefined() const { + std::lock_guard lock(metadata_mutex); return metadata.primary_key.definition_ast != nullptr; } bool IStorage::hasPrimaryKey() const { + std::lock_guard lock(metadata_mutex); return !metadata.primary_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPrimaryKey() const { - if (hasPrimaryKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.primary_key.column_names.empty()) return metadata.primary_key.expression->getRequiredColumns(); return {}; } Names IStorage::getPrimaryKeyColumns() const { - if (hasSortingKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.primary_key.column_names.empty()) return metadata.primary_key.column_names; return {}; } -const KeyDescription & IStorage::getSamplingKey() const +KeyDescription IStorage::getSamplingKey() const { + std::lock_guard lock(metadata_mutex); return metadata.sampling_key; } void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { + std::lock_guard lock(metadata_mutex); metadata.sampling_key = sampling_key_; } bool IStorage::isSamplingKeyDefined() const { + std::lock_guard lock(metadata_mutex); return metadata.sampling_key.definition_ast != nullptr; } bool IStorage::hasSamplingKey() const { + std::lock_guard lock(metadata_mutex); return !metadata.sampling_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSampling() const { - if (hasSamplingKey()) + std::lock_guard lock(metadata_mutex); + if (!metadata.sampling_key.column_names.empty()) return metadata.sampling_key.expression->getRequiredColumns(); return {}; } -const TTLTableDescription & IStorage::getTableTTLs() const +TTLTableDescription IStorage::getTableTTLs() const { + std::lock_guard lock(metadata_mutex); return metadata.table_ttl; } void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) { + std::lock_guard lock(metadata_mutex); metadata.table_ttl = table_ttl_; } @@ -554,38 +585,45 @@ bool IStorage::hasAnyTableTTL() const return hasAnyMoveTTL() || hasRowsTTL(); } -const TTLColumnsDescription & IStorage::getColumnTTLs() const +TTLColumnsDescription IStorage::getColumnTTLs() const { + std::lock_guard lock(metadata_mutex); return metadata.column_ttls_by_name; } void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) { + std::lock_guard lock(metadata_mutex); metadata.column_ttls_by_name = column_ttls_by_name_; } bool IStorage::hasAnyColumnTTL() const { + std::lock_guard lock(metadata_mutex); return !metadata.column_ttls_by_name.empty(); } -const TTLDescription & IStorage::getRowsTTL() const +TTLDescription IStorage::getRowsTTL() const { + std::lock_guard lock(metadata_mutex); return metadata.table_ttl.rows_ttl; } bool IStorage::hasRowsTTL() const { + std::lock_guard lock(metadata_mutex); return metadata.table_ttl.rows_ttl.expression != nullptr; } -const TTLDescriptions & IStorage::getMoveTTLs() const +TTLDescriptions IStorage::getMoveTTLs() const { + std::lock_guard lock(metadata_mutex); return metadata.table_ttl.move_ttl; } bool IStorage::hasAnyMoveTTL() const { + std::lock_guard lock(metadata_mutex); return !metadata.table_ttl.move_ttl.empty(); } @@ -649,31 +687,38 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum } -const ASTPtr & IStorage::getSettingsChanges() const +ASTPtr IStorage::getSettingsChanges() const { - return metadata.settings_changes; + std::lock_guard lock(metadata_mutex); + if (metadata.settings_changes) + return metadata.settings_changes->clone(); + return nullptr; } void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { + std::lock_guard lock(metadata_mutex); if (settings_changes_) metadata.settings_changes = settings_changes_->clone(); else metadata.settings_changes = nullptr; } -const SelectQueryDescription & IStorage::getSelectQuery() const +SelectQueryDescription IStorage::getSelectQuery() const { + std::lock_guard lock(metadata_mutex); return metadata.select; } void IStorage::setSelectQuery(const SelectQueryDescription & select_) { + std::lock_guard lock(metadata_mutex); metadata.select = select_; } bool IStorage::hasSelectQuery() const { + std::lock_guard lock(metadata_mutex); return metadata.select.select_query != nullptr; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1a5a0a0753d..40ca901640b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -137,29 +137,33 @@ public: using ColumnSizeByName = std::unordered_map; virtual ColumnSizeByName getColumnSizes() const { return {}; } -public: /// thread-unsafe part. lockStructure must be acquired - const ColumnsDescription & getColumns() const; /// returns combined set of columns +public: + /// NOTE: These methods are thread-safe now, but require additional + /// structure lock to get consistent metadata snapshot. This will be fixed + /// soon. TODO(alesap) + + ColumnsDescription getColumns() const; /// returns combined set of columns void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. void setSecondaryIndices(IndicesDescription secondary_indices_); - const IndicesDescription & getSecondaryIndices() const; + IndicesDescription getSecondaryIndices() const; /// Has at least one non primary index bool hasSecondaryIndices() const; - const ConstraintsDescription & getConstraints() const; + ConstraintsDescription getConstraints() const; void setConstraints(ConstraintsDescription constraints_); /// Storage settings - const ASTPtr & getSettingsChanges() const; + ASTPtr getSettingsChanges() const; void setSettingsChanges(const ASTPtr & settings_changes_); bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } /// Select query for *View storages. - const SelectQueryDescription & getSelectQuery() const; + SelectQueryDescription getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; - const StorageInMemoryMetadata & getInMemoryMetadata() const { return metadata; } + StorageInMemoryMetadata getInMemoryMetadata() const { return metadata; } Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. @@ -204,7 +208,8 @@ private: StorageID storage_id; mutable std::mutex id_mutex; - + /// TODO (alesap) just use multiversion for atomic metadata + mutable std::mutex metadata_mutex; StorageInMemoryMetadata metadata; private: RWLockImpl::LockHolder tryLockTimed( @@ -438,7 +443,7 @@ public: virtual Strings getDataPaths() const { return {}; } /// Returns structure with partition key. - const KeyDescription & getPartitionKey() const; + KeyDescription getPartitionKey() const; /// Set partition key for storage (methods bellow, are just wrappers for this /// struct). void setPartitionKey(const KeyDescription & partition_key_); @@ -453,7 +458,7 @@ public: /// Returns structure with sorting key. - const KeyDescription & getSortingKey() const; + KeyDescription getSortingKey() const; /// Set sorting key for storage (methods bellow, are just wrappers for this /// struct). void setSortingKey(const KeyDescription & sorting_key_); @@ -470,7 +475,7 @@ public: Names getSortingKeyColumns() const; /// Returns structure with primary key. - const KeyDescription & getPrimaryKey() const; + KeyDescription getPrimaryKey() const; /// Set primary key for storage (methods bellow, are just wrappers for this /// struct). void setPrimaryKey(const KeyDescription & primary_key_); @@ -488,7 +493,7 @@ public: Names getPrimaryKeyColumns() const; /// Returns structure with sampling key. - const KeyDescription & getSamplingKey() const; + KeyDescription getSamplingKey() const; /// Set sampling key for storage (methods bellow, are just wrappers for this /// struct). void setSamplingKey(const KeyDescription & sampling_key_); @@ -512,22 +517,22 @@ public: virtual StoragePolicyPtr getStoragePolicy() const { return {}; } /// Common tables TTLs (for rows and moves). - const TTLTableDescription & getTableTTLs() const; + TTLTableDescription getTableTTLs() const; void setTableTTLs(const TTLTableDescription & table_ttl_); bool hasAnyTableTTL() const; /// Separate TTLs for columns. - const TTLColumnsDescription & getColumnTTLs() const; + TTLColumnsDescription getColumnTTLs() const; void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); bool hasAnyColumnTTL() const; /// Just wrapper for table TTLs, return rows part of table TTLs. - const TTLDescription & getRowsTTL() const; + TTLDescription getRowsTTL() const; bool hasRowsTTL() const; /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of /// table TTL. - const TTLDescriptions & getMoveTTLs() const; + TTLDescriptions getMoveTTLs() const; bool hasAnyMoveTTL() const; /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index d59aef2ecaa..ee9a9681e61 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -24,13 +24,14 @@ IndexDescription::IndexDescription(const IndexDescription & other) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) , name(other.name) , type(other.type) - , expression(other.expression) /// actions never changed , arguments(other.arguments) , column_names(other.column_names) , data_types(other.data_types) , sample_block(other.sample_block) , granularity(other.granularity) { + if (other.expression) + expression = std::make_shared(*other.expression); } @@ -51,7 +52,12 @@ IndexDescription & IndexDescription::operator=(const IndexDescription & other) name = other.name; type = other.type; - expression = other.expression; + + if (other.expression) + expression = std::make_shared(*other.expression); + else + expression.reset(); + arguments = other.arguments; column_names = other.column_names; data_types = other.data_types; diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 4eb18320ad9..7d5b0d56008 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -19,12 +19,13 @@ namespace ErrorCodes KeyDescription::KeyDescription(const KeyDescription & other) : definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr) , expression_list_ast(other.expression_list_ast ? other.expression_list_ast->clone() : nullptr) - , expression(other.expression) , sample_block(other.sample_block) , column_names(other.column_names) , data_types(other.data_types) , additional_column(other.additional_column) { + if (other.expression) + expression = std::make_shared(*other.expression); } KeyDescription & KeyDescription::operator=(const KeyDescription & other) @@ -39,7 +40,15 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) if (other.expression_list_ast) expression_list_ast = other.expression_list_ast->clone(); - expression = other.expression; + else + expression_list_ast.reset(); + + + if (other.expression) + expression = std::make_shared(*other.expression); + else + expression.reset(); + sample_block = other.sample_block; column_names = other.column_names; data_types = other.data_types; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7f8de6f3856..fdd62b03046 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1344,7 +1344,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S if (hasSettingsChanges()) { - const auto & current_changes = getSettingsChanges()->as().changes; + const auto current_changes = getSettingsChanges()->as().changes; const auto & new_changes = new_metadata.settings_changes->as().changes; for (const auto & changed_setting : new_changes) { diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index e241b7676a0..898df5006fd 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -23,6 +23,26 @@ extern const int BAD_ARGUMENTS; extern const int BAD_TTL_EXPRESSION; } + +TTLAggregateDescription::TTLAggregateDescription(const TTLAggregateDescription & other) + : column_name(other.column_name) + , expression_result_column_name(other.expression_result_column_name) +{ + if (other.expression) + expression = std::make_shared(*other.expression); +} + +TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateDescription & other) +{ + column_name = other.column_name; + expression_result_column_name = other.expression_result_column_name; + if (other.expression) + expression = std::make_shared(*other.expression); + else + expression.reset(); + return *this; +} + namespace { @@ -58,9 +78,7 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin TTLDescription::TTLDescription(const TTLDescription & other) : mode(other.mode) , expression_ast(other.expression_ast ? other.expression_ast->clone() : nullptr) - , expression(other.expression) , result_column(other.result_column) - , where_expression(other.where_expression) , where_result_column(other.where_result_column) , group_by_keys(other.group_by_keys) , set_parts(other.set_parts) @@ -68,6 +86,11 @@ TTLDescription::TTLDescription(const TTLDescription & other) , destination_type(other.destination_type) , destination_name(other.destination_name) { + if (other.expression) + expression = std::make_shared(*other.expression); + + if (other.where_expression) + where_expression = std::make_shared(*other.where_expression); } TTLDescription & TTLDescription::operator=(const TTLDescription & other) @@ -81,9 +104,17 @@ TTLDescription & TTLDescription::operator=(const TTLDescription & other) else expression_ast.reset(); - expression = other.expression; + if (other.expression) + expression = std::make_shared(*other.expression); + else + expression.reset(); + result_column = other.result_column; - where_expression = other.where_expression; + if (other.where_expression) + where_expression = std::make_shared(*other.where_expression); + else + where_expression.reset(); + where_result_column = other.where_result_column; group_by_keys = other.group_by_keys; set_parts = other.set_parts; @@ -218,8 +249,12 @@ TTLDescription TTLDescription::getTTLFromAST( auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, true); auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context); - result.set_parts.emplace_back(TTLAggregateDescription{ - name, value->getColumnName(), expr_analyzer.getActions(false)}); + TTLAggregateDescription set_part; + set_part.column_name = name; + set_part.expression_result_column_name = value->getColumnName(); + set_part.expression = expr_analyzer.getActions(false); + + result.set_parts.emplace_back(set_part); for (const auto & descr : expr_analyzer.getAnalyzedData().aggregate_descriptions) result.aggregate_descriptions.push_back(descr); diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 86e82e14c73..906cfb0e675 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -25,6 +25,10 @@ struct TTLAggregateDescription /// Expressions to calculate the value of assignment expression ExpressionActionsPtr expression; + + TTLAggregateDescription() = default; + TTLAggregateDescription(const TTLAggregateDescription & other); + TTLAggregateDescription & operator=(const TTLAggregateDescription & other); }; using TTLAggregateDescriptions = std::vector; From 83155e139c681836adc4a843d88a8f6839981f88 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 12 Jun 2020 17:59:14 +0300 Subject: [PATCH 0593/2229] Try fix tests. --- src/Storages/MergeTree/MergeTreeRangeReader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 6cdd0270dea..667b0b2da96 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -906,7 +906,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r result.columns.erase(result.columns.begin() + prewhere_column_pos); else result.columns[prewhere_column_pos] = - result.block_before_prewhere.getByPosition(prewhere_column_pos).type-> + getSampleBlock().getByName(prewhere->prewhere_column_name).type-> createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); } } From 72257061d5678606f75faa33b0ca72d54ae1fec8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 18:09:12 +0300 Subject: [PATCH 0594/2229] Avoid errors due to implicit int<->bool conversions when using ZK API --- programs/copier/ClusterCopier.cpp | 32 +-- programs/copier/ZooKeeperStaff.h | 2 +- src/Common/ZooKeeper/IKeeper.cpp | 94 ++++---- src/Common/ZooKeeper/IKeeper.h | 113 +++++----- src/Common/ZooKeeper/Increment.h | 4 +- src/Common/ZooKeeper/KeeperException.h | 6 +- src/Common/ZooKeeper/LeaderElection.h | 2 +- src/Common/ZooKeeper/Lock.cpp | 6 +- src/Common/ZooKeeper/TestKeeper.cpp | 14 +- src/Common/ZooKeeper/ZooKeeper.cpp | 207 +++++++++--------- src/Common/ZooKeeper/ZooKeeper.h | 40 ++-- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 103 +++++---- .../gtest_zkutil_test_multi_exception.cpp | 8 +- .../tests/zkutil_expiration_test.cpp | 6 +- .../tests/zkutil_test_commands_new_lib.cpp | 44 ++-- src/Common/ZooKeeper/tests/zookeeper_impl.cpp | 4 +- src/Interpreters/DDLWorker.cpp | 19 +- .../MergeTree/EphemeralLockInZooKeeper.cpp | 6 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 22 +- .../ReplicatedMergeTreeCleanupThread.cpp | 14 +- .../ReplicatedMergeTreePartCheckThread.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 18 +- .../ReplicatedMergeTreeRestartingThread.cpp | 8 +- src/Storages/StorageReplicatedMergeTree.cpp | 122 +++++------ .../System/StorageSystemZooKeeper.cpp | 2 +- .../get_current_inserts_in_replicated.cpp | 2 +- .../tests/transform_part_zk_nodes.cpp | 2 +- utils/zookeeper-dump-tree/main.cpp | 2 +- 28 files changed, 462 insertions(+), 442 deletions(-) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 5254d2a97ac..7fa0f663295 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -25,7 +25,7 @@ void ClusterCopier::init() task_description_watch_callback = [this] (const Coordination::WatchResponse & response) { - if (response.error != Coordination::ZOK) + if (response.error != Coordination::Error::ZOK) return; UInt64 version = ++task_description_version; LOG_DEBUG(log, "Task description should be updated, local version {}", version); @@ -206,11 +206,11 @@ void ClusterCopier::uploadTaskDescription(const std::string & task_path, const s zookeeper->createAncestors(local_task_description_path); auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - if (code && force) + if (code != Coordination::Error::ZOK && force) zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); LOG_DEBUG(log, "Task description {} uploaded to {} with result {} ({})", - ((code && !force) ? "not " : ""), local_task_description_path, code, zookeeper->error2string(code)); + ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code)); } void ClusterCopier::reloadTaskDescription() @@ -220,10 +220,10 @@ void ClusterCopier::reloadTaskDescription() String task_config_str; Coordination::Stat stat{}; - int code; + Coordination::Error code; zookeeper->tryGetWatch(task_description_path, task_config_str, &stat, task_description_watch_callback, &code); - if (code) + if (code != Coordination::Error::ZOK) throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS); LOG_DEBUG(log, "Loading description, zxid={}", task_description_current_stat.czxid); @@ -376,10 +376,10 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee Coordination::Responses responses; auto code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZOK || code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) return std::make_shared(current_worker_path, *zookeeper, false, false, description); - if (code == Coordination::ZBADVERSION) + if (code == Coordination::Error::ZBADVERSION) { ++num_bad_version_errors; @@ -545,7 +545,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Someone is already moving pieces {}", current_partition_attach_is_active); return TaskStatus::Active; @@ -745,7 +745,7 @@ bool ClusterCopier::tryDropPartitionPiece( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number)); std::this_thread::sleep_for(default_sleep_time); @@ -778,7 +778,7 @@ bool ClusterCopier::tryDropPartitionPiece( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Partition {} is being filled now by somebody, sleep", task_partition.name); return false; @@ -795,7 +795,7 @@ bool ClusterCopier::tryDropPartitionPiece( /// Remove all status nodes { Strings children; - if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::ZOK) + if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::Error::ZOK) for (const auto & child : children) { zookeeper->removeRecursive(current_shards_path + "/" + child); @@ -845,7 +845,7 @@ bool ClusterCopier::tryDropPartitionPiece( } LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name); - if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::ZNODEEXISTS) + if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::Error::ZNODEEXISTS) zookeeper->set(current_shards_path, host_id); } @@ -1233,7 +1233,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) { LOG_DEBUG(log, "Someone is already processing {}", current_task_piece_is_active_path); return TaskStatus::Active; @@ -1271,9 +1271,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( { String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent); - if (res == Coordination::ZNODEEXISTS) + if (res == Coordination::Error::ZNODEEXISTS) LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number); - if (res == Coordination::ZOK) + if (res == Coordination::Error::ZOK) LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number); return TaskStatus::Finished; } @@ -1429,7 +1429,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( { Coordination::ExistsResponse status = future_is_dirty_checker.get(); - if (status.error != Coordination::ZNONODE) + if (status.error != Coordination::Error::ZNONODE) { LogicalClock dirt_discovery_epoch (status.stat.mzxid); if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h index edd0d9e43d2..66036ae2f27 100644 --- a/programs/copier/ZooKeeperStaff.h +++ b/programs/copier/ZooKeeperStaff.h @@ -178,7 +178,7 @@ public: [stale = stale] (const Coordination::WatchResponse & rsp) { auto logger = &Poco::Logger::get("ClusterCopier"); - if (rsp.error == Coordination::ZOK) + if (rsp.error == Coordination::Error::ZOK) { switch (rsp.type) { diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 5c27971038f..cb378ba1e13 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -23,7 +23,7 @@ namespace ProfileEvents namespace Coordination { -Exception::Exception(const std::string & msg, const int32_t code_, int) +Exception::Exception(const std::string & msg, const Error code_, int) : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION), code(code_) { if (Coordination::isUserError(code)) @@ -34,17 +34,17 @@ Exception::Exception(const std::string & msg, const int32_t code_, int) ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions); } -Exception::Exception(const std::string & msg, const int32_t code_) +Exception::Exception(const std::string & msg, const Error code_) : Exception(msg + " (" + errorMessage(code_) + ")", code_, 0) { } -Exception::Exception(const int32_t code_) +Exception::Exception(const Error code_) : Exception(errorMessage(code_), code_, 0) { } -Exception::Exception(const int32_t code_, const std::string & path) +Exception::Exception(const Error code_, const std::string & path) : Exception(std::string{errorMessage(code_)} + ", path: " + path, code_, 0) { } @@ -58,10 +58,10 @@ using namespace DB; static void addRootPath(String & path, const String & root_path) { if (path.empty()) - throw Exception("Path cannot be empty", ZBADARGUMENTS); + throw Exception("Path cannot be empty", Error::ZBADARGUMENTS); if (path[0] != '/') - throw Exception("Path must begin with /", ZBADARGUMENTS); + throw Exception("Path must begin with /", Error::ZBADARGUMENTS); if (root_path.empty()) return; @@ -78,64 +78,62 @@ static void removeRootPath(String & path, const String & root_path) return; if (path.size() <= root_path.size()) - throw Exception("Received path is not longer than root_path", ZDATAINCONSISTENCY); + throw Exception("Received path is not longer than root_path", Error::ZDATAINCONSISTENCY); path = path.substr(root_path.size()); } -const char * errorMessage(int32_t code) +const char * errorMessage(Error code) { switch (code) { - case ZOK: return "Ok"; - case ZSYSTEMERROR: return "System error"; - case ZRUNTIMEINCONSISTENCY: return "Run time inconsistency"; - case ZDATAINCONSISTENCY: return "Data inconsistency"; - case ZCONNECTIONLOSS: return "Connection loss"; - case ZMARSHALLINGERROR: return "Marshalling error"; - case ZUNIMPLEMENTED: return "Unimplemented"; - case ZOPERATIONTIMEOUT: return "Operation timeout"; - case ZBADARGUMENTS: return "Bad arguments"; - case ZINVALIDSTATE: return "Invalid zhandle state"; - case ZAPIERROR: return "API error"; - case ZNONODE: return "No node"; - case ZNOAUTH: return "Not authenticated"; - case ZBADVERSION: return "Bad version"; - case ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals"; - case ZNODEEXISTS: return "Node exists"; - case ZNOTEMPTY: return "Not empty"; - case ZSESSIONEXPIRED: return "Session expired"; - case ZINVALIDCALLBACK: return "Invalid callback"; - case ZINVALIDACL: return "Invalid ACL"; - case ZAUTHFAILED: return "Authentication failed"; - case ZCLOSING: return "ZooKeeper is closing"; - case ZNOTHING: return "(not error) no server responses to process"; - case ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; + case Error::ZOK: return "Ok"; + case Error::ZSYSTEMERROR: return "System error"; + case Error::ZRUNTIMEINCONSISTENCY: return "Run time inconsistency"; + case Error::ZDATAINCONSISTENCY: return "Data inconsistency"; + case Error::ZCONNECTIONLOSS: return "Connection loss"; + case Error::ZMARSHALLINGERROR: return "Marshalling error"; + case Error::ZUNIMPLEMENTED: return "Unimplemented"; + case Error::ZOPERATIONTIMEOUT: return "Operation timeout"; + case Error::ZBADARGUMENTS: return "Bad arguments"; + case Error::ZINVALIDSTATE: return "Invalid zhandle state"; + case Error::ZAPIERROR: return "API error"; + case Error::ZNONODE: return "No node"; + case Error::ZNOAUTH: return "Not authenticated"; + case Error::ZBADVERSION: return "Bad version"; + case Error::ZNOCHILDRENFOREPHEMERALS: return "No children for ephemerals"; + case Error::ZNODEEXISTS: return "Node exists"; + case Error::ZNOTEMPTY: return "Not empty"; + case Error::ZSESSIONEXPIRED: return "Session expired"; + case Error::ZINVALIDCALLBACK: return "Invalid callback"; + case Error::ZINVALIDACL: return "Invalid ACL"; + case Error::ZAUTHFAILED: return "Authentication failed"; + case Error::ZCLOSING: return "ZooKeeper is closing"; + case Error::ZNOTHING: return "(not error) no server responses to process"; + case Error::ZSESSIONMOVED: return "Session moved to another server, so operation is ignored"; } - if (code > 0) - return strerror(code); - return "unknown error"; + __builtin_unreachable(); } -bool isHardwareError(int32_t zk_return_code) +bool isHardwareError(Error zk_return_code) { - return zk_return_code == ZINVALIDSTATE - || zk_return_code == ZSESSIONEXPIRED - || zk_return_code == ZSESSIONMOVED - || zk_return_code == ZCONNECTIONLOSS - || zk_return_code == ZMARSHALLINGERROR - || zk_return_code == ZOPERATIONTIMEOUT; + return zk_return_code == Error::ZINVALIDSTATE + || zk_return_code == Error::ZSESSIONEXPIRED + || zk_return_code == Error::ZSESSIONMOVED + || zk_return_code == Error::ZCONNECTIONLOSS + || zk_return_code == Error::ZMARSHALLINGERROR + || zk_return_code == Error::ZOPERATIONTIMEOUT; } -bool isUserError(int32_t zk_return_code) +bool isUserError(Error zk_return_code) { - return zk_return_code == ZNONODE - || zk_return_code == ZBADVERSION - || zk_return_code == ZNOCHILDRENFOREPHEMERALS - || zk_return_code == ZNODEEXISTS - || zk_return_code == ZNOTEMPTY; + return zk_return_code == Error::ZNONODE + || zk_return_code == Error::ZBADVERSION + || zk_return_code == Error::ZNOCHILDRENFOREPHEMERALS + || zk_return_code == Error::ZNODEEXISTS + || zk_return_code == Error::ZNOTEMPTY; } diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index f415e0306e8..409c3838147 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -53,6 +53,57 @@ struct Stat int64_t pzxid; }; +enum class Error : int32_t +{ + ZOK = 0, + + /** System and server-side errors. + * This is never thrown by the server, it shouldn't be used other than + * to indicate a range. Specifically error codes greater than this + * value, but lesser than ZAPIERROR, are system errors. + */ + ZSYSTEMERROR = -1, + + ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found + ZDATAINCONSISTENCY = -3, /// A data inconsistency was found + ZCONNECTIONLOSS = -4, /// Connection to the server has been lost + ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data + ZUNIMPLEMENTED = -6, /// Operation is unimplemented + ZOPERATIONTIMEOUT = -7, /// Operation timeout + ZBADARGUMENTS = -8, /// Invalid arguments + ZINVALIDSTATE = -9, /// Invliad zhandle state + + /** API errors. + * This is never thrown by the server, it shouldn't be used other than + * to indicate a range. Specifically error codes greater than this + * value are API errors. + */ + ZAPIERROR = -100, + + ZNONODE = -101, /// Node does not exist + ZNOAUTH = -102, /// Not authenticated + ZBADVERSION = -103, /// Version conflict + ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children + ZNODEEXISTS = -110, /// The node already exists + ZNOTEMPTY = -111, /// The node has children + ZSESSIONEXPIRED = -112, /// The session has been expired by the server + ZINVALIDCALLBACK = -113, /// Invalid callback specified + ZINVALIDACL = -114, /// Invalid ACL specified + ZAUTHFAILED = -115, /// Client authentication failed + ZCLOSING = -116, /// ZooKeeper is closing + ZNOTHING = -117, /// (not error) no server responses to process + ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored +}; + +/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors +bool isHardwareError(Error code); + +/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here. +bool isUserError(Error code); + +const char * errorMessage(Error code); + + struct Request; using RequestPtr = std::shared_ptr; using Requests = std::vector; @@ -74,7 +125,7 @@ using ResponseCallback = std::function; struct Response { - int32_t error = 0; + Error error = Error::ZOK; Response() = default; Response(const Response &) = default; Response & operator=(const Response &) = default; @@ -225,56 +276,6 @@ using CheckCallback = std::function; using MultiCallback = std::function; -enum Error -{ - ZOK = 0, - - /** System and server-side errors. - * This is never thrown by the server, it shouldn't be used other than - * to indicate a range. Specifically error codes greater than this - * value, but lesser than ZAPIERROR, are system errors. - */ - ZSYSTEMERROR = -1, - - ZRUNTIMEINCONSISTENCY = -2, /// A runtime inconsistency was found - ZDATAINCONSISTENCY = -3, /// A data inconsistency was found - ZCONNECTIONLOSS = -4, /// Connection to the server has been lost - ZMARSHALLINGERROR = -5, /// Error while marshalling or unmarshalling data - ZUNIMPLEMENTED = -6, /// Operation is unimplemented - ZOPERATIONTIMEOUT = -7, /// Operation timeout - ZBADARGUMENTS = -8, /// Invalid arguments - ZINVALIDSTATE = -9, /// Invliad zhandle state - - /** API errors. - * This is never thrown by the server, it shouldn't be used other than - * to indicate a range. Specifically error codes greater than this - * value are API errors. - */ - ZAPIERROR = -100, - - ZNONODE = -101, /// Node does not exist - ZNOAUTH = -102, /// Not authenticated - ZBADVERSION = -103, /// Version conflict - ZNOCHILDRENFOREPHEMERALS = -108, /// Ephemeral nodes may not have children - ZNODEEXISTS = -110, /// The node already exists - ZNOTEMPTY = -111, /// The node has children - ZSESSIONEXPIRED = -112, /// The session has been expired by the server - ZINVALIDCALLBACK = -113, /// Invalid callback specified - ZINVALIDACL = -114, /// Invalid ACL specified - ZAUTHFAILED = -115, /// Client authentication failed - ZCLOSING = -116, /// ZooKeeper is closing - ZNOTHING = -117, /// (not error) no server responses to process - ZSESSIONMOVED = -118 /// Session moved to another server, so operation is ignored -}; - -/// Network errors and similar. You should reinitialize ZooKeeper session in case of these errors -bool isHardwareError(int32_t code); - -/// Valid errors sent from the server about database state (like "no node"). Logical and authentication errors (like "bad arguments") are not here. -bool isUserError(int32_t code); - -const char * errorMessage(int32_t code); - /// For watches. enum State { @@ -301,19 +302,19 @@ class Exception : public DB::Exception { private: /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution. - Exception(const std::string & msg, const int32_t code_, int); + Exception(const std::string & msg, const Error code_, int); public: - explicit Exception(const int32_t code_); - Exception(const std::string & msg, const int32_t code_); - Exception(const int32_t code_, const std::string & path); + explicit Exception(const Error code_); + Exception(const std::string & msg, const Error code_); + Exception(const Error code_, const std::string & path); Exception(const Exception & exc); const char * name() const throw() override { return "Coordination::Exception"; } const char * className() const throw() override { return "Coordination::Exception"; } Exception * clone() const override { return new Exception(*this); } - const int32_t code; + const Error code; }; diff --git a/src/Common/ZooKeeper/Increment.h b/src/Common/ZooKeeper/Increment.h index fa5f550ca9b..883fc00442e 100644 --- a/src/Common/ZooKeeper/Increment.h +++ b/src/Common/ZooKeeper/Increment.h @@ -29,11 +29,11 @@ public: if (zookeeper->tryGet(path, result_str, &stat)) { result = std::stol(result_str) + 1; - success = zookeeper->trySet(path, std::to_string(result), stat.version) == Coordination::ZOK; + success = zookeeper->trySet(path, std::to_string(result), stat.version) == Coordination::Error::ZOK; } else { - success = zookeeper->tryCreate(path, std::to_string(result), zkutil::CreateMode::Persistent) == Coordination::ZOK; + success = zookeeper->tryCreate(path, std::to_string(result), zkutil::CreateMode::Persistent) == Coordination::Error::ZOK; } } while (!success); diff --git a/src/Common/ZooKeeper/KeeperException.h b/src/Common/ZooKeeper/KeeperException.h index 5fcca4cf3d2..6498aca809c 100644 --- a/src/Common/ZooKeeper/KeeperException.h +++ b/src/Common/ZooKeeper/KeeperException.h @@ -21,12 +21,12 @@ public: /// If it is user error throws KeeperMultiException else throws ordinary KeeperException /// If it is ZOK does nothing - static void check(int32_t code, const Coordination::Requests & requests, const Coordination::Responses & responses); + static void check(Coordination::Error code, const Coordination::Requests & requests, const Coordination::Responses & responses); - KeeperMultiException(int32_t code, const Coordination::Requests & requests, const Coordination::Responses & responses); + KeeperMultiException(Coordination::Error code, const Coordination::Requests & requests, const Coordination::Responses & responses); private: - static size_t getFailedOpIndex(int32_t code, const Coordination::Responses & responses); + static size_t getFailedOpIndex(Coordination::Error code, const Coordination::Responses & responses); }; } diff --git a/src/Common/ZooKeeper/LeaderElection.h b/src/Common/ZooKeeper/LeaderElection.h index e3b97e7f8ca..dca87efe7c2 100644 --- a/src/Common/ZooKeeper/LeaderElection.h +++ b/src/Common/ZooKeeper/LeaderElection.h @@ -121,7 +121,7 @@ private: { DB::tryLogCurrentException(log); - if (e.code == Coordination::ZSESSIONEXPIRED) + if (e.code == Coordination::Error::ZSESSIONEXPIRED) return; } catch (...) diff --git a/src/Common/ZooKeeper/Lock.cpp b/src/Common/ZooKeeper/Lock.cpp index c781d8ba2bf..9c966cc576d 100644 --- a/src/Common/ZooKeeper/Lock.cpp +++ b/src/Common/ZooKeeper/Lock.cpp @@ -16,13 +16,13 @@ bool Lock::tryLock() else { std::string dummy; - int32_t code = zookeeper->tryCreate(lock_path, lock_message, zkutil::CreateMode::Ephemeral, dummy); + Coordination::Error code = zookeeper->tryCreate(lock_path, lock_message, zkutil::CreateMode::Ephemeral, dummy); - if (code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZNODEEXISTS) { locked.reset(); } - else if (code == Coordination::ZOK) + else if (code == Coordination::Error::ZOK) { locked = std::make_unique(zookeeper); } diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 4f736e66aab..a734d218ff6 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -158,7 +158,7 @@ struct TestKeeperMultiRequest final : MultiRequest, TestKeeperRequest requests.push_back(std::make_shared(*concrete_request_check)); } else - throw Exception("Illegal command as part of multi ZooKeeper request", ZBADARGUMENTS); + throw Exception("Illegal command as part of multi ZooKeeper request", Error::ZBADARGUMENTS); } } @@ -338,7 +338,7 @@ ResponsePtr TestKeeperListRequest::process(TestKeeper::Container & container, in { auto path_prefix = path; if (path_prefix.empty()) - throw Exception("Logical error: path cannot be empty", ZSESSIONEXPIRED); + throw Exception("Logical error: path cannot be empty", Error::ZSESSIONEXPIRED); if (path_prefix.back() != '/') path_prefix += '/'; @@ -514,7 +514,7 @@ void TestKeeper::finalize() WatchResponse response; response.type = SESSION; response.state = EXPIRED_SESSION; - response.error = ZSESSIONEXPIRED; + response.error = Error::ZSESSIONEXPIRED; for (auto & callback : path_watch.second) { @@ -541,7 +541,7 @@ void TestKeeper::finalize() if (info.callback) { ResponsePtr response = info.request->createResponse(); - response->error = ZSESSIONEXPIRED; + response->error = Error::ZSESSIONEXPIRED; try { info.callback(*response); @@ -556,7 +556,7 @@ void TestKeeper::finalize() WatchResponse response; response.type = SESSION; response.state = EXPIRED_SESSION; - response.error = ZSESSIONEXPIRED; + response.error = Error::ZSESSIONEXPIRED; try { info.watch(response); @@ -587,10 +587,10 @@ void TestKeeper::pushRequest(RequestInfo && request) std::lock_guard lock(push_request_mutex); if (expired) - throw Exception("Session expired", ZSESSIONEXPIRED); + throw Exception("Session expired", Error::ZSESSIONEXPIRED); if (!requests_queue.tryPush(std::move(request), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push request to queue within operation timeout", ZOPERATIONTIMEOUT); + throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); } catch (...) { diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 115518e2bf9..169299483ce 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -38,9 +38,9 @@ const int CreateMode::PersistentSequential = 2; const int CreateMode::EphemeralSequential = 3; -static void check(int32_t code, const std::string & path) +static void check(Coordination::Error code, const std::string & path) { - if (code) + if (code != Coordination::Error::ZOK) throw KeeperException(code, path); } @@ -59,7 +59,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho if (implementation == "zookeeper") { if (hosts.empty()) - throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::ZBADARGUMENTS); + throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS); std::vector hosts_strings; splitInto<','>(hosts_strings, hosts); @@ -84,7 +84,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho } if (nodes.empty()) - throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::ZBADARGUMENTS); + throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS); impl = std::make_unique( nodes, @@ -112,7 +112,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho } if (!chroot.empty() && !exists("/")) - throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::ZNONODE); + throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); } ZooKeeper::ZooKeeper(const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_, @@ -164,7 +164,7 @@ struct ZooKeeperArgs implementation = config.getString(config_name + "." + key); } else - throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::ZBADARGUMENTS); + throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); } /// Shuffle the hosts to distribute the load among ZooKeeper nodes. @@ -182,7 +182,7 @@ struct ZooKeeperArgs if (!chroot.empty()) { if (chroot.front() != '/') - throw KeeperException(std::string("Root path in config file should start with '/', but got ") + chroot, Coordination::ZBADARGUMENTS); + throw KeeperException(std::string("Root path in config file should start with '/', but got ") + chroot, Coordination::Error::ZBADARGUMENTS); if (chroot.back() == '/') chroot.pop_back(); } @@ -211,17 +211,17 @@ static Coordination::WatchCallback callbackForEvent(const EventPtr & watch) } -int32_t ZooKeeper::getChildrenImpl(const std::string & path, Strings & res, +Coordination::Error ZooKeeper::getChildrenImpl(const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::ListResponse & response) { code = response.error; - if (!code) + if (code == Coordination::Error::ZOK) { res = response.names; if (stat) @@ -251,37 +251,37 @@ Strings ZooKeeper::getChildrenWatch( return res; } -int32_t ZooKeeper::tryGetChildren(const std::string & path, Strings & res, +Coordination::Error ZooKeeper::tryGetChildren(const std::string & path, Strings & res, Coordination::Stat * stat, const EventPtr & watch) { - int32_t code = getChildrenImpl(path, res, stat, callbackForEvent(watch)); + Coordination::Error code = getChildrenImpl(path, res, stat, callbackForEvent(watch)); - if (!(code == Coordination::ZOK || code == Coordination::ZNONODE)) + if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); return code; } -int32_t ZooKeeper::tryGetChildrenWatch(const std::string & path, Strings & res, +Coordination::Error ZooKeeper::tryGetChildrenWatch(const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = getChildrenImpl(path, res, stat, watch_callback); + Coordination::Error code = getChildrenImpl(path, res, stat, watch_callback); - if (!(code == Coordination::ZOK || code == Coordination::ZNONODE)) + if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); return code; } -int32_t ZooKeeper::createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created) +Coordination::Error ZooKeeper::createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::CreateResponse & response) { code = response.error; - if (!code) + if (code == Coordination::Error::ZOK) path_created = response.path_created; event.set(); }; @@ -298,20 +298,20 @@ std::string ZooKeeper::create(const std::string & path, const std::string & data return path_created; } -int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created) +Coordination::Error ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created) { - int32_t code = createImpl(path, data, mode, path_created); + Coordination::Error code = createImpl(path, data, mode, path_created); - if (!(code == Coordination::ZOK || - code == Coordination::ZNONODE || - code == Coordination::ZNODEEXISTS || - code == Coordination::ZNOCHILDRENFOREPHEMERALS)) + if (!(code == Coordination::Error::ZOK || + code == Coordination::Error::ZNONODE || + code == Coordination::Error::ZNODEEXISTS || + code == Coordination::Error::ZNOCHILDRENFOREPHEMERALS)) throw KeeperException(code, path); return code; } -int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode) +Coordination::Error ZooKeeper::tryCreate(const std::string & path, const std::string & data, int32_t mode) { std::string path_created; return tryCreate(path, data, mode, path_created); @@ -320,9 +320,9 @@ int32_t ZooKeeper::tryCreate(const std::string & path, const std::string & data, void ZooKeeper::createIfNotExists(const std::string & path, const std::string & data) { std::string path_created; - int32_t code = createImpl(path, data, CreateMode::Persistent, path_created); + Coordination::Error code = createImpl(path, data, CreateMode::Persistent, path_created); - if (code == Coordination::ZOK || code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) return; else throw KeeperException(code, path); @@ -341,14 +341,14 @@ void ZooKeeper::createAncestors(const std::string & path) } } -int32_t ZooKeeper::removeImpl(const std::string & path, int32_t version) +Coordination::Error ZooKeeper::removeImpl(const std::string & path, int32_t version) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::RemoveResponse & response) { - if (response.error) + if (response.error != Coordination::Error::ZOK) code = response.error; event.set(); }; @@ -363,26 +363,26 @@ void ZooKeeper::remove(const std::string & path, int32_t version) check(tryRemove(path, version), path); } -int32_t ZooKeeper::tryRemove(const std::string & path, int32_t version) +Coordination::Error ZooKeeper::tryRemove(const std::string & path, int32_t version) { - int32_t code = removeImpl(path, version); - if (!(code == Coordination::ZOK || - code == Coordination::ZNONODE || - code == Coordination::ZBADVERSION || - code == Coordination::ZNOTEMPTY)) + Coordination::Error code = removeImpl(path, version); + if (!(code == Coordination::Error::ZOK || + code == Coordination::Error::ZNONODE || + code == Coordination::Error::ZBADVERSION || + code == Coordination::Error::ZNOTEMPTY)) throw KeeperException(code, path); return code; } -int32_t ZooKeeper::existsImpl(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) +Coordination::Error ZooKeeper::existsImpl(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::ExistsResponse & response) { code = response.error; - if (!code && stat) + if (code == Coordination::Error::ZOK && stat) *stat = response.stat; event.set(); }; @@ -399,22 +399,22 @@ bool ZooKeeper::exists(const std::string & path, Coordination::Stat * stat, cons bool ZooKeeper::existsWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = existsImpl(path, stat, watch_callback); + Coordination::Error code = existsImpl(path, stat, watch_callback); - if (!(code == Coordination::ZOK || code == Coordination::ZNONODE)) + if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); - return code != Coordination::ZNONODE; + return code != Coordination::Error::ZNONODE; } -int32_t ZooKeeper::getImpl(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) +Coordination::Error ZooKeeper::getImpl(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::GetResponse & response) { code = response.error; - if (!code) + if (code == Coordination::Error::ZOK) { res = response.data; if (stat) @@ -431,7 +431,7 @@ int32_t ZooKeeper::getImpl(const std::string & path, std::string & res, Coordina std::string ZooKeeper::get(const std::string & path, Coordination::Stat * stat, const EventPtr & watch) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; std::string res; if (tryGet(path, res, stat, watch, &code)) return res; @@ -441,7 +441,7 @@ std::string ZooKeeper::get(const std::string & path, Coordination::Stat * stat, std::string ZooKeeper::getWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; std::string res; if (tryGetWatch(path, res, stat, watch_callback, &code)) return res; @@ -449,34 +449,44 @@ std::string ZooKeeper::getWatch(const std::string & path, Coordination::Stat * s throw KeeperException("Can't get data for node " + path + ": node doesn't exist", code); } -bool ZooKeeper::tryGet(const std::string & path, std::string & res, Coordination::Stat * stat, const EventPtr & watch, int * return_code) +bool ZooKeeper::tryGet( + const std::string & path, + std::string & res, + Coordination::Stat * stat, + const EventPtr & watch, + Coordination::Error * return_code) { return tryGetWatch(path, res, stat, callbackForEvent(watch), return_code); } -bool ZooKeeper::tryGetWatch(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback, int * return_code) +bool ZooKeeper::tryGetWatch( + const std::string & path, + std::string & res, + Coordination::Stat * stat, + Coordination::WatchCallback watch_callback, + Coordination::Error * return_code) { - int32_t code = getImpl(path, res, stat, watch_callback); + Coordination::Error code = getImpl(path, res, stat, watch_callback); - if (!(code == Coordination::ZOK || code == Coordination::ZNONODE)) + if (!(code == Coordination::Error::ZOK || code == Coordination::Error::ZNONODE)) throw KeeperException(code, path); if (return_code) *return_code = code; - return code == Coordination::ZOK; + return code == Coordination::Error::ZOK; } -int32_t ZooKeeper::setImpl(const std::string & path, const std::string & data, +Coordination::Error ZooKeeper::setImpl(const std::string & path, const std::string & data, int32_t version, Coordination::Stat * stat) { - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::SetResponse & response) { code = response.error; - if (!code && stat) + if (code == Coordination::Error::ZOK && stat) *stat = response.stat; event.set(); }; @@ -493,34 +503,34 @@ void ZooKeeper::set(const std::string & path, const std::string & data, int32_t void ZooKeeper::createOrUpdate(const std::string & path, const std::string & data, int32_t mode) { - int32_t code = trySet(path, data, -1); - if (code == Coordination::ZNONODE) + Coordination::Error code = trySet(path, data, -1); + if (code == Coordination::Error::ZNONODE) { create(path, data, mode); } - else if (code != Coordination::ZOK) + else if (code != Coordination::Error::ZOK) throw KeeperException(code, path); } -int32_t ZooKeeper::trySet(const std::string & path, const std::string & data, +Coordination::Error ZooKeeper::trySet(const std::string & path, const std::string & data, int32_t version, Coordination::Stat * stat) { - int32_t code = setImpl(path, data, version, stat); + Coordination::Error code = setImpl(path, data, version, stat); - if (!(code == Coordination::ZOK || - code == Coordination::ZNONODE || - code == Coordination::ZBADVERSION)) + if (!(code == Coordination::Error::ZOK || + code == Coordination::Error::ZNONODE || + code == Coordination::Error::ZBADVERSION)) throw KeeperException(code, path); return code; } -int32_t ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses) +Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses) { if (requests.empty()) - return Coordination::ZOK; + return Coordination::Error::ZOK; - int32_t code = 0; + Coordination::Error code = Coordination::Error::ZOK; Poco::Event event; auto callback = [&](const Coordination::MultiResponse & response) @@ -538,15 +548,15 @@ int32_t ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordinati Coordination::Responses ZooKeeper::multi(const Coordination::Requests & requests) { Coordination::Responses responses; - int32_t code = multiImpl(requests, responses); + Coordination::Error code = multiImpl(requests, responses); KeeperMultiException::check(code, requests, responses); return responses; } -int32_t ZooKeeper::tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses) +Coordination::Error ZooKeeper::tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses) { - int32_t code = multiImpl(requests, responses); - if (code && !Coordination::isUserError(code)) + Coordination::Error code = multiImpl(requests, responses); + if (code != Coordination::Error::ZOK && !Coordination::isUserError(code)) throw KeeperException(code); return code; } @@ -587,7 +597,7 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path) void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path) { Strings children; - if (tryGetChildren(path, children) != Coordination::ZOK) + if (tryGetChildren(path, children) != Coordination::Error::ZOK) return; while (!children.empty()) { @@ -609,7 +619,7 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path) /// this means someone is concurrently removing these children and we will have /// to remove them one by one. Coordination::Responses responses; - if (tryMulti(ops, responses) != Coordination::ZOK) + if (tryMulti(ops, responses) != Coordination::Error::ZOK) for (const std::string & child : batch) tryRemove(child); } @@ -645,7 +655,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & auto callback = [state](const Coordination::ExistsResponse & response) { - state->code = response.error; + state->code = int32_t(response.error); if (state->code) state->event.set(); }; @@ -654,7 +664,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & { if (!state->code) { - state->code = response.error; + state->code = int32_t(response.error); if (!state->code) state->event_type = response.type; state->event.set(); @@ -670,11 +680,11 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & else if (!state->event.tryWait(1000)) continue; - if (state->code == Coordination::ZNONODE) + if (state->code == int32_t(Coordination::Error::ZNONODE)) return true; if (state->code) - throw KeeperException(state->code, path); + throw KeeperException(static_cast(state->code.load(std::memory_order_seq_cst)), path); if (state->event_type == Coordination::DELETED) return true; @@ -688,11 +698,6 @@ ZooKeeperPtr ZooKeeper::startNewSession() const } -std::string ZooKeeper::error2string(int32_t code) -{ - return Coordination::errorMessage(code); -} - bool ZooKeeper::expired() { return impl->isExpired(); @@ -712,7 +717,7 @@ std::future ZooKeeper::asyncCreate(const std::stri auto callback = [promise, path](const Coordination::CreateResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -730,7 +735,7 @@ std::future ZooKeeper::asyncGet(const std::string & p auto callback = [promise, path](const Coordination::GetResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -748,7 +753,7 @@ std::future ZooKeeper::asyncTryGet(const std::string auto callback = [promise, path](const Coordination::GetResponse & response) mutable { - if (response.error && response.error != Coordination::ZNONODE) + if (response.error != Coordination::Error::ZOK && response.error != Coordination::Error::ZNONODE) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -765,7 +770,7 @@ std::future ZooKeeper::asyncExists(const std::stri auto callback = [promise, path](const Coordination::ExistsResponse & response) mutable { - if (response.error && response.error != Coordination::ZNONODE) + if (response.error != Coordination::Error::ZOK && response.error != Coordination::Error::ZNONODE) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -782,7 +787,7 @@ std::future ZooKeeper::asyncSet(const std::string & p auto callback = [promise, path](const Coordination::SetResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -799,7 +804,7 @@ std::future ZooKeeper::asyncGetChildren(const std::s auto callback = [promise, path](const Coordination::ListResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -816,7 +821,7 @@ std::future ZooKeeper::asyncRemove(const std::stri auto callback = [promise, path](const Coordination::RemoveResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); else promise->set_value(response); @@ -833,8 +838,13 @@ std::future ZooKeeper::asyncTryRemove(const std::s auto callback = [promise, path](const Coordination::RemoveResponse & response) mutable { - if (response.error && response.error != Coordination::ZNONODE && response.error != Coordination::ZBADVERSION && response.error != Coordination::ZNOTEMPTY) + if (response.error != Coordination::Error::ZOK + && response.error != Coordination::Error::ZNONODE + && response.error != Coordination::Error::ZBADVERSION + && response.error != Coordination::Error::ZNOTEMPTY) + { promise->set_exception(std::make_exception_ptr(KeeperException(path, response.error))); + } else promise->set_value(response); }; @@ -864,7 +874,7 @@ std::future ZooKeeper::asyncMulti(const Coordinatio auto callback = [promise](const Coordination::MultiResponse & response) mutable { - if (response.error) + if (response.error != Coordination::Error::ZOK) promise->set_exception(std::make_exception_ptr(KeeperException(response.error))); else promise->set_value(response); @@ -874,7 +884,7 @@ std::future ZooKeeper::asyncMulti(const Coordinatio return future; } -int32_t ZooKeeper::tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses) +Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses) { try { @@ -887,24 +897,24 @@ int32_t ZooKeeper::tryMultiNoThrow(const Coordination::Requests & requests, Coor } -size_t KeeperMultiException::getFailedOpIndex(int32_t exception_code, const Coordination::Responses & responses) +size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) { if (responses.empty()) throw DB::Exception("Responses for multi transaction is empty", DB::ErrorCodes::LOGICAL_ERROR); for (size_t index = 0, size = responses.size(); index < size; ++index) - if (responses[index]->error) + if (responses[index]->error != Coordination::Error::ZOK) return index; if (!Coordination::isUserError(exception_code)) - throw DB::Exception("There are no failed OPs because '" + ZooKeeper::error2string(exception_code) + "' is not valid response code for that", + throw DB::Exception("There are no failed OPs because '" + std::string(Coordination::errorMessage(exception_code)) + "' is not valid response code for that", DB::ErrorCodes::LOGICAL_ERROR); throw DB::Exception("There is no failed OpResult", DB::ErrorCodes::LOGICAL_ERROR); } -KeeperMultiException::KeeperMultiException(int32_t exception_code, const Coordination::Requests & requests_, const Coordination::Responses & responses_) +KeeperMultiException::KeeperMultiException(Coordination::Error exception_code, const Coordination::Requests & requests_, const Coordination::Responses & responses_) : KeeperException("Transaction failed", exception_code), requests(requests_), responses(responses_), failed_op_index(getFailedOpIndex(exception_code, responses)) { @@ -917,9 +927,10 @@ std::string KeeperMultiException::getPathForFirstFailedOp() const return requests[failed_op_index]->getPath(); } -void KeeperMultiException::check(int32_t exception_code, const Coordination::Requests & requests, const Coordination::Responses & responses) +void KeeperMultiException::check( + Coordination::Error exception_code, const Coordination::Requests & requests, const Coordination::Responses & responses) { - if (!exception_code) + if (exception_code == Coordination::Error::ZOK) return; if (Coordination::isUserError(exception_code)) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 3bf9ad3c100..416e40c2da4 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -99,8 +99,8 @@ public: /// * The parent is ephemeral. /// * The node already exists. /// In case of other errors throws an exception. - int32_t tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); - int32_t tryCreate(const std::string & path, const std::string & data, int32_t mode); + Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); + Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode); /// Create a Persistent node. /// Does nothing if the node already exists. @@ -117,7 +117,7 @@ public: /// * The node doesn't exist /// * Versions don't match /// * The node has children. - int32_t tryRemove(const std::string & path, int32_t version = -1); + Coordination::Error tryRemove(const std::string & path, int32_t version = -1); bool exists(const std::string & path, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr); bool existsWatch(const std::string & path, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); @@ -127,9 +127,11 @@ public: /// Doesn't not throw in the following cases: /// * The node doesn't exist. Returns false in this case. - bool tryGet(const std::string & path, std::string & res, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr, int * code = nullptr); + bool tryGet(const std::string & path, std::string & res, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr, + Coordination::Error * code = nullptr); - bool tryGetWatch(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback, int * code = nullptr); + bool tryGetWatch(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback, + Coordination::Error * code = nullptr); void set(const std::string & path, const std::string & data, int32_t version = -1, Coordination::Stat * stat = nullptr); @@ -140,7 +142,7 @@ public: /// Doesn't not throw in the following cases: /// * The node doesn't exist. /// * Versions do not match. - int32_t trySet(const std::string & path, const std::string & data, + Coordination::Error trySet(const std::string & path, const std::string & data, int32_t version = -1, Coordination::Stat * stat = nullptr); Strings getChildren(const std::string & path, @@ -153,11 +155,11 @@ public: /// Doesn't not throw in the following cases: /// * The node doesn't exist. - int32_t tryGetChildren(const std::string & path, Strings & res, + Coordination::Error tryGetChildren(const std::string & path, Strings & res, Coordination::Stat * stat = nullptr, const EventPtr & watch = nullptr); - int32_t tryGetChildrenWatch(const std::string & path, Strings & res, + Coordination::Error tryGetChildrenWatch(const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); @@ -166,9 +168,9 @@ public: Coordination::Responses multi(const Coordination::Requests & requests); /// Throws only if some operation has returned an "unexpected" error /// - an error that would cause the corresponding try- method to throw. - int32_t tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses); + Coordination::Error tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses); /// Throws nothing (even session expired errors) - int32_t tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses); + Coordination::Error tryMultiNoThrow(const Coordination::Requests & requests, Coordination::Responses & responses); Int64 getClientID(); @@ -238,8 +240,6 @@ public: /// Like the previous one but don't throw any exceptions on future.get() FutureMulti tryAsyncMulti(const Coordination::Requests & ops); - static std::string error2string(int32_t code); - private: friend class EphemeralNodeHolder; @@ -250,13 +250,15 @@ private: void tryRemoveChildrenRecursive(const std::string & path); /// The following methods don't throw exceptions but return error codes. - int32_t createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); - int32_t removeImpl(const std::string & path, int32_t version); - int32_t getImpl(const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); - int32_t setImpl(const std::string & path, const std::string & data, int32_t version, Coordination::Stat * stat); - int32_t getChildrenImpl(const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); - int32_t multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses); - int32_t existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback); + Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); + Coordination::Error removeImpl(const std::string & path, int32_t version); + Coordination::Error getImpl( + const std::string & path, std::string & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); + Coordination::Error setImpl(const std::string & path, const std::string & data, int32_t version, Coordination::Stat * stat); + Coordination::Error getChildrenImpl( + const std::string & path, Strings & res, Coordination::Stat * stat, Coordination::WatchCallback watch_callback); + Coordination::Error multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses); + Coordination::Error existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback); std::unique_ptr impl; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index e6cab23d2ce..8564b996bda 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -335,6 +335,13 @@ static void read(int32_t & x, ReadBuffer & in) x = __builtin_bswap32(x); } +static void read(Error & x, ReadBuffer & in) +{ + int32_t code; + readBinary(code, in); + x = Error(code); +} + static void read(bool & x, ReadBuffer & in) { readBinary(x, in); @@ -353,10 +360,10 @@ static void read(String & s, ReadBuffer & in) } if (size < 0) - throw Exception("Negative size while reading string from ZooKeeper", ZMARSHALLINGERROR); + throw Exception("Negative size while reading string from ZooKeeper", Error::ZMARSHALLINGERROR); if (size > MAX_STRING_OR_ARRAY_SIZE) - throw Exception("Too large string size while reading from ZooKeeper", ZMARSHALLINGERROR); + throw Exception("Too large string size while reading from ZooKeeper", Error::ZMARSHALLINGERROR); s.resize(size); in.read(s.data(), size); @@ -367,7 +374,7 @@ template void read(std::array & s, ReadBuffer & in) int32_t size = 0; read(size, in); if (size != N) - throw Exception("Unexpected array size while reading from ZooKeeper", ZMARSHALLINGERROR); + throw Exception("Unexpected array size while reading from ZooKeeper", Error::ZMARSHALLINGERROR); in.read(s.data(), N); } @@ -391,9 +398,9 @@ template void read(std::vector & arr, ReadBuffer & in) int32_t size = 0; read(size, in); if (size < 0) - throw Exception("Negative size while reading array from ZooKeeper", ZMARSHALLINGERROR); + throw Exception("Negative size while reading array from ZooKeeper", Error::ZMARSHALLINGERROR); if (size > MAX_STRING_OR_ARRAY_SIZE) - throw Exception("Too large array size while reading from ZooKeeper", ZMARSHALLINGERROR); + throw Exception("Too large array size while reading from ZooKeeper", Error::ZMARSHALLINGERROR); arr.resize(size); for (auto & elem : arr) read(elem, in); @@ -489,7 +496,7 @@ struct ZooKeeperCloseResponse final : ZooKeeperResponse { void readImpl(ReadBuffer &) override { - throw Exception("Received response for close request", ZRUNTIMEINCONSISTENCY); + throw Exception("Received response for close request", Error::ZRUNTIMEINCONSISTENCY); } }; @@ -650,12 +657,12 @@ struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse { void readImpl(ReadBuffer & in) override { - int32_t read_error; + Coordination::Error read_error; Coordination::read(read_error, in); if (read_error != error) - throw Exception("Error code in ErrorResponse (" + toString(read_error) + ") doesn't match error code in header (" + toString(error) + ")", - ZMARSHALLINGERROR); + throw Exception(fmt::format("Error code in ErrorResponse ({}) doesn't match error code in header ({})", read_error, error), + Error::ZMARSHALLINGERROR); } }; @@ -691,7 +698,7 @@ struct ZooKeeperMultiRequest final : MultiRequest, ZooKeeperRequest requests.push_back(std::make_shared(*concrete_request_check)); } else - throw Exception("Illegal command as part of multi ZooKeeper request", ZBADARGUMENTS); + throw Exception("Illegal command as part of multi ZooKeeper request", Error::ZBADARGUMENTS); } } @@ -739,14 +746,14 @@ struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse { ZooKeeper::OpNum op_num; bool done; - int32_t op_error; + Error op_error; Coordination::read(op_num, in); Coordination::read(done, in); Coordination::read(op_error, in); if (done) - throw Exception("Not enough results received for multi transaction", ZMARSHALLINGERROR); + throw Exception("Not enough results received for multi transaction", Error::ZMARSHALLINGERROR); /// op_num == -1 is special for multi transaction. /// For unknown reason, error code is duplicated in header and in response body. @@ -754,18 +761,18 @@ struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse if (op_num == -1) response = std::make_shared(); - if (op_error) + if (op_error != Error::ZOK) { response->error = op_error; /// Set error for whole transaction. /// If some operations fail, ZK send global error as zero and then send details about each operation. /// It will set error code for first failed operation and it will set special "runtime inconsistency" code for other operations. - if (!error && op_error != ZRUNTIMEINCONSISTENCY) + if (error == Error::ZOK && op_error != Error::ZRUNTIMEINCONSISTENCY) error = op_error; } - if (!op_error || op_num == -1) + if (op_error == Error::ZOK || op_num == -1) dynamic_cast(*response).readImpl(in); } @@ -780,11 +787,11 @@ struct ZooKeeperMultiResponse final : MultiResponse, ZooKeeperResponse Coordination::read(error_read, in); if (!done) - throw Exception("Too many results received for multi transaction", ZMARSHALLINGERROR); + throw Exception("Too many results received for multi transaction", Error::ZMARSHALLINGERROR); if (op_num != -1) - throw Exception("Unexpected op_num received at the end of results for multi transaction", ZMARSHALLINGERROR); + throw Exception("Unexpected op_num received at the end of results for multi transaction", Error::ZMARSHALLINGERROR); if (error_read != -1) - throw Exception("Unexpected error value received at the end of results for multi transaction", ZMARSHALLINGERROR); + throw Exception("Unexpected error value received at the end of results for multi transaction", Error::ZMARSHALLINGERROR); } } }; @@ -883,7 +890,7 @@ void ZooKeeper::connect( Poco::Timespan connection_timeout) { if (nodes.empty()) - throw Exception("No nodes passed to ZooKeeper constructor", ZBADARGUMENTS); + throw Exception("No nodes passed to ZooKeeper constructor", Error::ZBADARGUMENTS); static constexpr size_t num_tries = 3; bool connected = false; @@ -970,7 +977,7 @@ void ZooKeeper::connect( } message << fail_reasons.str() << "\n"; - throw Exception(message.str(), ZCONNECTIONLOSS); + throw Exception(message.str(), Error::ZCONNECTIONLOSS); } } @@ -1005,11 +1012,11 @@ void ZooKeeper::receiveHandshake() read(handshake_length); if (handshake_length != 36) - throw Exception("Unexpected handshake length received: " + toString(handshake_length), ZMARSHALLINGERROR); + throw Exception("Unexpected handshake length received: " + toString(handshake_length), Error::ZMARSHALLINGERROR); read(protocol_version_read); if (protocol_version_read != protocol_version) - throw Exception("Unexpected protocol version: " + toString(protocol_version_read), ZMARSHALLINGERROR); + throw Exception("Unexpected protocol version: " + toString(protocol_version_read), Error::ZMARSHALLINGERROR); read(timeout); if (timeout != session_timeout.totalMilliseconds()) @@ -1032,7 +1039,7 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data) int32_t length; XID read_xid; int64_t zxid; - int32_t err; + Error err; read(length); size_t count_before_event = in->count(); @@ -1042,16 +1049,16 @@ void ZooKeeper::sendAuth(const String & scheme, const String & data) if (read_xid != auth_xid) throw Exception("Unexpected event received in reply to auth request: " + toString(read_xid), - ZMARSHALLINGERROR); + Error::ZMARSHALLINGERROR); int32_t actual_length = in->count() - count_before_event; if (length != actual_length) throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), - ZMARSHALLINGERROR); + Error::ZMARSHALLINGERROR); - if (err) - throw Exception("Error received in reply to auth request. Code: " + toString(err) + ". Message: " + String(errorMessage(err)), - ZMARSHALLINGERROR); + if (err != Error::ZOK) + throw Exception("Error received in reply to auth request. Code: " + toString(int32_t(err)) + ". Message: " + String(errorMessage(err)), + Error::ZMARSHALLINGERROR); } @@ -1154,7 +1161,7 @@ void ZooKeeper::receiveThread() earliest_operation = operations.begin()->second; auto earliest_operation_deadline = earliest_operation->time + std::chrono::microseconds(operation_timeout.totalMicroseconds()); if (now > earliest_operation_deadline) - throw Exception("Operation timeout (deadline already expired) for path: " + earliest_operation->request->getPath(), ZOPERATIONTIMEOUT); + throw Exception("Operation timeout (deadline already expired) for path: " + earliest_operation->request->getPath(), Error::ZOPERATIONTIMEOUT); max_wait = std::chrono::duration_cast(earliest_operation_deadline - now).count(); } } @@ -1170,10 +1177,10 @@ void ZooKeeper::receiveThread() else { if (earliest_operation) - throw Exception("Operation timeout (no response) for path: " + earliest_operation->request->getPath(), ZOPERATIONTIMEOUT); + throw Exception("Operation timeout (no response) for path: " + earliest_operation->request->getPath(), Error::ZOPERATIONTIMEOUT); waited += max_wait; if (waited >= session_timeout.totalMicroseconds()) - throw Exception("Nothing is received in session timeout", ZOPERATIONTIMEOUT); + throw Exception("Nothing is received in session timeout", Error::ZOPERATIONTIMEOUT); } @@ -1193,7 +1200,7 @@ void ZooKeeper::receiveEvent() int32_t length; XID xid; int64_t zxid; - int32_t err; + Error err; read(length); size_t count_before_event = in->count(); @@ -1206,8 +1213,8 @@ void ZooKeeper::receiveEvent() if (xid == ping_xid) { - if (err) - throw Exception("Received error in heartbeat response: " + String(errorMessage(err)), ZRUNTIMEINCONSISTENCY); + if (err != Error::ZOK) + throw Exception("Received error in heartbeat response: " + String(errorMessage(err)), Error::ZRUNTIMEINCONSISTENCY); response = std::make_shared(); } @@ -1252,7 +1259,7 @@ void ZooKeeper::receiveEvent() auto it = operations.find(xid); if (it == operations.end()) - throw Exception("Received response for unknown xid", ZRUNTIMEINCONSISTENCY); + throw Exception("Received response for unknown xid", Error::ZRUNTIMEINCONSISTENCY); /// After this point, we must invoke callback, that we've grabbed from 'operations'. /// Invariant: all callbacks are invoked either in case of success or in case of error. @@ -1272,7 +1279,7 @@ void ZooKeeper::receiveEvent() if (!response) response = request_info.request->makeResponse(); - if (err) + if (err != Error::ZOK) response->error = err; else { @@ -1282,7 +1289,7 @@ void ZooKeeper::receiveEvent() int32_t actual_length = in->count() - count_before_event; if (length != actual_length) - throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), ZMARSHALLINGERROR); + throw Exception("Response length doesn't match. Expected: " + toString(length) + ", actual: " + toString(actual_length), Error::ZMARSHALLINGERROR); } catch (...) { @@ -1294,7 +1301,7 @@ void ZooKeeper::receiveEvent() /// In case we cannot read the response, we should indicate it as the error of that type /// when the user cannot assume whether the request was processed or not. - response->error = ZCONNECTIONLOSS; + response->error = Error::ZCONNECTIONLOSS; if (request_info.callback) request_info.callback(*response); @@ -1361,8 +1368,8 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) ResponsePtr response = request_info.request->makeResponse(); response->error = request_info.request->probably_sent - ? ZCONNECTIONLOSS - : ZSESSIONEXPIRED; + ? Error::ZCONNECTIONLOSS + : Error::ZSESSIONEXPIRED; if (request_info.callback) { @@ -1390,7 +1397,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) WatchResponse response; response.type = SESSION; response.state = EXPIRED_SESSION; - response.error = ZSESSIONEXPIRED; + response.error = Error::ZSESSIONEXPIRED; for (auto & callback : path_watches.second) { @@ -1421,7 +1428,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) ResponsePtr response = info.request->makeResponse(); if (response) { - response->error = ZSESSIONEXPIRED; + response->error = Error::ZSESSIONEXPIRED; try { info.callback(*response); @@ -1437,7 +1444,7 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) WatchResponse response; response.type = SESSION; response.state = EXPIRED_SESSION; - response.error = ZSESSIONEXPIRED; + response.error = Error::ZSESSIONEXPIRED; try { info.watch(response); @@ -1466,9 +1473,9 @@ void ZooKeeper::pushRequest(RequestInfo && info) { info.request->xid = next_xid.fetch_add(1); if (info.request->xid == close_xid) - throw Exception("xid equal to close_xid", ZSESSIONEXPIRED); + throw Exception("xid equal to close_xid", Error::ZSESSIONEXPIRED); if (info.request->xid < 0) - throw Exception("XID overflow", ZSESSIONEXPIRED); + throw Exception("XID overflow", Error::ZSESSIONEXPIRED); } /// We must serialize 'pushRequest' and 'finalize' (from sendThread, receiveThread) calls @@ -1478,10 +1485,10 @@ void ZooKeeper::pushRequest(RequestInfo && info) std::lock_guard lock(push_request_mutex); if (expired) - throw Exception("Session expired", ZSESSIONEXPIRED); + throw Exception("Session expired", Error::ZSESSIONEXPIRED); if (!requests_queue.tryPush(std::move(info), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push request to queue within operation timeout", ZOPERATIONTIMEOUT); + throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); } catch (...) { @@ -1651,7 +1658,7 @@ void ZooKeeper::close() request_info.request = std::make_shared(std::move(request)); if (!requests_queue.tryPush(std::move(request_info), operation_timeout.totalMilliseconds())) - throw Exception("Cannot push close request to queue within operation timeout", ZOPERATIONTIMEOUT); + throw Exception("Cannot push close request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); ProfileEvents::increment(ProfileEvents::ZooKeeperClose); } diff --git a/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp b/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp index cd4c6e0a159..8440b4fe7c9 100644 --- a/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp +++ b/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp @@ -86,7 +86,7 @@ TEST(zkutil, MultiAsync) ops.clear(); auto res = fut.get(); - ASSERT_EQ(res.error, Coordination::ZOK); + ASSERT_EQ(res.error, Coordination::Error::ZOK); ASSERT_EQ(res.responses.size(), 2); } @@ -126,15 +126,15 @@ TEST(zkutil, MultiAsync) /// The test is quite heavy. It is normal if session is expired during this test. /// If we don't check that, the test will be flacky. - if (res.error != Coordination::ZSESSIONEXPIRED && res.error != Coordination::ZCONNECTIONLOSS) + if (res.error != Coordination::Error::ZSESSIONEXPIRED && res.error != Coordination::Error::ZCONNECTIONLOSS) { - ASSERT_EQ(res.error, Coordination::ZNODEEXISTS); + ASSERT_EQ(res.error, Coordination::Error::ZNODEEXISTS); ASSERT_EQ(res.responses.size(), 2); } } catch (const Coordination::Exception & e) { - if (e.code != Coordination::ZSESSIONEXPIRED && e.code != Coordination::ZCONNECTIONLOSS) + if (e.code != Coordination::Error::ZSESSIONEXPIRED && e.code != Coordination::Error::ZCONNECTIONLOSS) throw; } } diff --git a/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp b/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp index d245428db8e..e09c72a4d6c 100644 --- a/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp +++ b/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp @@ -39,12 +39,12 @@ int main(int argc, char ** argv) ops.emplace_back(zkutil::makeRemoveRequest("/test/zk_expiration_test", -1)); Coordination::Responses responses; - int32_t code = zk.tryMultiNoThrow(ops, responses); + Coordination::Error code = zk.tryMultiNoThrow(ops, responses); - std::cout << time(nullptr) - time0 << "s: " << zkutil::ZooKeeper::error2string(code) << std::endl; + std::cout << time(nullptr) - time0 << "s: " << Coordination::errorMessage(code) << std::endl; try { - if (code) + if (code != Coordination::Error::ZOK) std::cout << "Path: " << zkutil::KeeperMultiException(code, ops, responses).getPathForFirstFailedOp() << std::endl; } catch (...) diff --git a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp index d9d3402fa32..89659fa5e46 100644 --- a/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp +++ b/src/Common/ZooKeeper/tests/zkutil_test_commands_new_lib.cpp @@ -49,8 +49,8 @@ try zk.create("/test", "old", false, false, {}, [&](const CreateResponse & response) { - if (response.error) - std::cerr << "Error (create) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (create): " << errorMessage(response.error) << '\n'; else std::cerr << "Created path: " << response.path_created << '\n'; @@ -64,8 +64,8 @@ try zk.get("/test", [&](const GetResponse & response) { - if (response.error) - std::cerr << "Error (get) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (get): " << errorMessage(response.error) << '\n'; else std::cerr << "Value: " << response.data << '\n'; @@ -73,8 +73,8 @@ try }, [](const WatchResponse & response) { - if (response.error) - std::cerr << "Watch (get) on /test, Error " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Watch (get) on /test, Error: " << errorMessage(response.error) << '\n'; else std::cerr << "Watch (get) on /test, path: " << response.path << ", type: " << response.type << '\n'; }); @@ -86,8 +86,8 @@ try zk.set("/test", "new", -1, [&](const SetResponse & response) { - if (response.error) - std::cerr << "Error (set) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (set): " << errorMessage(response.error) << '\n'; else std::cerr << "Set\n"; @@ -101,8 +101,8 @@ try zk.list("/", [&](const ListResponse & response) { - if (response.error) - std::cerr << "Error (list) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (list): " << errorMessage(response.error) << '\n'; else { std::cerr << "Children:\n"; @@ -114,8 +114,8 @@ try }, [](const WatchResponse & response) { - if (response.error) - std::cerr << "Watch (list) on /, Error " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Watch (list) on /, Error: " << errorMessage(response.error) << '\n'; else std::cerr << "Watch (list) on /, path: " << response.path << ", type: " << response.type << '\n'; }); @@ -127,8 +127,8 @@ try zk.exists("/test", [&](const ExistsResponse & response) { - if (response.error) - std::cerr << "Error (exists) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (exists): " << errorMessage(response.error) << '\n'; else std::cerr << "Exists\n"; @@ -136,8 +136,8 @@ try }, [](const WatchResponse & response) { - if (response.error) - std::cerr << "Watch (exists) on /test, Error " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Watch (exists) on /test, Error: " << errorMessage(response.error) << '\n'; else std::cerr << "Watch (exists) on /test, path: " << response.path << ", type: " << response.type << '\n'; }); @@ -148,8 +148,8 @@ try zk.remove("/test", -1, [&](const RemoveResponse & response) { - if (response.error) - std::cerr << "Error (remove) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (remove): " << errorMessage(response.error) << '\n'; else std::cerr << "Removed\n"; @@ -184,13 +184,13 @@ try zk.multi(ops, [&](const MultiResponse & response) { - if (response.error) - std::cerr << "Error (multi) " << response.error << ": " << errorMessage(response.error) << '\n'; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error (multi): " << errorMessage(response.error) << '\n'; else { for (const auto & elem : response.responses) - if (elem->error) - std::cerr << "Error (elem) " << elem->error << ": " << errorMessage(elem->error) << '\n'; + if (elem->error != Coordination::Error::ZOK) + std::cerr << "Error (elem): " << errorMessage(elem->error) << '\n'; std::cerr << "Created path: " << dynamic_cast(*response.responses[0]).path_created << '\n'; } diff --git a/src/Common/ZooKeeper/tests/zookeeper_impl.cpp b/src/Common/ZooKeeper/tests/zookeeper_impl.cpp index 74ba63514f2..3c2e52c93f2 100644 --- a/src/Common/ZooKeeper/tests/zookeeper_impl.cpp +++ b/src/Common/ZooKeeper/tests/zookeeper_impl.cpp @@ -9,8 +9,8 @@ try zookeeper.create("/test", "hello", false, false, {}, [](const Coordination::CreateResponse & response) { - if (response.error) - std::cerr << "Error " << response.error << ": " << Coordination::errorMessage(response.error) << "\n"; + if (response.error != Coordination::Error::ZOK) + std::cerr << "Error: " << Coordination::errorMessage(response.error) << "\n"; else std::cerr << "Path created: " << response.path_created << "\n"; }); diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index dac51b21081..9f89fa9199b 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -422,7 +422,7 @@ void DDLWorker::processTasks() } catch (const Coordination::Exception & e) { - if (server_startup && e.code == Coordination::ZNONODE) + if (server_startup && e.code == Coordination::Error::ZNONODE) { LOG_WARNING(log, "ZooKeeper NONODE error during startup. Ignoring entry {} ({}) : {}", task.entry_name, task.entry.query, getCurrentExceptionMessage(true)); } @@ -603,15 +603,15 @@ void DDLWorker::processTask(DDLTask & task, const ZooKeeperPtr & zookeeper) auto code = zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy); - if (code == Coordination::ZOK || code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) { // Ok } - else if (code == Coordination::ZNONODE) + else if (code == Coordination::Error::ZNONODE) { /// There is no parent createStatusDirs(task.entry_path, zookeeper); - if (Coordination::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy)) + if (Coordination::Error::ZOK != zookeeper->tryCreate(active_node_path, "", zkutil::CreateMode::Ephemeral, dummy)) throw Coordination::Exception(code, active_node_path); } else @@ -915,8 +915,9 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP ops.emplace_back(std::make_shared(std::move(request))); } Coordination::Responses responses; - int code = zookeeper->tryMulti(ops, responses); - if (code && code != Coordination::ZNODEEXISTS) + Coordination::Error code = zookeeper->tryMulti(ops, responses); + if (code != Coordination::Error::ZOK + && code != Coordination::Error::ZNODEEXISTS) throw Coordination::Exception(code); } @@ -1013,7 +1014,7 @@ void DDLWorker::runMainThread() } } } - else if (e.code == Coordination::ZNONODE) + else if (e.code == Coordination::Error::ZNONODE) { LOG_ERROR(log, "ZooKeeper error: {}", getCurrentExceptionMessage(true)); } @@ -1201,8 +1202,8 @@ private: static Strings getChildrenAllowNoNode(const std::shared_ptr & zookeeper, const String & node_path) { Strings res; - int code = zookeeper->tryGetChildren(node_path, res); - if (code && code != Coordination::ZNONODE) + Coordination::Error code = zookeeper->tryGetChildren(node_path, res); + if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) throw Coordination::Exception(code, node_path); return res; } diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index 762dbc7d5b6..6b00215fd26 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -104,13 +104,13 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( lock_ops.push_back(zkutil::makeCheckRequest(block_numbers_path, partitions_stat.version)); Coordination::Responses lock_responses; - int rc = zookeeper.tryMulti(lock_ops, lock_responses); - if (rc == Coordination::ZBADVERSION) + Coordination::Error rc = zookeeper.tryMulti(lock_ops, lock_responses); + if (rc == Coordination::Error::ZBADVERSION) { LOG_TRACE(&Poco::Logger::get("EphemeralLocksInAllPartitions"), "Someone has inserted a block in a new partition while we were creating locks. Retry."); continue; } - else if (rc != Coordination::ZOK) + else if (rc != Coordination::Error::ZOK) throw Coordination::Exception(rc); for (size_t i = 0; i < partitions.size(); ++i) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index d8c1103809f..c67ea11f56f 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -85,7 +85,7 @@ void ReplicatedMergeTreeBlockOutputStream::checkQuorumPrecondition(zkutil::ZooKe */ auto quorum_status = quorum_status_future.get(); - if (quorum_status.error != Coordination::ZNONODE) + if (quorum_status.error != Coordination::Error::ZNONODE) throw Exception("Quorum for previous write has not been satisfied yet. Status: " + quorum_status.data, ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE); /// Both checks are implicitly made also later (otherwise there would be a race condition). @@ -93,7 +93,7 @@ void ReplicatedMergeTreeBlockOutputStream::checkQuorumPrecondition(zkutil::ZooKe auto is_active = is_active_future.get(); auto host = host_future.get(); - if (is_active.error == Coordination::ZNONODE || host.error == Coordination::ZNONODE) + if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) throw Exception("Replica is not active right now", ErrorCodes::READONLY); quorum_info.is_active_node_value = is_active.data; @@ -299,9 +299,9 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo storage.renameTempPartAndAdd(part, nullptr, &transaction); Coordination::Responses responses; - int32_t multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT + Coordination::Error multi_code = zookeeper->tryMultiNoThrow(ops, responses); /// 1 RTT - if (multi_code == Coordination::ZOK) + if (multi_code == Coordination::Error::ZOK) { transaction.commit(); storage.merge_selecting_task->schedule(); @@ -309,8 +309,8 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo /// Lock nodes have been already deleted, do not delete them in destructor block_number_lock->assumeUnlocked(); } - else if (multi_code == Coordination::ZCONNECTIONLOSS - || multi_code == Coordination::ZOPERATIONTIMEOUT) + else if (multi_code == Coordination::Error::ZCONNECTIONLOSS + || multi_code == Coordination::Error::ZOPERATIONTIMEOUT) { /** If the connection is lost, and we do not know if the changes were applied, we can not delete the local part * if the changes were applied, the inserted block appeared in `/blocks/`, and it can not be inserted again. @@ -326,7 +326,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo { String failed_op_path = zkutil::KeeperMultiException(multi_code, ops, responses).getPathForFirstFailedOp(); - if (multi_code == Coordination::ZNODEEXISTS && deduplicate_block && failed_op_path == block_id_path) + if (multi_code == Coordination::Error::ZNODEEXISTS && deduplicate_block && failed_op_path == block_id_path) { /// Block with the same id have just appeared in table (or other replica), rollback thee insertion. LOG_INFO(log, "Block with ID {} already exists; ignoring it (removing part {})", block_id, part->name); @@ -336,7 +336,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo last_block_is_duplicate = true; ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); } - else if (multi_code == Coordination::ZNODEEXISTS && failed_op_path == quorum_info.status_path) + else if (multi_code == Coordination::Error::ZNODEEXISTS && failed_op_path == quorum_info.status_path) { transaction.rollback(); @@ -347,7 +347,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo /// NOTE: We could be here if the node with the quorum existed, but was quickly removed. transaction.rollback(); throw Exception("Unexpected logical error while adding block " + toString(block_number) + " with ID '" + block_id + "': " - + zkutil::ZooKeeper::error2string(multi_code) + ", path " + failed_op_path, + + Coordination::errorMessage(multi_code) + ", path " + failed_op_path, ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); } } @@ -355,13 +355,13 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo { transaction.rollback(); throw Exception("Unrecoverable network error while adding block " + toString(block_number) + " with ID '" + block_id + "': " - + zkutil::ZooKeeper::error2string(multi_code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); + + Coordination::errorMessage(multi_code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); } else { transaction.rollback(); throw Exception("Unexpected ZooKeeper error while adding block " + toString(block_number) + " with ID '" + block_id + "': " - + zkutil::ZooKeeper::error2string(multi_code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); + + Coordination::errorMessage(multi_code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); } if (quorum) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index de91a5d5940..1bc132eaba4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -40,7 +40,7 @@ void ReplicatedMergeTreeCleanupThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::ZSESSIONEXPIRED) + if (e.code == Coordination::Error::ZSESSIONEXPIRED) return; } catch (...) @@ -319,15 +319,15 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() for (auto & pair : try_remove_futures) { const String & path = pair.first; - int32_t rc = pair.second.get().error; - if (rc == Coordination::ZNOTEMPTY) + Coordination::Error rc = pair.second.get().error; + if (rc == Coordination::Error::ZNOTEMPTY) { /// Can happen if there are leftover block nodes with children created by previous server versions. zookeeper->removeRecursive(path); cached_block_stats.erase(first_outdated_block->node); } - else if (rc) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, zkutil::ZooKeeper::error2string(rc)); + else if (rc != Coordination::Error::ZOK) + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); else { /// Successfully removed blocks have to be removed from cache @@ -348,7 +348,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & Strings blocks; Coordination::Stat stat; - if (zookeeper.tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat)) + if (Coordination::Error::ZOK != zookeeper.tryGetChildren(storage.zookeeper_path + "/blocks", blocks, &stat)) throw Exception(storage.zookeeper_path + "/blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); /// Seems like this code is obsolete, because we delete blocks from cache @@ -391,7 +391,7 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & for (auto & elem : exists_futures) { auto status = elem.second.get(); - if (status.error != Coordination::ZNONODE) + if (status.error != Coordination::Error::ZNONODE) { cached_block_stats.emplace(elem.first, status.stat.ctime); timed_blocks.emplace_back(elem.first, status.stat.ctime); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 8f99f315620..0d824fa2dd8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -368,7 +368,7 @@ void ReplicatedMergeTreePartCheckThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::ZSESSIONEXPIRED) + if (e.code == Coordination::Error::ZSESSIONEXPIRED) return; task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index a6dec4816bf..09e9cd494ca 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -319,8 +319,8 @@ void ReplicatedMergeTreeQueue::updateTimesInZooKeeper( Coordination::Responses responses; auto code = zookeeper->tryMulti(ops, responses); - if (code) - LOG_ERROR(log, "Couldn't set value of nodes for insert times ({}/min_unprocessed_insert_time, max_processed_insert_time): {}", replica_path, zkutil::ZooKeeper::error2string(code) + ". This shouldn't happen often."); + if (code != Coordination::Error::ZOK) + LOG_ERROR(log, "Couldn't set value of nodes for insert times ({}/min_unprocessed_insert_time, max_processed_insert_time): {}. This shouldn't happen often.", replica_path, Coordination::errorMessage(code)); } } @@ -364,8 +364,8 @@ void ReplicatedMergeTreeQueue::removeProcessedEntry(zkutil::ZooKeeperPtr zookeep notifySubscribers(queue_size); auto code = zookeeper->tryRemove(replica_path + "/queue/" + entry->znode_name); - if (code) - LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, zkutil::ZooKeeper::error2string(code)); + if (code != Coordination::Error::ZOK) + LOG_ERROR(log, "Couldn't remove {}/queue/{}: {}. This shouldn't happen often.", replica_path, entry->znode_name, Coordination::errorMessage(code)); updateTimesInZooKeeper(zookeeper, min_unprocessed_insert_time_changed, max_processed_insert_time_changed); } @@ -720,7 +720,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( std::lock_guard lock(update_mutations_mutex); auto rc = zookeeper->tryRemove(zookeeper_path + "/mutations/" + mutation_id); - if (rc == Coordination::ZOK) + if (rc == Coordination::Error::ZOK) LOG_DEBUG(log, "Removed mutation {} from ZooKeeper.", mutation_id); ReplicatedMergeTreeMutationEntryPtr entry; @@ -844,8 +844,8 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( if ((*it)->currently_executing) to_wait.push_back(*it); auto code = zookeeper->tryRemove(replica_path + "/queue/" + (*it)->znode_name); - if (code) - LOG_INFO(log, "Couldn't remove {}: {}", replica_path + "/queue/" + (*it)->znode_name, zkutil::ZooKeeper::error2string(code)); + if (code != Coordination::Error::ZOK) + LOG_INFO(log, "Couldn't remove {}: {}", replica_path + "/queue/" + (*it)->znode_name, Coordination::errorMessage(code)); updateStateOnQueueEntryRemoval( *it, /* is_successful = */ false, @@ -1625,7 +1625,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( for (auto & block : block_infos) { Coordination::GetResponse resp = block.contents_future.get(); - if (!resp.error && lock_holder_paths.count(resp.data)) + if (resp.error == Coordination::Error::ZOK && lock_holder_paths.count(resp.data)) committing_blocks[block.partition].insert(block.number); } } @@ -1633,7 +1633,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( queue_.pullLogsToQueue(zookeeper); Coordination::GetResponse quorum_status_response = quorum_status_future.get(); - if (!quorum_status_response.error) + if (quorum_status_response.error == Coordination::Error::ZOK) { ReplicatedMergeTreeQuorumEntry quorum_status; quorum_status.fromString(quorum_status_response.data); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 93d652f2be0..0ec9b8069ac 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -234,7 +234,7 @@ void ReplicatedMergeTreeRestartingThread::removeFailedQuorumParts() auto zookeeper = storage.getZooKeeper(); Strings failed_parts; - if (zookeeper->tryGetChildren(storage.zookeeper_path + "/quorum/failed_parts", failed_parts) != Coordination::ZOK) + if (zookeeper->tryGetChildren(storage.zookeeper_path + "/quorum/failed_parts", failed_parts) != Coordination::Error::ZOK) return; /// Firstly, remove parts from ZooKeeper @@ -294,12 +294,12 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() { auto code = zookeeper->tryRemove(is_active_path, stat.version); - if (code == Coordination::ZBADVERSION) + if (code == Coordination::Error::ZBADVERSION) throw Exception("Another instance of replica " + storage.replica_path + " was created just now." " You shouldn't run multiple instances of same replica. You need to check configuration files.", ErrorCodes::REPLICA_IS_ALREADY_ACTIVE); - if (code && code != Coordination::ZNONODE) + if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) throw Coordination::Exception(code, is_active_path); } @@ -314,7 +314,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNODEEXISTS) + if (e.code == Coordination::Error::ZNODEEXISTS) throw Exception("Replica " + storage.replica_path + " appears to be already active. If you're sure it's not, " "try again in a minute or remove znode " + storage.replica_path + "/is_active manually", ErrorCodes::REPLICA_IS_ALREADY_ACTIVE); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 809536f7452..e7d9e214995 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -441,8 +441,8 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() LOG_WARNING(log, "Removing leftovers from table {} (this might take several minutes)", zookeeper_path); Strings children; - int32_t code = zookeeper->tryGetChildren(zookeeper_path, children); - if (code == Coordination::ZNONODE) + Coordination::Error code = zookeeper->tryGetChildren(zookeeper_path, children); + if (code == Coordination::Error::ZNONODE) { LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); } @@ -458,16 +458,16 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1)); code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZNONODE) + if (code == Coordination::Error::ZNONODE) { LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); } - else if (code == Coordination::ZNOTEMPTY) + else if (code == Coordination::Error::ZNOTEMPTY) { throw Exception(fmt::format( "The old table was not completely removed from ZooKeeper, {} still exists and may contain some garbage. But it should never happen according to the logic of operations (it's a bug).", zookeeper_path), ErrorCodes::LOGICAL_ERROR); } - else if (code != Coordination::ZOK) + else if (code != Coordination::Error::ZOK) { /// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation. zkutil::KeeperMultiException::check(code, ops, responses); @@ -535,12 +535,12 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() Coordination::Responses responses; auto code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZNODEEXISTS) { LOG_WARNING(log, "It looks like the table {} was created by another server at the same moment, will retry", zookeeper_path); continue; } - else if (code != Coordination::ZOK) + else if (code != Coordination::Error::ZOK) { zkutil::KeeperMultiException::check(code, ops, responses); } @@ -557,7 +557,7 @@ void StorageReplicatedMergeTree::createReplica() LOG_DEBUG(log, "Creating replica {}", replica_path); - int32_t code; + Coordination::Error code; do { @@ -599,15 +599,15 @@ void StorageReplicatedMergeTree::createReplica() Coordination::Responses responses; code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZNODEEXISTS) { throw Exception("Replica " + replica_path + " already exists.", ErrorCodes::REPLICA_IS_ALREADY_EXIST); } - else if (code == Coordination::ZBADVERSION) + else if (code == Coordination::Error::ZBADVERSION) { LOG_ERROR(log, "Retrying createReplica(), because some other replicas were created at the same time"); } - else if (code == Coordination::ZNONODE) + else if (code == Coordination::Error::ZNONODE) { throw Exception("Table " + zookeeper_path + " was suddenly removed.", ErrorCodes::ALL_REPLICAS_LOST); } @@ -615,7 +615,7 @@ void StorageReplicatedMergeTree::createReplica() { zkutil::KeeperMultiException::check(code, ops, responses); } - } while (code == Coordination::ZBADVERSION); + } while (code == Coordination::Error::ZBADVERSION); } void StorageReplicatedMergeTree::drop() @@ -640,7 +640,7 @@ void StorageReplicatedMergeTree::drop() /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. Strings replicas; - if (Coordination::ZOK == zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) && replicas.empty()) + if (Coordination::Error::ZOK == zookeeper->tryGetChildren(zookeeper_path + "/replicas", replicas) && replicas.empty()) { LOG_INFO(log, "{} is the last replica, will remove table", replica_path); @@ -656,17 +656,17 @@ void StorageReplicatedMergeTree::drop() Coordination::Responses responses; ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path + "/replicas", -1)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/dropped", "", zkutil::CreateMode::Persistent)); - int32_t code = zookeeper->tryMulti(ops, responses); + Coordination::Error code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZNONODE || code == Coordination::ZNODEEXISTS) + if (code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { LOG_WARNING(log, "Table {} is already started to be removing by another replica right now", replica_path); } - else if (code == Coordination::ZNOTEMPTY) + else if (code == Coordination::Error::ZNOTEMPTY) { LOG_WARNING(log, "Another replica was suddenly created, will keep the table {}", replica_path); } - else if (code != Coordination::ZOK) + else if (code != Coordination::Error::ZOK) { zkutil::KeeperMultiException::check(code, ops, responses); } @@ -676,7 +676,7 @@ void StorageReplicatedMergeTree::drop() Strings children; code = zookeeper->tryGetChildren(zookeeper_path, children); - if (code == Coordination::ZNONODE) + if (code == Coordination::Error::ZNONODE) { LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); } @@ -692,16 +692,16 @@ void StorageReplicatedMergeTree::drop() ops.emplace_back(zkutil::makeRemoveRequest(zookeeper_path, -1)); code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZNONODE) + if (code == Coordination::Error::ZNONODE) { LOG_WARNING(log, "Table {} is already finished removing by another replica right now", replica_path); } - else if (code == Coordination::ZNOTEMPTY) + else if (code == Coordination::Error::ZNOTEMPTY) { LOG_ERROR(log, "Table was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", zookeeper_path); } - else if (code != Coordination::ZOK) + else if (code != Coordination::Error::ZOK) { /// It is still possible that ZooKeeper session is expired or server is killed in the middle of the delete operation. zkutil::KeeperMultiException::check(code, ops, responses); @@ -936,7 +936,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) time_t part_create_time = 0; Coordination::ExistsResponse exists_resp = exists_futures[i].get(); - if (!exists_resp.error) + if (exists_resp.error == Coordination::Error::ZOK) { part_create_time = exists_resp.stat.ctime / 1000; removePartFromZooKeeper(part_name, ops, exists_resp.stat.numChildren > 0); @@ -1107,7 +1107,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd size_t num_check_ops = 2 * absent_part_paths_on_replicas.size(); size_t failed_op_index = e.failed_op_index; - if (failed_op_index < num_check_ops && e.code == Coordination::ZNODEEXISTS) + if (failed_op_index < num_check_ops && e.code == Coordination::Error::ZNODEEXISTS) { LOG_INFO(log, "The part {} on a replica suddenly appeared, will recheck checksums", e.getPathForFirstFailedOp()); } @@ -1584,15 +1584,15 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry) Coordination::Responses responses; auto code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZOK) + if (code == Coordination::Error::ZOK) { LOG_DEBUG(log, "Marked quorum for part {} as failed.", entry.new_part_name); queue.removeFromVirtualParts(part_info); return true; } - else if (code == Coordination::ZBADVERSION || code == Coordination::ZNONODE || code == Coordination::ZNODEEXISTS) + else if (code == Coordination::Error::ZBADVERSION || code == Coordination::Error::ZNONODE || code == Coordination::Error::ZNODEEXISTS) { - LOG_DEBUG(log, "State was changed or isn't expected when trying to mark quorum for part {} as failed. Code: {}", entry.new_part_name, zkutil::ZooKeeper::error2string(code)); + LOG_DEBUG(log, "State was changed or isn't expected when trying to mark quorum for part {} as failed. Code: {}", entry.new_part_name, Coordination::errorMessage(code)); } else throw Coordination::Exception(code); @@ -2088,7 +2088,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo auto rc = zookeeper->tryMulti(ops, responses); - if (rc == Coordination::ZOK) + if (rc == Coordination::Error::ZOK) { break; } @@ -2256,7 +2256,7 @@ void StorageReplicatedMergeTree::queueUpdatingTask() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::ZSESSIONEXPIRED) + if (e.code == Coordination::Error::ZSESSIONEXPIRED) { restarting_thread.wakeup(); return; @@ -2282,7 +2282,7 @@ void StorageReplicatedMergeTree::mutationsUpdatingTask() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::ZSESSIONEXPIRED) + if (e.code == Coordination::Error::ZSESSIONEXPIRED) return; mutations_updating_task->scheduleAfter(QUEUE_UPDATE_ERROR_SLEEP_MS); @@ -2525,7 +2525,7 @@ bool StorageReplicatedMergeTree::createLogEntryToMergeParts( for (size_t i = 0; i < parts.size(); ++i) { /// If there is no information about part in ZK, we will not merge it. - if (exists_futures[i].get().error == Coordination::ZNONODE) + if (exists_futures[i].get().error == Coordination::Error::ZNONODE) { all_in_zk = false; @@ -2871,16 +2871,16 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) ops.emplace_back(zkutil::makeSetRequest(quorum_last_part_path, new_added_parts, added_parts_stat.version)); auto code = zookeeper->tryMulti(ops, responses); - if (code == Coordination::ZOK) + if (code == Coordination::Error::ZOK) { break; } - else if (code == Coordination::ZNONODE) + else if (code == Coordination::Error::ZNONODE) { /// The quorum has already been achieved. break; } - else if (code == Coordination::ZBADVERSION) + else if (code == Coordination::Error::ZBADVERSION) { /// Node was updated meanwhile. We must re-read it and repeat all the actions. continue; @@ -2893,16 +2893,16 @@ void StorageReplicatedMergeTree::updateQuorum(const String & part_name) /// We update the node, registering there one more replica. auto code = zookeeper->trySet(quorum_status_path, quorum_entry.toString(), stat.version); - if (code == Coordination::ZOK) + if (code == Coordination::Error::ZOK) { break; } - else if (code == Coordination::ZNONODE) + else if (code == Coordination::Error::ZNONODE) { /// The quorum has already been achieved. break; } - else if (code == Coordination::ZBADVERSION) + else if (code == Coordination::Error::ZBADVERSION) { /// Node was updated meanwhile. We must re-read it and repeat all the actions. continue; @@ -2946,16 +2946,16 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) auto code = zookeeper->trySet(quorum_last_part_path, new_added_parts, added_parts_stat.version); - if (code == Coordination::ZOK) + if (code == Coordination::Error::ZOK) { break; } - else if (code == Coordination::ZNONODE) + else if (code == Coordination::Error::ZNONODE) { /// Node is deleted. It is impossible, but it is Ok. break; } - else if (code == Coordination::ZBADVERSION) + else if (code == Coordination::Error::ZBADVERSION) { /// Node was updated meanwhile. We must re-read it and repeat all the actions. continue; @@ -3643,9 +3643,9 @@ void StorageReplicatedMergeTree::alter( } Coordination::Responses results; - int32_t rc = zookeeper->tryMulti(ops, results); + Coordination::Error rc = zookeeper->tryMulti(ops, results); - if (rc == Coordination::ZOK) + if (rc == Coordination::Error::ZOK) { if (alter_entry->have_mutation) { @@ -3665,9 +3665,9 @@ void StorageReplicatedMergeTree::alter( } break; } - else if (rc == Coordination::ZBADVERSION) + else if (rc == Coordination::Error::ZBADVERSION) { - if (results[0]->error) + if (results[0]->error != Coordination::Error::ZOK) throw Exception("Metadata on replica is not up to date with common metadata in Zookeeper. Cannot alter", ErrorCodes::CANNOT_ASSIGN_ALTER); continue; @@ -3987,8 +3987,8 @@ StorageReplicatedMergeTree::allocateBlockNumber( ops.push_back(zkutil::makeSetRequest(block_numbers_path, "", -1)); Coordination::Responses responses; - int code = zookeeper->tryMulti(ops, responses); - if (code && code != Coordination::ZNODEEXISTS) + Coordination::Error code = zookeeper->tryMulti(ops, responses); + if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNODEEXISTS) zkutil::KeeperMultiException::check(code, ops, responses); } @@ -4001,7 +4001,7 @@ StorageReplicatedMergeTree::allocateBlockNumber( } catch (const zkutil::KeeperMultiException & e) { - if (e.code == Coordination::ZNODEEXISTS && e.getPathForFirstFailedOp() == zookeeper_block_id_path) + if (e.code == Coordination::Error::ZNODEEXISTS && e.getPathForFirstFailedOp() == zookeeper_block_id_path) return {}; throw Exception("Cannot allocate block number in ZooKeeper: " + e.displayText(), ErrorCodes::KEEPER_EXCEPTION); @@ -4690,9 +4690,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const mutations_path + "/", entry.toString(), zkutil::CreateMode::PersistentSequential)); Coordination::Responses responses; - int32_t rc = zookeeper->tryMulti(requests, responses); + Coordination::Error rc = zookeeper->tryMulti(requests, responses); - if (rc == Coordination::ZOK) + if (rc == Coordination::Error::ZOK) { const String & path_created = dynamic_cast(responses[1].get())->path_created; @@ -4700,7 +4700,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, const LOG_TRACE(log, "Created mutation with ID {}", entry.znode_name); break; } - else if (rc == Coordination::ZBADVERSION) + else if (rc == Coordination::Error::ZBADVERSION) { LOG_TRACE(log, "Version conflict when trying to create a mutation node, retrying..."); continue; @@ -4892,7 +4892,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St for (size_t i = 0; i < part_names.size(); ++i) { Coordination::ExistsResponse exists_resp = exists_futures[i].get(); - if (!exists_resp.error) + if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0); @@ -4904,7 +4904,7 @@ bool StorageReplicatedMergeTree::tryRemovePartsFromZooKeeperWithRetries(const St { auto response = future.get(); - if (response.error == 0 || response.error == Coordination::ZNONODE) + if (response.error == Coordination::Error::ZOK || response.error == Coordination::Error::ZNONODE) continue; if (Coordination::isHardwareError(response.error)) @@ -4953,7 +4953,7 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper( for (size_t i = 0; i < part_names.size(); ++i) { Coordination::ExistsResponse exists_resp = exists_futures[i].get(); - if (!exists_resp.error) + if (exists_resp.error == Coordination::Error::ZOK) { Coordination::Requests ops; removePartFromZooKeeper(part_names[i], ops, exists_resp.stat.numChildren > 0); @@ -4982,9 +4982,9 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper( continue; auto response = future.get(); - if (response.error == Coordination::ZOK) + if (response.error == Coordination::Error::ZOK) continue; - else if (response.error == Coordination::ZNONODE) + else if (response.error == Coordination::Error::ZNONODE) { LOG_DEBUG(log, "There is no part {} in ZooKeeper, it was only in filesystem", part_names[i]); continue; @@ -4996,7 +4996,7 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper( continue; } else - LOG_WARNING(log, "Cannot remove part {} from ZooKeeper: {}", part_names[i], zkutil::ZooKeeper::error2string(response.error)); + LOG_WARNING(log, "Cannot remove part {} from ZooKeeper: {}", part_names[i], Coordination::errorMessage(response.error)); } } @@ -5005,7 +5005,7 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( zkutil::ZooKeeper & zookeeper, const String & partition_id, Int64 min_block_num, Int64 max_block_num) { Strings blocks; - if (zookeeper.tryGetChildren(zookeeper_path + "/blocks", blocks)) + if (Coordination::Error::ZOK != zookeeper.tryGetChildren(zookeeper_path + "/blocks", blocks)) throw Exception(zookeeper_path + "/blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); String partition_prefix = partition_id + "_"; @@ -5025,7 +5025,7 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( const String & path = pair.first; auto result = pair.second.get(); - if (result.error == Coordination::ZNONODE) + if (result.error == Coordination::Error::ZNONODE) continue; ReadBufferFromString buf(result.data); @@ -5038,14 +5038,14 @@ void StorageReplicatedMergeTree::clearBlocksInPartition( for (auto & pair : to_delete_futures) { const String & path = pair.first; - int32_t rc = pair.second.get().error; - if (rc == Coordination::ZNOTEMPTY) + Coordination::Error rc = pair.second.get().error; + if (rc == Coordination::Error::ZNOTEMPTY) { /// Can happen if there are leftover block nodes with children created by previous server versions. zookeeper.removeRecursive(path); } - else if (rc) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, zkutil::ZooKeeper::error2string(rc)); + else if (rc != Coordination::Error::ZOK) + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); } LOG_TRACE(log, "Deleted {} deduplication block IDs in partition ID {}", to_delete_futures.size(), partition_id); diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index c3f1d8a8505..d5806667d1f 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -131,7 +131,7 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, const Contex for (size_t i = 0, size = nodes.size(); i < size; ++i) { auto res = futures[i].get(); - if (res.error == Coordination::ZNONODE) + if (res.error == Coordination::Error::ZNONODE) continue; /// Node was deleted meanwhile. const Coordination::Stat & stat = res.stat; diff --git a/src/Storages/tests/get_current_inserts_in_replicated.cpp b/src/Storages/tests/get_current_inserts_in_replicated.cpp index aba69684045..f1a87b5ca05 100644 --- a/src/Storages/tests/get_current_inserts_in_replicated.cpp +++ b/src/Storages/tests/get_current_inserts_in_replicated.cpp @@ -86,7 +86,7 @@ try for (BlockInfo & block : block_infos) { Coordination::GetResponse resp = block.contents_future.get(); - if (!resp.error && lock_holder_paths.count(resp.data)) + if (resp.error == Coordination::Error::ZOK && lock_holder_paths.count(resp.data)) { ++total_count; current_inserts[block.partition].insert(block.number); diff --git a/src/Storages/tests/transform_part_zk_nodes.cpp b/src/Storages/tests/transform_part_zk_nodes.cpp index 2b4f6b7ff5c..35e44bb9446 100644 --- a/src/Storages/tests/transform_part_zk_nodes.cpp +++ b/src/Storages/tests/transform_part_zk_nodes.cpp @@ -76,7 +76,7 @@ try } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNONODE) + if (e.code == Coordination::Error::ZNONODE) continue; throw; } diff --git a/utils/zookeeper-dump-tree/main.cpp b/utils/zookeeper-dump-tree/main.cpp index d848ffdad3c..877a4a996b7 100644 --- a/utils/zookeeper-dump-tree/main.cpp +++ b/utils/zookeeper-dump-tree/main.cpp @@ -45,7 +45,7 @@ try } catch (const Coordination::Exception & e) { - if (e.code == Coordination::ZNONODE) + if (e.code == Coordination::Error::ZNONODE) continue; throw; } From 5101708831d6550ac061b9d8b070c3439aad4968 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 18:11:33 +0300 Subject: [PATCH 0595/2229] fixup --- docker/test/performance-comparison/compare.sh | 7 +++++-- .../config/users.d/perf-comparison-tweaks-users.xml | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 241fdaec70d..5435d37e2e0 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -171,8 +171,11 @@ function run_benchmark # TODO disable this when there is an explicit list of tests to run "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv - clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json < benchmark/website-queries.tsv - clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json < benchmark/website-queries.tsv + # TODO things to fix in clickhouse-benchmark: + # - --max_memory_usage setting does nothing + # - no way to continue on error + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv } function get_profiles_watchdog diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 6e3e3df5d39..1bde2a1388b 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -6,6 +6,8 @@ 1 1 1 + + 30000000000
    From e5897bbfb6073b087488ce58e940683ad9f55133 Mon Sep 17 00:00:00 2001 From: Dmitry Date: Fri, 12 Jun 2020 18:59:11 +0300 Subject: [PATCH 0596/2229] size_t -> ssize_t --- src/Processors/Transforms/AggregatingInOrderTransform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 3cac1c9602c..7edeff65ec8 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -100,9 +100,9 @@ void AggregatingInOrderTransform::consume(Chunk chunk) params->aggregator.createStatesAndFillKeyColumnsWithSingleKey(variants, key_columns, key_begin, res_key_columns); ++cur_block_size; } - size_t mid = 0; - size_t high = 0; - size_t low = -1; + ssize_t mid = 0; + ssize_t high = 0; + ssize_t low = -1; /// Will split block into segments with the same key while (key_end != rows) { From 8d9b770da4a74a16c405f90df669bf1d34135fc5 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 13 Jun 2020 00:15:02 +0800 Subject: [PATCH 0597/2229] ISSUES-7572 fix defaults config level & add replicas_status and prometheus handler --- src/Server/HTTPHandlerFactory.cpp | 93 +++++++++++-------- src/Server/HTTPHandlerFactory.h | 14 +-- src/Server/PrometheusRequestHandler.cpp | 15 +-- src/Server/ReplicasStatusHandler.cpp | 6 +- src/Server/StaticRequestHandler.cpp | 16 ---- .../test_http_handlers_config/test.py | 26 ++++-- .../test_custom_defaults_handlers/config.xml | 10 -- .../test_prometheus_handler/config.xml | 17 ++++ .../test_replicas_status_handler/config.xml | 12 +++ 9 files changed, 112 insertions(+), 97 deletions(-) delete mode 100644 tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml create mode 100644 tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml create mode 100644 tests/integration/test_http_handlers_config/test_replicas_status_handler/config.xml diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index ec75656a9a8..6459b0aab3b 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -74,55 +74,51 @@ static inline auto createHandlersFactoryFromConfig( for (const auto & key : keys) { - if (!startsWith(key, "rule")) - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + if (key == "defaults") + addDefaultHandlersFactory(*main_handler_factory, server, &async_metrics); + else if (startsWith(key, "rule")) + { + const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); - const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); + if (handler_type.empty()) + throw Exception("Handler type in config is not specified here: " + prefix + "." + key + ".handler.type", + ErrorCodes::INVALID_CONFIG_PARAMETER); - if (handler_type == "root") - addRootHandlerFactory(*main_handler_factory, server); - else if (handler_type == "ping") - addPingHandlerFactory(*main_handler_factory, server); - else if (handler_type == "defaults") - addDefaultHandlersFactory(*main_handler_factory, server, async_metrics); - else if (handler_type == "prometheus") - addPrometheusHandlerFactory(*main_handler_factory, server, async_metrics); - else if (handler_type == "replicas_status") - addReplicasStatusHandlerFactory(*main_handler_factory, server); - else if (handler_type == "static") - main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key)); - else if (handler_type == "dynamic_query_handler") - main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key)); - else if (handler_type == "predefined_query_handler") - main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key)); - else if (handler_type.empty()) - throw Exception("Handler type in config is not specified here: " + - prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER); + if (handler_type == "static") + main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key)); + else if (handler_type == "dynamic_query_handler") + main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key)); + else if (handler_type == "predefined_query_handler") + main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key)); + else if (handler_type == "prometheus") + main_handler_factory->addHandler(createPrometheusHandlerFactory(server, async_metrics, prefix + "." + key)); + else if (handler_type == "replicas_status") + main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, prefix + "." + key)); + else + throw Exception("Unknown handler type '" + handler_type + "' in config here: " + prefix + "." + key + ".handler.type", + ErrorCodes::INVALID_CONFIG_PARAMETER); + } else - throw Exception("Unknown handler type '" + handler_type +"' in config here: " + - prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule' or 'defaults'", + ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); } return main_handler_factory.release(); } -static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory( - IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) +static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics) { if (server.config().has("http_handlers")) return createHandlersFactoryFromConfig(server, name, "http_handlers", async_metrics); else { auto factory = std::make_unique(name); - - addRootHandlerFactory(*factory, server); - addPingHandlerFactory(*factory, server); - addReplicasStatusHandlerFactory(*factory, server); - addPrometheusHandlerFactory(*factory, server, async_metrics); + addDefaultHandlersFactory(*factory, server, &async_metrics); auto query_handler = std::make_unique>(server, "query"); query_handler->allowPostAndGetParamsRequest(); factory->addHandler(query_handler.release()); + return factory.release(); } } @@ -130,10 +126,7 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory( static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name) { auto factory = std::make_unique(name); - - addRootHandlerFactory(*factory, server); - addPingHandlerFactory(*factory, server); - addReplicasStatusHandlerFactory(*factory, server); + addDefaultHandlersFactory(*factory, server, nullptr); auto main_handler = std::make_unique>(server); main_handler->allowPostAndGetParamsRequest(); @@ -161,12 +154,32 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); } -void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) +static const auto ping_response_expression = "Ok.\n"; +static const auto root_response_expression = "config://http_server_default_response"; + +void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics * async_metrics) { - addRootHandlerFactory(factory, server); - addPingHandlerFactory(factory, server); - addReplicasStatusHandlerFactory(factory, server); - addPrometheusHandlerFactory(factory, server, async_metrics); + auto root_handler = std::make_unique>(server, root_response_expression); + root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); + factory.addHandler(root_handler.release()); + + auto ping_handler = std::make_unique>(server, ping_response_expression); + ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); + factory.addHandler(ping_handler.release()); + + auto replicas_status_handler = std::make_unique>(server); + replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); + factory.addHandler(replicas_status_handler.release()); + + /// We check that prometheus handler will be served on current (default) port. + /// Otherwise it will be created separately, see below. + if (async_metrics && server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) + { + auto prometheus_handler = std::make_unique>( + server, PrometheusMetricsWriter(server.config(), "prometheus", *async_metrics)); + prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); + factory.addHandler(prometheus_handler.release()); + } } } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index ac3a7451338..8e21a13ba18 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -103,15 +103,7 @@ private: std::function creator; }; -void addRootHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); - -void addPingHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); - -void addReplicasStatusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); - -void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics); - -void addPrometheusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics); +void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics * async_metrics); Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix); @@ -119,6 +111,10 @@ Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & ser Poco::Net::HTTPRequestHandlerFactory * createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix); +Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix); + +Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix); + Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name); } diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 0f5df54b002..60deec9b289 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include namespace DB @@ -41,17 +41,10 @@ void PrometheusRequestHandler::handleRequest( } } -void addPrometheusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) +Poco::Net::HTTPRequestHandlerFactory * createPrometheusHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & config_prefix) { - /// We check that prometheus handler will be served on current (default) port. - /// Otherwise it will be created separately, see below. - if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) - { - auto prometheus_handler = std::make_unique>( - server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); - prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); - factory.addHandler(prometheus_handler.release()); - } + return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory( + server, PrometheusMetricsWriter(server.config(), config_prefix + ".handler", async_metrics)), server.config(), config_prefix); } } diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index d6bbfdbd090..3606da23ab5 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -106,11 +106,9 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request } } -void addReplicasStatusHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) +Poco::Net::HTTPRequestHandlerFactory * createReplicasStatusHandlerFactory(IServer & server, const std::string & config_prefix) { - auto replicas_status_handler = std::make_unique>(server); - replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); - factory.addHandler(replicas_status_handler.release()); + return addFiltersFromConfig(new HandlingRuleHTTPHandlerFactory(server), server.config(), config_prefix); } } diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index 255e3cab5af..22f32e6a0e7 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -155,22 +155,6 @@ StaticRequestHandler::StaticRequestHandler(IServer & server_, const String & exp { } -void addRootHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) -{ - static const auto root_response_expression = "config://http_server_default_response"; - - auto root_handler = std::make_unique>(server, root_response_expression); - root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); - factory.addHandler(root_handler.release()); -} - -void addPingHandlerFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) -{ - auto ping_handler = std::make_unique>(server, "Ok.\n"); - ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest(); - factory.addHandler(ping_handler.release()); -} - Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix) { int status = server.config().getInt(config_prefix + ".handler.status", 200); diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index a38bd3ff343..c18c22acbb2 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -124,12 +124,24 @@ def test_defaults_http_handlers(): assert 200 == cluster.instance.http_request('replicas_status', method='GET').status_code assert 'Ok\n' == cluster.instance.http_request('replicas_status', method='GET').content -def test_custom_defaults_http_handlers(): - with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "custom_defaults_handlers", "test_custom_defaults_handlers")) as cluster: - assert 200 == cluster.instance.http_request('', method='GET').status_code - assert 'Default server response' == cluster.instance.http_request('', method='GET').content +def test_prometheus_handler(): + with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "prometheus_handler", "test_prometheus_handler")) as cluster: + assert 404 == cluster.instance.http_request('', method='GET', headers={'XXX': 'xxx'}).status_code - assert 200 == cluster.instance.http_request('ping', method='GET').status_code - assert 'Ok\n' == cluster.instance.http_request('ping', method='GET').content + assert 404 == cluster.instance.http_request('test_prometheus', method='GET', headers={'XXX': 'bad'}).status_code - assert 404 == cluster.instance.http_request('replicas_status', method='GET').status_code + assert 404 == cluster.instance.http_request('test_prometheus', method='POST', headers={'XXX': 'xxx'}).status_code + + assert 200 == cluster.instance.http_request('test_prometheus', method='GET', headers={'XXX': 'xxx'}).status_code + assert 'ClickHouseProfileEvents_Query' in cluster.instance.http_request('test_prometheus', method='GET', headers={'XXX': 'xxx'}).content + +def test_replicas_status_handler(): + with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "replicas_status_handler", "test_replicas_status_handler")) as cluster: + assert 404 == cluster.instance.http_request('', method='GET', headers={'XXX': 'xxx'}).status_code + + assert 404 == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'bad'}).status_code + + assert 404 == cluster.instance.http_request('test_replicas_status', method='POST', headers={'XXX': 'xxx'}).status_code + + assert 200 == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'xxx'}).status_code + assert 'Ok\n' == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'xxx'}).content diff --git a/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml b/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml deleted file mode 100644 index 54008c2c4b8..00000000000 --- a/tests/integration/test_http_handlers_config/test_custom_defaults_handlers/config.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - - Default server response - - - - - - diff --git a/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml b/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml new file mode 100644 index 00000000000..7c80649cee2 --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml @@ -0,0 +1,17 @@ + + + + + + GET + xxx + /test_prometheus + + replicas_status + true + true + true + + + + diff --git a/tests/integration/test_http_handlers_config/test_replicas_status_handler/config.xml b/tests/integration/test_http_handlers_config/test_replicas_status_handler/config.xml new file mode 100644 index 00000000000..21f7d3b0fc8 --- /dev/null +++ b/tests/integration/test_http_handlers_config/test_replicas_status_handler/config.xml @@ -0,0 +1,12 @@ + + + + + + GET + xxx + /test_replicas_status + replicas_status + + + From 276fcd8903ff8ec5bbd394d7221e65fbfa005bfe Mon Sep 17 00:00:00 2001 From: Denis Glazachev Date: Fri, 12 Jun 2020 21:59:47 +0400 Subject: [PATCH 0598/2229] Add/rename parameters that control TLS --- programs/server/config.xml | 25 +++++++--- src/Access/ExternalAuthenticators.cpp | 70 +++++++++++++++++++-------- src/Access/LDAPClient.cpp | 58 ++++++++++++++++++---- src/Access/LDAPParams.h | 21 ++++++-- 4 files changed, 134 insertions(+), 40 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 372f418c812..f43ecad9a78 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -209,20 +209,29 @@ Specify 'no' for plain text (ldap://) protocol (not recommended). Specify 'yes' for LDAP over SSL/TLS (ldaps://) protocol (recommended, the default). Specify 'starttls' for legacy StartTLS protocol (plain text (ldap://) protocol, upgraded to TLS). - tls_cert_verify - TLS peer certificate verification behavior. - Accepted values are: never, allow, try, demand. - ca_cert_dir - path to CA certificates directory. - ca_cert_file - path to CA certificate file. - Example: + tls_minimum_protocol_version - the minimum protocol version of SSL/TLS. + Accepted values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2' (the default). + tls_require_cert - SSL/TLS peer certificate verification behavior. + Accepted values are: 'never', 'allow', 'try', 'demand' (the default). + tls_cert_file - path to certificate file. + tls_key_file - path to certificate key file. + tls_ca_cert_file - path to CA certificate file. + tls_ca_cert_dir - path to the directory containing CA certificates. + tls_cipher_suite - allowed cipher suite. + Example: localhost 636 cn= , ou=users, dc=example, dc=com yes - demand - /path/to/ca_cert_dir - /path/to/ca_cert_file + tls1.2 + demand + /path/to/tls_cert_file + /path/to/tls_key_file + /path/to/tls_ca_cert_file + /path/to/tls_ca_cert_dir + SECURE256:+SECURE128:-VERS-TLS-ALL:+VERS-TLS1.2:-RSA:-DHE-DSS:-CAMELLIA-128-CBC:-CAMELLIA-256-CBC --> diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 8d0487bfd31..fcb7317a52d 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -30,9 +30,13 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str const bool has_auth_dn_prefix = config.has(ldap_server_config + ".auth_dn_prefix"); const bool has_auth_dn_suffix = config.has(ldap_server_config + ".auth_dn_suffix"); const bool has_enable_tls = config.has(ldap_server_config + ".enable_tls"); - const bool has_tls_cert_verify = config.has(ldap_server_config + ".tls_cert_verify"); - const bool has_ca_cert_dir = config.has(ldap_server_config + ".ca_cert_dir"); - const bool has_ca_cert_file = config.has(ldap_server_config + ".ca_cert_file"); + const bool has_tls_minimum_protocol_version = config.has(ldap_server_config + ".tls_minimum_protocol_version"); + const bool has_tls_require_cert = config.has(ldap_server_config + ".tls_require_cert"); + const bool has_tls_cert_file = config.has(ldap_server_config + ".tls_cert_file"); + const bool has_tls_key_file = config.has(ldap_server_config + ".tls_key_file"); + const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file"); + const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir"); + const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite"); if (!has_host) throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); @@ -61,28 +65,56 @@ auto parseLDAPServer(const Poco::Util::AbstractConfiguration & config, const Str params.enable_tls = LDAPServerParams::TLSEnable::NO; } - if (has_tls_cert_verify) + if (has_tls_minimum_protocol_version) { - String tls_cert_verify_lc_str = config.getString(ldap_server_config + ".tls_cert_verify"); - boost::to_lower(tls_cert_verify_lc_str); + String tls_minimum_protocol_version_lc_str = config.getString(ldap_server_config + ".tls_minimum_protocol_version"); + boost::to_lower(tls_minimum_protocol_version_lc_str); - if (tls_cert_verify_lc_str == "never") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::NEVER; - else if (tls_cert_verify_lc_str == "allow") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::ALLOW; - else if (tls_cert_verify_lc_str == "try") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::TRY; - else if (tls_cert_verify_lc_str == "demand") - params.tls_cert_verify = LDAPServerParams::TLSCertVerify::DEMAND; + if (tls_minimum_protocol_version_lc_str == "ssl2") + params.tls_minimum_protocol_version = LDAPServerParams::TLSProtocolVersion::SSL2; + else if (tls_minimum_protocol_version_lc_str == "ssl3") + params.tls_minimum_protocol_version = LDAPServerParams::TLSProtocolVersion::SSL3; + else if (tls_minimum_protocol_version_lc_str == "tls1.0") + params.tls_minimum_protocol_version = LDAPServerParams::TLSProtocolVersion::TLS1_0; + else if (tls_minimum_protocol_version_lc_str == "tls1.1") + params.tls_minimum_protocol_version = LDAPServerParams::TLSProtocolVersion::TLS1_1; + else if (tls_minimum_protocol_version_lc_str == "tls1.2") + params.tls_minimum_protocol_version = LDAPServerParams::TLSProtocolVersion::TLS1_2; else - throw Exception("Bad value for 'tls_cert_verify' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Bad value for 'tls_minimum_protocol_version' entry, allowed values are: 'ssl2', 'ssl3', 'tls1.0', 'tls1.1', 'tls1.2'", ErrorCodes::BAD_ARGUMENTS); } - if (has_ca_cert_dir) - params.ca_cert_dir = config.getString(ldap_server_config + ".ca_cert_dir"); + if (has_tls_require_cert) + { + String tls_require_cert_lc_str = config.getString(ldap_server_config + ".tls_require_cert"); + boost::to_lower(tls_require_cert_lc_str); - if (has_ca_cert_file) - params.ca_cert_file = config.getString(ldap_server_config + ".ca_cert_file"); + if (tls_require_cert_lc_str == "never") + params.tls_require_cert = LDAPServerParams::TLSRequireCert::NEVER; + else if (tls_require_cert_lc_str == "allow") + params.tls_require_cert = LDAPServerParams::TLSRequireCert::ALLOW; + else if (tls_require_cert_lc_str == "try") + params.tls_require_cert = LDAPServerParams::TLSRequireCert::TRY; + else if (tls_require_cert_lc_str == "demand") + params.tls_require_cert = LDAPServerParams::TLSRequireCert::DEMAND; + else + throw Exception("Bad value for 'tls_require_cert' entry, allowed values are: 'never', 'allow', 'try', 'demand'", ErrorCodes::BAD_ARGUMENTS); + } + + if (has_tls_cert_file) + params.tls_cert_file = config.getString(ldap_server_config + ".tls_cert_file"); + + if (has_tls_key_file) + params.tls_key_file = config.getString(ldap_server_config + ".tls_key_file"); + + if (has_tls_ca_cert_file) + params.tls_ca_cert_file = config.getString(ldap_server_config + ".tls_ca_cert_file"); + + if (has_tls_ca_cert_dir) + params.tls_ca_cert_dir = config.getString(ldap_server_config + ".tls_ca_cert_dir"); + + if (has_tls_cipher_suite) + params.tls_cipher_suite = config.getString(ldap_server_config + ".tls_cipher_suite"); if (has_port) { diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index b538c06c9ea..02c24bc9a9e 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -140,19 +140,23 @@ int LDAPClient::openConnection(const bool graceful_bind_failure) diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); #endif +#ifdef LDAP_OPT_TIMEOUT { ::timeval operation_timeout; operation_timeout.tv_sec = params.operation_timeout.count(); operation_timeout.tv_usec = 0; diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); } +#endif +#ifdef LDAP_OPT_NETWORK_TIMEOUT { ::timeval network_timeout; network_timeout.tv_sec = params.network_timeout.count(); network_timeout.tv_usec = 0; diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); } +#endif { const int search_timeout = params.search_timeout.count(); @@ -164,23 +168,59 @@ int LDAPClient::openConnection(const bool graceful_bind_failure) diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); } +#ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN { int value = 0; - switch (params.tls_cert_verify) + switch (params.tls_minimum_protocol_version) { - case LDAPServerParams::TLSCertVerify::NEVER: value = LDAP_OPT_X_TLS_NEVER; break; - case LDAPServerParams::TLSCertVerify::ALLOW: value = LDAP_OPT_X_TLS_ALLOW; break; - case LDAPServerParams::TLSCertVerify::TRY: value = LDAP_OPT_X_TLS_TRY; break; - case LDAPServerParams::TLSCertVerify::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break; + case LDAPServerParams::TLSProtocolVersion::SSL2: value = LDAP_OPT_X_TLS_PROTOCOL_SSL2; break; + case LDAPServerParams::TLSProtocolVersion::SSL3: value = LDAP_OPT_X_TLS_PROTOCOL_SSL3; break; + case LDAPServerParams::TLSProtocolVersion::TLS1_0: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_0; break; + case LDAPServerParams::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break; + case LDAPServerParams::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break; + } + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value)); + } +#endif + +#ifdef LDAP_OPT_X_TLS_REQUIRE_CERT + { + int value = 0; + switch (params.tls_require_cert) + { + case LDAPServerParams::TLSRequireCert::NEVER: value = LDAP_OPT_X_TLS_NEVER; break; + case LDAPServerParams::TLSRequireCert::ALLOW: value = LDAP_OPT_X_TLS_ALLOW; break; + case LDAPServerParams::TLSRequireCert::TRY: value = LDAP_OPT_X_TLS_TRY; break; + case LDAPServerParams::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break; } diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); } +#endif - if (!params.ca_cert_dir.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.ca_cert_dir.c_str())); +#ifdef LDAP_OPT_X_TLS_CERTFILE + if (!params.tls_cert_file.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str())); +#endif - if (!params.ca_cert_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.ca_cert_file.c_str())); +#ifdef LDAP_OPT_X_TLS_KEYFILE + if (!params.tls_key_file.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str())); +#endif + +#ifdef LDAP_OPT_X_TLS_CACERTFILE + if (!params.tls_ca_cert_file.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str())); +#endif + +#ifdef LDAP_OPT_X_TLS_CACERTDIR + if (!params.tls_ca_cert_dir.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str())); +#endif + +#ifdef LDAP_OPT_X_TLS_CIPHER_SUITE + if (!params.tls_cipher_suite.empty()) + diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str())); +#endif #ifdef LDAP_OPT_X_TLS_NEWCTX { diff --git a/src/Access/LDAPParams.h b/src/Access/LDAPParams.h index ed28526d29d..0d7c7dd17cd 100644 --- a/src/Access/LDAPParams.h +++ b/src/Access/LDAPParams.h @@ -23,7 +23,16 @@ struct LDAPServerParams YES }; - enum class TLSCertVerify + enum class TLSProtocolVersion + { + SSL2, + SSL3, + TLS1_0, + TLS1_1, + TLS1_2 + }; + + enum class TLSRequireCert { NEVER, ALLOW, @@ -42,9 +51,13 @@ struct LDAPServerParams std::uint16_t port = 636; TLSEnable enable_tls = TLSEnable::YES; - TLSCertVerify tls_cert_verify = TLSCertVerify::DEMAND; - String ca_cert_dir; - String ca_cert_file; + TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2; + TLSRequireCert tls_require_cert = TLSRequireCert::DEMAND; + String tls_cert_file; + String tls_key_file; + String tls_ca_cert_file; + String tls_ca_cert_dir; + String tls_cipher_suite; SASLMechanism sasl_mechanism = SASLMechanism::SIMPLE; From 3b0a3e00c0c1a902622aabf277b2ab0bb2a07571 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 20:05:35 +0000 Subject: [PATCH 0599/2229] Some fixes --- src/Storages/RabbitMQ/RabbitMQHandler.cpp | 8 +-- src/Storages/RabbitMQ/RabbitMQHandler.h | 5 +- src/Storages/RabbitMQ/RabbitMQSettings.h | 2 +- .../ReadBufferFromRabbitMQConsumer.cpp | 53 +++++++++++-------- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 9 +++- .../WriteBufferToRabbitMQProducer.cpp | 4 +- .../integration/test_storage_rabbitmq/test.py | 3 +- 7 files changed, 51 insertions(+), 33 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 0a432e1b5ca..8667427ee63 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -37,11 +37,12 @@ void RabbitMQHandler::onError(AMQP::TcpConnection * connection, const char * mes void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) { /* The object of this class is shared between concurrent consumers (who share the same connection == share the same - * event loop). But the loop should not be attempted to start if it is already running. + * event loop and handler). But the loop should not be attempted to start if it is already running. */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { loop_started = true; + stop_scheduled.store(false); event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } @@ -56,7 +57,7 @@ void RabbitMQHandler::startProducerLoop() void RabbitMQHandler::stop() { - if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + if (mutex_before_loop_stop.try_lock()) { event_base_loopbreak(evbase); mutex_before_loop_stop.unlock(); @@ -66,8 +67,9 @@ void RabbitMQHandler::stop() void RabbitMQHandler::stopWithTimeout() { - if (mutex_before_loop_stop.try_lock_for(std::chrono::milliseconds(0))) + if (mutex_before_loop_stop.try_lock()) { + stop_scheduled.store(true); event_base_loopexit(evbase, &tv); mutex_before_loop_stop.unlock(); } diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 911651097bb..9b2d273422d 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -23,15 +23,16 @@ public: void startProducerLoop(); void stopWithTimeout(); void stop(); + std::atomic & checkStopIsScheduled() { return stop_scheduled; }; private: event_base * evbase; Poco::Logger * log; timeval tv; - size_t count_passed = 0; + std::atomic stop_scheduled = false; std::timed_mutex mutex_before_event_loop; - std::timed_mutex mutex_before_loop_stop; + std::mutex mutex_before_loop_stop; }; } diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index d81a887747b..c9f09489f77 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -15,9 +15,9 @@ namespace DB M(SettingString, rabbitmq_exchange_name, "clickhouse-exchange", "The exchange name, to which messages are sent.", 0) \ M(SettingString, rabbitmq_format, "", "The message format.", 0) \ M(SettingChar, rabbitmq_row_delimiter, '\0', "The character to be considered as a delimiter.", 0) \ + M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ - M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 5d2e3073d41..6b8763138a4 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -13,6 +13,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} namespace Exchange { @@ -22,6 +26,7 @@ namespace Exchange static const String DIRECT = "direct"; static const String TOPIC = "topic"; static const String HASH = "consistent_hash"; + static const String HEADERS = "headers"; } @@ -55,7 +60,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != Exchange::DEFAULT ? true : false; + exchange_type_set = exchange_type != Exchange::DEFAULT; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -81,7 +86,7 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different - * binding algorithm this default behaviuor is much faster). It is also used in INSERT query. + * binding algorithm this default behaviuor is much faster). It is also used in INSERT query (so it is always declared). */ String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) @@ -114,10 +119,12 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else return; + else if (exchange_type == Exchange::HEADERS) + throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-echange - the only oprion to avoid getting the same + * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting the same * messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) @@ -156,7 +163,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { - /// These variables might be updated later from a separate thread in onError callbacks + /// These variables might be updated later from a separate thread in onError callbacks. if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) { initExchange(); @@ -206,7 +213,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); - /// Must be done here and not in readPrefix() because library might fail to handle async subscription on the same connection + /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed. Also note + * that if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise consumer might fail + * to subscribe and no resubscription will help. + */ subscribe(queues.back()); LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); @@ -229,7 +239,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } else { - /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange + /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. for (auto & routing_key : routing_keys) { /// Binding directly to exchange, specified by the client @@ -274,6 +284,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onSuccess([&](const std::string & /* consumer */) { subscribed_queue[queue_name] = true; + consumer_error = false; ++count_subscribed; LOG_TRACE(log, "Consumer {} is subscribed to queue {}", channel_id, queue_name); @@ -290,24 +301,17 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) message_received += row_delimiter; } - bool stop_loop = false; - /// Needed to avoid data race because this vector can be used at the same time by another thread in nextImpl(). { std::lock_guard lock(mutex); received.push_back(message_received); - - /* As event loop is blocking to the thread that started it and a single thread should not be blocked while - * executing all callbacks on the connection (not only its own), then there should be some point to unblock. - * loop_started == 1 if current consumer is started the loop and not another. - */ - if (!loop_started) - { - stop_loop = true; - } } - if (stop_loop) + /* As event loop is blocking to the thread that started it and a single thread should not be blocked while + * executing all callbacks on the connection (not only its own), then there should be some point to unblock. + * loop_started == 1 if current consumer is started the loop and not another. + */ + if (!loop_started.load() && !eventHandler.checkStopIsScheduled().load()) { stopEventLoopWithTimeout(); } @@ -323,7 +327,6 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) void ReadBufferFromRabbitMQConsumer::checkSubscription() { - /// In general this condition will always be true and looping/resubscribing would not happen if (count_subscribed == num_queues) return; @@ -337,7 +340,11 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - /// A case that would not normally happen + /// Updated in callbacks which are run by the loop + if (count_subscribed == num_queues) + return; + + /// A case that should never normally happen for (auto & queue : queues) { subscribe(queue); @@ -372,9 +379,9 @@ bool ReadBufferFromRabbitMQConsumer::nextImpl() { if (received.empty()) { - /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread + /// Run the onReceived callbacks to save the messages that have been received by now, blocks current thread. startEventLoop(loop_started); - loop_started = false; + loop_started.store(false); } if (received.empty()) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 212d1fbc783..af8ad50e4e1 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -450,8 +450,15 @@ void registerStorageRabbitMQ(StorageFactory & factory) { exchange_type = safeGet(ast->value); } - } + if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" && exchange_type != "consistent_hash") + { + if (exchange_type == "headers") + throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else + throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + } + } UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; if (args_count >= 7) diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 8fa241dade5..6e2b6f21f1d 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -33,7 +33,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( : WriteBuffer(nullptr, 0) , login_password(login_password_) , routing_key(routing_key_) - , exchange_name(exchange_) + , exchange_name(exchange_ + "_direct") , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) @@ -126,7 +126,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name * and makes it visible from current producer_channel. */ - producer_channel->declareExchange(exchange_name + "_direct", AMQP::direct, AMQP::passive) + producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) .onSuccess([&]() { exchange_declared = true; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 1a56395eb29..37163db06f4 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -882,7 +882,7 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): @pytest.mark.timeout(240) -def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): +def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): instance.query(''' DROP TABLE IF EXISTS test.view_sharding; DROP TABLE IF EXISTS test.consumer_sharding; @@ -890,6 +890,7 @@ def test_rabbitmq_sharding_between_channels_insert(rabbitmq_cluster): ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_num_consumers = 5, + rabbitmq_num_queues = 2, rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; CREATE TABLE test.view_sharding (key UInt64, value UInt64) From 462e8bcdc97dfd094747b682f19cabbc8e4b74bc Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 11 Jun 2020 21:03:53 +0000 Subject: [PATCH 0600/2229] Support transactions for publishing --- src/Storages/RabbitMQ/RabbitMQSettings.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 24 ++++++++-- src/Storages/RabbitMQ/StorageRabbitMQ.h | 4 +- .../WriteBufferToRabbitMQProducer.cpp | 46 +++++++++++++++++-- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 3 ++ 5 files changed, 70 insertions(+), 8 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSettings.h b/src/Storages/RabbitMQ/RabbitMQSettings.h index c9f09489f77..5cd52ed9ef7 100644 --- a/src/Storages/RabbitMQ/RabbitMQSettings.h +++ b/src/Storages/RabbitMQ/RabbitMQSettings.h @@ -18,6 +18,7 @@ namespace DB M(SettingString, rabbitmq_exchange_type, "default", "The exchange type.", 0) \ M(SettingUInt64, rabbitmq_num_consumers, 1, "The number of consumer channels per table.", 0) \ M(SettingUInt64, rabbitmq_num_queues, 1, "The number of queues per consumer.", 0) \ + M(SettingBool, rabbitmq_transactional_channel, false, "Use transactional channel for publishing.", 0) \ DECLARE_SETTINGS_COLLECTION(LIST_OF_RABBITMQ_SETTINGS) diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index af8ad50e4e1..669cfe19aa5 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -61,7 +61,8 @@ StorageRabbitMQ::StorageRabbitMQ( char row_delimiter_, const String & exchange_type_, size_t num_consumers_, - size_t num_queues_) + size_t num_queues_, + const bool use_transactional_channel_) : IStorage(table_id_) , global_context(context_.getGlobalContext()) , rabbitmq_context(Context(global_context)) @@ -72,6 +73,7 @@ StorageRabbitMQ::StorageRabbitMQ( , num_consumers(num_consumers_) , num_queues(num_queues_) , exchange_type(exchange_type_) + , use_transactional_channel(use_transactional_channel_) , log(&Poco::Logger::get("StorageRabbitMQ (" + table_id_.table_name + ")")) , semaphore(0, num_consumers_) , login_password(std::make_pair( @@ -225,7 +227,8 @@ ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, - log, num_consumers * num_queues, bind_by_id, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); + log, num_consumers * num_queues, bind_by_id, use_transactional_channel, + row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } @@ -488,9 +491,24 @@ void registerStorageRabbitMQ(StorageFactory & factory) } } + bool use_transactional_channel = static_cast(rabbitmq_settings.rabbitmq_transactional_channel); + if (args_count >= 9) + { + const auto * ast = engine_args[8]->as(); + if (ast && ast->value.getType() == Field::Types::UInt64) + { + use_transactional_channel = static_cast(safeGet(ast->value)); + } + else + { + throw Exception("Transactional channel parameter is a bool", ErrorCodes::BAD_ARGUMENTS); + } + } + return StorageRabbitMQ::create( args.table_id, args.context, args.columns, - host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, num_queues); + host_port, routing_keys, exchange, format, row_delimiter, exchange_type, num_consumers, + num_queues, use_transactional_channel); }; factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, }); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 45ced9d247b..e056faa0d65 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -65,7 +65,8 @@ protected: char row_delimiter_, const String & exchange_type_, size_t num_consumers_, - size_t num_queues_); + size_t num_queues_, + const bool use_transactional_channel_); private: Context global_context; @@ -81,6 +82,7 @@ private: bool bind_by_id; size_t num_queues; const String exchange_type; + const bool use_transactional_channel; Poco::Logger * log; std::pair parsed_address; diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 6e2b6f21f1d..09179b95a15 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -15,7 +15,8 @@ namespace DB enum { Connection_setup_sleep = 200, - Connection_setup_retries_max = 1000, + Loop_retries_max = 1000, + Loop_wait = 10, Batch = 10000 }; @@ -27,6 +28,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, + const bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_) @@ -37,6 +39,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( , log(log_) , num_queues(num_queues_) , bind_by_id(bind_by_id_) + , use_transactional_channel(use_transactional_channel_) , delim(delimiter) , max_rows(rows_per_message) , chunk_size(chunk_size_) @@ -50,7 +53,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( * different threads (as outputStreams are asynchronous) with the same connection leads to internal library errors. */ size_t cnt_retries = 0; - while (!connection.ready() && ++cnt_retries != Connection_setup_retries_max) + while (!connection.ready() && ++cnt_retries != Loop_retries_max) { event_base_loop(producerEvbase, EVLOOP_NONBLOCK | EVLOOP_ONCE); std::this_thread::sleep_for(std::chrono::milliseconds(Connection_setup_sleep)); @@ -63,14 +66,19 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( producer_channel = std::make_shared(&connection); checkExchange(); + + /// If publishing should be wrapped in transactions + if (use_transactional_channel) + { + producer_channel->startTransaction(); + } } WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - checkExchange(); + finilize(); connection.close(); - assert(rows == 0 && chunks.empty()); } @@ -145,6 +153,36 @@ void WriteBufferToRabbitMQProducer::checkExchange() } +void WriteBufferToRabbitMQProducer::finilize() +{ + checkExchange(); + + if (use_transactional_channel) + { + std::atomic answer_received = false; + producer_channel->commitTransaction() + .onSuccess([&]() + { + answer_received = true; + LOG_TRACE(log, "All messages were successfully published"); + }) + .onError([&](const char * message) + { + answer_received = true; + LOG_TRACE(log, "None of messages were publishd: {}", message); + /// Probably should do something here + }); + + size_t count_retries = 0; + while (!answer_received && ++count_retries != Loop_retries_max) + { + startEventLoop(); + std::this_thread::sleep_for(std::chrono::milliseconds(Loop_wait)); + } + } +} + + void WriteBufferToRabbitMQProducer::nextImpl() { chunks.push_back(std::string()); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 90e0d90b356..9fd36257561 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -26,6 +26,7 @@ public: Poco::Logger * log_, const size_t num_queues_, const bool bind_by_id_, + const bool use_transactional_channel_, std::optional delimiter, size_t rows_per_message, size_t chunk_size_ @@ -39,12 +40,14 @@ public: private: void nextImpl() override; void checkExchange(); + void finilize(); std::pair & login_password; const String routing_key; const String exchange_name; const bool bind_by_id; const size_t num_queues; + const bool use_transactional_channel; event_base * producerEvbase; RabbitMQHandler eventHandler; From f608044690fa4f66d6a7b3aefc3889734bec5c0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 21:19:16 +0300 Subject: [PATCH 0601/2229] Fix error --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 8564b996bda..3fbdf2a02f5 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -338,7 +338,7 @@ static void read(int32_t & x, ReadBuffer & in) static void read(Error & x, ReadBuffer & in) { int32_t code; - readBinary(code, in); + read(code, in); x = Error(code); } From 56869228a2db535d27bd2ab7767cf2ed64247ac9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 21:28:07 +0300 Subject: [PATCH 0602/2229] add flag to continue on errors --- docker/test/performance-comparison/compare.sh | 5 +- programs/benchmark/Benchmark.cpp | 60 ++++++++++++------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 5435d37e2e0..1dbf712ff50 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -173,9 +173,8 @@ function run_benchmark "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv # TODO things to fix in clickhouse-benchmark: # - --max_memory_usage setting does nothing - # - no way to continue on error - clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv - clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json --continue_on_errors -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json --continue_on_errors -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv } function get_profiles_watchdog diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index e17320b39ea..6884f6faed3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -59,11 +59,14 @@ public: bool cumulative_, bool secure_, const String & default_database_, const String & user_, const String & password_, const String & stage, bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const Settings & settings_) + const String & json_path_, size_t confidence_, + const String & query_id_, bool continue_on_errors_, + const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), settings(settings_), + json_path(json_path_), confidence(confidence_), query_id(query_id_), + continue_on_errors(continue_on_errors_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) { @@ -149,6 +152,7 @@ private: String json_path; size_t confidence; std::string query_id; + bool continue_on_errors; Settings settings; SharedContextHolder shared_context; Context global_context; @@ -332,35 +336,45 @@ private: pcg64 generator(randomSeed()); std::uniform_int_distribution distribution(0, connection_entries.size() - 1); - try + /// In these threads we do not accept INT signal. + sigset_t sig_set; + if (sigemptyset(&sig_set) + || sigaddset(&sig_set, SIGINT) + || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) { - /// In these threads we do not accept INT signal. - sigset_t sig_set; - if (sigemptyset(&sig_set) - || sigaddset(&sig_set, SIGINT) - || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) - throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); + throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); + } - while (true) + while (true) + { + bool extracted = false; + + while (!extracted) { - bool extracted = false; + extracted = queue.tryPop(query, 100); - while (!extracted) + if (shutdown + || (max_iterations && queries_executed == max_iterations)) { - extracted = queue.tryPop(query, 100); - - if (shutdown || (max_iterations && queries_executed == max_iterations)) - return; + return; } + } + + try + { execute(connection_entries, query, distribution(generator)); ++queries_executed; } - } - catch (...) - { - shutdown = true; - std::cerr << "An error occurred while processing query:\n" << query << "\n"; - throw; + catch (...) + { + std::cerr << "An error occurred while processing query:\n" + << query << "\n"; + if (!continue_on_errors) + { + shutdown = true; + throw; + } + } } } @@ -541,6 +555,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") + ("continue_on_errors", "continue testing even if a query fails") ; Settings settings; @@ -580,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["json"].as(), options["confidence"].as(), options["query_id"].as(), + options.count("continue_on_errors") > 0, settings); return benchmark.run(); } From 7f52b615e061d6cbd3c493bc599913541875e397 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 12 Jun 2020 21:17:06 +0300 Subject: [PATCH 0603/2229] Fix bloom filters for String (data skipping indices) bloom filter was broken for the first element, if all of the following conditions satisfied: - they are created on INSERT (in thie case bloom filter hashing uses offsets, in case of OPTIMIZE it does not, since it already has granulars). - the record is not the first in the block - the record is the first per index_granularity (do not confuse this with data skipping index GRANULARITY). - type of the field for indexing is "String" (not FixedString) Because in this case there was incorrect length and *data* for that string. --- src/Interpreters/BloomFilterHash.h | 7 +++---- .../0_stateless/01307_data_skip_bloom_filter.reference | 4 ++++ .../queries/0_stateless/01307_data_skip_bloom_filter.sql | 8 ++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/01307_data_skip_bloom_filter.reference create mode 100644 tests/queries/0_stateless/01307_data_skip_bloom_filter.sql diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h index e7411433781..43f5d7b5e87 100644 --- a/src/Interpreters/BloomFilterHash.h +++ b/src/Interpreters/BloomFilterHash.h @@ -196,18 +196,17 @@ struct BloomFilterHash const ColumnString::Chars & data = index_column->getChars(); const ColumnString::Offsets & offsets = index_column->getOffsets(); - ColumnString::Offset current_offset = pos; for (size_t index = 0, size = vec.size(); index < size; ++index) { + ColumnString::Offset current_offset = offsets[index + pos - 1]; + size_t length = offsets[index + pos] - current_offset - 1 /* terminating zero */; UInt64 city_hash = CityHash_v1_0_2::CityHash64( - reinterpret_cast(&data[current_offset]), offsets[index + pos] - current_offset - 1); + reinterpret_cast(&data[current_offset]), length); if constexpr (is_first) vec[index] = city_hash; else vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash)); - - current_offset = offsets[index + pos]; } } else if (const auto * fixed_string_index_column = typeid_cast(column)) diff --git a/tests/queries/0_stateless/01307_data_skip_bloom_filter.reference b/tests/queries/0_stateless/01307_data_skip_bloom_filter.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/01307_data_skip_bloom_filter.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01307_data_skip_bloom_filter.sql b/tests/queries/0_stateless/01307_data_skip_bloom_filter.sql new file mode 100644 index 00000000000..832f7140af2 --- /dev/null +++ b/tests/queries/0_stateless/01307_data_skip_bloom_filter.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS test_01307; +CREATE TABLE test_01307 (id UInt64, val String, INDEX ind val TYPE bloom_filter() GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 2; +INSERT INTO test_01307 (id, val) select number as id, toString(number) as val from numbers(4); +SELECT count() FROM test_01307 WHERE identity(val) = '2'; +SELECT count() FROM test_01307 WHERE val = '2'; +OPTIMIZE TABLE test_01307 FINAL; +SELECT count() FROM test_01307 WHERE identity(val) = '2'; +SELECT count() FROM test_01307 WHERE val = '2'; From 8985d64237e9b7561c778570683f372c4debc4e5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:39:48 +0300 Subject: [PATCH 0604/2229] Fix "Arcadia" build --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 3fbdf2a02f5..9d42e2f7a48 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -52,13 +52,6 @@ namespace CurrentMetrics extern const Metric ZooKeeperWatch; } -namespace DB -{ - namespace ErrorCodes - { - extern const int SUPPORT_IS_DISABLED; - } -} /** ZooKeeper wire protocol. @@ -908,7 +901,8 @@ void ZooKeeper::connect( #if USE_SSL socket = Poco::Net::SecureStreamSocket(); #else - throw Exception{"Communication with ZooKeeper over SSL is disabled because poco library was built without NetSSL support.", ErrorCodes::SUPPORT_IS_DISABLED}; + throw Poco::Exception{ + "Communication with ZooKeeper over SSL is disabled because poco library was built without NetSSL support."}; #endif } else From a6908515864f1adab9b09c323a24f91e5dd35c67 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jun 2020 00:20:59 +0300 Subject: [PATCH 0605/2229] Fix style --- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 9d42e2f7a48..61a64f2c51f 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -901,8 +901,8 @@ void ZooKeeper::connect( #if USE_SSL socket = Poco::Net::SecureStreamSocket(); #else - throw Poco::Exception{ - "Communication with ZooKeeper over SSL is disabled because poco library was built without NetSSL support."}; + throw Poco::Exception( + "Communication with ZooKeeper over SSL is disabled because poco library was built without NetSSL support."); #endif } else From 0b1ff4f9cce72ab24e6b9bb909874f81789423ba Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 00:48:10 +0300 Subject: [PATCH 0606/2229] Update max_rows_to_read in 00945_bloom_filter_index test The expected values was incorrect, since for strings we have 1 and 10 and there will be at least two index granulas, hence 12 rows. --- tests/queries/0_stateless/00945_bloom_filter_index.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index 6f93ae89a42..d509b99229a 100755 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -43,7 +43,7 @@ SELECT COUNT() FROM bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_r SELECT COUNT() FROM bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_types_test WHERE str IN ( SELECT str FROM bloom_filter_types_test); @@ -122,7 +122,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; -SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_null_types_test WHERE isNull(i8); @@ -150,7 +150,7 @@ CREATE TABLE bloom_filter_lc_null_types_test (order_key UInt64, str LowCardinali INSERT INTO bloom_filter_lc_null_types_test SELECT number AS order_key, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; INSERT INTO bloom_filter_lc_null_types_test SELECT 0 AS order_key, NULL AS str, NULL AS fixed_string; -SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE isNull(str); From 6869c122a811fed91c9fc953df4d4af89a3a30f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 21:24:32 +0300 Subject: [PATCH 0607/2229] Step 1 towards removing leader election: check and modify version of the "log" node --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 10 +- .../MergeTree/ReplicatedMergeTreeQueue.h | 8 +- src/Storages/StorageReplicatedMergeTree.cpp | 249 ++++++++++++------ src/Storages/StorageReplicatedMergeTree.h | 13 +- 4 files changed, 199 insertions(+), 81 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 09e9cd494ca..8a9dbceba04 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -419,7 +419,7 @@ bool ReplicatedMergeTreeQueue::removeFromVirtualParts(const MergeTreePartInfo & return virtual_parts.remove(part_info); } -void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) +int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback) { std::lock_guard lock(pull_logs_to_queue_mutex); if (pull_log_blocker.isCancelled()) @@ -428,6 +428,10 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, C String index_str = zookeeper->get(replica_path + "/log_pointer"); UInt64 index; + /// The version of "/log" is modified when new entries to merge/mutate/drop appear. + Coordination::Stat stat; + zookeeper->get(zookeeper_path + "/log", &stat); + Strings log_entries = zookeeper->getChildrenWatch(zookeeper_path + "/log", nullptr, watch_callback); /// We update mutations after we have loaded the list of log entries, but before we insert them @@ -561,6 +565,8 @@ void ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, C if (storage.queue_task_handle) storage.queue_task_handle->signalReadyToRun(); } + + return stat.version; } @@ -1630,7 +1636,7 @@ ReplicatedMergeTreeMergePredicate::ReplicatedMergeTreeMergePredicate( } } - queue_.pullLogsToQueue(zookeeper); + merges_version = queue_.pullLogsToQueue(zookeeper); Coordination::GetResponse quorum_status_response = quorum_status_future.get(); if (quorum_status_response.error == Coordination::Error::ZOK) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 4cbb86adb7b..e093e193381 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -271,8 +271,9 @@ public: * If watch_callback is not empty, will call it when new entries appear in the log. * If there were new entries, notifies storage.queue_task_handle. * Additionally loads mutations (so that the set of mutations is always more recent than the queue). + * Return the version of "logs" node (that is updated for every merge/mutation/... added to the log) */ - void pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}); + int32_t pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper, Coordination::WatchCallback watch_callback = {}); /// Load new mutation entries. If something new is loaded, schedule storage.merge_selecting_task. /// If watch_callback is not empty, will call it when new mutations appear in ZK. @@ -434,6 +435,9 @@ public: bool isMutationFinished(const ReplicatedMergeTreeMutationEntry & mutation) const; + /// The version of "log" node that is used to check that no new merges have appeared. + int32_t getVersion() const { return merges_version; } + private: const ReplicatedMergeTreeQueue & queue; @@ -445,6 +449,8 @@ private: /// Quorum state taken at some later time than prev_virtual_parts. String inprogress_quorum_part; + + int32_t merges_version = -1; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e7d9e214995..9d20aeecac1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2402,7 +2402,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() const bool deduplicate = false; /// TODO: read deduplicate option from table config const bool force_ttl = false; - bool success = false; + CreateMergeEntryResult create_result = CreateMergeEntryResult::Other; try { @@ -2422,7 +2422,11 @@ void StorageReplicatedMergeTree::mergeSelectingTask() size_t merges_and_mutations_sum = merges_and_mutations_queued.first + merges_and_mutations_queued.second; if (merges_and_mutations_sum >= storage_settings_ptr->max_replicated_merges_in_queue) { - LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({}) is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.", merges_and_mutations_queued.first, merges_and_mutations_queued.second, storage_settings_ptr->max_replicated_merges_in_queue); + LOG_TRACE(log, "Number of queued merges ({}) and part mutations ({})" + " is greater than max_replicated_merges_in_queue ({}), so won't select new parts to merge or mutate.", + merges_and_mutations_queued.first, + merges_and_mutations_queued.second, + storage_settings_ptr->max_replicated_merges_in_queue); } else { @@ -2434,10 +2438,10 @@ void StorageReplicatedMergeTree::mergeSelectingTask() if (max_source_parts_size_for_merge > 0 && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, merge_pred, nullptr)) { - success = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, - future_merged_part.name, future_merged_part.type, deduplicate, force_ttl); + create_result = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, + future_merged_part.name, future_merged_part.type, deduplicate, force_ttl, nullptr, merge_pred.getVersion()); } - /// If there are many mutations in queue it may happen, that we cannot enqueue enough merges to merge all new parts + /// If there are many mutations in queue, it may happen, that we cannot enqueue enough merges to merge all new parts else if (max_source_part_size_for_mutation > 0 && queue.countMutations() > 0 && merges_and_mutations_queued.second < storage_settings_ptr->max_replicated_mutations_in_queue) { @@ -2452,11 +2456,11 @@ void StorageReplicatedMergeTree::mergeSelectingTask() if (!desired_mutation_version) continue; - if (createLogEntryToMutatePart(*part, desired_mutation_version->first, desired_mutation_version->second)) - { - success = true; + create_result = createLogEntryToMutatePart(*part, + desired_mutation_version->first, desired_mutation_version->second, merge_pred.getVersion()); + + if (create_result == CreateMergeEntryResult::Ok) break; - } } } } @@ -2469,11 +2473,15 @@ void StorageReplicatedMergeTree::mergeSelectingTask() if (!is_leader) return; - if (!success) + if (create_result != CreateMergeEntryResult::Ok + && create_result != CreateMergeEntryResult::LogUpdated) + { merge_selecting_task->scheduleAfter(MERGE_SELECTING_SLEEP_MS); + } else + { merge_selecting_task->schedule(); - + } } @@ -2507,14 +2515,15 @@ void StorageReplicatedMergeTree::mutationsFinalizingTask() } -bool StorageReplicatedMergeTree::createLogEntryToMergeParts( +StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::createLogEntryToMergeParts( zkutil::ZooKeeperPtr & zookeeper, const DataPartsVector & parts, const String & merged_name, const MergeTreeDataPartType & merged_part_type, bool deduplicate, bool force_ttl, - ReplicatedMergeTreeLogEntryData * out_log_entry) + ReplicatedMergeTreeLogEntryData * out_log_entry, + int32_t log_version) { std::vector> exists_futures; exists_futures.reserve(parts.size()); @@ -2539,7 +2548,7 @@ bool StorageReplicatedMergeTree::createLogEntryToMergeParts( } if (!all_in_zk) - return false; + return CreateMergeEntryResult::MissingPart; ReplicatedMergeTreeLogEntryData entry; entry.type = LogEntry::MERGE_PARTS; @@ -2551,21 +2560,43 @@ bool StorageReplicatedMergeTree::createLogEntryToMergeParts( entry.create_time = time(nullptr); for (const auto & part : parts) - { entry.source_parts.push_back(part->name); - } - String path_created = zookeeper->create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential); - entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1); + Coordination::Requests ops; + Coordination::Responses responses; + + ops.emplace_back(zkutil::makeCreateRequest( + zookeeper_path + "/log/log-", entry.toString(), + zkutil::CreateMode::PersistentSequential)); + + ops.emplace_back(zkutil::makeCheckRequest( + zookeeper_path + "/log", log_version)); + + Coordination::Error code = zookeeper->tryMulti(ops, responses); + + if (code == Coordination::Error::ZOK) + { + String path_created = dynamic_cast(*responses.front()).path_created; + entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1); + } + else if (code == Coordination::Error::ZBADVERSION) + { + return CreateMergeEntryResult::LogUpdated; + } + else + { + zkutil::KeeperMultiException::check(code, ops, responses); + } if (out_log_entry) *out_log_entry = entry; - return true; + return CreateMergeEntryResult::Ok; } -bool StorageReplicatedMergeTree::createLogEntryToMutatePart(const IMergeTreeDataPart & part, Int64 mutation_version, int alter_version) +StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::createLogEntryToMutatePart( + const IMergeTreeDataPart & part, Int64 mutation_version, int32_t alter_version, int32_t log_version) { auto zookeeper = getZooKeeper(); @@ -2578,7 +2609,7 @@ bool StorageReplicatedMergeTree::createLogEntryToMutatePart(const IMergeTreeData enqueuePartForCheck(part.name); } - return false; + return CreateMergeEntryResult::MissingPart; } MergeTreePartInfo new_part_info = part.info; @@ -2594,8 +2625,23 @@ bool StorageReplicatedMergeTree::createLogEntryToMutatePart(const IMergeTreeData entry.create_time = time(nullptr); entry.alter_version = alter_version; - zookeeper->create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential); - return true; + Coordination::Requests ops; + Coordination::Responses responses; + + ops.emplace_back(zkutil::makeCreateRequest( + zookeeper_path + "/log/log-", entry.toString(), + zkutil::CreateMode::PersistentSequential)); + + ops.emplace_back(zkutil::makeCheckRequest( + zookeeper_path + "/log", log_version)); + + Coordination::Error code = zookeeper->tryMulti(ops, responses); + + if (code == Coordination::Error::ZBADVERSION) + return CreateMergeEntryResult::LogUpdated; + + zkutil::KeeperMultiException::check(code, ops, responses); + return CreateMergeEntryResult::Ok; } @@ -3377,7 +3423,8 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, } -bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) +bool StorageReplicatedMergeTree::optimize( + const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) { assertNotReadonly(); @@ -3387,14 +3434,11 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p return true; } + constexpr size_t max_retries = 10; + std::vector merge_entries; { - /// We must select parts for merge under merge_selecting_mutex because other threads - /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges. - std::lock_guard merge_selecting_lock(merge_selecting_mutex); - auto zookeeper = getZooKeeper(); - ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); auto handle_noop = [&] (const String & message) { @@ -3418,52 +3462,94 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p for (const String & partition_id : partition_ids) { - FutureMergedMutatedPart future_merged_part; - bool selected = merger_mutator.selectAllPartsToMergeWithinPartition( - future_merged_part, disk_space, can_merge, partition_id, true, nullptr); - ReplicatedMergeTreeLogEntryData merge_entry; - if (selected && !createLogEntryToMergeParts(zookeeper, future_merged_part.parts, - future_merged_part.name, future_merged_part.type, deduplicate, force_ttl, &merge_entry)) - return handle_noop("Can't create merge queue node in ZooKeeper"); - if (merge_entry.type != ReplicatedMergeTreeLogEntryData::Type::EMPTY) + size_t try_no = 0; + for (; try_no < max_retries; ++try_no) + { + /// We must select parts for merge under merge_selecting_mutex because other threads + /// (merge_selecting_thread or OPTIMIZE queries) could assign new merges. + std::lock_guard merge_selecting_lock(merge_selecting_mutex); + ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); + + FutureMergedMutatedPart future_merged_part; + bool selected = merger_mutator.selectAllPartsToMergeWithinPartition( + future_merged_part, disk_space, can_merge, partition_id, true, nullptr); + + if (!selected) + break; + + ReplicatedMergeTreeLogEntryData merge_entry; + CreateMergeEntryResult create_result = createLogEntryToMergeParts( + zookeeper, future_merged_part.parts, + future_merged_part.name, future_merged_part.type, deduplicate, force_ttl, + &merge_entry, can_merge.getVersion()); + + if (create_result == CreateMergeEntryResult::MissingPart) + return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing"); + + if (create_result == CreateMergeEntryResult::LogUpdated) + continue; + merge_entries.push_back(std::move(merge_entry)); + break; + } + if (try_no == max_retries) + return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of " + + toString(max_retries) + " tries"); } } else { - FutureMergedMutatedPart future_merged_part; - String disable_reason; - bool selected = false; - if (!partition) - { - selected = merger_mutator.selectPartsToMerge( - future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason); - } - else + size_t try_no = 0; + for (; try_no < max_retries; ++try_no) { + std::lock_guard merge_selecting_lock(merge_selecting_mutex); + ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); - UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); - String partition_id = getPartitionIDFromQuery(partition, query_context); - selected = merger_mutator.selectAllPartsToMergeWithinPartition( - future_merged_part, disk_space, can_merge, partition_id, final, &disable_reason); - } + FutureMergedMutatedPart future_merged_part; + String disable_reason; + bool selected = false; + if (!partition) + { + selected = merger_mutator.selectPartsToMerge( + future_merged_part, true, storage_settings_ptr->max_bytes_to_merge_at_max_space_in_pool, can_merge, &disable_reason); + } + else + { - if (!selected) - { - std::stringstream message; - message << "Cannot select parts for optimization"; - if (!disable_reason.empty()) - message << ": " << disable_reason; - LOG_INFO(log, message.str()); - return handle_noop(message.str()); - } + UInt64 disk_space = getStoragePolicy()->getMaxUnreservedFreeSpace(); + String partition_id = getPartitionIDFromQuery(partition, query_context); + selected = merger_mutator.selectAllPartsToMergeWithinPartition( + future_merged_part, disk_space, can_merge, partition_id, final, &disable_reason); + } + + if (!selected) + { + std::stringstream message; + message << "Cannot select parts for optimization"; + if (!disable_reason.empty()) + message << ": " << disable_reason; + LOG_INFO(log, message.str()); + return handle_noop(message.str()); + } + + ReplicatedMergeTreeLogEntryData merge_entry; + CreateMergeEntryResult create_result = createLogEntryToMergeParts( + zookeeper, future_merged_part.parts, + future_merged_part.name, future_merged_part.type, deduplicate, force_ttl, + &merge_entry, can_merge.getVersion()); + + if (create_result == CreateMergeEntryResult::MissingPart) + return handle_noop("Can't create merge queue node in ZooKeeper, because some parts are missing"); + + if (create_result == CreateMergeEntryResult::LogUpdated) + continue; - ReplicatedMergeTreeLogEntryData merge_entry; - if (!createLogEntryToMergeParts(zookeeper, future_merged_part.parts, - future_merged_part.name, future_merged_part.type, deduplicate, force_ttl, &merge_entry)) - return handle_noop("Can't create merge queue node in ZooKeeper"); - if (merge_entry.type != ReplicatedMergeTreeLogEntryData::Type::EMPTY) merge_entries.push_back(std::move(merge_entry)); + break; + } + if (try_no == max_retries) + return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of " + + toString(max_retries) + " tries"); } } @@ -3537,7 +3623,8 @@ void StorageReplicatedMergeTree::alter( return; } - auto ast_to_str = [](ASTPtr query) -> String { + auto ast_to_str = [](ASTPtr query) -> String + { if (!query) return ""; return queryToString(query); @@ -3587,7 +3674,6 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/columns", new_columns_str, -1)); - if (ast_to_str(current_metadata.settings_ast) != ast_to_str(future_metadata.settings_ast)) { lockStructureExclusively( @@ -3609,10 +3695,12 @@ void StorageReplicatedMergeTree::alter( alter_entry->alter_version = new_metadata_version; alter_entry->create_time = time(nullptr); - auto maybe_mutation_commands = params.getMutationCommands(current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context); + auto maybe_mutation_commands = params.getMutationCommands( + current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context); alter_entry->have_mutation = !maybe_mutation_commands.empty(); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", alter_entry->toString(), zkutil::CreateMode::PersistentSequential)); + ops.emplace_back(zkutil::makeCreateRequest( + zookeeper_path + "/log/log-", alter_entry->toString(), zkutil::CreateMode::PersistentSequential)); std::optional lock_holder; @@ -4015,7 +4103,8 @@ StorageReplicatedMergeTree::allocateBlockNumber( } -Strings StorageReplicatedMergeTree::waitForAllReplicasToProcessLogEntry(const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) +Strings StorageReplicatedMergeTree::waitForAllReplicasToProcessLogEntry( + const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) { LOG_DEBUG(log, "Waiting for all replicas to process {}", entry.znode_name); @@ -4040,7 +4129,8 @@ Strings StorageReplicatedMergeTree::waitForAllReplicasToProcessLogEntry(const Re } -bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry(const String & replica, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) +bool StorageReplicatedMergeTree::waitForReplicaToProcessLogEntry( + const String & replica, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active) { String entry_str = entry.toString(); String log_node_name; @@ -5185,6 +5275,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ } } + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); Transaction transaction(*this); @@ -5370,7 +5461,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta } } - ops.emplace_back(zkutil::makeCreateRequest(dest_table_storage->zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + ops.emplace_back(zkutil::makeCreateRequest(dest_table_storage->zookeeper_path + "/log/log-", + entry.toString(), zkutil::CreateMode::PersistentSequential)); { Transaction transaction(*dest_table_storage); @@ -5417,11 +5509,13 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta Coordination::Requests ops_dest; - ops_dest.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry_delete.toString(), zkutil::CreateMode::PersistentSequential)); + ops_dest.emplace_back(zkutil::makeCreateRequest( + zookeeper_path + "/log/log-", entry_delete.toString(), zkutil::CreateMode::PersistentSequential)); + ops_dest.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version op_results = zookeeper->multi(ops_dest); - log_znode_path = dynamic_cast(*op_results.back()).path_created; + log_znode_path = dynamic_cast(*op_results.front()).path_created; entry_delete.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); if (query_context.getSettingsRef().replication_alter_partitions_sync > 1) @@ -5577,7 +5671,12 @@ bool StorageReplicatedMergeTree::dropPartsInPartition( entry.detach = detach; entry.create_time = time(nullptr); - String log_znode_path = zookeeper.create(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential); + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); + ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/log", "", -1)); /// Just update version. + Coordination::Responses responses = zookeeper.multi(ops); + + String log_znode_path = dynamic_cast(*responses.front()).path_created; entry.znode_name = log_znode_path.substr(log_znode_path.find_last_of('/') + 1); return true; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 18c691046d6..ec38eb7e842 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -425,16 +425,23 @@ private: * Call when merge_selecting_mutex is locked. * Returns false if any part is not in ZK. */ - bool createLogEntryToMergeParts( + enum class CreateMergeEntryResult { Ok, MissingPart, LogUpdated, Other }; + + CreateMergeEntryResult createLogEntryToMergeParts( zkutil::ZooKeeperPtr & zookeeper, const DataPartsVector & parts, const String & merged_name, const MergeTreeDataPartType & merged_part_type, bool deduplicate, bool force_ttl, - ReplicatedMergeTreeLogEntryData * out_log_entry = nullptr); + ReplicatedMergeTreeLogEntryData * out_log_entry, + int32_t log_version); - bool createLogEntryToMutatePart(const IMergeTreeDataPart & part, Int64 mutation_version, int alter_version); + CreateMergeEntryResult createLogEntryToMutatePart( + const IMergeTreeDataPart & part, + Int64 mutation_version, + int32_t alter_version, + int32_t log_version); /// Exchange parts. From 833c8178ae51718008b76a1ebd90fb6988088b03 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:01:44 +0300 Subject: [PATCH 0608/2229] Whitespaces --- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9d20aeecac1..0e2d138334e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2605,7 +2605,8 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c { if (part.modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) { - LOG_WARNING(log, "Part {} (that was selected for mutation) with age {} seconds exists locally but not in ZooKeeper. Won't mutate that part and will check it.", part.name, (time(nullptr) - part.modification_time)); + LOG_WARNING(log, "Part {} (that was selected for mutation) with age {} seconds exists locally but not in ZooKeeper." + " Won't mutate that part and will check it.", part.name, (time(nullptr) - part.modification_time)); enqueuePartForCheck(part.name); } From 902774c89cc12671191f82ee4403fd8d5d913d3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:02:00 +0300 Subject: [PATCH 0609/2229] Removed default value of constructor --- src/Common/ZooKeeper/LeaderElection.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Common/ZooKeeper/LeaderElection.h b/src/Common/ZooKeeper/LeaderElection.h index dca87efe7c2..f8a4d56dc76 100644 --- a/src/Common/ZooKeeper/LeaderElection.h +++ b/src/Common/ZooKeeper/LeaderElection.h @@ -36,7 +36,12 @@ public: * It means that different participants of leader election have different identifiers * and existence of more than one ephemeral node with same identifier indicates an error. */ - LeaderElection(DB::BackgroundSchedulePool & pool_, const std::string & path_, ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_ = "") + LeaderElection( + DB::BackgroundSchedulePool & pool_, + const std::string & path_, + ZooKeeper & zookeeper_, + LeadershipHandler handler_, + const std::string & identifier_) : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) , log_name("LeaderElection (" + path + ")") , log(&Poco::Logger::get(log_name)) From 52ac009754a40d71a4b0e737518d64ed40bd44ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:22:55 +0300 Subject: [PATCH 0610/2229] Update versions on merge and mutation --- .../MergeTree/MergeTreeDataMergerMutator.h | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 385ada72fdd..7c2ee53fc1d 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -82,7 +82,6 @@ public: const AllowedMergingPredicate & can_merge, String * out_disable_reason = nullptr); - /** Select all the parts in the specified partition for merge, if possible. * final - choose to merge even a single part - that is, allow to merge one part "with itself". */ diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0e2d138334e..57535466558 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2569,8 +2569,8 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - ops.emplace_back(zkutil::makeCheckRequest( - zookeeper_path + "/log", log_version)); + ops.emplace_back(zkutil::makeSetRequest( + zookeeper_path + "/log", "", log_version)); /// Check and update version. Coordination::Error code = zookeeper->tryMulti(ops, responses); @@ -2578,9 +2578,12 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c { String path_created = dynamic_cast(*responses.front()).path_created; entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1); + + LOG_TRACE(log, "Created log entry {} for merge {}", path_created, merged_name); } else if (code == Coordination::Error::ZBADVERSION) { + LOG_TRACE(log, "Log entry is not created for merge {} because log was updated", merged_name); return CreateMergeEntryResult::LogUpdated; } else @@ -2633,15 +2636,20 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c zookeeper_path + "/log/log-", entry.toString(), zkutil::CreateMode::PersistentSequential)); - ops.emplace_back(zkutil::makeCheckRequest( - zookeeper_path + "/log", log_version)); + ops.emplace_back(zkutil::makeSetRequest( + zookeeper_path + "/log", "", log_version)); /// Check and update version. Coordination::Error code = zookeeper->tryMulti(ops, responses); if (code == Coordination::Error::ZBADVERSION) + { + LOG_TRACE(log, "Log entry is not created for mutation {} because log was updated", new_part_name); return CreateMergeEntryResult::LogUpdated; + } zkutil::KeeperMultiException::check(code, ops, responses); + + LOG_TRACE(log, "Created log entry for mutation {}", new_part_name); return CreateMergeEntryResult::Ok; } From 22dd47c13fc97b8adf726fc42ca94847f1c76dd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:24:17 +0300 Subject: [PATCH 0611/2229] Added a test --- .../01307_multiple_leaders.reference | 2 + .../0_stateless/01307_multiple_leaders.sh | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/01307_multiple_leaders.reference create mode 100755 tests/queries/0_stateless/01307_multiple_leaders.sh diff --git a/tests/queries/0_stateless/01307_multiple_leaders.reference b/tests/queries/0_stateless/01307_multiple_leaders.reference new file mode 100644 index 00000000000..576441b288d --- /dev/null +++ b/tests/queries/0_stateless/01307_multiple_leaders.reference @@ -0,0 +1,2 @@ +2000 1999000 +2000 1999000 diff --git a/tests/queries/0_stateless/01307_multiple_leaders.sh b/tests/queries/0_stateless/01307_multiple_leaders.sh new file mode 100755 index 00000000000..0bf5e0b13bf --- /dev/null +++ b/tests/queries/0_stateless/01307_multiple_leaders.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -n --query " +DROP TABLE IF EXISTS r0; +DROP TABLE IF EXISTS r1; + +CREATE TABLE r0 (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r0') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M'; +CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r1') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M'; +" + +function thread() +{ + REPLICA=$1 + ITERATIONS=$2 + + $CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 0 --min_insert_block_size_bytes 0 --query "INSERT INTO r$REPLICA SELECT number * 2 + $REPLICA FROM numbers($ITERATIONS)" +} + + +thread 0 1000 & +thread 1 1000 & + +wait + +$CLICKHOUSE_CLIENT -n --query " +SYSTEM SYNC REPLICA r0; +SYSTEM SYNC REPLICA r1; + +SELECT count(), sum(x) FROM r0; +SELECT count(), sum(x) FROM r1; + +DROP TABLE r0; +DROP TABLE r1; +" From b8b55a5b9911c2e0b551274d507a78cf4dbe739e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:04:18 +0300 Subject: [PATCH 0612/2229] More LeaderElection to Storage/MergeTree --- src/{Common/ZooKeeper => Storages/MergeTree}/LeaderElection.h | 4 ++-- src/Storages/StorageReplicatedMergeTree.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/{Common/ZooKeeper => Storages/MergeTree}/LeaderElection.h (97%) diff --git a/src/Common/ZooKeeper/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h similarity index 97% rename from src/Common/ZooKeeper/LeaderElection.h rename to src/Storages/MergeTree/LeaderElection.h index f8a4d56dc76..c94e3e27e5a 100644 --- a/src/Common/ZooKeeper/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -1,11 +1,11 @@ #pragma once -#include "ZooKeeper.h" -#include "KeeperException.h" #include #include #include #include +#include +#include #include diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index ec38eb7e842..382cf7ac469 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -19,12 +19,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include From 85c0706901de09a6a327d3d953a509a6ef2376b3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:19:01 +0300 Subject: [PATCH 0613/2229] Step 2: allow multiple leaders --- src/Storages/MergeTree/LeaderElection.h | 45 ++++++++++++------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index c94e3e27e5a..680ffe4992f 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -23,7 +23,16 @@ namespace CurrentMetrics namespace zkutil { -/** Implements leader election algorithm described here: http://zookeeper.apache.org/doc/r3.4.5/recipes.html#sc_leaderElection +/** Initially was used to implement leader election algorithm described here: + * http://zookeeper.apache.org/doc/r3.4.5/recipes.html#sc_leaderElection + * + * But then we decided to get rid of leader election, so every replica can become leader. + * For now, every replica can become leader if there is no leader among replicas with old version. + * + * Replicas with old versions participate in leader election with ephemeral sequential nodes. + * If the node is first, then replica is leader. + * Replicas with new versions creates persistent sequential nodes. + * If the first node is persistent, then all replicas with new versions become leaders. */ class LeaderElection { @@ -67,16 +76,13 @@ public: private: DB::BackgroundSchedulePool & pool; DB::BackgroundSchedulePool::TaskHolder task; - std::string path; + const std::string path; ZooKeeper & zookeeper; LeadershipHandler handler; std::string identifier; std::string log_name; Poco::Logger * log; - EphemeralNodeHolderPtr node; - std::string node_name; - std::atomic shutdown_called {false}; CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; @@ -84,43 +90,35 @@ private: void createNode() { shutdown_called = false; - node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier); - - std::string node_path = node->getPath(); - node_name = node_path.substr(node_path.find_last_of('/') + 1); - + zookeeper.create(path + "/leader_election-", identifier, CreateMode::PersistentSequential); task->activateAndSchedule(); } void releaseNode() { shutdown(); - node = nullptr; } void threadFunction() { - bool success = false; - try { Strings children = zookeeper.getChildren(path); - std::sort(children.begin(), children.end()); - auto it = std::lower_bound(children.begin(), children.end(), node_name); - if (it == children.end() || *it != node_name) + if (children.empty()) throw Poco::Exception("Assertion failed in LeaderElection"); - if (it == children.begin()) + std::sort(children.begin(), children.end()); + + Coordination::Stat stat; + zookeeper.get(path + "/" + children.front(), &stat); + + if (!stat.ephemeralOwner) { + /// It is sequential node - we can become leader. ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); handler(); return; } - - if (!zookeeper.existsWatch(path + "/" + *(it - 1), nullptr, task->getWatchCallback())) - task->schedule(); - - success = true; } catch (const KeeperException & e) { @@ -134,8 +132,7 @@ private: DB::tryLogCurrentException(log); } - if (!success) - task->scheduleAfter(10 * 1000); + task->scheduleAfter(10 * 1000); } }; From ab00e343054eb1dee01a0c0d87da5bc2875dfc28 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 22:38:36 +0300 Subject: [PATCH 0614/2229] Miscellaneous --- src/Storages/MergeTree/LeaderElection.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 680ffe4992f..725ab61e877 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -70,7 +70,7 @@ public: ~LeaderElection() { - releaseNode(); + shutdown(); } private: @@ -94,11 +94,6 @@ private: task->activateAndSchedule(); } - void releaseNode() - { - shutdown(); - } - void threadFunction() { try From cfef7ba6920560442543ffb40fb5be89693f3a0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:23:15 +0300 Subject: [PATCH 0615/2229] Whitespace --- src/Interpreters/SystemLog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index d79edde7052..b432cd8803b 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -95,7 +95,6 @@ SystemLogs::SystemLogs(Context & global_context, const Poco::Util::AbstractConfi if (asynchronous_metric_log) logs.emplace_back(asynchronous_metric_log.get()); - try { for (auto & log : logs) From 6ff671b092b65b9b016248ebfcc936d4bd5684d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:32:31 +0300 Subject: [PATCH 0616/2229] Improvement --- src/Storages/MergeTree/LeaderElection.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 725ab61e877..36209d6c003 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -29,8 +29,12 @@ namespace zkutil * But then we decided to get rid of leader election, so every replica can become leader. * For now, every replica can become leader if there is no leader among replicas with old version. * + * It's tempting to remove this class at all, but we have to maintain it, + * to maintain compatibility when replicas with different versions work on the same cluster + * (this is allowed for short time period during cluster update). + * * Replicas with old versions participate in leader election with ephemeral sequential nodes. - * If the node is first, then replica is leader. + * If the node is first, then replica is the leader. * Replicas with new versions creates persistent sequential nodes. * If the first node is persistent, then all replicas with new versions become leaders. */ @@ -90,6 +94,17 @@ private: void createNode() { shutdown_called = false; + + /// If there is at least one persistent node, we don't have to create another. + Strings children = zookeeper.getChildren(path); + for (const auto & child : children) + { + Coordination::Stat stat; + zookeeper.get(path + "/" + child, &stat); + if (!stat.ephemeralOwner) + return; + } + zookeeper.create(path + "/leader_election-", identifier, CreateMode::PersistentSequential); task->activateAndSchedule(); } From 21897f2abd451e5c80a9e9cf14bf7ce6f0d83bfe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:38:43 +0300 Subject: [PATCH 0617/2229] Instrument --- src/Common/ProfileEvents.cpp | 5 +++++ src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 8393ea85112..a0eb7a5fb48 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -203,6 +203,11 @@ \ M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \ M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \ + \ + M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \ + M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \ + M(CreatedLogEntryForMutation, "Successfully created log entry to mutate parts in ReplicatedMergeTree.") \ + M(NotCreatedLogEntryForMutation, "Log entry to mutate parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \ namespace ProfileEvents { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 57535466558..eb395ff55c0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -71,6 +71,10 @@ namespace ProfileEvents extern const Event ReplicatedPartFetches; extern const Event DataAfterMergeDiffersFromReplica; extern const Event DataAfterMutationDiffersFromReplica; + extern const Event CreatedLogEntryForMerge; + extern const Event NotCreatedLogEntryForMerge; + extern const Event CreatedLogEntryForMutation; + extern const Event NotCreatedLogEntryForMutation; } namespace CurrentMetrics @@ -2579,10 +2583,12 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c String path_created = dynamic_cast(*responses.front()).path_created; entry.znode_name = path_created.substr(path_created.find_last_of('/') + 1); + ProfileEvents::increment(ProfileEvents::CreatedLogEntryForMerge); LOG_TRACE(log, "Created log entry {} for merge {}", path_created, merged_name); } else if (code == Coordination::Error::ZBADVERSION) { + ProfileEvents::increment(ProfileEvents::NotCreatedLogEntryForMerge); LOG_TRACE(log, "Log entry is not created for merge {} because log was updated", merged_name); return CreateMergeEntryResult::LogUpdated; } @@ -2643,12 +2649,14 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c if (code == Coordination::Error::ZBADVERSION) { + ProfileEvents::increment(ProfileEvents::NotCreatedLogEntryForMutation); LOG_TRACE(log, "Log entry is not created for mutation {} because log was updated", new_part_name); return CreateMergeEntryResult::LogUpdated; } zkutil::KeeperMultiException::check(code, ops, responses); + ProfileEvents::increment(ProfileEvents::CreatedLogEntryForMutation); LOG_TRACE(log, "Created log entry for mutation {}", new_part_name); return CreateMergeEntryResult::Ok; } From 6f0db5ef108b1f13351e4792d427043417187728 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 12 Jun 2020 23:42:31 +0300 Subject: [PATCH 0618/2229] Fix error --- src/Storages/MergeTree/LeaderElection.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index 36209d6c003..ef6b68bbe15 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -102,7 +102,11 @@ private: Coordination::Stat stat; zookeeper.get(path + "/" + child, &stat); if (!stat.ephemeralOwner) + { + ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); + handler(); return; + } } zookeeper.create(path + "/leader_election-", identifier, CreateMode::PersistentSequential); @@ -124,7 +128,7 @@ private: if (!stat.ephemeralOwner) { - /// It is sequential node - we can become leader. + /// It is persistent node - we can become leader. ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); handler(); return; From 18f8861fa0df4ac2d1d57c5fc16dca651f4081df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jun 2020 00:19:08 +0300 Subject: [PATCH 0619/2229] Better test --- .../0_stateless/01307_multiple_leaders.sh | 33 ++++++++----------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/01307_multiple_leaders.sh b/tests/queries/0_stateless/01307_multiple_leaders.sh index 0bf5e0b13bf..b16feaeb591 100755 --- a/tests/queries/0_stateless/01307_multiple_leaders.sh +++ b/tests/queries/0_stateless/01307_multiple_leaders.sh @@ -5,35 +5,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -$CLICKHOUSE_CLIENT -n --query " -DROP TABLE IF EXISTS r0; -DROP TABLE IF EXISTS r1; +NUM_REPLICAS=2 +DATA_SIZE=1000 -CREATE TABLE r0 (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r0') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M'; -CREATE TABLE r1 (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r1') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M'; -" +SEQ=$(seq 0 $(($NUM_REPLICAS - 1))) + +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "DROP TABLE IF EXISTS r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "CREATE TABLE r$REPLICA (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r$REPLICA') ORDER BY x SETTINGS min_bytes_for_wide_part = '10M';"; done function thread() { REPLICA=$1 ITERATIONS=$2 - $CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 0 --min_insert_block_size_bytes 0 --query "INSERT INTO r$REPLICA SELECT number * 2 + $REPLICA FROM numbers($ITERATIONS)" + $CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 0 --min_insert_block_size_bytes 0 --query "INSERT INTO r$REPLICA SELECT number * $NUM_REPLICAS + $REPLICA FROM numbers($ITERATIONS)" } - -thread 0 1000 & -thread 1 1000 & +for REPLICA in $SEQ; do + thread $REPLICA $DATA_SIZE & +done wait -$CLICKHOUSE_CLIENT -n --query " -SYSTEM SYNC REPLICA r0; -SYSTEM SYNC REPLICA r1; - -SELECT count(), sum(x) FROM r0; -SELECT count(), sum(x) FROM r1; - -DROP TABLE r0; -DROP TABLE r1; -" +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "SYSTEM SYNC REPLICA r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "SELECT count(), sum(x) FROM r$REPLICA"; done +for REPLICA in $SEQ; do $CLICKHOUSE_CLIENT -n --query "DROP TABLE r$REPLICA"; done From de96296e019887f925c998bef97e42bafaa964e8 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sat, 13 Jun 2020 10:17:02 +0800 Subject: [PATCH 0620/2229] ISSUES-7572 fix build failure --- src/Server/ReplicasStatusHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 3606da23ab5..5ead756ee1e 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB From 62eaeac713a130f73c74e6efc6d831009a582a5e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 13 Jun 2020 05:35:42 +0300 Subject: [PATCH 0621/2229] trigger ci From 558912be7d4e4c8da7b366be3da00e5d2c2c1905 Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Sat, 13 Jun 2020 10:47:23 +0800 Subject: [PATCH 0622/2229] Update lazy.md (#11620) * Update lazy.md Optimize the doc translation for Simplified-Chinese version. * Update lazy.md Co-authored-by: Ivan Blinkov --- docs/zh/engines/database-engines/lazy.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/zh/engines/database-engines/lazy.md b/docs/zh/engines/database-engines/lazy.md index c0a08e37559..700eb4b3b25 100644 --- a/docs/zh/engines/database-engines/lazy.md +++ b/docs/zh/engines/database-engines/lazy.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 31 toc_title: "\u61D2\u60F0" --- -# 懒惰 {#lazy} +# 延时引擎Lazy {#lazy} -仅将表保留在RAM中 `expiration_time_in_seconds` 上次访问后几秒钟。 只能与\*日志表一起使用。 +在距最近一次访问间隔`expiration_time_in_seconds`时间段内,将表保存在内存中,仅适用于 \*Log引擎表 -它针对存储许多小\*日志表进行了优化,访问之间存在较长的时间间隔。 +由于针对这类表的访问间隔较长,对保存大量小的 \*Log引擎表进行了优化, ## 创建数据库 {#creating-a-database} From 09a37db2a3e9c12a47f44dc684173a62e06aafb9 Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Sat, 13 Jun 2020 10:48:50 +0800 Subject: [PATCH 0623/2229] Update clickhouse-benchmark.md (#11619) * Update clickhouse-benchmark.md Optimize doc translation for Simplized-Chinese version,author by TomBombadil * Update clickhouse-benchmark.md Co-authored-by: Ivan Blinkov --- .../utilities/clickhouse-benchmark.md | 48 +++++++++---------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/docs/zh/operations/utilities/clickhouse-benchmark.md b/docs/zh/operations/utilities/clickhouse-benchmark.md index d1e83cb9789..1c255f621c0 100644 --- a/docs/zh/operations/utilities/clickhouse-benchmark.md +++ b/docs/zh/operations/utilities/clickhouse-benchmark.md @@ -1,11 +1,9 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 61 -toc_title: "\uFF82\u6697\uFF6A\uFF82\u6C3E\u73AF\u50AC\uFF82\u56E3" +toc_title: "性能测试" --- -# ツ暗ェツ氾环催ツ団 {#clickhouse-benchmark} +# 性能测试 {#clickhouse-benchmark} 连接到ClickHouse服务器并重复发送指定的查询。 @@ -21,7 +19,7 @@ $ echo "single query" | clickhouse-benchmark [keys] $ clickhouse-benchmark [keys] <<< "single query" ``` -如果要发送一组查询,请创建一个文本文件,并将每个查询放在此文件中的单个字符串上。 例如: +如果要发送一组查询,请创建一个文本文件,并将每个查询的字符串放在此文件中。 例如: ``` sql SELECT * FROM system.numbers LIMIT 10000000 @@ -34,15 +32,15 @@ SELECT 1 clickhouse-benchmark [keys] < queries_file ``` -## 键 {#clickhouse-benchmark-keys} +## keys参数 {#clickhouse-benchmark-keys} - `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` 同时发送。 默认值:1。 - `-d N`, `--delay=N` — Interval in seconds between intermediate reports (set 0 to disable reports). Default value: 1. -- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. 为 [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-h` 钥匙 +- `-h WORD`, `--host=WORD` — Server host. Default value: `localhost`. 为 [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-h` 参数 - `-p N`, `--port=N` — Server port. Default value: 9000. For the [比较模式](#clickhouse-benchmark-comparison-mode) 您可以使用多个 `-p` 钥匙 -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0. -- `-r`, `--randomize` — Random order of queries execution if there is more then one input query. -- `-s`, `--secure` — Using TLS connection. +- `-i N`, `--iterations=N` — 查询的总次数. Default value: 0. +- `-r`, `--randomize` — 有多个查询时,以随机顺序执行. +- `-s`, `--secure` — 使用TLS安全连接. - `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` 达到指定的时间限制时停止发送查询。 默认值:0(禁用时间限制)。 - `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [比较模式](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` 执行 [独立双样本学生的t测试](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) 测试以确定两个分布是否与所选置信水平没有不同。 - `--cumulative` — Printing cumulative data instead of data per interval. @@ -51,14 +49,14 @@ clickhouse-benchmark [keys] < queries_file - `--user=USERNAME` — ClickHouse user name. Default value: `default`. - `--password=PSWD` — ClickHouse user password. Default value: empty string. - `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` 输出异常的堆栈跟踪。 -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns answer to `clickhouse-benchmark` 在指定的阶段。 可能的值: `complete`, `fetch_columns`, `with_mergeable_state`. 默认值: `complete`. +- `--stage=WORD` — 查询请求的服务端处理状态. 在特定阶段Clickhouse会停止查询处理,并返回结果给`clickhouse-benchmark`。 可能的值: `complete`, `fetch_columns`, `with_mergeable_state`. 默认值: `complete`. - `--help` — Shows the help message. -如果你想申请一些 [设置](../../operations/settings/index.md) 对于查询,请将它们作为键传递 `--= SETTING_VALUE`. 例如, `--max_memory_usage=1048576`. +如果你想在查询时应用上述的部分参数 [设置](../../operations/settings/index.md) ,请将它们作为键传递 `--= SETTING_VALUE`. 例如, `--max_memory_usage=1048576`. ## 输出 {#clickhouse-benchmark-output} -默认情况下, `clickhouse-benchmark` 每个报表 `--delay` 间隔。 +默认情况下, `clickhouse-benchmark` 按照 `--delay` 参数间隔输出结果。 报告示例: @@ -83,27 +81,27 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul 99.990% 0.150 sec. ``` -在报告中,您可以找到: +在结果报告中,您可以找到: -- 在查询的数量 `Queries executed:` 场。 +- 查询数量:参见`Queries executed:`字段。 -- 状态字符串包含(按顺序): +- 状态码(按顺序给出): - - ClickHouse服务器的端点。 + - ClickHouse服务器的连接信息。 - 已处理的查询数。 - - QPS:QPS:在指定的时间段内每秒执行多少个查询服务器 `--delay` 争论。 - - RPS:在指定的时间段内,服务器每秒读取多少行 `--delay` 争论。 - - MiB/s:在指定的时间段内每秒读取多少mebibytes服务器 `--delay` 争论。 - - 结果RPS:在指定的时间段内,服务器每秒放置到查询结果的行数 `--delay` 争论。 - - 结果MiB/s.在指定的时间段内,服务器每秒将多少mebibytes放置到查询结果中 `--delay` 争论。 + - QPS:服务端每秒处理的查询数量 + - RPS:服务器每秒读取多少行 + - MiB/s:服务器每秒读取多少字节的数据 + - 结果RPS:服务端每秒生成多少行的结果集数据 + - 结果MiB/s.服务端每秒生成多少字节的结果集数据 -- 查询执行时间的百分位数。 +- 查询执行时间的百分比。 -## 比较模式 {#clickhouse-benchmark-comparison-mode} +## 对比模式 {#clickhouse-benchmark-comparison-mode} `clickhouse-benchmark` 可以比较两个正在运行的ClickHouse服务器的性能。 -要使用比较模式,请通过以下两对指定两个服务器的端点 `--host`, `--port` 钥匙 键在参数列表中的位置匹配在一起,第一 `--host` 与第一匹配 `--port` 等等。 `clickhouse-benchmark` 建立到两个服务器的连接,然后发送查询。 每个查询寻址到随机选择的服务器。 每个服务器的结果分别显示。 +要使用对比模式,分别为每个服务器配置各自的`--host`, `--port`参数。`clickhouse-benchmark` 会根据设置的参数建立到各个Server的连接并发送请求。每个查询请求会随机发送到某个服务器。输出结果会按服务器分组输出 ## 示例 {#clickhouse-benchmark-example} From 01d903c60d3913584bf4207dbc51563493415349 Mon Sep 17 00:00:00 2001 From: Ildus Kurbangaliev Date: Thu, 11 Jun 2020 10:31:37 +0000 Subject: [PATCH 0624/2229] add minMap and maxMap functions --- .../aggregate-functions/reference.md | 40 +++++++++ .../AggregateFunctionSumMap.cpp | 74 ++++++++++++++++ .../AggregateFunctionSumMap.h | 72 ++++++++++++---- src/Common/FieldVisitors.h | 84 +++++++++++++++++++ .../01280_min_map_max_map.reference | 24 ++++++ .../0_stateless/01280_min_map_max_map.sql | 33 ++++++++ 6 files changed, 310 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/01280_min_map_max_map.reference create mode 100644 tests/queries/0_stateless/01280_min_map_max_map.sql diff --git a/docs/en/sql-reference/aggregate-functions/reference.md b/docs/en/sql-reference/aggregate-functions/reference.md index baba1ce904e..5af70ff136b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference.md +++ b/docs/en/sql-reference/aggregate-functions/reference.md @@ -370,6 +370,46 @@ GROUP BY timeslot └─────────────────────┴──────────────────────────────────────────────┴────────────────────────────────┘ ``` +## minMap(key, value), minMap(Tuple(key, value)) {#agg_functions-minmap} + +Calculates the minimum from ‘value’ array according to the keys specified in the ‘key’ array. +Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values. +The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled. +Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. + +Example: + +```sql +SELECT minMap(a, b) +FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) +``` + +```text +┌─minMap(a, b)──────┐ +│ ([1,2,3],[2,1,1]) │ +└───────────────────┘ +``` + +## maxMap(key, value), maxMap(Tuple(key, value)) {#agg_functions-maxmap} + +Calculates the maximum from ‘value’ array according to the keys specified in the ‘key’ array. +Passing tuple of keys and values arrays is synonymical to passing two arrays of keys and values. +The number of elements in ‘key’ and ‘value’ must be the same for each row that is totaled. +Returns a tuple of two arrays: keys in sorted order, and values calculated for the corresponding keys. + +Example: + +```sql +SELECT maxMap(a, b) +FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) +``` + +```text +┌─maxMap(a, b)──────┐ +│ ([1,2,3],[2,2,1]) │ +└───────────────────┘ +``` + ## skewPop {#skewpop} Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/src/AggregateFunctions/AggregateFunctionSumMap.cpp index f4e299fe7c9..0e0d654abf1 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -32,6 +32,20 @@ struct SumMapFiltered using F = AggregateFunctionSumMapFiltered; }; +template +struct MinMap +{ + template + using F = AggregateFunctionMinMap; +}; + +template +struct MaxMap +{ + template + using F = AggregateFunctionMaxMap; +}; + auto parseArguments(const std::string & name, const DataTypes & arguments) { @@ -154,6 +168,64 @@ AggregateFunctionPtr createAggregateFunctionSumMapFiltered(const std::string & n return res; } +AggregateFunctionPtr createAggregateFunctionMinMap(const std::string & name, const DataTypes & arguments, const Array & params) +{ + assertNoParameters(name, params); + + auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments); + + AggregateFunctionPtr res; + if (tuple_argument) + { + res.reset(createWithNumericBasedType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithDecimalType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithStringType::template F>(*keys_type, keys_type, values_types, arguments)); + } + else + { + res.reset(createWithNumericBasedType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithDecimalType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithStringType::template F>(*keys_type, keys_type, values_types, arguments)); + } + if (!res) + throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return res; +} + +AggregateFunctionPtr createAggregateFunctionMaxMap(const std::string & name, const DataTypes & arguments, const Array & params) +{ + assertNoParameters(name, params); + + auto [keys_type, values_types, tuple_argument] = parseArguments(name, arguments); + + AggregateFunctionPtr res; + if (tuple_argument) + { + res.reset(createWithNumericBasedType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithDecimalType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithStringType::template F>(*keys_type, keys_type, values_types, arguments)); + } + else + { + res.reset(createWithNumericBasedType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithDecimalType::template F>(*keys_type, keys_type, values_types, arguments)); + if (!res) + res.reset(createWithStringType::template F>(*keys_type, keys_type, values_types, arguments)); + } + if (!res) + throw Exception("Illegal type of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return res; +} + } void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory) @@ -162,6 +234,8 @@ void registerAggregateFunctionSumMap(AggregateFunctionFactory & factory) factory.registerFunction("sumMapWithOverflow", createAggregateFunctionSumMap); factory.registerFunction("sumMapFiltered", createAggregateFunctionSumMapFiltered); factory.registerFunction("sumMapFilteredWithOverflow", createAggregateFunctionSumMapFiltered); + factory.registerFunction("minMap", createAggregateFunctionMinMap); + factory.registerFunction("maxMap", createAggregateFunctionMaxMap); } } diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index e2aef611955..0c4b407b8a8 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -28,16 +28,16 @@ namespace ErrorCodes } template -struct AggregateFunctionSumMapData +struct AggregateFunctionXxxMapData { // Map needs to be ordered to maintain function properties std::map merged_maps; }; /** Aggregate function, that takes at least two arguments: keys and values, and as a result, builds a tuple of of at least 2 arrays - - * ordered keys and variable number of argument values summed up by corresponding keys. + * ordered keys and variable number of argument values aggregated by corresponding keys. * - * This function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map". + * sumMap function is the most useful when using SummingMergeTree to sum Nested columns, which name ends in "Map". * * Example: sumMap(k, v...) of: * k v @@ -49,25 +49,25 @@ struct AggregateFunctionSumMapData * [8,9,10] [20,20,20] * will return: * ([1,2,3,4,5,6,7,8,9,10],[10,10,45,20,35,20,15,30,20,20]) + * + * minMap and maxMap share the same idea, but calculate min and max correspondingly. */ -template -class AggregateFunctionSumMapBase : public IAggregateFunctionDataHelper< - AggregateFunctionSumMapData>, Derived> +template +class AggregateFunctionMapOpBase : public IAggregateFunctionDataHelper< + AggregateFunctionXxxMapData>, Derived> { private: DataTypePtr keys_type; DataTypes values_types; public: - AggregateFunctionSumMapBase( + AggregateFunctionMapOpBase( const DataTypePtr & keys_type_, const DataTypes & values_types_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper>, Derived>(argument_types_, params_) + : IAggregateFunctionDataHelper>, Derived>(argument_types_, params_) , keys_type(keys_type_), values_types(values_types_) {} - String getName() const override { return "sumMap"; } - DataTypePtr getReturnType() const override { DataTypes types; @@ -88,7 +88,7 @@ public: // No overflow, meaning we promote the types if necessary. if (!value_type->canBePromoted()) { - throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception{"Values for " + getName() + " are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } result_type = value_type->promoteNumericType(); @@ -161,7 +161,7 @@ public: if (it != merged_maps.end()) { - applyVisitor(FieldVisitorSum(value), it->second[col]); + applyVisitor(Visitor(value), it->second[col]); } else { @@ -198,7 +198,7 @@ public: if (it != merged_maps.end()) { for (size_t col = 0; col < values_types.size(); ++col) - applyVisitor(FieldVisitorSum(elem.second[col]), it->second[col]); + applyVisitor(Visitor(elem.second[col]), it->second[col]); } else merged_maps[elem.first] = elem.second; @@ -300,15 +300,16 @@ public: } bool keepKey(const T & key) const { return static_cast(*this).keepKey(key); } + String getName() const override { return static_cast(*this).getName(); } }; template class AggregateFunctionSumMap final : - public AggregateFunctionSumMapBase, overflow, tuple_argument> + public AggregateFunctionMapOpBase, FieldVisitorSum, overflow, tuple_argument> { private: using Self = AggregateFunctionSumMap; - using Base = AggregateFunctionSumMapBase; + using Base = AggregateFunctionMapOpBase; public: AggregateFunctionSumMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) @@ -322,14 +323,15 @@ public: template class AggregateFunctionSumMapFiltered final : - public AggregateFunctionSumMapBase, + FieldVisitorSum, overflow, tuple_argument> { private: using Self = AggregateFunctionSumMapFiltered; - using Base = AggregateFunctionSumMapBase; + using Base = AggregateFunctionMapOpBase; std::unordered_set keys_to_keep; @@ -351,4 +353,40 @@ public: bool keepKey(const T & key) const { return keys_to_keep.count(key); } }; +template +class AggregateFunctionMinMap final : + public AggregateFunctionMapOpBase, FieldVisitorMin, true, tuple_argument> +{ +private: + using Self = AggregateFunctionMinMap; + using Base = AggregateFunctionMapOpBase; + +public: + AggregateFunctionMinMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) + : Base{keys_type_, values_types_, argument_types_, {}} + {} + + String getName() const override { return "minMap"; } + + bool keepKey(const T &) const { return true; } +}; + +template +class AggregateFunctionMaxMap final : + public AggregateFunctionMapOpBase, FieldVisitorMax, true, tuple_argument> +{ +private: + using Self = AggregateFunctionMaxMap; + using Base = AggregateFunctionMapOpBase; + +public: + AggregateFunctionMaxMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) + : Base{keys_type_, values_types_, argument_types_, {}} + {} + + String getName() const override { return "maxMap"; } + + bool keepKey(const T &) const { return true; } +}; + } diff --git a/src/Common/FieldVisitors.h b/src/Common/FieldVisitors.h index 90f80974ab1..40a75b162c9 100644 --- a/src/Common/FieldVisitors.h +++ b/src/Common/FieldVisitors.h @@ -449,4 +449,88 @@ public: } }; +/** Implements `Max` operation. + * Returns true if changed + */ +class FieldVisitorMax : public StaticVisitor +{ +private: + const Field & rhs; +public: + explicit FieldVisitorMax(const Field & rhs_) : rhs(rhs_) {} + + bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Array &) const { throw Exception("Cannot compare Arrays", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Tuple &) const { throw Exception("Cannot compare Tuples", ErrorCodes::LOGICAL_ERROR); } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot compare AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } + + template + bool operator() (DecimalField & x) const + { + auto val = get>(rhs); + if (val > x) + { + x = val; + return true; + } + + return false; + } + + template + bool operator() (T & x) const + { + auto val = get(rhs); + if (val > x) + { + x = val; + return true; + } + + return false; + } +}; + +/** Implements `Min` operation. + * Returns true if changed + */ +class FieldVisitorMin : public StaticVisitor +{ +private: + const Field & rhs; +public: + explicit FieldVisitorMin(const Field & rhs_) : rhs(rhs_) {} + + bool operator() (Null &) const { throw Exception("Cannot compare Nulls", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } + bool operator() (Tuple &) const { throw Exception("Cannot sum Tuples", ErrorCodes::LOGICAL_ERROR); } + bool operator() (AggregateFunctionStateData &) const { throw Exception("Cannot sum AggregateFunctionStates", ErrorCodes::LOGICAL_ERROR); } + + template + bool operator() (DecimalField & x) const + { + auto val = get>(rhs); + if (val < x) + { + x = val; + return true; + } + + return false; + } + + template + bool operator() (T & x) const + { + auto val = get(rhs); + if (val < x) + { + x = val; + return true; + } + + return false; + } +}; + } diff --git a/tests/queries/0_stateless/01280_min_map_max_map.reference b/tests/queries/0_stateless/01280_min_map_max_map.reference new file mode 100644 index 00000000000..dd707d602c7 --- /dev/null +++ b/tests/queries/0_stateless/01280_min_map_max_map.reference @@ -0,0 +1,24 @@ +([0,1,2,3,4,5,6,7,8,9,10],[10,1,1,1,1,1,1,1,1,1,1]) Tuple(Array(Int32), Array(UInt64)) +([1],[-49]) +([1.00],[-49.00]) +([0,1,2,3,4,5,6,7,8,9,10],[100,91,92,93,94,95,96,97,98,99,1]) Tuple(Array(Int32), Array(UInt64)) +([1],[50]) +([1.00],[50.00]) +(['01234567-89ab-cdef-0123-456789abcdef'],['01111111-89ab-cdef-0123-456789abcdef']) +(['1'],['1']) +(['1'],['1']) +([1],[1]) +([1],[1]) +(['1970-01-02'],[1]) +(['1970-01-01 03:00:01'],[1]) +([1.01],[1]) +(['a'],[1]) +(['01234567-89ab-cdef-0123-456789abcdef'],['02222222-89ab-cdef-0123-456789abcdef']) +(['1'],['2']) +(['1'],['2']) +([1],[2]) +([1],[2]) +(['1970-01-02'],[2]) +(['1970-01-01 03:00:01'],[2]) +([1.01],[2]) +(['a'],[2]) diff --git a/tests/queries/0_stateless/01280_min_map_max_map.sql b/tests/queries/0_stateless/01280_min_map_max_map.sql new file mode 100644 index 00000000000..02731eee601 --- /dev/null +++ b/tests/queries/0_stateless/01280_min_map_max_map.sql @@ -0,0 +1,33 @@ +select minMap([toInt32(number % 10), number % 10 + 1], [number, 1]) as m, toTypeName(m) from numbers(1, 100); +select minMap([1], [toInt32(number) - 50]) from numbers(1, 100); +select minMap([cast(1, 'Decimal(10, 2)')], [cast(toInt32(number) - 50, 'Decimal(10, 2)')]) from numbers(1, 100); + +select maxMap([toInt32(number % 10), number % 10 + 1], [number, 1]) as m, toTypeName(m) from numbers(1, 100); +select maxMap([1], [toInt32(number) - 50]) from numbers(1, 100); +select maxMap([cast(1, 'Decimal(10, 2)')], [cast(toInt32(number) - 50, 'Decimal(10, 2)')]) from numbers(1, 100); + +-- check different types for minMap +select minMap(val, cnt) from values ('val Array(UUID), cnt Array(UUID)', + (['01234567-89ab-cdef-0123-456789abcdef'], ['01111111-89ab-cdef-0123-456789abcdef']), + (['01234567-89ab-cdef-0123-456789abcdef'], ['02222222-89ab-cdef-0123-456789abcdef'])); +select minMap(val, cnt) from values ('val Array(String), cnt Array(String)', (['1'], ['1']), (['1'], ['2'])); +select minMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(FixedString(1))', (['1'], ['1']), (['1'], ['2'])); +select minMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select minMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); +select minMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); + +-- check different types for maxMap +select maxMap(val, cnt) from values ('val Array(UUID), cnt Array(UUID)', + (['01234567-89ab-cdef-0123-456789abcdef'], ['01111111-89ab-cdef-0123-456789abcdef']), + (['01234567-89ab-cdef-0123-456789abcdef'], ['02222222-89ab-cdef-0123-456789abcdef'])); +select maxMap(val, cnt) from values ('val Array(String), cnt Array(String)', (['1'], ['1']), (['1'], ['2'])); +select maxMap(val, cnt) from values ('val Array(FixedString(1)), cnt Array(FixedString(1))', (['1'], ['1']), (['1'], ['2'])); +select maxMap(val, cnt) from values ('val Array(UInt64), cnt Array(UInt64)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(Float64), cnt Array(Int8)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(Date), cnt Array(Int16)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(DateTime(\'Europe/Moscow\')), cnt Array(Int32)', ([1], [1]), ([1], [2])); +select maxMap(val, cnt) from values ('val Array(Decimal(10, 2)), cnt Array(Int16)', (['1.01'], [1]), (['1.01'], [2])); +select maxMap(val, cnt) from values ('val Array(Enum16(\'a\'=1)), cnt Array(Int16)', (['a'], [1]), (['a'], [2])); From fcfb6d3bc2b0f1a914cfedcc162a9ad5232f93bb Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 13 Jun 2020 11:51:07 +0300 Subject: [PATCH 0625/2229] Merge with master --- src/Storages/IStorage.cpp | 6 +++--- src/Storages/IStorage.h | 6 +++--- src/Storages/TTLDescription.cpp | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index a09bb45f9d0..74c422af385 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -32,13 +32,13 @@ namespace ErrorCodes extern const int DEADLOCK_AVOIDED; } -ColumnsDescription IStorage::getColumns() const +const ColumnsDescription & IStorage::getColumns() const { std::lock_guard lock(metadata_mutex); return metadata.columns; } -IndicesDescription IStorage::getSecondaryIndices() const +const IndicesDescription & IStorage::getSecondaryIndices() const { std::lock_guard lock(metadata_mutex); return metadata.secondary_indices; @@ -51,7 +51,7 @@ bool IStorage::hasSecondaryIndices() const return !metadata.secondary_indices.empty(); } -ConstraintsDescription IStorage::getConstraints() const +const ConstraintsDescription & IStorage::getConstraints() const { std::lock_guard lock(metadata_mutex); return metadata.constraints; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 40ca901640b..3bacab6f46f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -142,15 +142,15 @@ public: /// structure lock to get consistent metadata snapshot. This will be fixed /// soon. TODO(alesap) - ColumnsDescription getColumns() const; /// returns combined set of columns + const ColumnsDescription & getColumns() const; /// returns combined set of columns void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. void setSecondaryIndices(IndicesDescription secondary_indices_); - IndicesDescription getSecondaryIndices() const; + const IndicesDescription & getSecondaryIndices() const; /// Has at least one non primary index bool hasSecondaryIndices() const; - ConstraintsDescription getConstraints() const; + const ConstraintsDescription & getConstraints() const; void setConstraints(ConstraintsDescription constraints_); /// Storage settings diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 898df5006fd..ea6b3e64aff 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -34,6 +34,9 @@ TTLAggregateDescription::TTLAggregateDescription(const TTLAggregateDescription & TTLAggregateDescription & TTLAggregateDescription::operator=(const TTLAggregateDescription & other) { + if (&other == this) + return *this; + column_name = other.column_name; expression_result_column_name = other.expression_result_column_name; if (other.expression) From 31b852c46dd517a8cfe1ebab3c8505ee09f4d481 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 13 Jun 2020 11:53:40 +0300 Subject: [PATCH 0626/2229] Remove redundant locks --- src/Storages/IStorage.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 74c422af385..9b1f4963dab 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -34,26 +34,21 @@ namespace ErrorCodes const ColumnsDescription & IStorage::getColumns() const { - std::lock_guard lock(metadata_mutex); return metadata.columns; } const IndicesDescription & IStorage::getSecondaryIndices() const { - std::lock_guard lock(metadata_mutex); return metadata.secondary_indices; } - bool IStorage::hasSecondaryIndices() const { - std::lock_guard lock(metadata_mutex); return !metadata.secondary_indices.empty(); } const ConstraintsDescription & IStorage::getConstraints() const { - std::lock_guard lock(metadata_mutex); return metadata.constraints; } @@ -294,7 +289,6 @@ void IStorage::check(const Block & block, bool need_all) const void IStorage::setColumns(ColumnsDescription columns_) { - std::lock_guard lock(metadata_mutex); if (columns_.getOrdinary().empty()) throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); metadata.columns = std::move(columns_); @@ -302,13 +296,11 @@ void IStorage::setColumns(ColumnsDescription columns_) void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_) { - std::lock_guard lock(metadata_mutex); metadata.secondary_indices = std::move(secondary_indices_); } void IStorage::setConstraints(ConstraintsDescription constraints_) { - std::lock_guard lock(metadata_mutex); metadata.constraints = std::move(constraints_); } From d636cdf4b02ae52bc7bddc6eb642401286ba8dc1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 13 Jun 2020 11:55:03 +0300 Subject: [PATCH 0627/2229] Remove outdated comment --- src/Storages/IStorage.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 3bacab6f46f..d48f269e833 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -137,10 +137,7 @@ public: using ColumnSizeByName = std::unordered_map; virtual ColumnSizeByName getColumnSizes() const { return {}; } -public: - /// NOTE: These methods are thread-safe now, but require additional - /// structure lock to get consistent metadata snapshot. This will be fixed - /// soon. TODO(alesap) +public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. From 18b58e8483a5ee1c0c5be77b6e56950a6087f8ee Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 13 Jun 2020 12:12:45 +0300 Subject: [PATCH 0628/2229] Revert too strict lock --- src/Storages/IStorage.cpp | 65 +++++++----------------- src/Storages/IStorage.h | 12 ++--- src/Storages/MergeTree/MergeTreeData.cpp | 2 - src/Storages/MergeTree/MergeTreeData.h | 5 -- 4 files changed, 25 insertions(+), 59 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 9b1f4963dab..b81b314c721 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -415,160 +415,138 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -KeyDescription IStorage::getPartitionKey() const +const KeyDescription & IStorage::getPartitionKey() const { - std::lock_guard lock(metadata_mutex); return metadata.partition_key; } void IStorage::setPartitionKey(const KeyDescription & partition_key_) { - std::lock_guard lock(metadata_mutex); metadata.partition_key = partition_key_; } bool IStorage::isPartitionKeyDefined() const { - std::lock_guard lock(metadata_mutex); return metadata.partition_key.definition_ast != nullptr; } bool IStorage::hasPartitionKey() const { - std::lock_guard lock(metadata_mutex); return !metadata.partition_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPartitionKey() const { - std::lock_guard lock(metadata_mutex); - if (!metadata.partition_key.column_names.empty()) + if (hasPartitionKey()) return metadata.partition_key.expression->getRequiredColumns(); return {}; } -KeyDescription IStorage::getSortingKey() const +const KeyDescription & IStorage::getSortingKey() const { - std::lock_guard lock(metadata_mutex); return metadata.sorting_key; } void IStorage::setSortingKey(const KeyDescription & sorting_key_) { - std::lock_guard lock(metadata_mutex); metadata.sorting_key = sorting_key_; } bool IStorage::isSortingKeyDefined() const { - std::lock_guard lock(metadata_mutex); return metadata.sorting_key.definition_ast != nullptr; } bool IStorage::hasSortingKey() const { - std::lock_guard lock(metadata_mutex); return !metadata.sorting_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSortingKey() const { - std::lock_guard lock(metadata_mutex); - if (!metadata.sorting_key.column_names.empty()) + if (hasSortingKey()) return metadata.sorting_key.expression->getRequiredColumns(); return {}; } Names IStorage::getSortingKeyColumns() const { - std::lock_guard lock(metadata_mutex); - if (!metadata.sorting_key.column_names.empty()) + if (hasSortingKey()) return metadata.sorting_key.column_names; return {}; } -KeyDescription IStorage::getPrimaryKey() const +const KeyDescription & IStorage::getPrimaryKey() const { - std::lock_guard lock(metadata_mutex); return metadata.primary_key; } void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { - std::lock_guard lock(metadata_mutex); metadata.primary_key = primary_key_; } bool IStorage::isPrimaryKeyDefined() const { - std::lock_guard lock(metadata_mutex); return metadata.primary_key.definition_ast != nullptr; } bool IStorage::hasPrimaryKey() const { - std::lock_guard lock(metadata_mutex); return !metadata.primary_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPrimaryKey() const { - std::lock_guard lock(metadata_mutex); - if (!metadata.primary_key.column_names.empty()) + if (hasPrimaryKey()) return metadata.primary_key.expression->getRequiredColumns(); return {}; } Names IStorage::getPrimaryKeyColumns() const { - std::lock_guard lock(metadata_mutex); if (!metadata.primary_key.column_names.empty()) return metadata.primary_key.column_names; return {}; } -KeyDescription IStorage::getSamplingKey() const +const KeyDescription & IStorage::getSamplingKey() const { - std::lock_guard lock(metadata_mutex); return metadata.sampling_key; } void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { - std::lock_guard lock(metadata_mutex); metadata.sampling_key = sampling_key_; } bool IStorage::isSamplingKeyDefined() const { - std::lock_guard lock(metadata_mutex); return metadata.sampling_key.definition_ast != nullptr; } bool IStorage::hasSamplingKey() const { - std::lock_guard lock(metadata_mutex); return !metadata.sampling_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSampling() const { - std::lock_guard lock(metadata_mutex); - if (!metadata.sampling_key.column_names.empty()) + if (hasSamplingKey()) return metadata.sampling_key.expression->getRequiredColumns(); return {}; } TTLTableDescription IStorage::getTableTTLs() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return metadata.table_ttl; } void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); metadata.table_ttl = table_ttl_; } @@ -579,43 +557,43 @@ bool IStorage::hasAnyTableTTL() const TTLColumnsDescription IStorage::getColumnTTLs() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return metadata.column_ttls_by_name; } void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); metadata.column_ttls_by_name = column_ttls_by_name_; } bool IStorage::hasAnyColumnTTL() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return !metadata.column_ttls_by_name.empty(); } TTLDescription IStorage::getRowsTTL() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return metadata.table_ttl.rows_ttl; } bool IStorage::hasRowsTTL() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return metadata.table_ttl.rows_ttl.expression != nullptr; } TTLDescriptions IStorage::getMoveTTLs() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return metadata.table_ttl.move_ttl; } bool IStorage::hasAnyMoveTTL() const { - std::lock_guard lock(metadata_mutex); + std::lock_guard lock(ttl_mutex); return !metadata.table_ttl.move_ttl.empty(); } @@ -681,7 +659,6 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum ASTPtr IStorage::getSettingsChanges() const { - std::lock_guard lock(metadata_mutex); if (metadata.settings_changes) return metadata.settings_changes->clone(); return nullptr; @@ -689,28 +666,24 @@ ASTPtr IStorage::getSettingsChanges() const void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { - std::lock_guard lock(metadata_mutex); if (settings_changes_) metadata.settings_changes = settings_changes_->clone(); else metadata.settings_changes = nullptr; } -SelectQueryDescription IStorage::getSelectQuery() const +const SelectQueryDescription & IStorage::getSelectQuery() const { - std::lock_guard lock(metadata_mutex); return metadata.select; } void IStorage::setSelectQuery(const SelectQueryDescription & select_) { - std::lock_guard lock(metadata_mutex); metadata.select = select_; } bool IStorage::hasSelectQuery() const { - std::lock_guard lock(metadata_mutex); return metadata.select.select_query != nullptr; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index d48f269e833..c7c8e382a87 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -156,7 +156,7 @@ public: /// thread-unsafe part. lockStructure must be acquired bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } /// Select query for *View storages. - SelectQueryDescription getSelectQuery() const; + const SelectQueryDescription & getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; @@ -206,7 +206,7 @@ private: mutable std::mutex id_mutex; /// TODO (alesap) just use multiversion for atomic metadata - mutable std::mutex metadata_mutex; + mutable std::mutex ttl_mutex; StorageInMemoryMetadata metadata; private: RWLockImpl::LockHolder tryLockTimed( @@ -440,7 +440,7 @@ public: virtual Strings getDataPaths() const { return {}; } /// Returns structure with partition key. - KeyDescription getPartitionKey() const; + const KeyDescription & getPartitionKey() const; /// Set partition key for storage (methods bellow, are just wrappers for this /// struct). void setPartitionKey(const KeyDescription & partition_key_); @@ -455,7 +455,7 @@ public: /// Returns structure with sorting key. - KeyDescription getSortingKey() const; + const KeyDescription & getSortingKey() const; /// Set sorting key for storage (methods bellow, are just wrappers for this /// struct). void setSortingKey(const KeyDescription & sorting_key_); @@ -472,7 +472,7 @@ public: Names getSortingKeyColumns() const; /// Returns structure with primary key. - KeyDescription getPrimaryKey() const; + const KeyDescription & getPrimaryKey() const; /// Set primary key for storage (methods bellow, are just wrappers for this /// struct). void setPrimaryKey(const KeyDescription & primary_key_); @@ -490,7 +490,7 @@ public: Names getPrimaryKeyColumns() const; /// Returns structure with sampling key. - KeyDescription getSamplingKey() const; + const KeyDescription & getSamplingKey() const; /// Set sampling key for storage (methods bellow, are just wrappers for this /// struct). void setSamplingKey(const KeyDescription & sampling_key_); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fdd62b03046..84470088ebe 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -529,7 +529,6 @@ void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metada { checkTTLExpressions(new_metadata); setColumnTTLs(new_metadata.column_ttls_by_name); - auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); setTableTTLs(new_metadata.table_ttl); } @@ -2807,7 +2806,6 @@ MergeTreeData::selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & tt time_t max_max_ttl = 0; TTLDescriptions::const_iterator best_entry_it; - auto lock = std::lock_guard(move_ttl_entries_mutex); const auto & move_ttl_entries = getMoveTTLs(); for (auto ttl_entry_it = move_ttl_entries.begin(); ttl_entry_it != move_ttl_entries.end(); ++ttl_entry_it) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index a101e0645e3..007c6898e60 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -640,11 +640,6 @@ public: std::optional selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; - /// This mutex is required for background move operations which do not - /// obtain global locks. - /// TODO (alesap) It will be removed after metadata became atomic - mutable std::mutex move_ttl_entries_mutex; - /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; From 3427da1c43e283100424a452aad828975422581c Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 13 Jun 2020 13:39:25 +0300 Subject: [PATCH 0629/2229] Fix incorrect usage of rows TTL --- src/DataStreams/TTLBlockInputStream.cpp | 36 ++++++++++++++----------- src/Storages/IStorage.cpp | 3 ++- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index ca65ae520c6..c79abff98cd 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -70,21 +70,22 @@ TTLBlockInputStream::TTLBlockInputStream( defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true); } - if (storage.hasRowsTTL() && storage.getRowsTTL().mode == TTLMode::GROUP_BY) + auto storage_rows_ttl = storage.getRowsTTL(); + if (storage.hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY) { - current_key_value.resize(storage.getRowsTTL().group_by_keys.size()); + current_key_value.resize(storage_rows_ttl.group_by_keys.size()); ColumnNumbers keys; - for (const auto & key : storage.getRowsTTL().group_by_keys) + for (const auto & key : storage_rows_ttl.group_by_keys) keys.push_back(header.getPositionByName(key)); - agg_key_columns.resize(storage.getRowsTTL().group_by_keys.size()); + agg_key_columns.resize(storage_rows_ttl.group_by_keys.size()); - AggregateDescriptions aggregates = storage.getRowsTTL().aggregate_descriptions; + AggregateDescriptions aggregates = storage_rows_ttl.aggregate_descriptions; for (auto & descr : aggregates) if (descr.arguments.empty()) for (const auto & name : descr.argument_names) descr.arguments.push_back(header.getPositionByName(name)); - agg_aggregate_columns.resize(storage.getRowsTTL().aggregate_descriptions.size()); + agg_aggregate_columns.resize(storage_rows_ttl.aggregate_descriptions.size()); const Settings & settings = storage.global_context.getSettingsRef(); @@ -105,8 +106,9 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) const Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part - if (storage.hasRowsTTL() && !storage.getRowsTTL().where_expression && - storage.getRowsTTL().mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max)) + auto storage_rows_ttl = storage.getRowsTTL(); + if (storage.hasRowsTTL() && !storage_rows_ttl.where_expression && + storage_rows_ttl.mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max)) { rows_removed = data_part->rows_count; return {}; @@ -151,7 +153,7 @@ void TTLBlockInputStream::readSuffixImpl() void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) { - const auto & rows_ttl = storage.getRowsTTL(); + auto rows_ttl = storage.getRowsTTL(); rows_ttl.expression->execute(block); if (rows_ttl.where_expression) @@ -160,8 +162,8 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) const IColumn * ttl_column = block.getByName(rows_ttl.result_column).column.get(); - const IColumn * where_result_column = storage.getRowsTTL().where_expression ? - block.getByName(storage.getRowsTTL().where_result_column).column.get() : nullptr; + const IColumn * where_result_column = rows_ttl.where_expression ? + block.getByName(rows_ttl.where_result_column).column.get() : nullptr; const auto & column_names = header.getNames(); @@ -199,6 +201,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) size_t rows_aggregated = 0; size_t current_key_start = 0; size_t rows_with_current_key = 0; + auto storage_rows_ttl = storage.getRowsTTL(); for (size_t i = 0; i < block.rows(); ++i) { UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); @@ -206,9 +209,9 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; bool same_as_current = true; - for (size_t j = 0; j < storage.getRowsTTL().group_by_keys.size(); ++j) + for (size_t j = 0; j < storage_rows_ttl.group_by_keys.size(); ++j) { - const String & key_column = storage.getRowsTTL().group_by_keys[j]; + const String & key_column = storage_rows_ttl.group_by_keys[j]; const IColumn * values_column = block.getByName(key_column).column.get(); if (!same_as_current || (*values_column)[i] != current_key_value[j]) { @@ -275,17 +278,18 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) if (!agg_result.empty()) { auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1); + auto storage_rows_ttl = storage.getRowsTTL(); for (auto & agg_block : aggregated_res) { - for (const auto & it : storage.getRowsTTL().set_parts) + for (const auto & it : storage_rows_ttl.set_parts) it.expression->execute(agg_block); - for (const auto & name : storage.getRowsTTL().group_by_keys) + for (const auto & name : storage_rows_ttl.group_by_keys) { const IColumn * values_column = agg_block.getByName(name).column.get(); auto & result_column = result_columns[header.getPositionByName(name)]; result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); } - for (const auto & it : storage.getRowsTTL().set_parts) + for (const auto & it : storage_rows_ttl.set_parts) { const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get(); auto & result_column = result_columns[header.getPositionByName(it.column_name)]; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index b81b314c721..a244f836f5c 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -629,7 +629,8 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum if (hasRowsTTL()) { - if (add_dependent_columns(getRowsTTL().expression, required_ttl_columns)) + auto rows_expression = getRowsTTL().expression; + if (add_dependent_columns(rows_expression, required_ttl_columns)) { /// Filter all columns, if rows TTL expression have to be recalculated. for (const auto & column : getColumns().getAllPhysical()) From 901a657417beb4d262a86c61b6935210eb9e7893 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 14:20:24 +0300 Subject: [PATCH 0630/2229] Rename 01307_data_skip_bloom_filter to 01307_bloom_filter_index_string_multi_granulas This better reflects the covered case. --- ...e => 01307_bloom_filter_index_string_multi_granulas.reference} | 0 ...ter.sql => 01307_bloom_filter_index_string_multi_granulas.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01307_data_skip_bloom_filter.reference => 01307_bloom_filter_index_string_multi_granulas.reference} (100%) rename tests/queries/0_stateless/{01307_data_skip_bloom_filter.sql => 01307_bloom_filter_index_string_multi_granulas.sql} (100%) diff --git a/tests/queries/0_stateless/01307_data_skip_bloom_filter.reference b/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.reference similarity index 100% rename from tests/queries/0_stateless/01307_data_skip_bloom_filter.reference rename to tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.reference diff --git a/tests/queries/0_stateless/01307_data_skip_bloom_filter.sql b/tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql similarity index 100% rename from tests/queries/0_stateless/01307_data_skip_bloom_filter.sql rename to tests/queries/0_stateless/01307_bloom_filter_index_string_multi_granulas.sql From 9901e4d528bf0ebb3594150116077a93666ec450 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jun 2020 20:20:42 +0300 Subject: [PATCH 0631/2229] Remove debug output #11554 --- src/Functions/extractAllGroups.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index b75e54b490e..8216a528b2c 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -227,7 +227,6 @@ public: row_offset = next_row_offset; } } - DUMP(Kind, needle, column_haystack, root_offsets_col, nested_offsets_col); ColumnArray::MutablePtr nested_array_col = ColumnArray::create(std::move(data_col), std::move(nested_offsets_col)); ColumnArray::MutablePtr root_array_col = ColumnArray::create(std::move(nested_array_col), std::move(root_offsets_col)); From d8312d0f0051fd4523716924a5bd8951e5276bb9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jun 2020 20:25:13 +0300 Subject: [PATCH 0632/2229] Remove "fail" from test name; fix typo in test name #11151 --- ... => 01281_unsucceeded_insert_select_queries_counter.reference} | 0 ...er.sql => 01281_unsucceeded_insert_select_queries_counter.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{01281_failed_insert_select_queries_couner.reference => 01281_unsucceeded_insert_select_queries_counter.reference} (100%) rename tests/queries/0_stateless/{01281_failed_insert_select_queries_couner.sql => 01281_unsucceeded_insert_select_queries_counter.sql} (100%) diff --git a/tests/queries/0_stateless/01281_failed_insert_select_queries_couner.reference b/tests/queries/0_stateless/01281_unsucceeded_insert_select_queries_counter.reference similarity index 100% rename from tests/queries/0_stateless/01281_failed_insert_select_queries_couner.reference rename to tests/queries/0_stateless/01281_unsucceeded_insert_select_queries_counter.reference diff --git a/tests/queries/0_stateless/01281_failed_insert_select_queries_couner.sql b/tests/queries/0_stateless/01281_unsucceeded_insert_select_queries_counter.sql similarity index 100% rename from tests/queries/0_stateless/01281_failed_insert_select_queries_couner.sql rename to tests/queries/0_stateless/01281_unsucceeded_insert_select_queries_counter.sql From b8a4c7708ac8724dd0aac5ca957a9aa46132af47 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 18:15:59 +0000 Subject: [PATCH 0633/2229] Make local exchanges unique for each table --- .../ReadBufferFromRabbitMQConsumer.cpp | 94 ++++++++++--------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 14 ++- src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 + 4 files changed, 64 insertions(+), 53 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 6b8763138a4..90485b28a96 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -41,7 +41,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String table_name_, + const String & local_exchange_name_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -54,7 +54,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , bind_by_id(bind_by_id_) , num_queues(num_queues_) , exchange_type(exchange_type_) - , table_name(table_name_) + , local_exchange_name(local_exchange_name_) , stopped(stopped_) { messages.clear(); @@ -85,28 +85,31 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { - /* If exchange_type is not set - then direct-exchange is used - this type of exchange is the fastest (also due to different - * binding algorithm this default behaviuor is much faster). It is also used in INSERT query (so it is always declared). + /* This direct-exchange is used for default implemenation and for INSERT query (so it is always declared). If exchange_type + * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table. + * This strict division to external and local exchanges is needed to avoid too much complexity with defining exchange_name + * for INSERT query producer and, in general, it is much better to distinguish them into separate ones. */ - String producer_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; - consumer_channel->declareExchange(producer_exchange, AMQP::fanout).onError([&](const char * message) + String default_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; + consumer_channel->declareExchange(default_exchange, AMQP::fanout).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_exchange, message); }); - internal_exchange_name = producer_exchange + "_" + Exchange::DIRECT; - consumer_channel->declareExchange(internal_exchange_name, AMQP::direct).onError([&](const char * message) + default_local_exchange = local_exchange_name; + default_local_exchange += exchange_type_set ? "_default_" + Exchange::DIRECT : "_" + Exchange::DIRECT; + consumer_channel->declareExchange(default_local_exchange, AMQP::direct).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_local_exchange, message); }); - /// With fanout exchange the binding key is ignored - a parameter might be arbitrary - consumer_channel->bindExchange(producer_exchange, internal_exchange_name, routing_keys[0]).onError([&](const char * message) + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. + consumer_channel->bindExchange(default_exchange, default_local_exchange, routing_keys[0]).onError([&](const char * message) { - internal_exchange_declared = false; - LOG_ERROR(log, "Failed to bind exchange: {}", message); + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange. Reason: {}", default_exchange, default_local_exchange, message); }); if (!exchange_type_set) @@ -124,26 +127,29 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting the same - * messages more than once - is having only one consumer with one queue, which is not good.) + * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting + * the same messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); + LOG_ERROR(log, "Failed to declare client's {} exchange: {}", exchange_type, message); }); - /// No need for declaring hash-exchange if there is only one consumer with one queue and exchange type is not hash - if (!bind_by_id && exchange_type != Exchange::HASH) + /// No need for declaring hash-exchange if there is only one consumer with one queue or exchange type is already hash + if (!bind_by_id) return; hash_exchange = true; + if (exchange_type == Exchange::HASH) + return; + AMQP::Table exchange_arguments; exchange_arguments["hash-property"] = "message_id"; - local_exchange_name = exchange_name + "_" + table_name; - consumer_channel->declareExchange(local_exchange_name, AMQP::consistent_hash, exchange_arguments) + String local_hash_exchange_name = local_exchange_name + "_hash"; + consumer_channel->declareExchange(local_hash_exchange_name, AMQP::consistent_hash, exchange_arguments) .onError([&](const char * message) { local_exchange_declared = false; @@ -152,7 +158,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() for (auto & routing_key : routing_keys) { - consumer_channel->bindExchange(exchange_name, local_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); @@ -164,19 +170,15 @@ void ReadBufferFromRabbitMQConsumer::initExchange() void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) { /// These variables might be updated later from a separate thread in onError callbacks. - if (!internal_exchange_declared || (exchange_type_set && !local_exchange_declared)) + if (!local_exchange_declared || (exchange_type_set && !local_hash_exchange_declared)) { initExchange(); local_exchange_declared = true; - internal_exchange_declared = true; + local_hash_exchange_declared = true; } - /* Internal exchange is a default exchange (by implementstion, not by rabbitmq settings) and is used for INSERT query - * and if exchange_type is not set - there is no local exchange. If it is set - then local exchange is a distributor - * exchange, which is bound to the exchange specified by the client. - */ - bool internal_bindings_created = false, internal_bindings_error = false; - bool local_bindings_created = false, local_bindings_error = false; + bool default_bindings_created = false, default_bindings_error = false; + bool bindings_created = false, bindings_error = false; consumer_channel->declareQueue(AMQP::exclusive) .onSuccess([&](const std::string & queue_name_, int /* msgcount */, int /* consumercount */) @@ -202,14 +204,14 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - consumer_channel->bindQueue(internal_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(default_local_exchange, queue_name_, binding_key) .onSuccess([&] { - internal_bindings_created = true; + default_bindings_created = true; }) .onError([&](const char * message) { - internal_bindings_error = true; + default_bindings_error = true; LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); @@ -223,17 +225,22 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) if (exchange_type_set) { - /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary if (hash_exchange) { - consumer_channel->bindQueue(local_exchange_name, queue_name_, binding_key) + /* If exchange_type == hash, then bind directly to this client's exchange (because there is no need for a distributor + * exchange as it is already hash-exchange), otherwise hash-exchange is a local distributor exchange. + */ + String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; + + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) .onSuccess([&] { - local_bindings_created = true; + bindings_created = true; }) .onError([&](const char * message) { - local_bindings_error = true; + bindings_error = true; LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } @@ -246,11 +253,11 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { - local_bindings_created = true; + bindings_created = true; }) .onError([&](const char * message) { - local_bindings_error = true; + bindings_error = true; LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); }); } @@ -259,7 +266,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) }) .onError([&](const char * message) { - internal_bindings_error = true; + default_bindings_error = true; LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); }); @@ -267,8 +274,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) * It is important at this moment to make sure that queue bindings are created before any publishing can happen because * otherwise messages will be routed nowhere. */ - while (!internal_bindings_created && !internal_bindings_error - || (exchange_type_set && !local_bindings_created && !local_bindings_error)) + while (!default_bindings_created && !default_bindings_error || (exchange_type_set && !bindings_created && !bindings_error)) { startEventLoop(loop_started); } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 3d02eeab761..6a2c847357d 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -32,7 +32,7 @@ public: const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String table_name_, + const String & local_exchange_name_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -54,7 +54,7 @@ private: const bool bind_by_id; const size_t num_queues; const String & exchange_type; - const String table_name; + const String & local_exchange_name; Poco::Logger * log; char row_delimiter; @@ -62,8 +62,8 @@ private: bool allowed = true; const std::atomic & stopped; - String internal_exchange_name, local_exchange_name; - bool internal_exchange_declared = false, local_exchange_declared = false; + String default_local_exchange; + bool local_exchange_declared = false, local_hash_exchange_declared = false; bool exchange_type_set = false, hash_exchange = false; std::atomic loop_started = false, consumer_error = false; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 669cfe19aa5..d3811bdb0d2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -104,6 +104,10 @@ StorageRabbitMQ::StorageRabbitMQ( task->deactivate(); bind_by_id = num_consumers > 1 || num_queues > 1; + + auto table_id = getStorageID(); + String table_name = table_id.table_name; + local_exchange_name = exchange_name + "_" + table_name; } @@ -214,17 +218,17 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ChannelPtr consumer_channel = std::make_shared(&connection); - auto table_id = getStorageID(); - String table_name = table_id.getNameForLogs(); - return std::make_shared(consumer_channel, eventHandler, exchange_name, routing_keys, - next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, table_name, stream_cancelled); + next_channel_id, log, row_delimiter, bind_by_id, num_queues, exchange_type, local_exchange_name, stream_cancelled); } ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - String producer_exchange = exchange_type == "default" ? exchange_name : exchange_name + "_default"; + /* If exchange type is set, then there are different exchanges for external publishing and for INSERT query + * as in this case they are of different types. + */ + String producer_exchange = exchange_type == "default" ? local_exchange_name : local_exchange_name + "_default"; return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, log, num_consumers * num_queues, bind_by_id, use_transactional_channel, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index e056faa0d65..79e4d5e4ca2 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -74,6 +74,7 @@ private: Names routing_keys; const String exchange_name; + String local_exchange_name; const String format_name; char row_delimiter; From 9c49398728909f0ae375aed8e3de17673405cc3c Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 18:44:17 +0000 Subject: [PATCH 0634/2229] Fix tests --- .../integration/test_storage_rabbitmq/test.py | 64 ++++++++++++++++--- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 37163db06f4..8442a7ecb0a 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -33,7 +33,7 @@ def check_rabbitmq_is_available(): 'exec', '-i', rabbitmq_id, - 'rabbitmqctl', + 'rabbitmqctl', 'await_startup'), stdout=subprocess.PIPE) p.communicate() @@ -774,6 +774,7 @@ def test_rabbitmq_insert(rabbitmq_cluster): CREATE TABLE test.rabbitmq (key UInt64, value UInt64) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'insert', rabbitmq_routing_key_list = 'insert1', rabbitmq_format = 'TSV', rabbitmq_row_delimiter = '\\n'; @@ -784,10 +785,10 @@ def test_rabbitmq_insert(rabbitmq_cluster): consumer_connection = pika.BlockingConnection(parameters) consumer = consumer_connection.channel() - consumer.exchange_declare(exchange='clickhouse-exchange', exchange_type='fanout') + consumer.exchange_declare(exchange='insert_rabbitmq_direct', exchange_type='direct') result = consumer.queue_declare(queue='') queue_name = result.method.queue - consumer.queue_bind(exchange='clickhouse-exchange', queue=queue_name, routing_key='insert1') + consumer.queue_bind(exchange='insert_rabbitmq_direct', queue=queue_name, routing_key='insert1') values = [] for i in range(50): @@ -871,8 +872,9 @@ def test_rabbitmq_many_inserts(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_many; - DROP TABLE test.view_many; + DROP TABLE IF EXISTS test.rabbitmq_many; + DROP TABLE IF EXISTS test.consumer_many; + DROP TABLE IF EXISTS test.view_many; ''') for thread in threads: @@ -932,8 +934,9 @@ def test_rabbitmq_sharding_between_channels_and_queues_insert(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_sharding; - DROP TABLE test.view_sharding; + DROP TABLE IF EXISTS test.rabbitmq_sharding; + DROP TABLE IF EXISTS test.consumer_sharding; + DROP TABLE IF EXISTS test.view_sharding; ''') for thread in threads: @@ -992,8 +995,9 @@ def test_rabbitmq_overloaded_insert(rabbitmq_cluster): break instance.query(''' - DROP TABLE test.consumer_overload; - DROP TABLE test.view_overload; + DROP TABLE IF EXISTS test.rabbitmq_overload; + DROP TABLE IF EXISTS test.consumer_overload; + DROP TABLE IF EXISTS test.view_overload; ''') for thread in threads: @@ -1060,6 +1064,16 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1118,6 +1132,16 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.fanout_exchange_{0}; + DROP TABLE IF EXISTS test.fanout_exchange_{0}_mv; + '''.format(consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1201,6 +1225,22 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables + messages_num * num_tables: break + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + '''.format(consumer_id)) + + for consumer_id in range(num_tables): + instance.query(''' + DROP TABLE IF EXISTS test.topic_exchange_{0}; + DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; + '''.format(num_tables + consumer_id)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) @@ -1371,6 +1411,12 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): for thread in threads: thread.join() + instance.query(''' + DROP TABLE IF EXISTS test.bindings_1; + DROP TABLE IF EXISTS test.bindings_2; + DROP TABLE IF EXISTS test.destination; + ''') + assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) From c70f7778fc6144d22c69b3a1972b4720e3b6788e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 13 Jun 2020 23:05:13 +0300 Subject: [PATCH 0635/2229] trigger ci From 5f82cc0021bc04177ab2e34e8f4709e61a1a9744 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 16:39:21 +0300 Subject: [PATCH 0636/2229] Fix description for LoadBalancing::IN_ORDER --- src/Core/SettingsCollection.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsCollection.h b/src/Core/SettingsCollection.h index 1fe5762de4c..aed8dc6a929 100644 --- a/src/Core/SettingsCollection.h +++ b/src/Core/SettingsCollection.h @@ -225,7 +225,8 @@ enum class LoadBalancing /// a replica is selected among the replicas with the minimum number of errors /// with the minimum number of distinguished characters in the replica name and local hostname NEAREST_HOSTNAME, - /// replicas are walked through strictly in order; the number of errors does not matter + // replicas with the same number of errors are accessed in the same order + // as they are specified in the configuration. IN_ORDER, /// if first replica one has higher number of errors, /// pick a random one from replicas with minimum number of errors From 2cd82a25f52b684e6fc13010f8efe0965c1e94dd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 13 Jun 2020 23:59:20 +0300 Subject: [PATCH 0637/2229] Remove trivial count query optimization if row-level security is set #11352 --- src/Interpreters/ExpressionAnalyzer.h | 2 ++ src/Interpreters/InterpreterSelectQuery.cpp | 17 ++++++++++------- src/Interpreters/InterpreterSelectQuery.h | 4 +++- .../ReplicatedMergeTreeBlockOutputStream.cpp | 3 ++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ed07ab3fe36..c69cb61162f 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -208,7 +208,9 @@ struct ExpressionAnalysisResult const FilterInfoPtr & filter_info, const Block & source_header); + /// Filter for row-level security. bool hasFilter() const { return filter_info.get(); } + bool hasJoin() const { return before_join.get(); } bool hasPrewhere() const { return prewhere_info.get(); } bool hasWhere() const { return before_where.get(); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f9072e6176a..98cf36cc30b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -94,7 +94,8 @@ namespace ErrorCodes } /// Assumes `storage` is set and the table filter (row-level security) is not empty. -String InterpreterSelectQuery::generateFilterActions(ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns) const +String InterpreterSelectQuery::generateFilterActions( + ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns) const { const auto & db_name = table_id.getDatabaseName(); const auto & table_name = table_id.getTableName(); @@ -474,8 +475,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl() second_stage, options.only_analyze, filter_info, - source_header - ); + source_header); if (options.to_stage == QueryProcessingStage::Enum::FetchColumns) { @@ -979,10 +979,13 @@ void InterpreterSelectQuery::executeFetchColumns( /// Optimization for trivial query like SELECT count() FROM table. bool optimize_trivial_count = - syntax_analyzer_result->optimize_trivial_count && storage && - processing_stage == QueryProcessingStage::FetchColumns && - query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) && - typeid_cast(query_analyzer->aggregates()[0].function.get()); + syntax_analyzer_result->optimize_trivial_count + && storage + && !filter_info + && processing_stage == QueryProcessingStage::FetchColumns + && query_analyzer->hasAggregation() + && (query_analyzer->aggregates().size() == 1) + && typeid_cast(query_analyzer->aggregates()[0].function.get()); if (optimize_trivial_count) { diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 34d255e398e..c60451d5f4a 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -132,7 +132,8 @@ private: void executeSubqueriesInSetsAndJoins(QueryPipeline & pipeline, const std::unordered_map & subqueries_for_sets); void executeMergeSorted(QueryPipeline & pipeline, const SortDescription & sort_description, UInt64 limit); - String generateFilterActions(ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const; + String generateFilterActions( + ExpressionActionsPtr & actions, const ASTPtr & row_policy_filter, const Names & prerequisite_columns = {}) const; enum class Modificator { @@ -159,6 +160,7 @@ private: /// Is calculated in getSampleBlock. Is used later in readImpl. ExpressionAnalysisResult analysis_result; + /// For row-level security. FilterInfoPtr filter_info; QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index c67ea11f56f..1bbc56d940d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -198,7 +198,8 @@ void ReplicatedMergeTreeBlockOutputStream::writeExistingPart(MergeTreeData::Muta } -void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id) +void ReplicatedMergeTreeBlockOutputStream::commitPart( + zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id) { storage.check(part->getColumns()); assertSessionIsNotExpired(zookeeper); From a421e7e4b40155ddaaab72d0269a68db4f289020 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 00:13:52 +0300 Subject: [PATCH 0638/2229] Added a test --- programs/server/users.d/access_management.xml | 7 +++++++ ...1308_row_policy_and_trivial_count_query.reference | 3 +++ .../01308_row_policy_and_trivial_count_query.sql | 12 ++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 programs/server/users.d/access_management.xml create mode 100644 tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.reference create mode 100644 tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql diff --git a/programs/server/users.d/access_management.xml b/programs/server/users.d/access_management.xml new file mode 100644 index 00000000000..7e799cb7b10 --- /dev/null +++ b/programs/server/users.d/access_management.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.reference b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.reference new file mode 100644 index 00000000000..61150aca43c --- /dev/null +++ b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.reference @@ -0,0 +1,3 @@ +3 +2 +3 diff --git a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql new file mode 100644 index 00000000000..c105885cb60 --- /dev/null +++ b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql @@ -0,0 +1,12 @@ +DROP TABLE IF EXISTS t; + +CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY x; +INSERT INTO t VALUES (1), (2), (3); + +SELECT count() FROM t; +CREATE ROW POLICY filter ON t USING (x % 2 = 1) TO ALL; +SELECT count() FROM t; +DROP ROW POLICY filter ON t; +SELECT count() FROM t; + +DROP TABLE t; From dcd7b7351c23bbe01e81493ecbad6fc2504822e4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 13 Jun 2020 21:37:37 +0000 Subject: [PATCH 0639/2229] Support headers-exchange type --- .../ReadBufferFromRabbitMQConsumer.cpp | 57 ++++++++++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 +- .../integration/test_storage_rabbitmq/test.py | 97 +++++++++++++++++++ 3 files changed, 152 insertions(+), 12 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 90485b28a96..31ca4f280e3 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "Poco/Timer.h" #include @@ -122,8 +123,7 @@ void ReadBufferFromRabbitMQConsumer::initExchange() else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else if (exchange_type == Exchange::HEADERS) - throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); + else if (exchange_type == Exchange::HEADERS) type = AMQP::ExchangeType::headers; else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages @@ -156,14 +156,37 @@ void ReadBufferFromRabbitMQConsumer::initExchange() LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - for (auto & routing_key : routing_keys) + if (exchange_type == Exchange::HEADERS) { - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + AMQP::Table binding_arguments; + std::vector matching; + + for (auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + /// Routing key can be arbitrary here. + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_keys[0], binding_arguments) + .onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); }); } + else + { + for (auto & routing_key : routing_keys) + { + consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + }); + } + } } @@ -232,7 +255,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) */ String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; - /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary + /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary. consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) .onSuccess([&] { @@ -244,6 +267,30 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } + else if (exchange_type == Exchange::HEADERS) + { + AMQP::Table binding_arguments; + std::vector matching; + + /// It is not parsed for the second time - if it was parsed above, then it would go to the first if statement, not here. + for (auto & header : routing_keys) + { + boost::split(matching, header, [](char c){ return c == '='; }); + binding_arguments[matching[0]] = matching[1]; + matching.clear(); + } + + consumer_channel->bindQueue(exchange_name, queue_name_, routing_keys[0], binding_arguments) + .onSuccess([&] + { + bindings_created = true; + }) + .onError([&](const char * message) + { + bindings_error = true; + LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_keys[0], message); + }); + } else { /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index d3811bdb0d2..852edd24726 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -458,13 +458,9 @@ void registerStorageRabbitMQ(StorageFactory & factory) exchange_type = safeGet(ast->value); } - if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" && exchange_type != "consistent_hash") - { - if (exchange_type == "headers") - throw Exception("Headers exchange is not supported", ErrorCodes::BAD_ARGUMENTS); - else - throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); - } + if (exchange_type != "fanout" && exchange_type != "direct" && exchange_type != "topic" + && exchange_type != "headers" && exchange_type != "consistent_hash") + throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); } UInt64 num_consumers = rabbitmq_settings.rabbitmq_num_consumers; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 8442a7ecb0a..f58e898a45f 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1420,6 +1420,103 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): assert int(result) == messages_num * threads_num * 5 * 2, 'ClickHouse lost some messages: {}'.format(result) +@pytest.mark.timeout(420) +def test_rabbitmq_headers_exchange(rabbitmq_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.destination; + CREATE TABLE test.destination(key UInt64, value UInt64, + _consumed_by LowCardinality(String)) + ENGINE = MergeTree() + ORDER BY key; + ''') + + num_tables_to_receive = 3 + for consumer_id in range(num_tables_to_receive): + print("Setting up table {}".format(consumer_id)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_num_consumers = 4, + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id)) + + num_tables_to_ignore = 2 + for consumer_id in range(num_tables_to_ignore): + print("Setting up table {}".format(consumer_id + num_tables_to_receive)) + instance.query(''' + DROP TABLE IF EXISTS test.headers_exchange_{0}; + DROP TABLE IF EXISTS test.headers_exchange_{0}_mv; + CREATE TABLE test.headers_exchange_{0} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', + rabbitmq_exchange_name = 'headers_exchange_testing', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'x-match=all,format=logs,type=report,year=2019', + rabbitmq_format = 'JSONEachRow', + rabbitmq_row_delimiter = '\\n'; + CREATE MATERIALIZED VIEW test.headers_exchange_{0}_mv TO test.destination AS + SELECT key, value, '{0}' as _consumed_by FROM test.headers_exchange_{0}; + '''.format(consumer_id + num_tables_to_receive)) + + i = [0] + messages_num = 1000 + + credentials = pika.PlainCredentials('root', 'clickhouse') + parameters = pika.ConnectionParameters('localhost', 5672, '/', credentials) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + channel.exchange_declare(exchange='headers_exchange_testing', exchange_type='headers') + + messages = [] + for _ in range(messages_num): + messages.append(json.dumps({'key': i[0], 'value': i[0]})) + i[0] += 1 + + fields={} + fields['format']='logs' + fields['type']='report' + fields['year']='2020' + + key_num = 0 + for message in messages: + channel.basic_publish(exchange='headers_exchange_testing', routing_key='', + properties=pika.BasicProperties(headers=fields), body=message) + + connection.close() + + while True: + result = instance.query('SELECT count() FROM test.destination') + time.sleep(1) + if int(result) == messages_num * num_tables_to_receive: + break + + for consumer_id in range(num_tables_to_receive): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id)) + for consumer_id in range(num_tables_to_ignore): + instance.query(''' + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; + DROP TABLE IF EXISTS test.direct_exchange_{0}; + '''.format(consumer_id + num_tables_to_receive)) + + instance.query(''' + DROP TABLE IF EXISTS test.destination; + ''') + + assert int(result) == messages_num * num_tables_to_receive, 'ClickHouse lost some messages: {}'.format(result) + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") From c139a05370bf1656f5f75e4e35021683bcf72c37 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 19:31:28 +0300 Subject: [PATCH 0640/2229] Forward declaration in StorageDistributed --- src/Storages/Distributed/DirectoryMonitor.cpp | 1 + src/Storages/StorageDistributed.cpp | 2 ++ src/Storages/StorageDistributed.h | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index a491cc411b1..8b45573464f 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d80fee1e4dc..ec69a75e1ba 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -46,6 +47,7 @@ #include #include +#include #include diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 4067012c449..c78097d8abe 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -7,8 +7,6 @@ #include #include #include -#include -#include #include #include #include @@ -17,6 +15,7 @@ namespace DB { +struct Settings; class Context; class VolumeJBOD; @@ -25,6 +24,9 @@ using VolumeJBODPtr = std::shared_ptr; class ExpressionActions; using ExpressionActionsPtr = std::shared_ptr; +class Cluster; +using ClusterPtr = std::shared_ptr; + /** A distributed table that resides on multiple servers. * Uses data from the specified database and tables on each server. * From 0e378590fe5ecf7091730e019d78854ab87ef359 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 20:23:51 +0300 Subject: [PATCH 0641/2229] Add load_balancing=round_robin --- docs/en/operations/settings/settings.md | 9 +++++++++ src/Client/ConnectionPoolWithFailover.cpp | 13 ++++++++++++- src/Client/ConnectionPoolWithFailover.h | 1 + src/Core/SettingsCollection.cpp | 3 ++- src/Core/SettingsCollection.h | 2 ++ 5 files changed, 26 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f29866d4980..3ed6e240f29 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -822,6 +822,7 @@ ClickHouse supports the following algorithms of choosing replicas: - [Nearest hostname](#load_balancing-nearest_hostname) - [In order](#load_balancing-in_order) - [First or random](#load_balancing-first_or_random) +- [Round robin](#load_balancing-round_robin) ### Random (by Default) {#load_balancing-random} @@ -865,6 +866,14 @@ This algorithm chooses the first replica in the set or a random replica if the f The `first_or_random` algorithm solves the problem of the `in_order` algorithm. With `in_order`, if one replica goes down, the next one gets a double load while the remaining replicas handle the usual amount of traffic. When using the `first_or_random` algorithm, the load is evenly distributed among replicas that are still available. +### Round robin {#load_balancing-round_robin} + +``` sql +load_balancing = round_robin +``` + +This algorithm uses round robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). + ## prefer\_localhost\_replica {#settings-prefer-localhost-replica} Enables/disables preferable using the localhost replica when processing distributed queries. diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 713bb33342f..257f4d36c33 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -11,7 +11,6 @@ #include - namespace ProfileEvents { extern const Event DistributedConnectionMissingTable; @@ -71,6 +70,12 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts case LoadBalancing::FIRST_OR_RANDOM: get_priority = [](size_t i) -> size_t { return i >= 1; }; break; + case LoadBalancing::ROUND_ROBIN: + if (last_used >= nested_pools.size()) + last_used = 0; + ++last_used; + get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; + break; } return Base::get(try_get_entry, get_priority); @@ -181,6 +186,12 @@ std::vector ConnectionPoolWithFailover::g case LoadBalancing::FIRST_OR_RANDOM: get_priority = [](size_t i) -> size_t { return i >= 1; }; break; + case LoadBalancing::ROUND_ROBIN: + if (last_used >= nested_pools.size()) + last_used = 0; + ++last_used; + get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; + break; } bool fallback_to_stale_replicas = settings ? bool(settings->fallback_to_stale_replicas_for_distributed_queries) : true; diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index bdc06656ff1..10dea98c8f7 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -97,6 +97,7 @@ private: private: std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. + size_t last_used = 0; /// Last used for round_robin policy. LoadBalancing default_load_balancing; }; diff --git a/src/Core/SettingsCollection.cpp b/src/Core/SettingsCollection.cpp index 324ad889a65..b36884fad22 100644 --- a/src/Core/SettingsCollection.cpp +++ b/src/Core/SettingsCollection.cpp @@ -481,7 +481,8 @@ void SettingURI::deserialize(ReadBuffer & buf, SettingsBinaryFormat) M(RANDOM, "random") \ M(NEAREST_HOSTNAME, "nearest_hostname") \ M(IN_ORDER, "in_order") \ - M(FIRST_OR_RANDOM, "first_or_random") + M(FIRST_OR_RANDOM, "first_or_random") \ + M(ROUND_ROBIN, "round_robin") IMPLEMENT_SETTING_ENUM(LoadBalancing, LOAD_BALANCING_LIST_OF_NAMES, ErrorCodes::UNKNOWN_LOAD_BALANCING) diff --git a/src/Core/SettingsCollection.h b/src/Core/SettingsCollection.h index aed8dc6a929..71a308fb37e 100644 --- a/src/Core/SettingsCollection.h +++ b/src/Core/SettingsCollection.h @@ -231,6 +231,8 @@ enum class LoadBalancing /// if first replica one has higher number of errors, /// pick a random one from replicas with minimum number of errors FIRST_OR_RANDOM, + // round robin across replicas with the same number of errors. + ROUND_ROBIN, }; using SettingLoadBalancing = SettingEnum; From 9386478a77f5822f074da5742ac287c0b6acc41a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jun 2020 21:33:58 +0300 Subject: [PATCH 0642/2229] Add test for load_balancing algorithms --- .../__init__.py | 0 .../configs/remote_servers.xml | 36 ++++++ .../test_distributed_load_balancing/test.py | 114 ++++++++++++++++++ 3 files changed, 150 insertions(+) create mode 100644 tests/integration/test_distributed_load_balancing/__init__.py create mode 100644 tests/integration/test_distributed_load_balancing/configs/remote_servers.xml create mode 100644 tests/integration/test_distributed_load_balancing/test.py diff --git a/tests/integration/test_distributed_load_balancing/__init__.py b/tests/integration/test_distributed_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml new file mode 100644 index 00000000000..9efd681e74e --- /dev/null +++ b/tests/integration/test_distributed_load_balancing/configs/remote_servers.xml @@ -0,0 +1,36 @@ + + + + + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + + + + + n1 + 9000 + + + n2 + 9000 + + + n3 + 9000 + + + + + diff --git a/tests/integration/test_distributed_load_balancing/test.py b/tests/integration/test_distributed_load_balancing/test.py new file mode 100644 index 00000000000..fa6dfb20a88 --- /dev/null +++ b/tests/integration/test_distributed_load_balancing/test.py @@ -0,0 +1,114 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import uuid +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +n1 = cluster.add_instance('n1', main_configs=['configs/remote_servers.xml']) +n2 = cluster.add_instance('n2', main_configs=['configs/remote_servers.xml']) +n3 = cluster.add_instance('n3', main_configs=['configs/remote_servers.xml']) + +nodes = len(cluster.instances) +queries = nodes*5 + +def create_tables(): + for n in cluster.instances.values(): + n.query('DROP TABLE IF EXISTS data') + n.query('DROP TABLE IF EXISTS dist') + n.query('CREATE TABLE data (key Int) Engine=Memory()') + n.query(""" + CREATE TABLE dist AS data + Engine=Distributed( + replicas_cluster, + currentDatabase(), + data) + """.format()) + +def make_uuid(): + return uuid.uuid4().hex + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + create_tables() + yield cluster + finally: + cluster.shutdown() + +def get_node(query_node, *args, **kwargs): + query_id = make_uuid() + + settings = { + 'query_id': query_id, + 'log_queries': 1, + 'log_queries_min_type': 'QUERY_START', + 'prefer_localhost_replica': 0, + } + if 'settings' not in kwargs: + kwargs['settings'] = settings + else: + kwargs['settings'].update(settings) + + query_node.query('SELECT * FROM dist', *args, **kwargs) + + for n in cluster.instances.values(): + n.query('SYSTEM FLUSH LOGS') + + rows = query_node.query(""" + SELECT c.host_name + FROM ( + SELECT _shard_num + FROM cluster(shards_cluster, system.query_log) + WHERE + initial_query_id = '{query_id}' AND + is_initial_query = 0 AND + type = 'QueryFinish' + ORDER BY event_date DESC, event_time DESC + LIMIT 1 + ) a + JOIN system.clusters c + ON a._shard_num = c.shard_num AND cluster = 'shards_cluster' + """.format(query_id=query_id)) + return rows.strip() + +# TODO: right now random distribution looks bad, but works +def test_load_balancing_default(): + unique_nodes = set() + for _ in range(0, queries): + unique_nodes.add(get_node(n1, settings={'load_balancing': 'random'})) + assert len(unique_nodes) == nodes, unique_nodes + +def test_load_balancing_nearest_hostname(): + unique_nodes = set() + for _ in range(0, queries): + unique_nodes.add(get_node(n1, settings={'load_balancing': 'nearest_hostname'})) + assert len(unique_nodes) == 1, unique_nodes + assert unique_nodes == set(['n1']) + +def test_load_balancing_in_order(): + unique_nodes = set() + for _ in range(0, queries): + unique_nodes.add(get_node(n1, settings={'load_balancing': 'in_order'})) + assert len(unique_nodes) == 1, unique_nodes + assert unique_nodes == set(['n1']) + +def test_load_balancing_first_or_random(): + unique_nodes = set() + for _ in range(0, queries): + unique_nodes.add(get_node(n1, settings={'load_balancing': 'first_or_random'})) + assert len(unique_nodes) == 1, unique_nodes + assert unique_nodes == set(['n1']) + +# TODO: last_used will be reset on config reload, hence may fail +def test_load_balancing_round_robin(): + unique_nodes = set() + for _ in range(0, nodes): + unique_nodes.add(get_node(n1, settings={'load_balancing': 'round_robin'})) + assert len(unique_nodes) == nodes, unique_nodes + assert unique_nodes == set(['n1', 'n2', 'n3']) From 844140467e6fdb2be464dd48b2153994fbdc0e5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 01:18:48 +0300 Subject: [PATCH 0643/2229] Simplify the code in comparison functions --- src/Functions/FunctionsComparison.h | 187 ++++++---------------------- 1 file changed, 41 insertions(+), 146 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 642995974b5..12da4c772d1 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -812,94 +813,49 @@ private: } } - bool executeDateOrDateTimeOrEnumOrUUIDWithConstString( + bool executeWithConstString( Block & block, size_t result, const IColumn * col_left_untyped, const IColumn * col_right_untyped, - const DataTypePtr & left_type, const DataTypePtr & right_type, bool left_is_num, size_t input_rows_count) + const DataTypePtr & left_type, const DataTypePtr & right_type, size_t input_rows_count) { - /// This is no longer very special case - comparing dates, datetimes, and enumerations with a string constant. - const IColumn * column_string_untyped = !left_is_num ? col_left_untyped : col_right_untyped; - const IColumn * column_number = left_is_num ? col_left_untyped : col_right_untyped; - const IDataType * number_type = left_is_num ? left_type.get() : right_type.get(); + /// To compare something with const string, we cast constant to appropriate type and compare as usual. + /// We should deal with possible overflows, e.g. toUInt8(1) = '257' should return false. - WhichDataType which(number_type); + const ColumnConst * left_const = checkAndGetColumnConstStringOrFixedString(col_left_untyped); + const ColumnConst * right_const = checkAndGetColumnConstStringOrFixedString(col_right_untyped); - const bool legal_types = which.isDateOrDateTime() || which.isEnum() || which.isUUID(); - - const auto column_string = checkAndGetColumnConst(column_string_untyped); - if (!column_string || !legal_types) + if (!left_const && !right_const) return false; - StringRef string_value = column_string->getDataAt(0); + const IDataType * type_string = left_const ? left_type.get() : right_type.get(); + const DataTypePtr & type_to_compare = !left_const ? left_type : right_type; - if (which.isDate()) + Field string_value = left_const ? left_const->getField() : right_const->getField(); + Field converted = convertFieldToType(string_value, *type_to_compare, type_string); + + /// If not possible to convert, comparison yields to false. + if (converted.isNull()) { - DayNum date; - ReadBufferFromMemory in(string_value.data, string_value.size); - readDateText(date, in); - if (!in.eof()) - throw Exception("String is too long for Date: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); - - ColumnPtr parsed_const_date_holder = DataTypeDate().createColumnConst(input_rows_count, date); - const ColumnConst * parsed_const_date = assert_cast(parsed_const_date_holder.get()); - executeNumLeftType(block, result, - left_is_num ? col_left_untyped : parsed_const_date, - left_is_num ? parsed_const_date : col_right_untyped); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0); } - else if (which.isDateTime()) + else { - time_t date_time; - ReadBufferFromMemory in(string_value.data, string_value.size); - readDateTimeText(date_time, in, dynamic_cast(*number_type).getTimeZone()); - if (!in.eof()) - throw Exception("String is too long for DateTime: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); + auto column_converted = type_to_compare->createColumnConst(input_rows_count, converted); - ColumnPtr parsed_const_date_time_holder = DataTypeDateTime().createColumnConst(input_rows_count, UInt64(date_time)); - const ColumnConst * parsed_const_date_time = assert_cast(parsed_const_date_time_holder.get()); - executeNumLeftType(block, result, - left_is_num ? col_left_untyped : parsed_const_date_time, - left_is_num ? parsed_const_date_time : col_right_untyped); + Block tmp_block + { + { left_const ? column_converted : col_left_untyped->getPtr(), type_to_compare, "" }, + { !left_const ? column_converted : col_right_untyped->getPtr(), type_to_compare, "" }, + block.getByPosition(result) + }; + + executeImpl(tmp_block, {0, 1}, 2, input_rows_count); + + block.getByPosition(result).column = std::move(tmp_block.getByPosition(2).column); } - else if (which.isUUID()) - { - UUID uuid; - ReadBufferFromMemory in(string_value.data, string_value.size); - readText(uuid, in); - if (!in.eof()) - throw Exception("String is too long for UUID: " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); - - ColumnPtr parsed_const_uuid_holder = DataTypeUUID().createColumnConst(input_rows_count, uuid); - const ColumnConst * parsed_const_uuid = assert_cast(parsed_const_uuid_holder.get()); - executeNumLeftType(block, result, - left_is_num ? col_left_untyped : parsed_const_uuid, - left_is_num ? parsed_const_uuid : col_right_untyped); - } - - else if (which.isEnum8()) - executeEnumWithConstString(block, result, column_number, column_string, - number_type, left_is_num, input_rows_count); - else if (which.isEnum16()) - executeEnumWithConstString(block, result, column_number, column_string, - number_type, left_is_num, input_rows_count); return true; } - /// Comparison between DataTypeEnum and string constant containing the name of an enum element - template - void executeEnumWithConstString( - Block & block, const size_t result, const IColumn * column_number, const ColumnConst * column_string, - const IDataType * type_untyped, const bool left_is_num, size_t input_rows_count) - { - const auto type = static_cast(type_untyped); - - const Field x = castToNearestFieldType(type->getValue(column_string->getValue())); - const auto enum_col = type->createColumnConst(input_rows_count, x); - - executeNumLeftType(block, result, - left_is_num ? column_number : enum_col.get(), - left_is_num ? enum_col.get() : column_number); - } - void executeTuple(Block & block, size_t result, const ColumnWithTypeAndName & c0, const ColumnWithTypeAndName & c1, size_t input_rows_count) { @@ -1124,17 +1080,11 @@ public: bool has_date = left.isDate() || right.isDate(); if (!((both_represented_by_number && !has_date) /// Do not allow compare date and number. - || (left.isStringOrFixedString() && right.isStringOrFixedString()) + || (left.isStringOrFixedString() || right.isStringOrFixedString()) /// Everything can be compared with string by conversion. /// You can compare the date, datetime, or datatime64 and an enumeration with a constant string. - || (left.isString() && right.isDateOrDateTime()) - || (left.isDateOrDateTime() && right.isString()) || (left.isDateOrDateTime() && right.isDateOrDateTime() && left.idx == right.idx) /// only date vs date, or datetime vs datetime || (left.isUUID() && right.isUUID()) - || (left.isUUID() && right.isString()) - || (left.isString() && right.isUUID()) || (left.isEnum() && right.isEnum() && arguments[0]->getName() == arguments[1]->getName()) /// only equivalent enum type values can be compared against - || (left.isEnum() && right.isString()) - || (left.isString() && right.isEnum()) || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) || (arguments[0]->equals(*arguments[1])))) { @@ -1151,7 +1101,8 @@ public: if (left_tuple && right_tuple) { - auto adaptor = FunctionOverloadResolverAdaptor(std::make_unique(FunctionComparison::create(context))); + auto adaptor = FunctionOverloadResolverAdaptor(std::make_unique( + FunctionComparison::create(context))); size_t size = left_tuple->getElements().size(); for (size_t i = 0; i < size; ++i) @@ -1201,6 +1152,9 @@ public: const bool left_is_num = col_left_untyped->isNumeric(); const bool right_is_num = col_right_untyped->isNumeric(); + const bool left_is_string = isStringOrFixedString(which_left); + const bool right_is_string = isStringOrFixedString(which_right); + bool date_and_datetime = (left_type != right_type) && which_left.isDateOrDateTime() && which_right.isDateOrDateTime(); @@ -1226,64 +1180,14 @@ public: { executeTuple(block, result, col_with_type_and_name_left, col_with_type_and_name_right, input_rows_count); } - else if (which_left.idx != which_right.idx - && (which_left.isDateTime64() || which_right.isDateTime64()) - && (which_left.isStringOrFixedString() || which_right.isStringOrFixedString())) + else if (left_is_string && right_is_string && executeString(block, result, col_left_untyped, col_right_untyped)) + { + } + else if (executeWithConstString( + block, result, col_left_untyped, col_right_untyped, + left_type, right_type, + input_rows_count)) { - /** Special case of comparing DateTime64 against a string. - * - * Can't be moved to executeDateOrDateTimeOrEnumOrUUIDWithConstString() - * since DateTime64 is basically a Decimal, but we do similar things, except type inference. - * Outline: - * - Extract string content - * - Parse it as a ColumnDateTime64 value (same type as DateTime64, means same precision) - * - Fabricate a column with type and name - * - Compare left and right comlumns as DateTime64 columns. - */ - - const size_t datetime64_col_index = which_left.isDateTime64() ? 0 : 1; - const size_t string_col_index = which_left.isStringOrFixedString() ? 0 : 1; - - const auto & datetime64_col_with_type_and_name = block.getByPosition(arguments[datetime64_col_index]); - const auto & string_col_with_type_and_name = block.getByPosition(arguments[string_col_index]); - - if (!isColumnConst(*string_col_with_type_and_name.column)) - throw Exception(getName() + ", illegal column type of argument #" + std::to_string(string_col_index) - + " '" + string_col_with_type_and_name.name + "'" - " expected const String or const FixedString," - " got " + string_col_with_type_and_name.type->getName(), - ErrorCodes::ILLEGAL_COLUMN); - - if (datetime64_col_with_type_and_name.column->size() == 0 || string_col_with_type_and_name.column->size() == 0) - { - // For some reason, when both left and right columns are empty (dry run while building a header block) - // executeDecimal() fills result column with bogus value. - block.getByPosition(result).column = ColumnUInt8::create(); - return; - } - - auto parsed_tmp_column_holder = datetime64_col_with_type_and_name.type->createColumn(); - - { - const StringRef string_value = string_col_with_type_and_name.column->getDataAt(0); - ReadBufferFromMemory in(string_value.data, string_value.size); - datetime64_col_with_type_and_name.type->deserializeAsWholeText(*parsed_tmp_column_holder, in, FormatSettings{}); - - if (!in.eof()) - throw Exception(getName() + ": String is too long for " + datetime64_col_with_type_and_name.type->getName() + " : " + string_value.toString(), ErrorCodes::TOO_LARGE_STRING_SIZE); - } - - // It is necessary to wrap tmp column in ColumnConst to avoid overflow when comparing. - // (non-const columns are expected to have same number of rows as every other column in block). - const ColumnWithTypeAndName parsed_tmp_col_with_type_and_name{ - ColumnConst::create(std::move(parsed_tmp_column_holder), 1), - datetime64_col_with_type_and_name.type, - string_col_with_type_and_name.name}; - - executeDecimal(block, result, - which_left.isDateTime64() ? datetime64_col_with_type_and_name : parsed_tmp_col_with_type_and_name, - which_right.isDateTime64() ? datetime64_col_with_type_and_name : parsed_tmp_col_with_type_and_name); - } else if (isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type)) { @@ -1294,19 +1198,10 @@ public: executeDecimal(block, result, col_with_type_and_name_left, col_with_type_and_name_right); } - else if (!left_is_num && !right_is_num && executeString(block, result, col_left_untyped, col_right_untyped)) - { - } else if (left_type->equals(*right_type)) { executeGenericIdenticalTypes(block, result, col_left_untyped, col_right_untyped); } - else if (executeDateOrDateTimeOrEnumOrUUIDWithConstString( - block, result, col_left_untyped, col_right_untyped, - left_type, right_type, - left_is_num, input_rows_count)) - { - } else { executeGeneric(block, result, col_with_type_and_name_left, col_with_type_and_name_right); From f2677a784132adf7b11a2f50e26494a807f0df3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 01:30:17 +0300 Subject: [PATCH 0644/2229] Added a test just in case --- tests/queries/0_stateless/01310_enum_comparison.reference | 2 ++ tests/queries/0_stateless/01310_enum_comparison.sql | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/01310_enum_comparison.reference create mode 100644 tests/queries/0_stateless/01310_enum_comparison.sql diff --git a/tests/queries/0_stateless/01310_enum_comparison.reference b/tests/queries/0_stateless/01310_enum_comparison.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/01310_enum_comparison.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/01310_enum_comparison.sql b/tests/queries/0_stateless/01310_enum_comparison.sql new file mode 100644 index 00000000000..26901a61b2b --- /dev/null +++ b/tests/queries/0_stateless/01310_enum_comparison.sql @@ -0,0 +1,6 @@ +CREATE TEMPORARY TABLE enum (x Enum('hello' = 1, 'world' = 2)); +INSERT INTO enum VALUES ('hello'); + +SELECT count() FROM enum WHERE x = 'hello'; +SELECT count() FROM enum WHERE x = 'world'; +SELECT count() FROM enum WHERE x = 'xyz'; -- { serverError 36 } From 41afea0165aa2bf66fd46903044bb08c874be54e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 02:10:10 +0300 Subject: [PATCH 0645/2229] Fix style --- src/Functions/FunctionsComparison.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 12da4c772d1..9cd13df826d 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -52,7 +52,6 @@ namespace DB namespace ErrorCodes { - extern const int TOO_LARGE_STRING_SIZE; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; From 713f8f0b2246be3381f2e7d1967b10173e24523c Mon Sep 17 00:00:00 2001 From: Volodymyr Kuznetsov Date: Sat, 13 Jun 2020 17:21:33 -0700 Subject: [PATCH 0646/2229] Added groupArrayArray and groupUniqArrayArray to SimpleAggregateFunction --- .../DataTypeCustomSimpleAggregateFunction.cpp | 2 +- .../00915_simple_aggregate_function.reference | 6 +++--- .../00915_simple_aggregate_function.sql | 14 ++++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 2ddce184cce..8b31a93dfe9 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static const std::vector supported_functions{"any", "anyLast", "min", "max", "sum", "groupBitAnd", "groupBitOr", "groupBitXor", "sumMap"}; +static const std::vector supported_functions{"any", "anyLast", "min", "max", "sum", "groupBitAnd", "groupBitOr", "groupBitXor", "sumMap", "groupArrayArray", "groupUniqArrayArray"}; String DataTypeCustomSimpleAggregateFunction::getName() const diff --git a/tests/queries/0_stateless/00915_simple_aggregate_function.reference b/tests/queries/0_stateless/00915_simple_aggregate_function.reference index d9e0a92cb01..771c19f2227 100644 --- a/tests/queries/0_stateless/00915_simple_aggregate_function.reference +++ b/tests/queries/0_stateless/00915_simple_aggregate_function.reference @@ -39,6 +39,6 @@ SimpleAggregateFunction(sum, Float64) 7 14 8 16 9 18 -1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) -10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) -SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) +1 1 2 2.2.2.2 3 ([1,2,3],[2,1,1]) [1,2,2,3,4] [4,2,1,3] +10 2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 20 20.20.20.20 5 ([2,3,4],[2,1,1]) [] [] +SimpleAggregateFunction(anyLast, Nullable(String)) SimpleAggregateFunction(anyLast, LowCardinality(Nullable(String))) SimpleAggregateFunction(anyLast, IPv4) SimpleAggregateFunction(groupBitOr, UInt32) SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) SimpleAggregateFunction(groupArrayArray, Array(Int32)) SimpleAggregateFunction(groupUniqArrayArray, Array(Int32)) diff --git a/tests/queries/0_stateless/00915_simple_aggregate_function.sql b/tests/queries/0_stateless/00915_simple_aggregate_function.sql index ba4935a6518..8cf0e032702 100644 --- a/tests/queries/0_stateless/00915_simple_aggregate_function.sql +++ b/tests/queries/0_stateless/00915_simple_aggregate_function.sql @@ -25,16 +25,18 @@ create table simple ( low_str SimpleAggregateFunction(anyLast,LowCardinality(Nullable(String))), ip SimpleAggregateFunction(anyLast,IPv4), status SimpleAggregateFunction(groupBitOr, UInt32), - tup SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))) + tup SimpleAggregateFunction(sumMap, Tuple(Array(Int32), Array(Int64))), + arr SimpleAggregateFunction(groupArrayArray, Array(Int32)), + uniq_arr SimpleAggregateFunction(groupUniqArrayArray, Array(Int32)) ) engine=AggregatingMergeTree order by id; -insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1])); -insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1])); +insert into simple values(1,'1','1','1.1.1.1', 1, ([1,2], [1,1]), [1,2], [1,2]); +insert into simple values(1,null,'2','2.2.2.2', 2, ([1,3], [1,1]), [2,3,4], [2,3,4]); -- String longer then MAX_SMALL_STRING_SIZE (actual string length is 100) -insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1])); -insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1])); +insert into simple values(10,'10','10','10.10.10.10', 4, ([2,3], [1,1]), [], []); +insert into simple values(10,'2222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222','20','20.20.20.20', 1, ([2, 4], [1,1]), [], []); select * from simple final order by id; -select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup) from simple limit 1; +select toTypeName(nullable_str),toTypeName(low_str),toTypeName(ip),toTypeName(status), toTypeName(tup), toTypeName(arr), toTypeName(uniq_arr) from simple limit 1; optimize table simple final; From 30f1f8811855a75d282eeba5cf2fd23191e2b656 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 03:43:42 +0300 Subject: [PATCH 0647/2229] Allow case-insensitive regexps; added a test #11101 --- src/Common/OptimizedRegularExpression.cpp | 37 ++++++++++++++++++- .../01312_case_insensitive_regexp.reference | 8 ++++ .../01312_case_insensitive_regexp.sql | 8 ++++ 3 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01312_case_insensitive_regexp.reference create mode 100644 tests/queries/0_stateless/01312_case_insensitive_regexp.sql diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 8c4aa00f171..1464923e6ab 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -38,6 +38,7 @@ void OptimizedRegularExpressionImpl::analyze( required_substring_is_prefix = false; required_substring.clear(); bool has_alternative_on_depth_0 = false; + bool has_case_insensitive_flag = false; /// Substring with a position. using Substring = std::pair; @@ -65,7 +66,17 @@ void OptimizedRegularExpressionImpl::analyze( switch (*pos) { - case '|': case '(': case ')': case '^': case '$': case '.': case '[': case '?': case '*': case '+': case '{': + case '|': + case '(': + case ')': + case '^': + case '$': + case '.': + case '[': + case '?': + case '*': + case '+': + case '{': if (depth == 0 && !in_curly_braces && !in_square_braces) { if (last_substring->first.empty()) @@ -110,6 +121,28 @@ void OptimizedRegularExpressionImpl::analyze( trivial_substrings.resize(trivial_substrings.size() + 1); last_substring = &trivial_substrings.back(); } + + /// Check for case-insensitive flag. + if (pos + 1 < end && pos[1] == '?') + { + for (size_t offset = 2; pos + offset < end; ++offset) + { + if (pos[offset] == '-' /// it means flag negation + /// various possible flags, actually only imsU are supported by re2 + || (pos[offset] >= 'a' && pos[offset] <= 'z') + || (pos[offset] >= 'A' && pos[offset] <= 'Z')) + { + if (pos[offset] == 'i') + { + /// Actually it can be negated case-insensitive flag. But we don't care. + has_case_insensitive_flag = true; + break; + } + } + else + break; + } + } } ++pos; break; @@ -209,7 +242,7 @@ void OptimizedRegularExpressionImpl::analyze( if (!is_trivial) { - if (!has_alternative_on_depth_0) + if (!has_alternative_on_depth_0 && !has_case_insensitive_flag) { /// We choose the non-alternative substring of the maximum length for first search. diff --git a/tests/queries/0_stateless/01312_case_insensitive_regexp.reference b/tests/queries/0_stateless/01312_case_insensitive_regexp.reference new file mode 100644 index 00000000000..c18b4e9b082 --- /dev/null +++ b/tests/queries/0_stateless/01312_case_insensitive_regexp.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01312_case_insensitive_regexp.sql b/tests/queries/0_stateless/01312_case_insensitive_regexp.sql new file mode 100644 index 00000000000..ca13989599d --- /dev/null +++ b/tests/queries/0_stateless/01312_case_insensitive_regexp.sql @@ -0,0 +1,8 @@ +SELECT match('Too late', 'Too late'); +select match('Too late', '(?i)Too late'); +select match('Too late', '(?i)too late'); +select match('Too late', '(?i:too late)'); +select match('Too late', '(?i)to{2} late'); +select match('Too late', '(?i)to(?)o late'); +select match('Too late', '(?i)to+ late'); +select match('Too late', '(?i)to(?:o|o) late'); From 970a8e3ecc7fbabb2590fd8a0b093b472e6adfb6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 03:56:13 +0300 Subject: [PATCH 0648/2229] Skip empty URL parameters #10749 --- src/Server/HTTPHandler.cpp | 4 ++++ .../0_stateless/01312_skip_empty_params.reference | 3 +++ tests/queries/0_stateless/01312_skip_empty_params.sh | 10 ++++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/01312_skip_empty_params.reference create mode 100755 tests/queries/0_stateless/01312_skip_empty_params.sh diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 84d23f10a55..e866af2f49b 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -430,6 +430,10 @@ void HTTPHandler::processQuery( auto param_could_be_skipped = [&] (const String & name) { + /// Empty parameter appears when URL like ?&a=b or a=b&&c=d. Just skip them for user's convenience. + if (name.empty()) + return true; + if (reserved_param_names.count(name)) return true; diff --git a/tests/queries/0_stateless/01312_skip_empty_params.reference b/tests/queries/0_stateless/01312_skip_empty_params.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/01312_skip_empty_params.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/01312_skip_empty_params.sh b/tests/queries/0_stateless/01312_skip_empty_params.sh new file mode 100755 index 00000000000..2e3541aee35 --- /dev/null +++ b/tests/queries/0_stateless/01312_skip_empty_params.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query=select%201&log_queries=1" +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&&query=select%201&log_queries=1" +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query=select%201&&&log_queries=1" From 6c278fee616e8326b40b2dbf62bed98be77a5ba6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 04:07:47 +0300 Subject: [PATCH 0649/2229] Better exception message --- src/Core/SettingsCollectionImpl.h | 13 ++++++++++++- src/Interpreters/Context.cpp | 11 ++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsCollectionImpl.h b/src/Core/SettingsCollectionImpl.h index 8210b04e2da..877567a7caf 100644 --- a/src/Core/SettingsCollectionImpl.h +++ b/src/Core/SettingsCollectionImpl.h @@ -7,6 +7,8 @@ */ #include +#include + namespace DB { @@ -91,7 +93,16 @@ Field SettingsCollection::const_reference::getValue() const template Field SettingsCollection::valueToCorrespondingType(size_t index, const Field & value) { - return members()[index].value_to_corresponding_type(value); + try + { + return members()[index].value_to_corresponding_type(value); + } + catch (Exception & e) + { + e.addMessage(fmt::format("in attempt to set the value of setting to {}", + applyVisitor(FieldVisitorToString(), value))); + throw; + } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1431f3fd62c..cb780443e03 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -982,7 +982,16 @@ void Context::setSetting(const StringRef & name, const Field & value) void Context::applySettingChange(const SettingChange & change) { - setSetting(change.name, change.value); + try + { + setSetting(change.name, change.value); + } + catch (Exception & e) + { + e.addMessage(fmt::format("in attempt to set the value of setting '{}' to {}", + change.name, applyVisitor(FieldVisitorToString(), change.value))); + throw; + } } From 400e9fb64f68e020907f9b4a9251b72c415455d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 04:23:53 +0300 Subject: [PATCH 0650/2229] Allow to drop table if there is no metadata in ZooKeeper; allow to rename --- src/Storages/StorageReplicatedMergeTree.cpp | 22 +++++++++++++++++++-- src/Storages/StorageReplicatedMergeTree.h | 2 ++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 57535466558..5931bca17ea 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -244,6 +244,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( { LOG_WARNING(log, "No metadata in ZooKeeper: table will be in readonly mode."); is_readonly = true; + has_metadata_in_zookeeper = false; return; } @@ -620,9 +621,14 @@ void StorageReplicatedMergeTree::createReplica() void StorageReplicatedMergeTree::drop() { + /// There is also the case when user has configured ClickHouse to wrong ZooKeeper cluster, + /// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table. + + if (has_metadata_in_zookeeper) { auto zookeeper = tryGetZooKeeper(); + /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (is_readonly || !zookeeper) throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); @@ -4032,8 +4038,20 @@ void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, c MergeTreeData::rename(new_path_to_table_data, new_table_id); /// Update table name in zookeeper - auto zookeeper = getZooKeeper(); - zookeeper->set(replica_path + "/host", getReplicatedMergeTreeAddress().toString()); + if (!is_readonly) + { + /// We don't do it for readonly tables, because it will be updated on next table startup. + /// It is also Ok to skip ZK error for the same reason. + try + { + auto zookeeper = getZooKeeper(); + zookeeper->set(replica_path + "/host", getReplicatedMergeTreeAddress().toString()); + } + catch (Coordination::Exception & e) + { + LOG_WARNING(log, "Cannot update the value of 'host' node (replica address) in ZooKeeper: {}", e.displayText()); + } + } /// TODO: You can update names of loggers. } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index ec38eb7e842..52ce1aada08 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -210,6 +210,8 @@ private: /// If true, the table is offline and can not be written to it. std::atomic_bool is_readonly {false}; + /// If false - ZooKeeper is available, but there is no table metadata. It's safe to drop table in this case. + bool has_metadata_in_zookeeper = true; String zookeeper_path; String replica_name; From e1317ef8ae6acc7ac40c3b6985bbd97828880dd2 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Sun, 14 Jun 2020 09:44:05 +0800 Subject: [PATCH 0651/2229] ISSUES-7572 fix test failure --- tests/integration/test_http_handlers_config/test.py | 6 +++--- .../test_prometheus_handler/config.xml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index c18c22acbb2..b31913ba962 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -119,10 +119,10 @@ def test_defaults_http_handlers(): assert 'Default server response' == cluster.instance.http_request('', method='GET').content assert 200 == cluster.instance.http_request('ping', method='GET').status_code - assert 'Ok\n' == cluster.instance.http_request('ping', method='GET').content + assert 'Ok.\n' == cluster.instance.http_request('ping', method='GET').content assert 200 == cluster.instance.http_request('replicas_status', method='GET').status_code - assert 'Ok\n' == cluster.instance.http_request('replicas_status', method='GET').content + assert 'Ok.\n' == cluster.instance.http_request('replicas_status', method='GET').content def test_prometheus_handler(): with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "prometheus_handler", "test_prometheus_handler")) as cluster: @@ -144,4 +144,4 @@ def test_replicas_status_handler(): assert 404 == cluster.instance.http_request('test_replicas_status', method='POST', headers={'XXX': 'xxx'}).status_code assert 200 == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'xxx'}).status_code - assert 'Ok\n' == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'xxx'}).content + assert 'Ok.\n' == cluster.instance.http_request('test_replicas_status', method='GET', headers={'XXX': 'xxx'}).content diff --git a/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml b/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml index 7c80649cee2..8ace97a66dc 100644 --- a/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml +++ b/tests/integration/test_http_handlers_config/test_prometheus_handler/config.xml @@ -7,7 +7,7 @@ xxx /test_prometheus - replicas_status + prometheus true true true From fae12d5e42048805873ee8c2fb6cd6eee8ae1e3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 04:45:03 +0300 Subject: [PATCH 0652/2229] Return NULL or zero when value is not parsed completely in parseDateTimeBestEffortOr* functions --- src/Functions/FunctionsConversion.h | 4 ++-- ...3_parse_date_time_best_effort_null_zero.reference | 6 ++++++ .../01313_parse_date_time_best_effort_null_zero.sql | 12 ++++++++++++ 3 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.reference create mode 100644 tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 879b885cf66..83417a3229b 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -722,10 +722,10 @@ struct ConvertThroughParsing parsed = ToDataType::tryReadText(vec_to[i], read_buffer, ToDataType::maxPrecision(), vec_to.getScale()); else parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone); - - parsed = parsed && isAllRead(read_buffer); } + parsed = parsed && isAllRead(read_buffer); + if (!parsed) vec_to[i] = 0; diff --git a/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.reference b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.reference new file mode 100644 index 00000000000..90bb776ca10 --- /dev/null +++ b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.reference @@ -0,0 +1,6 @@ +\N +0000-00-00 00:00:00 +\N +0000-00-00 00:00:00.000 +\N +0000-00-00 00:00:00 diff --git a/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql new file mode 100644 index 00000000000..69b66b46df7 --- /dev/null +++ b/tests/queries/0_stateless/01313_parse_date_time_best_effort_null_zero.sql @@ -0,0 +1,12 @@ +SELECT parseDateTimeBestEffort(''); -- { serverError 6 } +SELECT parseDateTimeBestEffortOrNull(''); +SELECT parseDateTimeBestEffortOrZero(''); + +SELECT parseDateTime64BestEffort(''); -- { serverError 6 } +SELECT parseDateTime64BestEffortOrNull(''); +SELECT parseDateTime64BestEffortOrZero(''); + +SET date_time_input_format = 'best_effort'; +SELECT toDateTime(''); -- { serverError 41 } +SELECT toDateTimeOrNull(''); +SELECT toDateTimeOrZero(''); From d6cf62e5872036099bb607bd075e68275aadf642 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 04:56:22 +0300 Subject: [PATCH 0653/2229] Added column "position" to system.columns and "column_position" to system.parts_columns --- src/Storages/System/StorageSystemColumns.cpp | 5 +++++ src/Storages/System/StorageSystemPartsColumns.cpp | 5 ++++- .../01314_position_in_system_columns.reference | 6 ++++++ .../0_stateless/01314_position_in_system_columns.sql | 8 ++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01314_position_in_system_columns.reference create mode 100644 tests/queries/0_stateless/01314_position_in_system_columns.sql diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index b4f5da22c17..90e52ad373e 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -32,6 +32,7 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "table", std::make_shared() }, { "name", std::make_shared() }, { "type", std::make_shared() }, + { "position", std::make_shared() }, { "default_kind", std::make_shared() }, { "default_expression", std::make_shared() }, { "data_compressed_bytes", std::make_shared() }, @@ -131,8 +132,10 @@ protected: bool check_access_for_columns = check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name); + size_t position = 0; for (const auto & column : columns) { + ++position; if (check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name)) continue; @@ -147,6 +150,8 @@ protected: res_columns[res_index++]->insert(column.name); if (columns_mask[src_index++]) res_columns[res_index++]->insert(column.type->getName()); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(position); if (column.default_desc.expression) { diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index b8acdc5f995..479621fd47f 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -49,6 +49,7 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const std::string & name_) {"column", std::make_shared()}, {"type", std::make_shared()}, + {"column_position", std::make_shared()}, {"default_kind", std::make_shared()}, {"default_expression", std::make_shared()}, {"column_bytes_on_disk", std::make_shared()}, @@ -101,9 +102,10 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns_, co using State = IMergeTreeDataPart::State; + size_t column_position = 0; for (const auto & column : part->getColumns()) - { + ++column_position; size_t j = 0; { WriteBufferFromOwnString out; @@ -143,6 +145,7 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns_, co columns_[j++]->insert(column.name); columns_[j++]->insert(column.type->getName()); + columns_[j++]->insert(column_position); auto column_info_it = columns_info.find(column.name); if (column_info_it != columns_info.end()) diff --git a/tests/queries/0_stateless/01314_position_in_system_columns.reference b/tests/queries/0_stateless/01314_position_in_system_columns.reference new file mode 100644 index 00000000000..32e0ae5900e --- /dev/null +++ b/tests/queries/0_stateless/01314_position_in_system_columns.reference @@ -0,0 +1,6 @@ +x UInt8 1 +y String 2 +z Array(String) 3 +x UInt8 1 +y String 2 +z Array(String) 3 diff --git a/tests/queries/0_stateless/01314_position_in_system_columns.sql b/tests/queries/0_stateless/01314_position_in_system_columns.sql new file mode 100644 index 00000000000..7bb0f3b5a96 --- /dev/null +++ b/tests/queries/0_stateless/01314_position_in_system_columns.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt8, y String, z Array(String)) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO test (x) VALUES (1); + +SELECT name, type, position FROM system.columns WHERE database = currentDatabase() AND table = 'test'; +SELECT column, type, column_position FROM system.parts_columns WHERE database = currentDatabase() AND table = 'test'; + +DROP TABLE test; From 4a052f60c7eb40ee0c0a323a5039f9f0931c3bd3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 06:10:35 +0300 Subject: [PATCH 0654/2229] Don't use debug info from ELF file if it doesn't correspond to the running binary. --- src/Common/Elf.cpp | 46 ++++++++++++++++++++++++++++++++++++++ src/Common/Elf.h | 7 ++++++ src/Common/SymbolIndex.cpp | 37 ++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+) diff --git a/src/Common/Elf.cpp b/src/Common/Elf.cpp index 11d454abd71..ee32586d95e 100644 --- a/src/Common/Elf.cpp +++ b/src/Common/Elf.cpp @@ -54,6 +54,18 @@ Elf::Elf(const std::string & path) throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); section_names = reinterpret_cast(mapped + section_names_offset); + + /// Get program headers + + ElfOff program_header_offset = header->e_phoff; + uint16_t program_header_num_entries = header->e_phnum; + + if (!program_header_offset + || !program_header_num_entries + || program_header_offset + program_header_num_entries * sizeof(ElfPhdr) > elf_size) + throw Exception("The ELF is truncated (program header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF); + + program_headers = reinterpret_cast(mapped + program_header_offset); } @@ -104,6 +116,40 @@ std::optional Elf::findSectionByName(const char * name) const } +String Elf::getBuildID() const +{ + for (size_t idx = 0; idx < header->e_phnum; ++idx) + { + const ElfPhdr & phdr = program_headers[idx]; + + if (phdr.p_type == PT_NOTE) + return getBuildID(mapped + phdr.p_offset, phdr.p_filesz); + } + return {}; +} + + +String Elf::getBuildID(const char * nhdr_pos, size_t size) +{ + const char * nhdr_end = nhdr_pos + size; + + while (nhdr_pos < nhdr_end) + { + const ElfNhdr & nhdr = *reinterpret_cast(nhdr_pos); + + nhdr_pos += sizeof(ElfNhdr) + nhdr.n_namesz; + if (nhdr.n_type == NT_GNU_BUILD_ID) + { + const char * build_id = nhdr_pos; + return {build_id, nhdr.n_descsz}; + } + nhdr_pos += nhdr.n_descsz; + } + + return {}; +} + + const char * Elf::Section::name() const { if (!elf.section_names) diff --git a/src/Common/Elf.h b/src/Common/Elf.h index f3aafc8e5a9..632d7e6f0b1 100644 --- a/src/Common/Elf.h +++ b/src/Common/Elf.h @@ -17,6 +17,7 @@ using ElfEhdr = ElfW(Ehdr); using ElfOff = ElfW(Off); using ElfPhdr = ElfW(Phdr); using ElfShdr = ElfW(Shdr); +using ElfNhdr = ElfW(Nhdr); using ElfSym = ElfW(Sym); @@ -53,12 +54,18 @@ public: const char * end() const { return mapped + elf_size; } size_t size() const { return elf_size; } + /// Obtain build id from PT_NOTES section of program headers. Return empty string if does not exist. + /// The string is returned in binary. Note that "readelf -n ./clickhouse-server" prints it in hex. + String getBuildID() const; + static String getBuildID(const char * nhdr_pos, size_t size); + private: MMapReadBufferFromFile in; size_t elf_size; const char * mapped; const ElfEhdr * header; const ElfShdr * section_headers; + const ElfPhdr * program_headers; const char * section_names = nullptr; }; diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 482c6fd0bad..54789695dd1 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -196,6 +196,20 @@ void collectSymbolsFromProgramHeaders(dl_phdr_info * info, } +String getBuildIDFromProgramHeaders(dl_phdr_info * info) +{ + for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) + { + const ElfPhdr & phdr = info->dlpi_phdr[header_index]; + if (phdr.p_type != PT_NOTE) + continue; + + return Elf::getBuildID(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz); + } + return {}; +} + + void collectSymbolsFromELFSymbolTable( dl_phdr_info * info, const Elf & elf, @@ -283,8 +297,31 @@ void collectSymbolsFromELF(dl_phdr_info * info, object_name = std::filesystem::exists(debug_info_path) ? debug_info_path : canonical_path; + /// But we have to compare Build ID to check that debug info corresponds to the same executable. + String our_build_id = getBuildIDFromProgramHeaders(info); + SymbolIndex::Object object; object.elf = std::make_unique(object_name); + + String file_build_id = object.elf->getBuildID(); + + if (our_build_id != file_build_id) + { + /// If debug info doesn't correspond to our binary, fallback to the info in our binary. + if (object_name != canonical_path) + { + object_name = canonical_path; + object.elf = std::make_unique(object_name); + + /// But it can still be outdated, for example, if executable file was deleted from filesystem and replaced by another file. + file_build_id = object.elf->getBuildID(); + if (our_build_id != file_build_id) + return; + } + else + return; + } + object.address_begin = reinterpret_cast(info->dlpi_addr); object.address_end = reinterpret_cast(info->dlpi_addr + object.elf->size()); object.name = object_name; From f9431e88861455ca2e93a39a164bb3c0a9914e76 Mon Sep 17 00:00:00 2001 From: Yuntao Wu Date: Sun, 14 Jun 2020 11:17:51 +0800 Subject: [PATCH 0655/2229] =?UTF-8?q?repacle=20means=20"=E6=9B=BF=E6=8D=A2?= =?UTF-8?q?=E2=80=98=E2=80=99=20or=20=E2=80=9C=E4=BB=A3=E6=9B=BF=E2=80=9D?= =?UTF-8?q?=20in=20Chinese?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../table-engines/mergetree-family/replacingmergetree.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md index 8cf1ab8af57..67d2f4dd56f 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md @@ -1,4 +1,4 @@ -# 更换麦树 {#replacingmergetree} +# 替换合并树 {#replacingmergetree} 该引擎和[MergeTree](mergetree.md)的不同之处在于它会删除具有相同主键的重复项。 @@ -23,7 +23,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 请求参数的描述,参考[请求参数](../../../engines/table-engines/mergetree-family/replacingmergetree.md)。 -**替换树参数** +**替换合并树参数** - `ver` — 版本列。类型为 `UInt*`, `Date` 或 `DateTime`。可选参数。 From cb395ff099ff6f6602356d0ee7ab14690ad5bb80 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:04:19 +0300 Subject: [PATCH 0656/2229] Update test --- tests/queries/0_stateless/00700_decimal_compare.reference | 2 ++ tests/queries/0_stateless/00700_decimal_compare.sql | 2 +- tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql | 8 ++++---- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00700_decimal_compare.reference b/tests/queries/0_stateless/00700_decimal_compare.reference index 32f0b0a6dea..2325847045f 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.reference +++ b/tests/queries/0_stateless/00700_decimal_compare.reference @@ -1,3 +1,5 @@ +0 +1 -42 -42 1 0 0 0 1 1 42 42 1 0 0 0 1 1 -42 -42.42000 0 0 1 1 0 1 diff --git a/tests/queries/0_stateless/00700_decimal_compare.sql b/tests/queries/0_stateless/00700_decimal_compare.sql index 24b4ce588e5..ae2f5790570 100644 --- a/tests/queries/0_stateless/00700_decimal_compare.sql +++ b/tests/queries/0_stateless/00700_decimal_compare.sql @@ -19,7 +19,7 @@ INSERT INTO decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-42, -42, -42, -0.42, SELECT a > toFloat64(0) FROM decimal; -- { serverError 43 } SELECT g > toFloat32(0) FROM decimal; -- { serverError 43 } -SELECT a > '0.0' FROM decimal; -- { serverError 43 } +SELECT a > '0.0' FROM decimal ORDER BY a; SELECT a, b, a = b, a < b, a > b, a != b, a <= b, a >= b FROM decimal ORDER BY a; SELECT a, g, a = g, a < g, a > g, a != g, a <= g, a >= g FROM decimal ORDER BY a; diff --git a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql index c65bf668d71..7848b4aaf24 100644 --- a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql +++ b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql @@ -1,9 +1,9 @@ -- Error cases: -- non-const string column -WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT DT64 = materialize(S); -- {serverError 44} -WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT materialize(S) = toDateTime64(S, 3); -- {serverError 44} -WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE DT64 = materialize(S); -- {serverError 44} -WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE materialize(S) = DT64; -- {serverError 44} +WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT DT64 = materialize(S); -- {serverError 43} +WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT materialize(S) = toDateTime64(S, 3); -- {serverError 43} +WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE DT64 = materialize(S); -- {serverError 43} +WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE materialize(S) = DT64; -- {serverError 43} SELECT * WHERE toDateTime64(123.345, 3) == 'ABCD'; -- {serverError 131} -- invalid DateTime64 string SELECT * WHERE toDateTime64(123.345, 3) == '2020-02-05 14:34:12.33333333333333333333333333333333333333333333333333333333'; -- {serverError 131} -- invalid string length From 59d4df19f03ab10588f043efa4f24e3292c2698d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:05:31 +0300 Subject: [PATCH 0657/2229] Update test --- tests/queries/0_stateless/00981_no_virtual_columns.reference | 2 +- tests/queries/0_stateless/00981_no_virtual_columns.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00981_no_virtual_columns.reference b/tests/queries/0_stateless/00981_no_virtual_columns.reference index a7ec77dc030..587be6b4c3f 100644 --- a/tests/queries/0_stateless/00981_no_virtual_columns.reference +++ b/tests/queries/0_stateless/00981_no_virtual_columns.reference @@ -1 +1 @@ -default merge_ab x UInt8 0 0 0 0 0 0 0 +x diff --git a/tests/queries/0_stateless/00981_no_virtual_columns.sql b/tests/queries/0_stateless/00981_no_virtual_columns.sql index 476377b4ddf..b3946154581 100644 --- a/tests/queries/0_stateless/00981_no_virtual_columns.sql +++ b/tests/queries/0_stateless/00981_no_virtual_columns.sql @@ -6,7 +6,7 @@ CREATE TABLE merge_a (x UInt8) ENGINE = StripeLog; CREATE TABLE merge_b (x UInt8) ENGINE = StripeLog; CREATE TABLE merge_ab AS merge(currentDatabase(), '^merge_[ab]$'); -SELECT * FROM system.columns WHERE database = currentDatabase() AND table = 'merge_ab'; +SELECT name FROM system.columns WHERE database = currentDatabase() AND table = 'merge_ab'; DROP TABLE merge_a; DROP TABLE merge_b; From fb040ef09fea5b59d573ae874192f62b07f0d5b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:09:02 +0300 Subject: [PATCH 0658/2229] Update test result (now it is better) --- .../00569_parse_date_time_best_effort.reference | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference index b873e8b848d..bf11e1c1d71 100644 --- a/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference +++ b/tests/queries/0_stateless/00569_parse_date_time_best_effort.reference @@ -13,7 +13,7 @@ 11 Feb 2018 06:40:50 +0300 2018-02-11 03:40:50 2018-02-11 03:40:50 17 Apr 2 1:2:3 2000-04-17 01:02:03 2000-04-17 01:02:03 19700102 01:00:00 1970-01-02 01:00:00 1970-01-02 01:00:00 - 1970010201:00:00 2032-06-06 02:03:21 2032-06-06 02:03:21 + 1970010201:00:00 ᴺᵁᴸᴸ 0000-00-00 00:00:00 19700102010203 1970-01-02 01:02:03 1970-01-02 01:02:03 19700102010203Z 1970-01-02 01:02:03 1970-01-02 01:02:03 1970/01/02 010203Z 1970-01-02 01:02:03 1970-01-02 01:02:03 @@ -61,7 +61,7 @@ 2017/01/32 0000-00-00 00:00:00 0000-00-00 00:00:00 2017-01 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 201701 MSD Jun 2017-05-31 20:00:00 2017-05-31 20:00:00 - 2017 25 1:2:3 0000-00-00 00:00:00 0000-00-00 00:00:00 + 2017 25 1:2:3 ᴺᵁᴸᴸ 0000-00-00 00:00:00 2017 25 Apr 1:2:3 2017-04-01 01:02:03 2017-04-01 01:02:03 2017 Apr 01 11:22:33 2017-04-01 11:22:33 2017-04-01 11:22:33 2017 Apr 02 01/02/03 UTC+0300 ᴺᵁᴸᴸ 0000-00-00 00:00:00 @@ -74,8 +74,8 @@ 2017 Apr 02 1:2:3 2017-04-02 01:02:03 2017-04-02 01:02:03 2017 Apr 02 1:2:33 2017-04-02 01:02:33 2017-04-02 01:02:33 2017 Apr 02 1:2:3 MSK 2017-04-01 22:02:03 2017-04-01 22:02:03 - 2017 Apr 02 1:2:3 MSK 2017 2017-04-01 22:02:03 2017-04-01 22:02:03 - 2017 Apr 02 1:2:3 MSK 2018 2017-04-01 22:02:03 2017-04-01 22:02:03 + 2017 Apr 02 1:2:3 MSK 2017 ᴺᵁᴸᴸ 0000-00-00 00:00:00 + 2017 Apr 02 1:2:3 MSK 2018 ᴺᵁᴸᴸ 0000-00-00 00:00:00 2017 Apr 02 1:2:3 UTC+0000 2017-04-02 01:02:03 2017-04-02 01:02:03 2017 Apr 02 1:2:3 UTC+0300 2017-04-01 22:02:03 2017-04-01 22:02:03 2017 Apr 02 1:2:3 UTC+0400 2017-04-01 21:02:03 2017-04-01 21:02:03 @@ -101,6 +101,6 @@ 25 Jan 2017 1:2:3 Z PM 2017-01-25 13:02:03 2017-01-25 13:02:03 25 Jan 2017 1:2:3Z PM 2017-01-25 13:02:03 2017-01-25 13:02:03 25 Jan 2017 1:2:3 Z PM +03:00 2017-01-25 10:02:03 2017-01-25 10:02:03 - Jun, 11 Feb 2018 06:40:50 +0300 2000-06-01 00:00:00 2000-06-01 00:00:00 + Jun, 11 Feb 2018 06:40:50 +0300 ᴺᵁᴸᴸ 0000-00-00 00:00:00 Sun 11 Feb 2018 06:40:50 +0300 2018-02-11 03:40:50 2018-02-11 03:40:50 Sun, 11 Feb 2018 06:40:50 +0300 2018-02-11 03:40:50 2018-02-11 03:40:50 From e24576c56c4d12bf5ab66500c4462c9163c85976 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:13:27 +0300 Subject: [PATCH 0659/2229] Update test --- .../0_stateless/00578_merge_table_shadow_virtual_column.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00578_merge_table_shadow_virtual_column.sql b/tests/queries/0_stateless/00578_merge_table_shadow_virtual_column.sql index 3071e307517..e729bfdf188 100644 --- a/tests/queries/0_stateless/00578_merge_table_shadow_virtual_column.sql +++ b/tests/queries/0_stateless/00578_merge_table_shadow_virtual_column.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS numbers2; CREATE TABLE numbers1 ENGINE = Memory AS SELECT number as _table FROM numbers(1000); CREATE TABLE numbers2 ENGINE = Memory AS SELECT number as _table FROM numbers(1000); -SELECT count() FROM merge(currentDatabase(), '^numbers\\d+$') WHERE _table='numbers1'; -- { serverError 43 } +SELECT count() FROM merge(currentDatabase(), '^numbers\\d+$') WHERE _table='numbers1'; -- { serverError 53 } SELECT count() FROM merge(currentDatabase(), '^numbers\\d+$') WHERE _table=1; DROP TABLE numbers1; From eec5abde071d98942446f3292b4a50ed0284cb4e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:16:23 +0300 Subject: [PATCH 0660/2229] Fix test --- src/Interpreters/convertFieldToType.cpp | 65 ------------------------- 1 file changed, 65 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 3cb774596c0..6328ed76924 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -124,42 +124,6 @@ static Field convertDecimalType(const Field & from, const To & type) } -DayNum stringToDate(const String & s) -{ - ReadBufferFromString in(s); - DayNum date{}; - - readDateText(date, in); - if (!in.eof()) - throw Exception("String is too long for Date: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); - - return date; -} - -UInt64 stringToDateTime(const String & s) -{ - ReadBufferFromString in(s); - time_t date_time{}; - - readDateTimeText(date_time, in); - if (!in.eof()) - throw Exception("String is too long for DateTime: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); - - return UInt64(date_time); -} - -DateTime64::NativeType stringToDateTime64(const String & s, UInt32 scale) -{ - ReadBufferFromString in(s); - DateTime64 datetime64 {0}; - - readDateTime64Text(datetime64, scale, in); - if (!in.eof()) - throw Exception("String is too long for DateTime64: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); - - return datetime64.value; -} - Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) { WhichDataType which_type(type); @@ -215,35 +179,6 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } // TODO (vnemkov): extra cases for DateTime64: converting from integer, converting from Decimal - - if (src.getType() == Field::Types::String) - { - if (which_type.isDate()) - { - /// Convert 'YYYY-MM-DD' Strings to Date - return stringToDate(src.get()); - } - else if (which_type.isDateTime()) - { - /// Convert 'YYYY-MM-DD hh:mm:ss' Strings to DateTime - return stringToDateTime(src.get()); - } - else if (which_type.isDateTime64()) - { - const auto * date_time64 = typeid_cast(&type); - /// Convert 'YYYY-MM-DD hh:mm:ss.NNNNNNNNN' Strings to DateTime - return stringToDateTime64(src.get(), date_time64->getScale()); - } - else if (which_type.isUUID()) - { - return stringToUUID(src.get()); - } - else if (which_type.isEnum()) - { - /// Convert String to Enum's value - return dynamic_cast(type).castToValue(src); - } - } } else if (which_type.isStringOrFixedString()) { From ff3e5e1a2ebaba695b8ebbad8a047f9c08259f74 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 02:09:40 +0300 Subject: [PATCH 0661/2229] Allow implicit conversion from String in IN, VALUES and comparison #11630 --- src/Interpreters/convertFieldToType.cpp | 29 ++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 6328ed76924..b71d2ffbaa7 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -148,7 +148,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); } - else if (type.isValueRepresentedByNumber()) + else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String) { if (which_type.isUInt8()) return convertNumericType(src, type); if (which_type.isUInt16()) return convertNumericType(src, type); @@ -164,9 +164,6 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (const auto * ptype = typeid_cast *>(&type)) return convertDecimalType(src, *ptype); if (const auto * ptype = typeid_cast *>(&type)) return convertDecimalType(src, *ptype); - if (!which_type.isDateOrDateTime() && !which_type.isUUID() && !which_type.isEnum()) - throw Exception{"Cannot convert field to type " + type.getName(), ErrorCodes::CANNOT_CONVERT_TYPE}; - if (which_type.isEnum() && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64)) { /// Convert UInt64 or Int64 to Enum's value @@ -263,17 +260,29 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } + /// Conversion from string by parsing. if (src.getType() == Field::Types::String) { - const auto col = type.createColumn(); - ReadBufferFromString buffer(src.get()); - type.deserializeAsTextEscaped(*col, buffer, FormatSettings{}); + /// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible. + const IDataType * type_to_parse = &type; + DataTypePtr holder; - return (*col)[0]; + if (type.canBePromoted()) + { + holder = type.promoteNumericType(); + type_to_parse = holder.get(); + } + + const auto col = type_to_parse->createColumn(); + ReadBufferFromString in_buffer(src.get()); + type_to_parse->deserializeAsWholeText(*col, in_buffer, FormatSettings{}); + if (!in_buffer.eof()) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String is too long for {}: {}", type.getName(), src.get()); + + Field parsed = (*col)[0]; + return convertFieldToType(parsed, type, from_type_hint); } - - // TODO (nemkov): should we attempt to parse value using or `type.deserializeAsTextEscaped()` type.deserializeAsTextEscaped() ? throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: " + Field::Types::toString(src.getType()), ErrorCodes::TYPE_MISMATCH); } From 402484079591be39d37079b96757a964c1a0144e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 02:19:16 +0300 Subject: [PATCH 0662/2229] Add a test --- src/Interpreters/convertFieldToType.cpp | 10 +++++++++- ..._comparison_with_constant_string.reference | 20 +++++++++++++++++++ .../01311_comparison_with_constant_string.sql | 20 +++++++++++++++++++ 3 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01311_comparison_with_constant_string.reference create mode 100644 tests/queries/0_stateless/01311_comparison_with_constant_string.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index b71d2ffbaa7..050be6ba956 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -275,7 +275,15 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto col = type_to_parse->createColumn(); ReadBufferFromString in_buffer(src.get()); - type_to_parse->deserializeAsWholeText(*col, in_buffer, FormatSettings{}); + try + { + type_to_parse->deserializeAsWholeText(*col, in_buffer, FormatSettings{}); + } + catch (Exception & e) + { + e.addMessage(fmt::format("while converting '{}' to {}", src.get(), type.getName())); + throw; + } if (!in_buffer.eof()) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String is too long for {}: {}", type.getName(), src.get()); diff --git a/tests/queries/0_stateless/01311_comparison_with_constant_string.reference b/tests/queries/0_stateless/01311_comparison_with_constant_string.reference new file mode 100644 index 00000000000..48c6fc05950 --- /dev/null +++ b/tests/queries/0_stateless/01311_comparison_with_constant_string.reference @@ -0,0 +1,20 @@ +0 +1 +0 +--- +1 +0 +1 +--- +1 +0 +0 +--- +0 +--- +1 +0 +--- +--- +1 +--- diff --git a/tests/queries/0_stateless/01311_comparison_with_constant_string.sql b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql new file mode 100644 index 00000000000..2cfec6ca05e --- /dev/null +++ b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql @@ -0,0 +1,20 @@ +SELECT number = '1' FROM numbers(3); +SELECT '---'; +SELECT '1' != number FROM numbers(3); +SELECT '---'; +SELECT '1' > number FROM numbers(3); +SELECT '---'; +SELECT 1 = '257'; +SELECT '---'; +SELECT 1 IN (1.23, '1', 2); +SELECT 1 IN (1.23, '2', 2); +SELECT '---'; + +-- it should work but it doesn't. +SELECT 1 = '1.0'; -- { serverError 131 } +SELECT '---'; + +SELECT toDateTime('2020-06-13 01:02:03') = '2020-06-13T01:02:03'; +SELECT '---'; + +SELECT 0 = ''; -- { serverError 32 } From e2f7a41a1a640f9c4d8e402f992ae8e2fbdcf77d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 02:27:10 +0300 Subject: [PATCH 0663/2229] Fix error; clarify more results in test --- src/Functions/FunctionsComparison.h | 6 ++++-- .../01311_comparison_with_constant_string.reference | 12 ++++++++++++ .../01311_comparison_with_constant_string.sql | 13 +++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 9cd13df826d..91525f84c14 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -817,6 +817,7 @@ private: const DataTypePtr & left_type, const DataTypePtr & right_type, size_t input_rows_count) { /// To compare something with const string, we cast constant to appropriate type and compare as usual. + /// It is ok to throw exception if value is not convertible. /// We should deal with possible overflows, e.g. toUInt8(1) = '257' should return false. const ColumnConst * left_const = checkAndGetColumnConstStringOrFixedString(col_left_untyped); @@ -831,10 +832,11 @@ private: Field string_value = left_const ? left_const->getField() : right_const->getField(); Field converted = convertFieldToType(string_value, *type_to_compare, type_string); - /// If not possible to convert, comparison yields to false. + /// If not possible to convert, comparison with =, <, >, <=, >= yields to false and comparison with != yields to true. if (converted.isNull()) { - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, + std::is_same_v, NotEqualsOp>); } else { diff --git a/tests/queries/0_stateless/01311_comparison_with_constant_string.reference b/tests/queries/0_stateless/01311_comparison_with_constant_string.reference index 48c6fc05950..7865f42932d 100644 --- a/tests/queries/0_stateless/01311_comparison_with_constant_string.reference +++ b/tests/queries/0_stateless/01311_comparison_with_constant_string.reference @@ -16,5 +16,17 @@ 0 --- --- +0 +--- +1 +--- +0 +--- +0 +--- +0 +--- +0 +--- 1 --- diff --git a/tests/queries/0_stateless/01311_comparison_with_constant_string.sql b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql index 2cfec6ca05e..6ca736ba146 100644 --- a/tests/queries/0_stateless/01311_comparison_with_constant_string.sql +++ b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql @@ -14,6 +14,19 @@ SELECT '---'; SELECT 1 = '1.0'; -- { serverError 131 } SELECT '---'; +SELECT 1 = '257'; +SELECT '---'; +SELECT 1 != '257'; +SELECT '---'; +SELECT 1 < '257'; -- this is wrong for now +SELECT '---'; +SELECT 1 > '257'; +SELECT '---'; +SELECT 1 <= '257'; -- this is wrong for now +SELECT '---'; +SELECT 1 >= '257'; +SELECT '---'; + SELECT toDateTime('2020-06-13 01:02:03') = '2020-06-13T01:02:03'; SELECT '---'; From fed6843e64a1e7ae9f0cc1d3884a4c11976e8dda Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 03:27:00 +0300 Subject: [PATCH 0664/2229] Fix style --- src/Interpreters/convertFieldToType.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 050be6ba956..9c1136e5df6 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -34,7 +34,6 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; extern const int TYPE_MISMATCH; extern const int TOO_LARGE_STRING_SIZE; - extern const int CANNOT_CONVERT_TYPE; } From bba0140d8fa0e42056bb9f5ba89ffe4002c4ee8f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:20:52 +0300 Subject: [PATCH 0665/2229] Fix tests --- src/Interpreters/convertFieldToType.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 9c1136e5df6..d60bb6cee6c 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -33,7 +33,6 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int TYPE_MISMATCH; - extern const int TOO_LARGE_STRING_SIZE; } @@ -284,7 +283,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID throw; } if (!in_buffer.eof()) - throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String is too long for {}: {}", type.getName(), src.get()); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get(), type.getName()); Field parsed = (*col)[0]; return convertFieldToType(parsed, type, from_type_hint); From 44221139e9f2d637a5e1a10a628692552c062291 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:35:50 +0300 Subject: [PATCH 0666/2229] Fix test --- src/DataTypes/DataTypeUUID.cpp | 7 ++++++- src/DataTypes/DataTypeUUID.h | 3 +++ src/Interpreters/convertFieldToType.cpp | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeUUID.cpp b/src/DataTypes/DataTypeUUID.cpp index 87e306cf477..94a043eb472 100644 --- a/src/DataTypes/DataTypeUUID.cpp +++ b/src/DataTypes/DataTypeUUID.cpp @@ -16,13 +16,18 @@ void DataTypeUUID::serializeText(const IColumn & column, size_t row_num, WriteBu writeText(UUID(assert_cast(column).getData()[row_num]), ostr); } -void DataTypeUUID::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void DataTypeUUID::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; readText(x, istr); assert_cast(column).getData().push_back(x); } +void DataTypeUUID::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeText(column, istr, settings); +} + void DataTypeUUID::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 75e634bc625..e9f1d22325b 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -17,6 +17,7 @@ public: bool equals(const IDataType & rhs) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -30,6 +31,8 @@ public: bool canBeUsedInBitOperations() const override { return true; } bool canBeInsideNullable() const override { return true; } + + bool canBePromoted() const override { return false; } }; } diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index d60bb6cee6c..21cf9422c32 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -173,6 +173,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// We don't need any conversion UInt64 is under type of Date and DateTime return src; } + + if (which_type.isUUID() && src.getType() == Field::Types::UInt128) + { + /// Already in needed type. + return src; + } + // TODO (vnemkov): extra cases for DateTime64: converting from integer, converting from Decimal } else if (which_type.isStringOrFixedString()) From 4953b5fc84c2b2b59cb8a9751553caa762961114 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sat, 13 Jun 2020 21:41:55 -0700 Subject: [PATCH 0667/2229] remove unused imports from HTTPHandlerFactory This removes unused imports from `src/Server/HTTPHandlerFactory.cpp`: ```bash - #include - #include - #include ``` --- src/Server/HTTPHandlerFactory.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 2f00aa0aa72..3bcb29672e7 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -1,9 +1,6 @@ #include "HTTPHandlerFactory.h" -#include #include -#include -#include #include #include "HTTPHandler.h" From 3958a032acd8e3da194704505715d9be22b33787 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 08:15:29 +0300 Subject: [PATCH 0668/2229] Added a test --- src/AggregateFunctions/AggregateFunctionNull.cpp | 10 +++++----- ...1315_count_distinct_return_not_nullable.reference | 9 +++++++++ .../01315_count_distinct_return_not_nullable.sql | 12 ++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference create mode 100644 tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 77687f9f328..993cb93c991 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -33,6 +33,11 @@ public: AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override { + /// Special case for 'count' function. It could be called with Nullable arguments + /// - that means - count number of calls, when all arguments are not NULL. + if (nested_function && nested_function->getName() == "count") + return std::make_shared(arguments[0], params); + bool has_nullable_types = false; bool has_null_types = false; for (const auto & arg_type : arguments) @@ -60,11 +65,6 @@ public: if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) return adapter; - /// Special case for 'count' function. It could be called with Nullable arguments - /// - that means - count number of calls, when all arguments are not NULL. - if (nested_function->getName() == "count") - return std::make_shared(arguments[0], params); - bool return_type_is_nullable = !nested_function->returnDefaultWhenOnlyNull() && nested_function->getReturnType()->canBeInsideNullable(); bool serialize_flag = return_type_is_nullable || nested_function->returnDefaultWhenOnlyNull(); diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference new file mode 100644 index 00000000000..f8b77704aa3 --- /dev/null +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference @@ -0,0 +1,9 @@ +0 +0 +0 +5 +5 +5 +0 +\N +\N diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql new file mode 100644 index 00000000000..2d9b5ef54aa --- /dev/null +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql @@ -0,0 +1,12 @@ +SELECT uniq(number >= 10 ? number : NULL) FROM numbers(10); +SELECT uniqExact(number >= 10 ? number : NULL) FROM numbers(10); +SELECT count(DISTINCT number >= 10 ? number : NULL) FROM numbers(10); + +SELECT uniq(number >= 5 ? number : NULL) FROM numbers(10); +SELECT uniqExact(number >= 5 ? number : NULL) FROM numbers(10); +SELECT count(DISTINCT number >= 5 ? number : NULL) FROM numbers(10); + +SELECT count(NULL); +-- These two returns NULL for now, but we want to change them to return 0. +SELECT uniq(NULL); +SELECT count(DISTINCT NULL); From ad321966f09e8801b902dffa650b8beb57924454 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 09:43:40 +0300 Subject: [PATCH 0669/2229] trigger ci From 1531f0bd0f794abba5ace7b220058d8d055dc883 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 17:52:03 +0300 Subject: [PATCH 0670/2229] Update performance.html --- website/templates/index/performance.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/templates/index/performance.html b/website/templates/index/performance.html index 2a9b766c74b..61cd31a06ca 100644 --- a/website/templates/index/performance.html +++ b/website/templates/index/performance.html @@ -6,7 +6,7 @@

    ClickHouse's performance exceeds comparable column-oriented database management systems currently available on the market. It processes hundreds of millions to more than a billion rows and tens of gigabytes of data per single server per second.

    - Detailed comparison + Detailed comparison
    From b8611cf46cd2d6f15f2a6e678961c06c686fa9ed Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 18:05:15 +0300 Subject: [PATCH 0671/2229] experiment --- tests/queries/0_stateless/00600_replace_running_query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 1331dd3c15b..75006cc56ce 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -36,5 +36,5 @@ wait ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null -${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' wait +${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' From 93aee32ae4b5846a08069ccb1e7154c2b9418f8b Mon Sep 17 00:00:00 2001 From: Avogar Date: Sun, 14 Jun 2020 18:35:32 +0300 Subject: [PATCH 0672/2229] Add ORCBlockOutputFormat --- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatFactory.h | 4 +- .../Formats/Impl/ORCBlockOutputFormat.cpp | 409 ++++++++++++++++++ .../Formats/Impl/ORCBlockOutputFormat.h | 70 +++ .../01307_orc_output_format.reference | 6 + .../0_stateless/01307_orc_output_format.sh | 20 + 6 files changed, 509 insertions(+), 1 deletion(-) create mode 100644 src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp create mode 100644 src/Processors/Formats/Impl/ORCBlockOutputFormat.h create mode 100644 tests/queries/0_stateless/01307_orc_output_format.reference create mode 100755 tests/queries/0_stateless/01307_orc_output_format.sh diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 9182c728600..e1bb40c737c 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -394,6 +394,7 @@ FormatFactory::FormatFactory() registerOutputFormatProcessorNull(*this); registerOutputFormatProcessorMySQLWrite(*this); registerOutputFormatProcessorMarkdown(*this); + registerOutputFormatProcessorORC(*this); } FormatFactory & FormatFactory::instance() diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index c8dd97aa940..9c1a23d7164 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -175,6 +175,9 @@ void registerInputFormatProcessorTemplate(FormatFactory & factory); void registerOutputFormatProcessorTemplate(FormatFactory & factory); void registerInputFormatProcessorMsgPack(FormatFactory & factory); void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); + /// File Segmentation Engines for parallel reading @@ -206,6 +209,5 @@ void registerOutputFormatProcessorMarkdown(FormatFactory & factory); void registerInputFormatProcessorCapnProto(FormatFactory & factory); void registerInputFormatProcessorRegexp(FormatFactory & factory); void registerInputFormatProcessorJSONAsString(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp new file mode 100644 index 00000000000..3745ee229a8 --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -0,0 +1,409 @@ +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + +ORCOutputStream::ORCOutputStream(WriteBuffer & out_) : out(out_) {} + +uint64_t ORCOutputStream::getLength() const +{ + return out.count(); +} + +uint64_t ORCOutputStream::getNaturalWriteSize() const +{ + out.nextIfAtEnd(); + return out.available(); +} + +void ORCOutputStream::write(const void* buf, size_t length) +{ + out.write(static_cast(buf), length); +} + +ORCBlockOutputFormat::ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_), format_settings{format_settings_}, output_stream(out_), data_types(header_.getDataTypes()) +{ + schema = orc::createStructType(); + size_t columns_count = header_.columns(); + for (size_t i = 0; i != columns_count; ++i) + { + schema->addStructField(header_.safeGetByPosition(i).name, getORCType(data_types[i])); + } + writer = orc::createWriter(*schema, &output_stream, options); +} + +ORC_UNIQUE_PTR ORCBlockOutputFormat::getORCType(const DataTypePtr & type) +{ + switch (type->getTypeId()) + { + case TypeIndex::UInt8: [[fallthrough]]; + case TypeIndex::Int8: + { + return orc::createPrimitiveType(orc::TypeKind::BYTE); + } + case TypeIndex::UInt16: [[fallthrough]]; + case TypeIndex::Int16: + { + return orc::createPrimitiveType(orc::TypeKind::SHORT); + } + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::Int32: + { + return orc::createPrimitiveType(orc::TypeKind::INT); + } + case TypeIndex::UInt64: [[fallthrough]]; + case TypeIndex::Int64: + { + return orc::createPrimitiveType(orc::TypeKind::LONG); + } + case TypeIndex::Float32: + { + return orc::createPrimitiveType(orc::TypeKind::FLOAT); + } + case TypeIndex::Float64: + { + return orc::createPrimitiveType(orc::TypeKind::DOUBLE); + } + case TypeIndex::Date: + { + return orc::createPrimitiveType(orc::TypeKind::DATE); + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::DateTime64: + { + return orc::createPrimitiveType(orc::TypeKind::TIMESTAMP); + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + return orc::createPrimitiveType(orc::TypeKind::STRING); + } + case TypeIndex::Nullable: + { + return getORCType(removeNullable(type)); + } + /* + case TypeIndex::Array: + { + const auto * array_type = typeid_cast(type.get()); + return orc::createListType(getORCType(array_type->getNestedType())); + } + */ + case TypeIndex::Decimal32: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal64: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + case TypeIndex::Decimal128: + { + const auto * decimal_type = typeid_cast *>(type.get()); + return orc::createDecimalType(decimal_type->getPrecision(), decimal_type->getScale()); + } + default: + { + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } + } +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeNumbers( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + size_t rows_num) +{ + NumberVectorBatch * number_orc_column = dynamic_cast(orc_column); + const auto & number_column = assert_cast &>(column); + number_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + number_orc_column->notNull[i] = 0; + continue; + } + number_orc_column->data[i] = number_column.getElement(i); + } + number_orc_column->numElements = rows_num; +} + +template +void ORCBlockOutputFormat::ORCBlockOutputFormat::writeDecimals( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num, + ConvertFunc convert) +{ + DecimalVectorBatch *decimal_orc_column = dynamic_cast(orc_column); + const auto & decimal_column = assert_cast &>(column); + const auto * decimal_type = typeid_cast *>(type.get()); + decimal_orc_column->precision = decimal_type->getPrecision(); + decimal_orc_column->scale = decimal_type->getScale(); + decimal_orc_column->resize(rows_num); + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + decimal_orc_column->notNull[i] = 0; + continue; + } + decimal_orc_column->values[i] = convert(decimal_column.getElement(i).value); + } + decimal_orc_column->numElements = rows_num; +} + +void ORCBlockOutputFormat::writeColumn( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num) +{ + if (null_bytemap) + { + orc_column->hasNulls = true; + } + switch (type->getTypeId()) + { + case TypeIndex::Int8: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt8: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int16: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Int64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::UInt64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Float32: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::Float64: + { + writeNumbers(orc_column, column, null_bytemap, rows_num); + break; + } + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + { + orc::StringVectorBatch * string_orc_column = dynamic_cast(orc_column); + const auto & string_column = assert_cast(column); + string_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + string_orc_column->notNull[i] = 0; + continue; + } + const StringRef & string = string_column.getDataAt(i); + string_orc_column->data[i] = const_cast(string.data); + string_orc_column->length[i] = string.size; + } + string_orc_column->numElements = rows_num; + break; + } + case TypeIndex::DateTime: + { + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + timestamp_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + timestamp_orc_column->data[i] = timestamp_column.getElement(i); + timestamp_orc_column->nanoseconds[i] = 0; + } + timestamp_orc_column->numElements = rows_num; + break; + } + case TypeIndex::DateTime64: + { + orc::TimestampVectorBatch * timestamp_orc_column = dynamic_cast(orc_column); + const auto & timestamp_column = assert_cast(column); + const auto * timestamp_type = assert_cast(type.get()); + + UInt32 scale = timestamp_type->getScale(); + timestamp_orc_column->resize(rows_num); + + for (size_t i = 0; i != rows_num; ++i) + { + if (null_bytemap && (*null_bytemap)[i]) + { + timestamp_orc_column->notNull[i] = 0; + continue; + } + UInt64 value = timestamp_column.getElement(i); + timestamp_orc_column->data[i] = value / std::pow(10, scale); + timestamp_orc_column->nanoseconds[i] = (value % UInt64(std::pow(10, scale))) * std::pow(10, 9 - scale); + } + timestamp_orc_column->numElements = rows_num; + break; + } + case TypeIndex::Decimal32:; + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int32 value){ return value; }); + break; + } + case TypeIndex::Decimal64: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int64 value){ return value; }); + break; + } + case TypeIndex::Decimal128: + { + writeDecimals( + orc_column, + column, + type, + null_bytemap, + rows_num, + [](Int128 value){ return orc::Int128(value >> 64, (value << 64) >> 64); }); + break; + } + case TypeIndex::Nullable: + { + const auto & nullable_column = assert_cast(column); + const PaddedPODArray & new_null_bytemap = assert_cast &>(*nullable_column.getNullMapColumnPtr()).getData(); + auto nested_type = removeNullable(type); + writeColumn(orc_column, nullable_column.getNestedColumn(), nested_type, &new_null_bytemap, rows_num); + break; + } + /* Doesn't work + case TypeIndex::Array: + { + orc::ListVectorBatch * list_orc_column = dynamic_cast(orc_column); + const auto & list_column = assert_cast(column); + auto nested_type = assert_cast(*type).getNestedType(); + const ColumnArray::Offsets & offsets = list_column.getOffsets(); + list_orc_column->resize(rows_num); + list_orc_column->offsets[0] = 0; + for (size_t i = 0; i != rows_num; ++i) + { + list_orc_column->offsets[i + 1] = offsets[i]; + } + const IColumn & nested_column = list_column.getData(); + orc::ColumnVectorBatch * nested_orc_column = list_orc_column->elements.get(); + writeColumn(nested_orc_column, nested_column, nested_type, null_bytemap, nested_column.size()); + list_orc_column->numElements = rows_num; + break; + } + */ + default: + throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + } +} + +void ORCBlockOutputFormat::consume(Chunk chunk) +{ + size_t columns_num = chunk.getNumColumns(); + size_t rows_num = chunk.getNumRows(); + ORC_UNIQUE_PTR batch = writer->createRowBatch(rows_num); + orc::StructVectorBatch *root = dynamic_cast(batch.get()); + for (size_t i = 0; i != columns_num; ++i) + { + writeColumn(root->fields[i], *chunk.getColumns()[i], data_types[i], nullptr, rows_num); + } + root->numElements = rows_num; + writer->add(*batch); +} + +void ORCBlockOutputFormat::finalize() +{ + writer->close(); +} + +void registerOutputFormatProcessorORC(FormatFactory & factory) +{ + factory.registerOutputFormatProcessor("ORC", []( + WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, format_settings); + }); +} + +} diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h new file mode 100644 index 00000000000..e075169b66f --- /dev/null +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -0,0 +1,70 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class WriteBuffer; + +class ORCOutputStream : public orc::OutputStream +{ +public: + ORCOutputStream(WriteBuffer & out_); + + uint64_t getLength() const override; + uint64_t getNaturalWriteSize() const override; + void write(const void* buf, size_t length) override; + + void close() override {}; + const std::string& getName() const override { return "ORCOutputStream"; }; + +private: + WriteBuffer & out; +}; + +class ORCBlockOutputFormat : public IOutputFormat +{ +public: + ORCBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + + String getName() const override { return "ORCBlockOutputFormat"; } + void consume(Chunk chunk) override; + void finalize() override; + + String getContentType() const override { return "application/octet-stream"; } + +private: + ORC_UNIQUE_PTR getORCType(const DataTypePtr & type); + template + void writeDecimals( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num, + ConvertFunc convert); + template + void writeNumbers( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, + const PaddedPODArray * null_bytemap, + size_t rows_num); + void writeColumn( + orc::ColumnVectorBatch * orc_column, + const IColumn & column, DataTypePtr & type, + const PaddedPODArray * null_bytemap, + size_t rows_num); + + const FormatSettings format_settings; + ORCOutputStream output_stream; + DataTypes data_types; + ORC_UNIQUE_PTR writer; + ORC_UNIQUE_PTR schema; + orc::WriterOptions options; +}; + +} diff --git a/tests/queries/0_stateless/01307_orc_output_format.reference b/tests/queries/0_stateless/01307_orc_output_format.reference new file mode 100644 index 00000000000..bd62476c2df --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.reference @@ -0,0 +1,6 @@ +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.000 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 0000-00-00 00:00:00.000 42.4200 42.4242424200 424242.42424242424242000000 42 +255 65535 4294967295 100000000000 -128 -32768 -2147483648 -100000000000 2.02 10000.0000001 String 2021-12-19 2021-12-19 03:00:00 2021-12-19 03:00:00.000 1.0001 1.0000000100 100000.00000000000001000000 1 +4 1234 3244467295 500000000000 -1 -256 -14741221 -7000000000 100.1 14321.032141201 Another string 2024-10-04 2028-04-21 01:20:00 2021-12-19 03:14:51.123 34.1234 123123.1231231230 123123123.12312312312312300000 \N +42 42 42 42 42 42 42 42 42.42 42.42 42 1970-02-12 1970-01-01 03:00:42 1970-01-01 03:00:00.042 42.4200 42.4242424200 424242.42424242424242000000 42 diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh new file mode 100755 index 00000000000..8d7e85a03de --- /dev/null +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS orc"; + +$CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 UInt32, uint64 UInt64, int8 Int8, int16 Int16, int32 Int32, int64 Int64, float Float32, double Float64, string String, date Date, datetime DateTime, datetime64 DateTime64, decimal32 Decimal32(4), decimal64 Decimal64(10), decimal128 Decimal128(20), nullable Nullable(Int32)) ENGINE = Memory"; + +$CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', 18980, 1639872000, 1639872000000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', 20000, 1839882000, 1639872891123, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', 42, 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > $CURDIR/tmp_orc_test_all_types.orc; + +cat $CURDIR/tmp_orc_test_all_types.orc | $CLICKHOUSE_CLIENT --query="INSERT INTO orc FORMAT ORC"; + +rm $CURDIR/tmp_orc_test_all_types.orc + +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc"; + +$CLICKHOUSE_CLIENT --query="DROP TABLE orc"; From 6a439a5eb530b513cdbecfb4f4e9ce32b58bae84 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 14 Jun 2020 19:17:22 +0300 Subject: [PATCH 0673/2229] fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 18 +++----- src/Parsers/ASTColumnDeclaration.cpp | 24 ++--------- src/Parsers/ASTColumnDeclaration.h | 3 +- src/Parsers/ParserCreateQuery.cpp | 2 +- src/Parsers/ParserCreateQuery.h | 41 +++++++------------ .../01269_creare_with_null.reference | 1 - .../01269_create_with_null.reference | 2 + .../0_stateless/01269_create_with_null.sql | 16 ++++++-- 8 files changed, 39 insertions(+), 68 deletions(-) delete mode 100644 tests/queries/0_stateless/01269_creare_with_null.reference diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 993c9e595e3..5d8c43aed0d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -287,28 +287,22 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( const auto & col_decl = ast->as(); DataTypePtr column_type = nullptr; - if (!col_decl.is_null && col_decl.is_not) - throw Exception{"Cant use NOT without NULL", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; if (col_decl.type) { column_type = DataTypeFactory::instance().get(col_decl.type); - if (col_decl.is_not && col_decl.is_null) + if (col_decl.null_modifier) { if (column_type->isNullable()) - throw Exception{"Cant use NOT NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; - } - else if (col_decl.is_null && !col_decl.is_not) - { - if (column_type->isNullable()) - throw Exception{"Cant use NULL with Nullable", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE}; - else + throw Exception("Cant use [NOT] NULL modifier with Nullable type", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); + if (*col_decl.null_modifier) column_type = makeNullable(column_type); } - - if (context.getSettingsRef().data_type_default_nullable && !column_type->isNullable() && !col_decl.is_not && !col_decl.is_null) + else if (context.getSettingsRef().data_type_default_nullable) + { column_type = makeNullable(column_type); + } column_names_and_types.emplace_back(col_decl.name, column_type); } diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 08f7813ad06..5dd5fd7d526 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -18,18 +18,6 @@ ASTPtr ASTColumnDeclaration::clone() const res->children.push_back(res->type); } - if (is_null) - { - res->is_null = is_null; - res->children.push_back(res->is_null); - } - - if (is_not) - { - res->is_not = is_not; - res->children.push_back(res->is_not); - } - if (default_expression) { res->default_expression = default_expression->clone(); @@ -73,16 +61,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta type->formatImpl(settings, state, frame); } - if (is_not) + if (null_modifier) { - settings.ostr << ' '; - is_not->formatImpl(settings, state, frame); - } - - if (is_null) - { - settings.ostr << ' '; - is_null->formatImpl(settings, state, frame); + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") + << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); } if (default_expression) diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 34afd771de2..ea17a8b4dfa 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -13,8 +13,7 @@ class ASTColumnDeclaration : public IAST public: String name; ASTPtr type; - ASTPtr is_null; - ASTPtr is_not; + std::optional null_modifier; String default_specifier; ASTPtr default_expression; ASTPtr comment; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c54033bd27d..159b19b28c6 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -157,7 +157,7 @@ bool ParserTablePropertyDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expecte ParserIndexDeclaration index_p; ParserConstraintDeclaration constraint_p; - ParserColumnDeclaration column_p; + ParserColumnDeclaration column_p{true, true}; ASTPtr new_node = nullptr; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 9fae3d60836..a4fc60a2393 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -92,7 +92,8 @@ template class IParserColumnDeclaration : public IParserBase { public: - explicit IParserColumnDeclaration(bool require_type_ = true) : require_type(require_type_) + explicit IParserColumnDeclaration(bool require_type_ = true, bool allow_null_modifiers_ = false) + : require_type(require_type_), allow_null_modifiers(allow_null_modifiers_) { } @@ -104,6 +105,7 @@ protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool require_type = true; + bool allow_null_modifiers = false; }; using ParserColumnDeclaration = IParserColumnDeclaration; @@ -126,8 +128,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserStringLiteral string_literal_parser; ParserCodec codec_parser; ParserExpression expression_parser; - ParserIdentifier null_parser; - ParserCompoundIdentifier not_null_parser; /// mandatory column name ASTPtr name; @@ -139,8 +139,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E */ ASTPtr type; String default_specifier; - ASTPtr is_null; - ASTPtr is_not; + std::optional null_modifier; ASTPtr default_expression; ASTPtr comment_expression; ASTPtr codec_expression; @@ -169,19 +168,17 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E if (require_type && !type && !default_expression) return false; /// reject column name without type - // Pos pos_before_null = pos; - - if (s_not.check(pos, expected)) - if (s_null.check(pos, expected)) + if (type && allow_null_modifiers) + { + if (s_not.ignore(pos, expected)) { - is_not = std::make_shared("NOT"); - is_null = std::make_shared("NULL"); + if (!s_null.ignore(pos, expected)) + return false; + null_modifier.emplace(false); } - else - return false; - else - if (s_null.check(pos, expected)) - is_null = std::make_shared("NULL"); + else if (s_null.ignore(pos, expected)) + null_modifier.emplace(true); + } if (s_comment.ignore(pos, expected)) { @@ -212,17 +209,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(type)); } - if (is_null) - { - column_declaration->is_null = is_null; - column_declaration->children.push_back(std::move(is_null)); - } - - if (is_not) - { - column_declaration->is_not = is_not; - column_declaration->children.push_back(std::move(is_not)); - } + column_declaration->null_modifier = null_modifier; if (default_expression) { diff --git a/tests/queries/0_stateless/01269_creare_with_null.reference b/tests/queries/0_stateless/01269_creare_with_null.reference deleted file mode 100644 index fa7b52d9ebf..00000000000 --- a/tests/queries/0_stateless/01269_creare_with_null.reference +++ /dev/null @@ -1 +0,0 @@ -Nullable(Int32) Int32 Nullable(Int32) \ No newline at end of file diff --git a/tests/queries/0_stateless/01269_create_with_null.reference b/tests/queries/0_stateless/01269_create_with_null.reference index 7ef113393d5..739063af67f 100644 --- a/tests/queries/0_stateless/01269_create_with_null.reference +++ b/tests/queries/0_stateless/01269_create_with_null.reference @@ -1,2 +1,4 @@ Nullable(Int32) Int32 Nullable(Int32) Int32 +CREATE TABLE default.data_null\n(\n `a` Nullable(Int32), \n `b` Int32, \n `c` Nullable(Int32), \n `d` Int32\n)\nENGINE = Memory() Nullable(Int32) Int32 Nullable(Int32) Nullable(Int32) +CREATE TABLE default.set_null\n(\n `a` Nullable(Int32), \n `b` Int32, \n `c` Nullable(Int32), \n `d` Nullable(Int32)\n)\nENGINE = Memory() diff --git a/tests/queries/0_stateless/01269_create_with_null.sql b/tests/queries/0_stateless/01269_create_with_null.sql index 68fa130e0da..856b6ea75f4 100644 --- a/tests/queries/0_stateless/01269_create_with_null.sql +++ b/tests/queries/0_stateless/01269_create_with_null.sql @@ -1,6 +1,8 @@ DROP TABLE IF EXISTS data_null; DROP TABLE IF EXISTS set_null; +SET data_type_default_nullable='false'; + CREATE TABLE data_null ( a INT NULL, b INT NOT NULL, @@ -9,19 +11,20 @@ CREATE TABLE data_null ( ) engine=Memory(); -INSERT INTO data_null VALUES (1, 2, 3, 4); +INSERT INTO data_null VALUES (NULL, 2, NULL, 4); SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d) FROM data_null; +SHOW CREATE TABLE data_null; -CREATE TABLE data_null ( +CREATE TABLE data_null_error ( a Nullable(INT) NULL, b INT NOT NULL, c Nullable(INT) ) engine=Memory(); --{serverError 377} -CREATE TABLE data_null ( +CREATE TABLE data_null_error ( a INT NULL, b Nullable(INT) NOT NULL, c Nullable(INT) @@ -37,6 +40,11 @@ CREATE TABLE set_null ( ) engine=Memory(); -INSERT INTO set_null VALUES (1, 2, 3, 4); +INSERT INTO set_null VALUES (NULL, 2, NULL, NULL); SELECT toTypeName(a), toTypeName(b), toTypeName(c), toTypeName(d) FROM set_null; + +SHOW CREATE TABLE set_null; + +DROP TABLE data_null; +DROP TABLE set_null; From c30457a3edf98c61cd45b4c4269f3f5d4679e3ba Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 20:34:59 +0300 Subject: [PATCH 0674/2229] trigger ci From 394fb64a9cda376ca8dfa8bac08a4fbcfa9bf3bf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 10:44:02 +0300 Subject: [PATCH 0675/2229] Better way of implementation --- programs/odbc-bridge/CMakeLists.txt | 1 + .../AggregateFunctionArray.cpp | 5 +- .../AggregateFunctionCount.cpp | 8 ++- .../AggregateFunctionCount.h | 10 ++-- .../AggregateFunctionFactory.cpp | 60 +++++++++++-------- .../AggregateFunctionFactory.h | 33 +++++++--- .../AggregateFunctionForEach.cpp | 5 +- .../AggregateFunctionIf.cpp | 5 +- .../AggregateFunctionMerge.cpp | 5 +- .../AggregateFunctionNothing.h | 2 +- .../AggregateFunctionNull.cpp | 24 +++++--- .../AggregateFunctionOrFill.cpp | 1 + .../AggregateFunctionResample.cpp | 1 + .../AggregateFunctionState.cpp | 5 +- .../AggregateFunctionUniq.cpp | 6 +- .../AggregateFunctionUniq.h | 12 ---- .../AggregateFunctionUniqUpTo.cpp | 2 +- src/AggregateFunctions/IAggregateFunction.h | 20 ++++--- .../IAggregateFunctionCombinator.h | 1 + src/CMakeLists.txt | 2 +- src/Common/IFactoryWithAliases.h | 32 +++++----- src/DataStreams/tests/CMakeLists.txt | 2 +- src/DataTypes/DataTypeAggregateFunction.cpp | 3 +- .../DataTypeCustomSimpleAggregateFunction.cpp | 3 +- src/DataTypes/DataTypeFactory.cpp | 4 +- src/DataTypes/DataTypeFactory.h | 10 ++-- src/Formats/tests/CMakeLists.txt | 2 +- src/Functions/FunctionFactory.cpp | 2 +- src/Functions/FunctionFactory.h | 8 +-- src/Functions/FunctionsBitmap.h | 10 ++-- src/Functions/array/arrayReduce.cpp | 3 +- src/Functions/array/arrayReduceInRanges.cpp | 3 +- src/Interpreters/ExpressionAnalyzer.cpp | 3 +- src/Interpreters/tests/CMakeLists.txt | 4 +- src/Interpreters/tests/hash_map.cpp | 7 ++- .../Algorithms/SummingSortedAlgorithm.cpp | 3 +- .../MergeTree/registerStorageMergeTree.cpp | 5 +- src/TableFunctions/TableFunctionFactory.cpp | 2 +- src/TableFunctions/TableFunctionFactory.h | 9 ++- ...unt_distinct_return_not_nullable.reference | 4 +- ...315_count_distinct_return_not_nullable.sql | 1 - .../CMakeLists.txt | 2 +- .../CMakeLists.txt | 2 +- 43 files changed, 196 insertions(+), 136 deletions(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index ab8d94f2a0c..628f9ee018a 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -14,6 +14,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers + clickhouse_aggregate_functions daemon dbms Poco::Data diff --git a/src/AggregateFunctions/AggregateFunctionArray.cpp b/src/AggregateFunctions/AggregateFunctionArray.cpp index ced95185263..7fe4f1f448b 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionArray.cpp @@ -36,7 +36,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionCount.cpp b/src/AggregateFunctions/AggregateFunctionCount.cpp index 6c22fec87a2..b00adaa0f1a 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.cpp +++ b/src/AggregateFunctions/AggregateFunctionCount.cpp @@ -7,6 +7,12 @@ namespace DB { +AggregateFunctionPtr AggregateFunctionCount::getOwnNullAdapter( + const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const +{ + return std::make_shared(types[0], params); +} + namespace { @@ -22,7 +28,7 @@ AggregateFunctionPtr createAggregateFunctionCount(const std::string & name, cons void registerAggregateFunctionCount(AggregateFunctionFactory & factory) { - factory.registerFunction("count", createAggregateFunctionCount, AggregateFunctionFactory::CaseInsensitive); + factory.registerFunction("count", {createAggregateFunctionCount, {true}}, AggregateFunctionFactory::CaseInsensitive); } } diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index e54f014f7a4..feb5725d9f1 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -68,16 +68,14 @@ public: data(place).count = new_count; } - /// The function returns non-Nullable type even when wrapped with Null combinator. - bool returnDefaultWhenOnlyNull() const override - { - return true; - } + AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr &, const DataTypes & types, const Array & params) const override; }; /// Simply count number of not-NULL values. -class AggregateFunctionCountNotNullUnary final : public IAggregateFunctionDataHelper +class AggregateFunctionCountNotNullUnary final + : public IAggregateFunctionDataHelper { public: AggregateFunctionCountNotNullUnary(const DataTypePtr & argument, const Array & params) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 3982c48700b..7ff52fe0f70 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -29,18 +29,18 @@ namespace ErrorCodes } -void AggregateFunctionFactory::registerFunction(const String & name, Creator creator, CaseSensitiveness case_sensitiveness) +void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness) { - if (creator == nullptr) + if (creator_with_properties.creator == nullptr) throw Exception("AggregateFunctionFactory: the aggregate function " + name + " has been provided " " a null constructor", ErrorCodes::LOGICAL_ERROR); - if (!aggregate_functions.emplace(name, creator).second) + if (!aggregate_functions.emplace(name, creator_with_properties).second) throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique", ErrorCodes::LOGICAL_ERROR); if (case_sensitiveness == CaseInsensitive - && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator).second) + && !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second) throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique", ErrorCodes::LOGICAL_ERROR); } @@ -59,6 +59,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get( const String & name, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level) const { auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types); @@ -76,18 +77,11 @@ AggregateFunctionPtr AggregateFunctionFactory::get( DataTypes nested_types = combinator->transformArguments(type_without_low_cardinality); Array nested_parameters = combinator->transformParameters(parameters); - AggregateFunctionPtr nested_function; - - /// A little hack - if we have NULL arguments, don't even create nested function. - /// Combinator will check if nested_function was created. - if (name == "count" || std::none_of(type_without_low_cardinality.begin(), type_without_low_cardinality.end(), - [](const auto & type) { return type->onlyNull(); })) - nested_function = getImpl(name, nested_types, nested_parameters, recursion_level); - - return combinator->transformAggregateFunction(nested_function, type_without_low_cardinality, parameters); + AggregateFunctionPtr nested_function = getImpl(name, nested_types, nested_parameters, out_properties, recursion_level); + return combinator->transformAggregateFunction(nested_function, out_properties, type_without_low_cardinality, parameters); } - auto res = getImpl(name, type_without_low_cardinality, parameters, recursion_level); + auto res = getImpl(name, type_without_low_cardinality, parameters, out_properties, recursion_level); if (!res) throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR); return res; @@ -98,19 +92,37 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( const String & name_param, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level) const { String name = getAliasToOrName(name_param); + Value found; + /// Find by exact match. if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) - return it->second(name, argument_types, parameters); - + { + found = it->second; + } /// Find by case-insensitive name. /// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names. - if (recursion_level == 0) + else if (recursion_level == 0) { - if (auto it = case_insensitive_aggregate_functions.find(Poco::toLower(name)); it != case_insensitive_aggregate_functions.end()) - return it->second(name, argument_types, parameters); + if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) + found = jt->second; + } + + if (found.creator) + { + out_properties = found.properties; + + /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. + if (!out_properties.returns_default_when_only_null + && std::any_of(argument_types.begin(), argument_types.end(), [](const auto & type) { return type->onlyNull(); })) + { + return nullptr; + } + + return found.creator(name, argument_types, parameters); } /// Combinators of aggregate functions. @@ -126,9 +138,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( DataTypes nested_types = combinator->transformArguments(argument_types); Array nested_parameters = combinator->transformParameters(parameters); - AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, recursion_level + 1); - - return combinator->transformAggregateFunction(nested_function, argument_types, parameters); + AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties, recursion_level + 1); + return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters); } auto hints = this->getHints(name); @@ -140,10 +151,11 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( } -AggregateFunctionPtr AggregateFunctionFactory::tryGet(const String & name, const DataTypes & argument_types, const Array & parameters) const +AggregateFunctionPtr AggregateFunctionFactory::tryGet( + const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const { return isAggregateFunctionName(name) - ? get(name, argument_types, parameters) + ? get(name, argument_types, parameters, out_properties) : nullptr; } diff --git a/src/AggregateFunctions/AggregateFunctionFactory.h b/src/AggregateFunctions/AggregateFunctionFactory.h index 6e755cc9e8c..ab45fcf683f 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.h +++ b/src/AggregateFunctions/AggregateFunctionFactory.h @@ -26,34 +26,50 @@ using DataTypes = std::vector; */ using AggregateFunctionCreator = std::function; +struct AggregateFunctionWithProperties +{ + AggregateFunctionCreator creator; + AggregateFunctionProperties properties; + + AggregateFunctionWithProperties() = default; + AggregateFunctionWithProperties(const AggregateFunctionWithProperties &) = default; + + template > * = nullptr> + AggregateFunctionWithProperties(Creator creator_, AggregateFunctionProperties properties_ = {}) + : creator(std::forward(creator_)), properties(std::move(properties_)) + { + } +}; + /** Creates an aggregate function by name. */ -class AggregateFunctionFactory final : private boost::noncopyable, public IFactoryWithAliases +class AggregateFunctionFactory final : private boost::noncopyable, public IFactoryWithAliases { public: - static AggregateFunctionFactory & instance(); /// Register a function by its name. /// No locking, you must register all functions before usage of get. void registerFunction( const String & name, - Creator creator, + Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); /// Throws an exception if not found. AggregateFunctionPtr get( const String & name, const DataTypes & argument_types, - const Array & parameters = {}, + const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level = 0) const; /// Returns nullptr if not found. AggregateFunctionPtr tryGet( const String & name, const DataTypes & argument_types, - const Array & parameters = {}) const; + const Array & parameters, + AggregateFunctionProperties & out_properties) const; bool isAggregateFunctionName(const String & name, int recursion_level = 0) const; @@ -62,19 +78,20 @@ private: const String & name, const DataTypes & argument_types, const Array & parameters, + AggregateFunctionProperties & out_properties, int recursion_level) const; private: - using AggregateFunctions = std::unordered_map; + using AggregateFunctions = std::unordered_map; AggregateFunctions aggregate_functions; /// Case insensitive aggregate functions will be additionally added here with lowercased name. AggregateFunctions case_insensitive_aggregate_functions; - const AggregateFunctions & getCreatorMap() const override { return aggregate_functions; } + const AggregateFunctions & getMap() const override { return aggregate_functions; } - const AggregateFunctions & getCaseInsensitiveCreatorMap() const override { return case_insensitive_aggregate_functions; } + const AggregateFunctions & getCaseInsensitiveMap() const override { return case_insensitive_aggregate_functions; } String getFactoryName() const override { return "AggregateFunctionFactory"; } diff --git a/src/AggregateFunctions/AggregateFunctionForEach.cpp b/src/AggregateFunctions/AggregateFunctionForEach.cpp index 775dab2dcd9..693bc6839fa 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.cpp +++ b/src/AggregateFunctions/AggregateFunctionForEach.cpp @@ -33,7 +33,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionIf.cpp b/src/AggregateFunctions/AggregateFunctionIf.cpp index cb5f9f15b1c..19a175de911 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.cpp +++ b/src/AggregateFunctions/AggregateFunctionIf.cpp @@ -31,7 +31,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { return std::make_shared(nested_function, arguments); } diff --git a/src/AggregateFunctions/AggregateFunctionMerge.cpp b/src/AggregateFunctions/AggregateFunctionMerge.cpp index 05d941844d9..2ce3f0e11f6 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.cpp +++ b/src/AggregateFunctions/AggregateFunctionMerge.cpp @@ -34,7 +34,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { const DataTypePtr & argument = arguments[0]; diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h index 511dbbecd38..b3206f6db6e 100644 --- a/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/src/AggregateFunctions/AggregateFunctionNothing.h @@ -25,7 +25,7 @@ public: DataTypePtr getReturnType() const override { - return std::make_shared(std::make_shared()); + return argument_types.front(); } void create(AggregateDataPtr) const override diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 993cb93c991..85d960eae62 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -31,13 +31,11 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties & properties, + const DataTypes & arguments, + const Array & params) const override { - /// Special case for 'count' function. It could be called with Nullable arguments - /// - that means - count number of calls, when all arguments are not NULL. - if (nested_function && nested_function->getName() == "count") - return std::make_shared(arguments[0], params); - bool has_nullable_types = false; bool has_null_types = false; for (const auto & arg_type : arguments) @@ -58,15 +56,23 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (has_null_types) - return std::make_shared(arguments, params); + { + std::cerr << properties.returns_default_when_only_null << "\n"; + + /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. + if (properties.returns_default_when_only_null) + return std::make_shared(DataTypes{std::make_shared()}, params); + else + return std::make_shared(arguments, params); + } assert(nested_function); if (auto adapter = nested_function->getOwnNullAdapter(nested_function, arguments, params)) return adapter; - bool return_type_is_nullable = !nested_function->returnDefaultWhenOnlyNull() && nested_function->getReturnType()->canBeInsideNullable(); - bool serialize_flag = return_type_is_nullable || nested_function->returnDefaultWhenOnlyNull(); + bool return_type_is_nullable = !properties.returns_default_when_only_null && nested_function->getReturnType()->canBeInsideNullable(); + bool serialize_flag = return_type_is_nullable || properties.returns_default_when_only_null; if (arguments.size() == 1) { diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.cpp b/src/AggregateFunctions/AggregateFunctionOrFill.cpp index b9cc2f9b8b7..ce8fc8d9ca5 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.cpp +++ b/src/AggregateFunctions/AggregateFunctionOrFill.cpp @@ -21,6 +21,7 @@ public: AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, const DataTypes & arguments, const Array & params) const override { diff --git a/src/AggregateFunctions/AggregateFunctionResample.cpp b/src/AggregateFunctions/AggregateFunctionResample.cpp index d8d13e22120..389c9048918 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.cpp +++ b/src/AggregateFunctions/AggregateFunctionResample.cpp @@ -43,6 +43,7 @@ public: AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, const DataTypes & arguments, const Array & params) const override { diff --git a/src/AggregateFunctions/AggregateFunctionState.cpp b/src/AggregateFunctions/AggregateFunctionState.cpp index fd92953d114..9d1c677c0ff 100644 --- a/src/AggregateFunctions/AggregateFunctionState.cpp +++ b/src/AggregateFunctions/AggregateFunctionState.cpp @@ -24,7 +24,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array & params) const override { return std::make_shared(nested_function, arguments, params); } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.cpp b/src/AggregateFunctions/AggregateFunctionUniq.cpp index 1d079550124..40742ae336e 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniq.cpp @@ -123,13 +123,13 @@ AggregateFunctionPtr createAggregateFunctionUniq(const std::string & name, const void registerAggregateFunctionsUniq(AggregateFunctionFactory & factory) { factory.registerFunction("uniq", - createAggregateFunctionUniq); + {createAggregateFunctionUniq, {true}}); factory.registerFunction("uniqHLL12", - createAggregateFunctionUniq); + {createAggregateFunctionUniq, {true}}); factory.registerFunction("uniqExact", - createAggregateFunctionUniq>); + {createAggregateFunctionUniq>, {true}}); } } diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 1588611b8a2..334e809ebe7 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -244,12 +244,6 @@ public: { assert_cast(to).getData().push_back(this->data(place).set.size()); } - - /// The function returns non-Nullable type even when wrapped with Null combinator. - bool returnDefaultWhenOnlyNull() const override - { - return true; - } }; @@ -304,12 +298,6 @@ public: { assert_cast(to).getData().push_back(this->data(place).set.size()); } - - /// The function returns non-Nullable type even when wrapped with Null combinator. - bool returnDefaultWhenOnlyNull() const override - { - return true; - } }; } diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp index a9a8ae0eaf3..9befc515de6 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.cpp @@ -85,7 +85,7 @@ AggregateFunctionPtr createAggregateFunctionUniqUpTo(const std::string & name, c void registerAggregateFunctionUniqUpTo(AggregateFunctionFactory & factory) { - factory.registerFunction("uniqUpTo", createAggregateFunctionUniqUpTo); + factory.registerFunction("uniqUpTo", {createAggregateFunctionUniqUpTo, {true}}); } } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 439a5e07c2e..5f4291dd21d 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -166,17 +166,12 @@ public: * nested_function is a smart pointer to this aggregate function itself. * arguments and params are for nested_function. */ - virtual AggregateFunctionPtr getOwnNullAdapter(const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const + virtual AggregateFunctionPtr getOwnNullAdapter( + const AggregateFunctionPtr & /*nested_function*/, const DataTypes & /*arguments*/, const Array & /*params*/) const { return nullptr; } - /** When the function is wrapped with Null combinator, - * should we return Nullable type with NULL when no values were aggregated - * or we should return non-Nullable type with default value (example: count, countDistinct). - */ - virtual bool returnDefaultWhenOnlyNull() const { return false; } - const DataTypes & getArgumentTypes() const { return argument_types; } const Array & getParameters() const { return parameters; } @@ -286,4 +281,15 @@ public: }; +/// Properties of aggregate function that are independent of argument types and parameters. +struct AggregateFunctionProperties +{ + /** When the function is wrapped with Null combinator, + * should we return Nullable type with NULL when no values were aggregated + * or we should return non-Nullable type with default value (example: count, countDistinct). + */ + bool returns_default_when_only_null = false; +}; + + } diff --git a/src/AggregateFunctions/IAggregateFunctionCombinator.h b/src/AggregateFunctions/IAggregateFunctionCombinator.h index 03e2766dc2c..89c313567a3 100644 --- a/src/AggregateFunctions/IAggregateFunctionCombinator.h +++ b/src/AggregateFunctions/IAggregateFunctionCombinator.h @@ -59,6 +59,7 @@ public: */ virtual AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties & properties, const DataTypes & arguments, const Array & params) const = 0; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fe223373cf3..321bba1139a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -381,6 +381,6 @@ if (ENABLE_TESTS AND USE_GTEST) -Wno-gnu-zero-variadic-macro-arguments ) - target_link_libraries(unit_tests_dbms PRIVATE ${GTEST_BOTH_LIBRARIES} clickhouse_functions clickhouse_parsers dbms clickhouse_common_zookeeper string_utils) + target_link_libraries(unit_tests_dbms PRIVATE ${GTEST_BOTH_LIBRARIES} clickhouse_functions clickhouse_aggregate_functions clickhouse_parsers dbms clickhouse_common_zookeeper string_utils) add_check(unit_tests_dbms) endif () diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 64703e51082..994b2c1a02c 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -16,14 +16,14 @@ namespace ErrorCodes } /** If stored objects may have several names (aliases) - * this interface may be helpful - * template parameter is available as Creator - */ -template -class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> + * this interface may be helpful + * template parameter is available as Value + */ +template +class IFactoryWithAliases : public IHints<2, IFactoryWithAliases> { protected: - using Creator = CreatorFunc; + using Value = ValueType; String getAliasToOrName(const String & name) const { @@ -43,13 +43,13 @@ public: CaseInsensitive }; - /** Register additional name for creator - * real_name have to be already registered. - */ + /** Register additional name for value + * real_name have to be already registered. + */ void registerAlias(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive) { - const auto & creator_map = getCreatorMap(); - const auto & case_insensitive_creator_map = getCaseInsensitiveCreatorMap(); + const auto & creator_map = getMap(); + const auto & case_insensitive_creator_map = getCaseInsensitiveMap(); const String factory_name = getFactoryName(); String real_dict_name; @@ -80,7 +80,7 @@ public: { std::vector result; auto getter = [](const auto & pair) { return pair.first; }; - std::transform(getCreatorMap().begin(), getCreatorMap().end(), std::back_inserter(result), getter); + std::transform(getMap().begin(), getMap().end(), std::back_inserter(result), getter); std::transform(aliases.begin(), aliases.end(), std::back_inserter(result), getter); return result; } @@ -88,7 +88,7 @@ public: bool isCaseInsensitive(const String & name) const { String name_lowercase = Poco::toLower(name); - return getCaseInsensitiveCreatorMap().count(name_lowercase) || case_insensitive_aliases.count(name_lowercase); + return getCaseInsensitiveMap().count(name_lowercase) || case_insensitive_aliases.count(name_lowercase); } const String & aliasTo(const String & name) const @@ -109,11 +109,11 @@ public: virtual ~IFactoryWithAliases() override {} private: - using InnerMap = std::unordered_map; // name -> creator + using InnerMap = std::unordered_map; // name -> creator using AliasMap = std::unordered_map; // alias -> original type - virtual const InnerMap & getCreatorMap() const = 0; - virtual const InnerMap & getCaseInsensitiveCreatorMap() const = 0; + virtual const InnerMap & getMap() const = 0; + virtual const InnerMap & getCaseInsensitiveMap() const = 0; virtual String getFactoryName() const = 0; /// Alias map to data_types from previous two maps diff --git a/src/DataStreams/tests/CMakeLists.txt b/src/DataStreams/tests/CMakeLists.txt index 14db417b71c..d01c79aee5f 100644 --- a/src/DataStreams/tests/CMakeLists.txt +++ b/src/DataStreams/tests/CMakeLists.txt @@ -1,4 +1,4 @@ set(SRCS) add_executable (finish_sorting_stream finish_sorting_stream.cpp ${SRCS}) -target_link_libraries (finish_sorting_stream PRIVATE dbms) +target_link_libraries (finish_sorting_stream PRIVATE clickhouse_aggregate_functions dbms) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 59811b1cd55..fdb17606f78 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -392,7 +392,8 @@ static DataTypePtr create(const ASTPtr & arguments) if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); - function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row); + AggregateFunctionProperties properties; + function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); return std::make_shared(function, argument_types, params_row); } diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 2ddce184cce..157192642ba 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -110,7 +110,8 @@ static std::pair create(const ASTPtr & argum if (function_name.empty()) throw Exception("Logical error: empty name of aggregate function passed", ErrorCodes::LOGICAL_ERROR); - function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row); + AggregateFunctionProperties properties; + function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); // check function if (std::find(std::begin(supported_functions), std::end(supported_functions), function->getName()) == std::end(supported_functions)) diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 880f25d009d..69dbed10ccc 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -80,7 +80,7 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr } -void DataTypeFactory::registerDataType(const String & family_name, Creator creator, CaseSensitiveness case_sensitiveness) +void DataTypeFactory::registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness) { if (creator == nullptr) throw Exception("DataTypeFactory: the data type family " + family_name + " has been provided " @@ -136,7 +136,7 @@ void DataTypeFactory::registerSimpleDataTypeCustom(const String &name, SimpleCre }, case_sensitiveness); } -const DataTypeFactory::Creator& DataTypeFactory::findCreatorByName(const String & family_name) const +const DataTypeFactory::Value & DataTypeFactory::findCreatorByName(const String & family_name) const { { DataTypesDictionary::const_iterator it = data_types.find(family_name); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 6bf09d31727..67b72945acc 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -23,7 +23,7 @@ class DataTypeFactory final : private boost::noncopyable, public IFactoryWithAli { private: using SimpleCreator = std::function; - using DataTypesDictionary = std::unordered_map; + using DataTypesDictionary = std::unordered_map; using CreatorWithCustom = std::function(const ASTPtr & parameters)>; using SimpleCreatorWithCustom = std::function()>; @@ -35,7 +35,7 @@ public: DataTypePtr get(const ASTPtr & ast) const; /// Register a type family by its name. - void registerDataType(const String & family_name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); /// Register a simple data type, that have no parameters. void registerSimpleDataType(const String & name, SimpleCreator creator, CaseSensitiveness case_sensitiveness = CaseSensitive); @@ -47,7 +47,7 @@ public: void registerSimpleDataTypeCustom(const String & name, SimpleCreatorWithCustom creator, CaseSensitiveness case_sensitiveness = CaseSensitive); private: - const Creator& findCreatorByName(const String & family_name) const; + const Value & findCreatorByName(const String & family_name) const; private: DataTypesDictionary data_types; @@ -57,9 +57,9 @@ private: DataTypeFactory(); - const DataTypesDictionary & getCreatorMap() const override { return data_types; } + const DataTypesDictionary & getMap() const override { return data_types; } - const DataTypesDictionary & getCaseInsensitiveCreatorMap() const override { return case_insensitive_data_types; } + const DataTypesDictionary & getCaseInsensitiveMap() const override { return case_insensitive_data_types; } String getFactoryName() const override { return "DataTypeFactory"; } }; diff --git a/src/Formats/tests/CMakeLists.txt b/src/Formats/tests/CMakeLists.txt index 187700dff72..e1cb7604fab 100644 --- a/src/Formats/tests/CMakeLists.txt +++ b/src/Formats/tests/CMakeLists.txt @@ -1,4 +1,4 @@ set(SRCS ) add_executable (tab_separated_streams tab_separated_streams.cpp ${SRCS}) -target_link_libraries (tab_separated_streams PRIVATE dbms) +target_link_libraries (tab_separated_streams PRIVATE clickhouse_aggregate_functions dbms) diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index 63f12188771..fbc8e11a9c9 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes void FunctionFactory::registerFunction(const std::string & name, - Creator creator, + Value creator, CaseSensitiveness case_sensitiveness) { if (!functions.emplace(name, creator).second) diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index ccaf2044693..7990e78daf8 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -53,7 +53,7 @@ public: FunctionOverloadResolverImplPtr tryGetImpl(const std::string & name, const Context & context) const; private: - using Functions = std::unordered_map; + using Functions = std::unordered_map; Functions functions; Functions case_insensitive_functions; @@ -64,9 +64,9 @@ private: return std::make_unique(Function::create(context)); } - const Functions & getCreatorMap() const override { return functions; } + const Functions & getMap() const override { return functions; } - const Functions & getCaseInsensitiveCreatorMap() const override { return case_insensitive_functions; } + const Functions & getCaseInsensitiveMap() const override { return case_insensitive_functions; } String getFactoryName() const override { return "FunctionFactory"; } @@ -74,7 +74,7 @@ private: /// No locking, you must register all functions before usage of get. void registerFunction( const std::string & name, - Creator creator, + Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); }; diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index bf84bfbe47e..868bf8095a4 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -113,8 +113,9 @@ public: auto nested_type = array_type->getNestedType(); DataTypes argument_types = {nested_type}; Array params_row; - AggregateFunctionPtr bitmap_function - = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); + AggregateFunctionProperties properties; + AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get( + AggregateFunctionGroupBitmapData::name(), argument_types, params_row, properties); return std::make_shared(bitmap_function, argument_types, params_row); } @@ -156,8 +157,9 @@ private: // output data Array params_row; - AggregateFunctionPtr bitmap_function - = AggregateFunctionFactory::instance().get(AggregateFunctionGroupBitmapData::name(), argument_types, params_row); + AggregateFunctionProperties properties; + AggregateFunctionPtr bitmap_function = AggregateFunctionFactory::instance().get( + AggregateFunctionGroupBitmapData::name(), argument_types, params_row, properties); auto col_to = ColumnAggregateFunction::create(bitmap_function); col_to->reserve(offsets.size()); diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 8d44acc82f5..2b37965260f 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -97,7 +97,8 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, aggregate_function_name, params_row, "function " + getName()); - aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row); + AggregateFunctionProperties properties; + aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } return aggregate_function->getReturnType(); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 2dd0cd56343..c3c65c4d9e5 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -115,7 +115,8 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, aggregate_function_name, params_row, "function " + getName()); - aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row); + AggregateFunctionProperties properties; + aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); } return std::make_shared(aggregate_function->getReturnType()); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index ecfa011f1c8..4c2a8b3dcea 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -420,8 +420,9 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ExpressionActionsPtr & action aggregate.argument_names[i] = name; } + AggregateFunctionProperties properties; aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array(); - aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters); + aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties); aggregate_descriptions.push_back(aggregate); } diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt index 324a38b1a17..4ab7da014e4 100644 --- a/src/Interpreters/tests/CMakeLists.txt +++ b/src/Interpreters/tests/CMakeLists.txt @@ -34,11 +34,11 @@ target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHAS target_link_libraries (two_level_hash_map PRIVATE dbms) add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor.cpp) -target_link_libraries (in_join_subqueries_preprocessor PRIVATE dbms clickhouse_parsers) +target_link_libraries (in_join_subqueries_preprocessor PRIVATE clickhouse_aggregate_functions dbms clickhouse_parsers) add_check(in_join_subqueries_preprocessor) add_executable (users users.cpp) -target_link_libraries (users PRIVATE dbms clickhouse_common_config) +target_link_libraries (users PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_config) if (OS_LINUX) add_executable (internal_iotop internal_iotop.cpp) diff --git a/src/Interpreters/tests/hash_map.cpp b/src/Interpreters/tests/hash_map.cpp index 8ddbd3b5886..dc87fd9ddde 100644 --- a/src/Interpreters/tests/hash_map.cpp +++ b/src/Interpreters/tests/hash_map.cpp @@ -103,9 +103,10 @@ int main(int argc, char ** argv) std::vector data(n); Value value; - AggregateFunctionPtr func_count = factory.get("count", data_types_empty); - AggregateFunctionPtr func_avg = factory.get("avg", data_types_uint64); - AggregateFunctionPtr func_uniq = factory.get("uniq", data_types_uint64); + AggregateFunctionProperties properties; + AggregateFunctionPtr func_count = factory.get("count", data_types_empty, {}, properties); + AggregateFunctionPtr func_avg = factory.get("avg", data_types_uint64, {}, properties); + AggregateFunctionPtr func_uniq = factory.get("uniq", data_types_uint64, {}, properties); #define INIT \ { \ diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 89154044ae5..8be4aac4067 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -47,7 +47,8 @@ struct SummingSortedAlgorithm::AggregateDescription void init(const char * function_name, const DataTypes & argument_types) { - function = AggregateFunctionFactory::instance().get(function_name, argument_types); + AggregateFunctionProperties properties; + function = AggregateFunctionFactory::instance().get(function_name, argument_types, {}, properties); add_function = function->getAddressOfAddFunction(); state.reset(function->sizeOfData(), function->alignOfData()); } diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index e08ea1739a5..13cce2b0536 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -117,8 +117,9 @@ static void appendGraphitePattern( aggregate_function_name, params_row, "GraphiteMergeTree storage initialization"); /// TODO Not only Float64 - pattern.function = AggregateFunctionFactory::instance().get(aggregate_function_name, {std::make_shared()}, - params_row); + AggregateFunctionProperties properties; + pattern.function = AggregateFunctionFactory::instance().get( + aggregate_function_name, {std::make_shared()}, params_row, properties); } else if (startsWith(key, "retention")) { diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index 1b34c1a1e6f..bc139edfb73 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -15,7 +15,7 @@ namespace ErrorCodes } -void TableFunctionFactory::registerFunction(const std::string & name, Creator creator, CaseSensitiveness case_sensitiveness) +void TableFunctionFactory::registerFunction(const std::string & name, Value creator, CaseSensitiveness case_sensitiveness) { if (!table_functions.emplace(name, creator).second) throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique", diff --git a/src/TableFunctions/TableFunctionFactory.h b/src/TableFunctions/TableFunctionFactory.h index cd87fa9c7f0..6d0302a64ff 100644 --- a/src/TableFunctions/TableFunctionFactory.h +++ b/src/TableFunctions/TableFunctionFactory.h @@ -24,12 +24,11 @@ using TableFunctionCreator = std::function; class TableFunctionFactory final: private boost::noncopyable, public IFactoryWithAliases { public: - static TableFunctionFactory & instance(); /// Register a function by its name. /// No locking, you must register all functions before usage of get. - void registerFunction(const std::string & name, Creator creator, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerFunction(const std::string & name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive); template void registerFunction(CaseSensitiveness case_sensitiveness = CaseSensitive) @@ -50,11 +49,11 @@ public: bool isTableFunctionName(const std::string & name) const; private: - using TableFunctions = std::unordered_map; + using TableFunctions = std::unordered_map; - const TableFunctions & getCreatorMap() const override { return table_functions; } + const TableFunctions & getMap() const override { return table_functions; } - const TableFunctions & getCaseInsensitiveCreatorMap() const override { return case_insensitive_table_functions; } + const TableFunctions & getCaseInsensitiveMap() const override { return case_insensitive_table_functions; } String getFactoryName() const override { return "TableFunctionFactory"; } diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference index f8b77704aa3..76b82419556 100644 --- a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference @@ -5,5 +5,5 @@ 5 5 0 -\N -\N +0 +0 diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql index 2d9b5ef54aa..9787ee2bd70 100644 --- a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql @@ -7,6 +7,5 @@ SELECT uniqExact(number >= 5 ? number : NULL) FROM numbers(10); SELECT count(DISTINCT number >= 5 ? number : NULL) FROM numbers(10); SELECT count(NULL); --- These two returns NULL for now, but we want to change them to return 0. SELECT uniq(NULL); SELECT count(DISTINCT NULL); diff --git a/utils/convert-month-partitioned-parts/CMakeLists.txt b/utils/convert-month-partitioned-parts/CMakeLists.txt index 14853590c76..ea6429a0610 100644 --- a/utils/convert-month-partitioned-parts/CMakeLists.txt +++ b/utils/convert-month-partitioned-parts/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable (convert-month-partitioned-parts main.cpp) -target_link_libraries(convert-month-partitioned-parts PRIVATE dbms clickhouse_parsers boost::program_options) +target_link_libraries(convert-month-partitioned-parts PRIVATE clickhouse_aggregate_functions dbms clickhouse_parsers boost::program_options) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt index 08907e1c5b9..882c510ea1c 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt +++ b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt @@ -1,3 +1,3 @@ add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS}) target_compile_options(zookeeper-adjust-block-numbers-to-parts PRIVATE -Wno-format) -target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE dbms clickhouse_common_zookeeper boost::program_options) +target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_zookeeper boost::program_options) From 217d05443a654bb35b760c2d144aa3516d30fd97 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 20:41:45 +0300 Subject: [PATCH 0676/2229] Remove old non-automated test --- src/Client/tests/CMakeLists.txt | 2 -- src/Client/tests/test_connect.cpp | 59 ------------------------------- 2 files changed, 61 deletions(-) delete mode 100644 src/Client/tests/test_connect.cpp diff --git a/src/Client/tests/CMakeLists.txt b/src/Client/tests/CMakeLists.txt index d952c006bb5..e69de29bb2d 100644 --- a/src/Client/tests/CMakeLists.txt +++ b/src/Client/tests/CMakeLists.txt @@ -1,2 +0,0 @@ -add_executable(test-connect test_connect.cpp) -target_link_libraries (test-connect PRIVATE dbms) diff --git a/src/Client/tests/test_connect.cpp b/src/Client/tests/test_connect.cpp deleted file mode 100644 index 50075cc24a6..00000000000 --- a/src/Client/tests/test_connect.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - - -/** In a loop it connects to the server and immediately breaks the connection. - * Using the SO_LINGER option, we ensure that the connection is terminated by sending a RST packet (not FIN). - * This behavior causes a bug in the TCPServer implementation in the Poco library. - */ -int main(int argc, char ** argv) -try -{ - for (size_t i = 0, num_iters = argc >= 2 ? DB::parse(argv[1]) : 1; i < num_iters; ++i) - { - std::cerr << "."; - - Poco::Net::SocketAddress address("localhost", 9000); - - int fd = socket(PF_INET, SOCK_STREAM, IPPROTO_IP); - - if (fd < 0) - DB::throwFromErrno("Cannot create socket", 0); - - linger linger_value; - linger_value.l_onoff = 1; - linger_value.l_linger = 0; - - if (0 != setsockopt(fd, SOL_SOCKET, SO_LINGER, &linger_value, sizeof(linger_value))) - DB::throwFromErrno("Cannot set linger", 0); - - try - { - int res = connect(fd, address.addr(), address.length()); - - if (res != 0 && errno != EINPROGRESS && errno != EWOULDBLOCK) - { - close(fd); - DB::throwFromErrno("Cannot connect", 0); - } - - close(fd); - } - catch (const Poco::Exception & e) - { - std::cerr << e.displayText() << "\n"; - } - } - - std::cerr << "\n"; -} -catch (const Poco::Exception & e) -{ - std::cerr << e.displayText() << "\n"; -} From fcd23d02eec3fd7029d3090f31a117f75bbe3be1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 20:57:50 +0300 Subject: [PATCH 0677/2229] Fix timeout in sql_fuzzy test --- tests/queries/0_stateless/00746_sql_fuzzy.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00746_sql_fuzzy.sh b/tests/queries/0_stateless/00746_sql_fuzzy.sh index f417f0146c2..539d0659d36 100755 --- a/tests/queries/0_stateless/00746_sql_fuzzy.sh +++ b/tests/queries/0_stateless/00746_sql_fuzzy.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "select name from system.table_functions format TSV;" > $S # if you want long run use: env SQL_FUZZY_RUNS=100000 clickhouse-test sql_fuzzy for SQL_FUZZY_RUN in $(seq ${SQL_FUZZY_RUNS:=10}); do - env SQL_FUZZY_RUN=$SQL_FUZZY_RUN $CURDIR/00746_sql_fuzzy.pl | $CLICKHOUSE_CLIENT --max_execution_time 10 -n --ignore-error >/dev/null 2>&1 + env SQL_FUZZY_RUN=$SQL_FUZZY_RUN $CURDIR/00746_sql_fuzzy.pl | $CLICKHOUSE_CLIENT --format Null --max_execution_time 10 -n --ignore-error >/dev/null 2>&1 if [[ `$CLICKHOUSE_CLIENT -q "SELECT 'Still alive'"` != 'Still alive' ]]; then break fi From 5d891f6c878562329c8a488debd704317c63b90d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 21:13:39 +0300 Subject: [PATCH 0678/2229] Fix tests --- .../0_stateless/01277_convert_field_to_type_logical_error.sql | 2 +- .../0_stateless/01311_comparison_with_constant_string.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01277_convert_field_to_type_logical_error.sql b/tests/queries/0_stateless/01277_convert_field_to_type_logical_error.sql index 05295575cf1..4712c124237 100644 --- a/tests/queries/0_stateless/01277_convert_field_to_type_logical_error.sql +++ b/tests/queries/0_stateless/01277_convert_field_to_type_logical_error.sql @@ -1 +1 @@ -SELECT -2487, globalNullIn(toIntervalMinute(-88074), 'qEkek..'), [-27.537293]; -- { serverError 70 } +SELECT -2487, globalNullIn(toIntervalMinute(-88074), 'qEkek..'), [-27.537293]; -- { serverError 53 } diff --git a/tests/queries/0_stateless/01311_comparison_with_constant_string.sql b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql index 6ca736ba146..d6641a50c45 100644 --- a/tests/queries/0_stateless/01311_comparison_with_constant_string.sql +++ b/tests/queries/0_stateless/01311_comparison_with_constant_string.sql @@ -11,7 +11,7 @@ SELECT 1 IN (1.23, '2', 2); SELECT '---'; -- it should work but it doesn't. -SELECT 1 = '1.0'; -- { serverError 131 } +SELECT 1 = '1.0'; -- { serverError 53 } SELECT '---'; SELECT 1 = '257'; From db0fc6c9a661ed1f1443e95f1f31252a12bf8244 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 20:48:10 +0300 Subject: [PATCH 0679/2229] Fix tests --- src/DataTypes/DataTypeDateTime64.h | 2 ++ src/Interpreters/convertFieldToType.cpp | 8 +++++++- tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index b575e9d81c1..249da255eb0 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -45,6 +45,8 @@ public: void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; bool equals(const IDataType & rhs) const override; + + bool canBePromoted() const override { return false; } }; /** Tansform-type wrapper for DateTime64, applies given Transform to DateTime64 value or only to a whole part of it. diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 21cf9422c32..d46573d0461 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -180,7 +180,13 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } - // TODO (vnemkov): extra cases for DateTime64: converting from integer, converting from Decimal + if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) + { + /// Already in needed type. + return src; + } + + /// TODO Conversion from integers to DateTime64 } else if (which_type.isStringOrFixedString()) { diff --git a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql index 7848b4aaf24..4ffcf3be3c9 100644 --- a/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql +++ b/tests/queries/0_stateless/01268_DateTime64_in_WHERE.sql @@ -5,8 +5,8 @@ WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT materiali WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE DT64 = materialize(S); -- {serverError 43} WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT * WHERE materialize(S) = DT64; -- {serverError 43} -SELECT * WHERE toDateTime64(123.345, 3) == 'ABCD'; -- {serverError 131} -- invalid DateTime64 string -SELECT * WHERE toDateTime64(123.345, 3) == '2020-02-05 14:34:12.33333333333333333333333333333333333333333333333333333333'; -- {serverError 131} -- invalid string length +SELECT * WHERE toDateTime64(123.345, 3) == 'ABCD'; -- {serverError 53} -- invalid DateTime64 string +SELECT * WHERE toDateTime64(123.345, 3) == '2020-02-05 14:34:12.33333333333333333333333333333333333333333333333333333333'; -- {serverError 53} -- invalid string length SELECT 'in SELECT'; WITH '2020-02-05 14:34:12.333' as S, toDateTime64(S, 3) as DT64 SELECT DT64 = S; From f6c52fe1c225cc53a3e184d6a8e9433733d2b59c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 03:16:01 +0300 Subject: [PATCH 0680/2229] Allow comparison with String in index analysis; simplify code #11630 --- src/Common/FieldVisitors.h | 303 ++++++++---------------- src/Storages/MergeTree/KeyCondition.cpp | 4 +- 2 files changed, 98 insertions(+), 209 deletions(-) diff --git a/src/Common/FieldVisitors.h b/src/Common/FieldVisitors.h index 90f80974ab1..257994a6bd2 100644 --- a/src/Common/FieldVisitors.h +++ b/src/Common/FieldVisitors.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include class SipHash; @@ -184,232 +186,119 @@ template <> constexpr bool isDecimalField>() { return t class FieldVisitorAccurateEquals : public StaticVisitor { public: - bool operator() (const UInt64 &, const Null &) const { return false; } - bool operator() (const UInt64 & l, const UInt64 & r) const { return l == r; } - bool operator() (const UInt64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Int64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const UInt64 & l, const Float64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const UInt64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - bool operator() (const Int64 &, const Null &) const { return false; } - bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const Int64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Int64 & r) const { return l == r; } - bool operator() (const Int64 & l, const Float64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const Int64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - bool operator() (const Float64 &, const Null &) const { return false; } - bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const Float64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Int64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const Float64 & l, const Float64 & r) const { return l == r; } - bool operator() (const Float64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - template - bool operator() (const Null &, const T &) const - { - return std::is_same_v; - } - - template - bool operator() (const String & l, const T & r) const - { - if constexpr (std::is_same_v) - return l == r; - if constexpr (std::is_same_v) - return stringToUUID(l) == r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const UInt128 & l, const T & r) const - { - if constexpr (std::is_same_v) - return l == r; - if constexpr (std::is_same_v) - return l == stringToUUID(r); - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const Array & l, const T & r) const - { - if constexpr (std::is_same_v) - return l == r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const Tuple & l, const T & r) const - { - if constexpr (std::is_same_v) - return l == r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - template - bool operator() (const DecimalField & l, const U & r) const + bool operator() (const T & l, const U & r) const { - if constexpr (isDecimalField()) - return l == r; - if constexpr (std::is_same_v || std::is_same_v) - return l == DecimalField(r, 0); - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } + if constexpr (std::is_same_v || std::is_same_v) + return std::is_same_v; + else + { + if constexpr (std::is_same_v) + return l == r; - template bool operator() (const UInt64 & l, const DecimalField & r) const { return DecimalField(l, 0) == r; } - template bool operator() (const Int64 & l, const DecimalField & r) const { return DecimalField(l, 0) == r; } - template bool operator() (const Float64 & l, const DecimalField & r) const { return cantCompare(l, r); } + if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) + return accurate::equalsOp(l, r); - template - bool operator() (const AggregateFunctionStateData & l, const T & r) const - { - if constexpr (std::is_same_v) - return l == r; - return cantCompare(l, r); - } + if constexpr (isDecimalField() && isDecimalField()) + return l == r; + + if constexpr (isDecimalField() && std::is_arithmetic_v) + return l == DecimalField(r, 0); + + if constexpr (std::is_arithmetic_v && isDecimalField()) + return DecimalField(l, 0) == r; + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return stringToUUID(l) == r; + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(l); + T parsed; + readText(parsed, in); + return operator()(parsed, r); + } + } + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return l == stringToUUID(r); + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(r); + T parsed; + readText(parsed, in); + return operator()(l, parsed); + } + } + } -private: - template - bool cantCompare(const T &, const U &) const - { - if constexpr (std::is_same_v) - return false; throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), - ErrorCodes::BAD_TYPE_OF_FIELD); + ErrorCodes::BAD_TYPE_OF_FIELD); } }; + class FieldVisitorAccurateLess : public StaticVisitor { public: - bool operator() (const UInt64 &, const Null &) const { return false; } - bool operator() (const UInt64 & l, const UInt64 & r) const { return l < r; } - bool operator() (const UInt64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Int64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const UInt64 & l, const Float64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const UInt64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const UInt64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - bool operator() (const Int64 &, const Null &) const { return false; } - bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const Int64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Int64 & r) const { return l < r; } - bool operator() (const Int64 & l, const Float64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const Int64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const Int64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - bool operator() (const Float64 &, const Null &) const { return false; } - bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const Float64 & l, const UInt128 & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Int64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const Float64 & l, const Float64 & r) const { return l < r; } - bool operator() (const Float64 & l, const String & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Array & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const Tuple & r) const { return cantCompare(l, r); } - bool operator() (const Float64 & l, const AggregateFunctionStateData & r) const { return cantCompare(l, r); } - - template - bool operator() (const Null &, const T &) const - { - return !std::is_same_v; - } - - template - bool operator() (const String & l, const T & r) const - { - if constexpr (std::is_same_v) - return l < r; - if constexpr (std::is_same_v) - return stringToUUID(l) < r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const UInt128 & l, const T & r) const - { - if constexpr (std::is_same_v) - return l < r; - if constexpr (std::is_same_v) - return l < stringToUUID(r); - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const Array & l, const T & r) const - { - if constexpr (std::is_same_v) - return l < r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - - template - bool operator() (const Tuple & l, const T & r) const - { - if constexpr (std::is_same_v) - return l < r; - if constexpr (std::is_same_v) - return false; - return cantCompare(l, r); - } - template - bool operator() (const DecimalField & l, const U & r) const + bool operator() (const T & l, const U & r) const { - if constexpr (isDecimalField()) - return l < r; - if constexpr (std::is_same_v || std::is_same_v) - return l < DecimalField(r, 0); - if constexpr (std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) return false; - return cantCompare(l, r); - } + else + { + if constexpr (std::is_same_v) + return l < r; - template bool operator() (const UInt64 & l, const DecimalField & r) const { return DecimalField(l, 0) < r; } - template bool operator() (const Int64 & l, const DecimalField & r) const { return DecimalField(l, 0) < r; } - template bool operator() (const Float64 &, const DecimalField &) const { return false; } + if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) + return accurate::lessOp(l, r); - template - bool operator() (const AggregateFunctionStateData & l, const T & r) const - { - return cantCompare(l, r); - } + if constexpr (isDecimalField() && isDecimalField()) + return l < r; + + if constexpr (isDecimalField() && std::is_arithmetic_v) + return l < DecimalField(r, 0); + + if constexpr (std::is_arithmetic_v && isDecimalField()) + return DecimalField(l, 0) < r; + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return stringToUUID(l) < r; + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(l); + T parsed; + readText(parsed, in); + return operator()(parsed, r); + } + } + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return l < stringToUUID(r); + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(r); + T parsed; + readText(parsed, in); + return operator()(l, parsed); + } + } + } -private: - template - bool cantCompare(const T &, const U &) const - { throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), - ErrorCodes::BAD_TYPE_OF_FIELD); + ErrorCodes::BAD_TYPE_OF_FIELD); } }; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index dad73b6a003..7265e818b51 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -826,8 +826,8 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont } bool cast_not_needed = - is_set_const /// Set args are already casted inside Set::createFromAST - || (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast. + is_set_const /// Set args are already casted inside Set::createFromAST + || (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast. if (!cast_not_needed) castValueToType(key_expr_type, const_value, const_type, node); From 3aedef99ce8395dd6e2c947dce4e999e95fa9bc6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 03:24:55 +0300 Subject: [PATCH 0681/2229] Added a test --- ...onstant_string_in_index_analysis.reference | 12 +++++++ ...with_constant_string_in_index_analysis.sql | 32 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.reference create mode 100644 tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql diff --git a/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.reference b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.reference new file mode 100644 index 00000000000..ee98bdf033b --- /dev/null +++ b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.reference @@ -0,0 +1,12 @@ +1 +999999 +100000 +899999 +100001 +900000 +1 +999999 +100000 +899999 +100001 +900000 diff --git a/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql new file mode 100644 index 00000000000..e37f647e81f --- /dev/null +++ b/tests/queries/0_stateless/01312_comparison_with_constant_string_in_index_analysis.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 1000; +INSERT INTO test SELECT * FROM numbers(1000000); +OPTIMIZE TABLE test; + +SET max_rows_to_read = 2000; +SELECT count() FROM test WHERE x = 100000; +SET max_rows_to_read = 1000000; +SELECT count() FROM test WHERE x != 100000; +SET max_rows_to_read = 101000; +SELECT count() FROM test WHERE x < 100000; +SET max_rows_to_read = 900000; +SELECT count() FROM test WHERE x > 100000; +SET max_rows_to_read = 101000; +SELECT count() FROM test WHERE x <= 100000; +SET max_rows_to_read = 901000; +SELECT count() FROM test WHERE x >= 100000; + +SET max_rows_to_read = 2000; +SELECT count() FROM test WHERE x = '100000'; +SET max_rows_to_read = 1000000; +SELECT count() FROM test WHERE x != '100000'; +SET max_rows_to_read = 101000; +SELECT count() FROM test WHERE x < '100000'; +SET max_rows_to_read = 900000; +SELECT count() FROM test WHERE x > '100000'; +SET max_rows_to_read = 101000; +SELECT count() FROM test WHERE x <= '100000'; +SET max_rows_to_read = 901000; +SELECT count() FROM test WHERE x >= '100000'; + +DROP TABLE test; From 067cf4cc403e512d09a9dbcc4e0178d5b29278d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 07:52:28 +0300 Subject: [PATCH 0682/2229] Fix gcc build --- src/Common/Arena.h | 2 +- src/Common/ArenaWithFreeLists.h | 2 +- src/Core/Defines.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/Arena.h b/src/Common/Arena.h index d203a92d4a3..aaf71cac525 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -4,7 +4,7 @@ #include #include #include -#if __has_include() +#if __has_include() && defined(ADDRESS_SANITIZER) # include #endif #include diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h index 6092f03ce19..3ae727fdaa5 100644 --- a/src/Common/ArenaWithFreeLists.h +++ b/src/Common/ArenaWithFreeLists.h @@ -1,6 +1,6 @@ #pragma once -#if __has_include() +#if __has_include() && defined(ADDRESS_SANITIZER) # include #endif #include diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 13070c565b4..8b26f486c9d 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -87,7 +87,7 @@ #define DBMS_DISTRIBUTED_SIGNATURE_HEADER 0xCAFEDACEull #define DBMS_DISTRIBUTED_SIGNATURE_HEADER_OLD_FORMAT 0xCAFECABEull -#if !__has_include() +#if !__has_include() || !defined(ADDRESS_SANITIZER) # define ASAN_UNPOISON_MEMORY_REGION(a, b) # define ASAN_POISON_MEMORY_REGION(a, b) #endif From 9e1b8b2872e366663707109eaee60a4df577865f Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 14 Jun 2020 16:26:37 +0000 Subject: [PATCH 0683/2229] Better exchanges, fix build, better comments, better tests --- .../RabbitMQ/RabbitMQBlockInputStream.cpp | 2 - src/Storages/RabbitMQ/RabbitMQHandler.cpp | 13 +- .../ReadBufferFromRabbitMQConsumer.cpp | 136 +++++++++--------- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 7 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 10 +- .../WriteBufferToRabbitMQProducer.cpp | 12 +- .../RabbitMQ/WriteBufferToRabbitMQProducer.h | 5 +- .../integration/test_storage_rabbitmq/test.py | 55 +++---- 8 files changed, 122 insertions(+), 118 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp index 2d995d97f18..6257a60d678 100644 --- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp +++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp @@ -142,8 +142,6 @@ Block RabbitMQBlockInputStream::readImpl() auto result_block = non_virtual_header.cloneWithColumns(std::move(result_columns)); auto virtual_block = virtual_header.cloneWithColumns(std::move(virtual_columns)); - LOG_DEBUG(log, "Total amount of rows is " + std::to_string(result_block.rows())); - for (const auto & column : virtual_block.getColumnsWithTypeAndName()) { result_block.insert(column); diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.cpp b/src/Storages/RabbitMQ/RabbitMQHandler.cpp index 8667427ee63..71c23bb9bc4 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.cpp +++ b/src/Storages/RabbitMQ/RabbitMQHandler.cpp @@ -41,8 +41,9 @@ void RabbitMQHandler::startConsumerLoop(std::atomic & loop_started) */ if (mutex_before_event_loop.try_lock_for(std::chrono::milliseconds(Lock_timeout))) { - loop_started = true; - stop_scheduled.store(false); + loop_started.store(true); + stop_scheduled = false; + event_base_loop(evbase, EVLOOP_NONBLOCK); mutex_before_event_loop.unlock(); } @@ -67,12 +68,8 @@ void RabbitMQHandler::stop() void RabbitMQHandler::stopWithTimeout() { - if (mutex_before_loop_stop.try_lock()) - { - stop_scheduled.store(true); - event_base_loopexit(evbase, &tv); - mutex_before_loop_stop.unlock(); - } + stop_scheduled = true; + event_base_loopexit(evbase, &tv); } } diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index 31ca4f280e3..ef4398753c2 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace Exchange +namespace ExchangeType { /// Note that default here means default by implementation and not by rabbitmq settings static const String DEFAULT = "default"; @@ -42,7 +42,7 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String & local_exchange_name_, + const String & local_exchange_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) , consumer_channel(std::move(consumer_channel_)) @@ -55,13 +55,15 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( , bind_by_id(bind_by_id_) , num_queues(num_queues_) , exchange_type(exchange_type_) - , local_exchange_name(local_exchange_name_) + , local_exchange(local_exchange_) + , local_default_exchange(local_exchange + "_" + ExchangeType::DIRECT) + , local_hash_exchange(local_exchange + "_" + ExchangeType::HASH) , stopped(stopped_) { messages.clear(); current = messages.begin(); - exchange_type_set = exchange_type != Exchange::DEFAULT; + exchange_type_set = exchange_type != ExchangeType::DEFAULT; /* One queue per consumer can handle up to 50000 messages. More queues per consumer can be added. * By default there is one queue per consumer. @@ -87,53 +89,52 @@ ReadBufferFromRabbitMQConsumer::~ReadBufferFromRabbitMQConsumer() void ReadBufferFromRabbitMQConsumer::initExchange() { /* This direct-exchange is used for default implemenation and for INSERT query (so it is always declared). If exchange_type - * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table. + * is not set, then there are only two exchanges - external, defined by the client, and local, unique for each table (default). * This strict division to external and local exchanges is needed to avoid too much complexity with defining exchange_name - * for INSERT query producer and, in general, it is much better to distinguish them into separate ones. + * for INSERT query producer and, in general, it is better to distinguish them into separate ones. */ - String default_exchange = exchange_type_set ? exchange_name + "_" + Exchange::DEFAULT : exchange_name; - consumer_channel->declareExchange(default_exchange, AMQP::fanout).onError([&](const char * message) + consumer_channel->declareExchange(local_default_exchange, AMQP::direct).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_exchange, message); - }); - - default_local_exchange = local_exchange_name; - default_local_exchange += exchange_type_set ? "_default_" + Exchange::DIRECT : "_" + Exchange::DIRECT; - consumer_channel->declareExchange(default_local_exchange, AMQP::direct).onError([&](const char * message) - { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare exchange {}. Reason: {}", default_local_exchange, message); - }); - - /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. - consumer_channel->bindExchange(default_exchange, default_local_exchange, routing_keys[0]).onError([&](const char * message) - { - local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange. Reason: {}", default_exchange, default_local_exchange, message); + LOG_ERROR(log, "Failed to declare local direct-exchange. Reason: {}", message); }); if (!exchange_type_set) + { + consumer_channel->declareExchange(exchange_name, AMQP::fanout).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to declare default fanout-exchange. Reason: {}", message); + }); + + /// With fanout exchange the binding key is ignored - a parameter might be arbitrary. All distribution lies on local_exchange. + consumer_channel->bindExchange(exchange_name, local_default_exchange, routing_keys[0]).onError([&](const char * message) + { + local_exchange_declared = false; + LOG_ERROR(log, "Failed to bind local direct-exchange to fanout-exchange. Reason: {}", message); + }); + return; + } /// For special purposes to use the flexibility of routing provided by rabbitmq - choosing exchange types is supported. AMQP::ExchangeType type; - if (exchange_type == Exchange::FANOUT) type = AMQP::ExchangeType::fanout; - else if (exchange_type == Exchange::DIRECT) type = AMQP::ExchangeType::direct; - else if (exchange_type == Exchange::TOPIC) type = AMQP::ExchangeType::topic; - else if (exchange_type == Exchange::HASH) type = AMQP::ExchangeType::consistent_hash; - else if (exchange_type == Exchange::HEADERS) type = AMQP::ExchangeType::headers; + if (exchange_type == ExchangeType::FANOUT) type = AMQP::ExchangeType::fanout; + else if (exchange_type == ExchangeType::DIRECT) type = AMQP::ExchangeType::direct; + else if (exchange_type == ExchangeType::TOPIC) type = AMQP::ExchangeType::topic; + else if (exchange_type == ExchangeType::HASH) type = AMQP::ExchangeType::consistent_hash; + else if (exchange_type == ExchangeType::HEADERS) type = AMQP::ExchangeType::headers; else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); - /* Declare exchange of the specified type and bind it to hash-exchange, which will evenly distribute messages - * between all consumers. (This enables better scaling as without hash-exchange - the only option to avoid getting - * the same messages more than once - is having only one consumer with one queue, which is not good.) + /* Declare client's exchange of the specified type and bind it to hash-exchange (if it is not already hash-exchange), which + * will evenly distribute messages between all consumers. (This enables better scaling as without hash-exchange - the only + * option to avoid getting the same messages more than once - is having only one consumer with one queue, which is not good.) */ consumer_channel->declareExchange(exchange_name, type).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to declare client's {} exchange: {}", exchange_type, message); + LOG_ERROR(log, "Failed to declare client's {} exchange. Reason: {}", exchange_type, message); }); /// No need for declaring hash-exchange if there is only one consumer with one queue or exchange type is already hash @@ -142,26 +143,32 @@ void ReadBufferFromRabbitMQConsumer::initExchange() hash_exchange = true; - if (exchange_type == Exchange::HASH) + if (exchange_type == ExchangeType::HASH) return; - AMQP::Table exchange_arguments; - exchange_arguments["hash-property"] = "message_id"; + /* By default hash exchange distributes messages based on a hash value of a routing key, which must be a string integer. But + * in current case we use hash exchange for binding to another exchange of some other type, which needs its own routing keys + * of other types: headers, patterns and string-keys. This means that hash property must be changed. + */ + AMQP::Table binding_arguments; + binding_arguments["hash-property"] = "message_id"; - String local_hash_exchange_name = local_exchange_name + "_hash"; - consumer_channel->declareExchange(local_hash_exchange_name, AMQP::consistent_hash, exchange_arguments) + /// Declare exchange for sharding. + consumer_channel->declareExchange(local_hash_exchange, AMQP::consistent_hash, binding_arguments) .onError([&](const char * message) { local_exchange_declared = false; LOG_ERROR(log, "Failed to declare {} exchange: {}", exchange_type, message); }); - if (exchange_type == Exchange::HEADERS) + /// Then bind client's exchange to sharding exchange (by keys, specified by the client): + + if (exchange_type == ExchangeType::HEADERS) { AMQP::Table binding_arguments; std::vector matching; - for (auto & header : routing_keys) + for (const auto & header : routing_keys) { boost::split(matching, header, [](char c){ return c == '='; }); binding_arguments[matching[0]] = matching[1]; @@ -169,21 +176,21 @@ void ReadBufferFromRabbitMQConsumer::initExchange() } /// Routing key can be arbitrary here. - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_keys[0], binding_arguments) + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_keys[0], binding_arguments) .onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); }); } else { - for (auto & routing_key : routing_keys) + for (const auto & routing_key : routing_keys) { - consumer_channel->bindExchange(exchange_name, local_hash_exchange_name, routing_key).onError([&](const char * message) + consumer_channel->bindExchange(exchange_name, local_hash_exchange, routing_key).onError([&](const char * message) { local_exchange_declared = false; - LOG_ERROR(log, "Failed to bind {} exchange to {} exchange: {}", local_exchange_name, exchange_name, message); + LOG_ERROR(log, "Failed to bind local hash exchange to client's exchange. Reason: {}", message); }); } } @@ -227,7 +234,8 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) } } - consumer_channel->bindQueue(default_local_exchange, queue_name_, binding_key) + /// Bind queue to exchange that is used for INSERT query and also for default implementation. + consumer_channel->bindQueue(local_default_exchange, queue_name_, binding_key) .onSuccess([&] { default_bindings_created = true; @@ -238,13 +246,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to bind to key {}. Reason: {}", binding_key, message); }); - /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed. Also note - * that if moved there, it must(!) be wrapped inside a channel->onReady callback or any other, otherwise consumer might fail - * to subscribe and no resubscription will help. + /* Subscription can probably be moved back to readPrefix(), but not sure whether it is better in regard to speed, because + * if moved there, it must(!) be wrapped inside a channel->onReady callback or any other (and the looping), otherwise + * consumer might fail to subscribe and no resubscription will help. */ subscribe(queues.back()); - LOG_TRACE(log, "Queue " + queue_name_ + " is bound by key " + binding_key); + LOG_DEBUG(log, "Queue " + queue_name_ + " is declared"); if (exchange_type_set) { @@ -253,10 +261,10 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) /* If exchange_type == hash, then bind directly to this client's exchange (because there is no need for a distributor * exchange as it is already hash-exchange), otherwise hash-exchange is a local distributor exchange. */ - String hash_exchange_name = exchange_type == Exchange::HASH ? exchange_name : local_exchange_name + "_hash"; + String current_hash_exchange = exchange_type == ExchangeType::HASH ? exchange_name : local_hash_exchange; /// If hash-exchange is used for messages distribution, then the binding key is ignored - can be arbitrary. - consumer_channel->bindQueue(hash_exchange_name, queue_name_, binding_key) + consumer_channel->bindQueue(current_hash_exchange, queue_name_, binding_key) .onSuccess([&] { bindings_created = true; @@ -267,13 +275,13 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", binding_key, message); }); } - else if (exchange_type == Exchange::HEADERS) + else if (exchange_type == ExchangeType::HEADERS) { AMQP::Table binding_arguments; std::vector matching; /// It is not parsed for the second time - if it was parsed above, then it would go to the first if statement, not here. - for (auto & header : routing_keys) + for (const auto & header : routing_keys) { boost::split(matching, header, [](char c){ return c == '='; }); binding_arguments[matching[0]] = matching[1]; @@ -288,15 +296,15 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_keys[0], message); + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); }); } else { /// Means there is only one queue with one consumer - no even distribution needed - no hash-exchange. - for (auto & routing_key : routing_keys) + for (const auto & routing_key : routing_keys) { - /// Binding directly to exchange, specified by the client + /// Binding directly to exchange, specified by the client. consumer_channel->bindQueue(exchange_name, queue_name_, routing_key) .onSuccess([&] { @@ -305,7 +313,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { bindings_error = true; - LOG_ERROR(log, "Failed to create queue binding to key {}. Reason: {}", routing_key, message); + LOG_ERROR(log, "Failed to bind queue to key. Reason: {}", message); }); } } @@ -314,7 +322,7 @@ void ReadBufferFromRabbitMQConsumer::initQueueBindings(const size_t queue_id) .onError([&](const char * message) { default_bindings_error = true; - LOG_ERROR(log, "Failed to declare queue on the channel: {}", message); + LOG_ERROR(log, "Failed to declare queue on the channel. Reason: {}", message); }); /* Run event loop (which updates local variables in a separate thread) until bindings are created or failed to be created. @@ -364,7 +372,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) * executing all callbacks on the connection (not only its own), then there should be some point to unblock. * loop_started == 1 if current consumer is started the loop and not another. */ - if (!loop_started.load() && !eventHandler.checkStopIsScheduled().load()) + if (!loop_started.load() && !eventHandler.checkStopIsScheduled()) { stopEventLoopWithTimeout(); } @@ -373,7 +381,7 @@ void ReadBufferFromRabbitMQConsumer::subscribe(const String & queue_name) .onError([&](const char * message) { consumer_error = true; - LOG_ERROR(log, "Consumer {} failed: {}", channel_id, message); + LOG_ERROR(log, "Consumer {} failed. Reason: {}", channel_id, message); }); } @@ -385,7 +393,7 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() wait_subscribed = num_queues; - /// These variables are updated in a separate thread + /// These variables are updated in a separate thread. while (count_subscribed != wait_subscribed && !consumer_error) { startEventLoop(loop_started); @@ -393,11 +401,11 @@ void ReadBufferFromRabbitMQConsumer::checkSubscription() LOG_TRACE(log, "Consumer {} is subscribed to {} queues", channel_id, count_subscribed); - /// Updated in callbacks which are run by the loop + /// Updated in callbacks which are run by the loop. if (count_subscribed == num_queues) return; - /// A case that should never normally happen + /// A case that should never normally happen. for (auto & queue : queues) { subscribe(queue); diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 6a2c847357d..d4bf35c00b8 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -32,7 +32,7 @@ public: const bool bind_by_id_, const size_t num_queues_, const String & exchange_type_, - const String & local_exchange_name_, + const String & local_exchange_, const std::atomic & stopped_); ~ReadBufferFromRabbitMQConsumer() override; @@ -53,8 +53,11 @@ private: const size_t channel_id; const bool bind_by_id; const size_t num_queues; + const String & exchange_type; - const String & local_exchange_name; + const String & local_exchange; + const String local_default_exchange; + const String local_hash_exchange; Poco::Logger * log; char row_delimiter; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 852edd24726..3de8d193302 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -107,6 +107,8 @@ StorageRabbitMQ::StorageRabbitMQ( auto table_id = getStorageID(); String table_name = table_id.table_name; + + /// Make sure that local exchange name is unique for each table and is not the same as client's exchange name local_exchange_name = exchange_name + "_" + table_name; } @@ -132,6 +134,7 @@ Pipes StorageRabbitMQ::read( } LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); + return pipes; } @@ -225,12 +228,7 @@ ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() ProducerBufferPtr StorageRabbitMQ::createWriteBuffer() { - /* If exchange type is set, then there are different exchanges for external publishing and for INSERT query - * as in this case they are of different types. - */ - String producer_exchange = exchange_type == "default" ? local_exchange_name : local_exchange_name + "_default"; - - return std::make_shared(parsed_address, login_password, routing_keys[0], producer_exchange, + return std::make_shared(parsed_address, login_password, routing_keys[0], local_exchange_name, log, num_consumers * num_queues, bind_by_id, use_transactional_channel, row_delimiter ? std::optional{row_delimiter} : std::nullopt, 1, 1024); } diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp index 09179b95a15..6d74e2c8298 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.cpp @@ -77,7 +77,7 @@ WriteBufferToRabbitMQProducer::WriteBufferToRabbitMQProducer( WriteBufferToRabbitMQProducer::~WriteBufferToRabbitMQProducer() { - finilize(); + finilizeProducer(); connection.close(); assert(rows == 0 && chunks.empty()); } @@ -118,7 +118,9 @@ void WriteBufferToRabbitMQProducer::countRow() ++message_counter; - /// run event loop to actually publish, checking exchange is just a point to stop the event loop + /* Run event loop to actually publish, checking exchange is just a point to stop the event loop. Messages are not sent + * without looping and looping after every batch is much better than processing all the messages in one time. + */ if ((message_counter %= Batch) == 0) { checkExchange(); @@ -132,7 +134,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() std::atomic exchange_declared = false, exchange_error = false; /* The AMQP::passive flag indicates that it should only be checked if there is a valid exchange with the given name - * and makes it visible from current producer_channel. + * and makes it declared on the current producer_channel. */ producer_channel->declareExchange(exchange_name, AMQP::direct, AMQP::passive) .onSuccess([&]() @@ -142,7 +144,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() .onError([&](const char * message) { exchange_error = true; - LOG_ERROR(log, "Exchange was not declared: {}", message); + LOG_ERROR(log, "Exchange for INSERT query was not declared. Reason: {}", message); }); /// These variables are updated in a separate thread and starting the loop blocks current thread @@ -153,7 +155,7 @@ void WriteBufferToRabbitMQProducer::checkExchange() } -void WriteBufferToRabbitMQProducer::finilize() +void WriteBufferToRabbitMQProducer::finilizeProducer() { checkExchange(); diff --git a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h index 9fd36257561..7d2bb6e598f 100644 --- a/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h +++ b/src/Storages/RabbitMQ/WriteBufferToRabbitMQProducer.h @@ -40,7 +40,7 @@ public: private: void nextImpl() override; void checkExchange(); - void finilize(); + void finilizeProducer(); std::pair & login_password; const String routing_key; @@ -56,9 +56,6 @@ private: size_t next_queue = 0; UInt64 message_counter = 0; - String channel_id; - - Messages messages; Poco::Logger * log; const std::optional delim; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index f58e898a45f..3c4c0b3215b 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -497,7 +497,7 @@ def test_rabbitmq_big_message(rabbitmq_cluster): assert int(result) == rabbitmq_messages*batch_messages, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): NUM_CHANNELS = 5 @@ -560,7 +560,7 @@ def test_rabbitmq_sharding_between_channels_publish(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): NUM_QUEUES = 4 @@ -623,7 +623,7 @@ def test_rabbitmq_sharding_between_queues_publish(rabbitmq_cluster): assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster): NUM_CONSUMERS = 10 @@ -688,7 +688,7 @@ def test_rabbitmq_sharding_between_channels_and_queues_publish(rabbitmq_cluster) assert int(result) == messages_num * threads_num, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_read_only_combo(rabbitmq_cluster): NUM_MV = 5; @@ -768,7 +768,7 @@ def test_rabbitmq_read_only_combo(rabbitmq_cluster): assert int(result) == messages_num * threads_num * NUM_MV, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(180) +@pytest.mark.timeout(240) def test_rabbitmq_insert(rabbitmq_cluster): instance.query(''' CREATE TABLE test.rabbitmq (key UInt64, value UInt64) @@ -1054,7 +1054,10 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): key = "direct_" + str(key_num) key_num += 1 for message in messages: - channel.basic_publish(exchange='direct_exchange_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='direct_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1066,8 +1069,8 @@ def test_rabbitmq_direct_exchange(rabbitmq_cluster): for consumer_id in range(num_tables): instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; '''.format(consumer_id)) instance.query(''' @@ -1122,7 +1125,10 @@ def test_rabbitmq_fanout_exchange(rabbitmq_cluster): key_num = 0 for message in messages: - channel.basic_publish(exchange='fanout_exchange_testing', routing_key='', body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='fanout_exchange_testing', routing_key='', + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1215,7 +1221,10 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): key = "random.logs" for message in messages: - channel.basic_publish(exchange='topic_exchange_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish( + exchange='topic_exchange_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1225,18 +1234,12 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables + messages_num * num_tables: break - for consumer_id in range(num_tables): + for consumer_id in range(num_tables * 2): instance.query(''' DROP TABLE IF EXISTS test.topic_exchange_{0}; DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; '''.format(consumer_id)) - for consumer_id in range(num_tables): - instance.query(''' - DROP TABLE IF EXISTS test.topic_exchange_{0}; - DROP TABLE IF EXISTS test.topic_exchange_{0}_mv; - '''.format(num_tables + consumer_id)) - instance.query(''' DROP TABLE IF EXISTS test.destination; ''') @@ -1244,7 +1247,7 @@ def test_rabbitmq_topic_exchange(rabbitmq_cluster): assert int(result) == messages_num * num_tables + messages_num * num_tables, 'ClickHouse lost some messages: {}'.format(result) -@pytest.mark.timeout(320) +@pytest.mark.timeout(420) def test_rabbitmq_hash_exchange(rabbitmq_cluster): instance.query(''' DROP TABLE IF EXISTS test.destination; @@ -1288,8 +1291,8 @@ def test_rabbitmq_hash_exchange(rabbitmq_cluster): for _ in range(messages_num): messages.append(json.dumps({'key': i[0], 'value': i[0]})) i[0] += 1 - key = str(randrange(10)) for message in messages: + key = str(randrange(10)) channel.basic_publish(exchange='hash_exchange_testing', routing_key=key, body=message) connection.close() @@ -1389,7 +1392,9 @@ def test_rabbitmq_multiple_bindings(rabbitmq_cluster): for key in keys: for message in messages: - channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, body=message) + mes_id = str(randrange(10)) + channel.basic_publish(exchange='multiple_bindings_testing', routing_key=key, + properties=pika.BasicProperties(message_id=mes_id), body=message) connection.close() @@ -1488,8 +1493,9 @@ def test_rabbitmq_headers_exchange(rabbitmq_cluster): key_num = 0 for message in messages: + mes_id = str(randrange(10)) channel.basic_publish(exchange='headers_exchange_testing', routing_key='', - properties=pika.BasicProperties(headers=fields), body=message) + properties=pika.BasicProperties(headers=fields, message_id=mes_id), body=message) connection.close() @@ -1499,16 +1505,11 @@ def test_rabbitmq_headers_exchange(rabbitmq_cluster): if int(result) == messages_num * num_tables_to_receive: break - for consumer_id in range(num_tables_to_receive): + for consumer_id in range(num_tables_to_receive + num_tables_to_ignore): instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; DROP TABLE IF EXISTS test.direct_exchange_{0}; + DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; '''.format(consumer_id)) - for consumer_id in range(num_tables_to_ignore): - instance.query(''' - DROP TABLE IF EXISTS test.direct_exchange_{0}_mv; - DROP TABLE IF EXISTS test.direct_exchange_{0}; - '''.format(consumer_id + num_tables_to_receive)) instance.query(''' DROP TABLE IF EXISTS test.destination; From 8dac30ae955a1ef0b78826d8d7b06594e583263d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 21:42:10 +0300 Subject: [PATCH 0684/2229] Split file for better build times --- src/Common/FieldVisitors.h | 128 ----------------- src/Common/FieldVisitorsAccurateComparison.h | 142 +++++++++++++++++++ src/Functions/array/arrayIndex.h | 2 +- src/Interpreters/FillingRow.cpp | 2 + src/Interpreters/FillingRow.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 2 +- 7 files changed, 148 insertions(+), 132 deletions(-) create mode 100644 src/Common/FieldVisitorsAccurateComparison.h diff --git a/src/Common/FieldVisitors.h b/src/Common/FieldVisitors.h index 257994a6bd2..ddeddb8fbf6 100644 --- a/src/Common/FieldVisitors.h +++ b/src/Common/FieldVisitors.h @@ -1,10 +1,7 @@ #pragma once #include -#include #include -#include -#include class SipHash; @@ -16,7 +13,6 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_CONVERT_TYPE; - extern const int BAD_TYPE_OF_FIELD; extern const int LOGICAL_ERROR; } @@ -179,130 +175,6 @@ template <> constexpr bool isDecimalField>() { return tr template <> constexpr bool isDecimalField>() { return true; } -/** More precise comparison, used for index. - * Differs from Field::operator< and Field::operator== in that it also compares values of different types. - * Comparison rules are same as in FunctionsComparison (to be consistent with expression evaluation in query). - */ -class FieldVisitorAccurateEquals : public StaticVisitor -{ -public: - template - bool operator() (const T & l, const U & r) const - { - if constexpr (std::is_same_v || std::is_same_v) - return std::is_same_v; - else - { - if constexpr (std::is_same_v) - return l == r; - - if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) - return accurate::equalsOp(l, r); - - if constexpr (isDecimalField() && isDecimalField()) - return l == r; - - if constexpr (isDecimalField() && std::is_arithmetic_v) - return l == DecimalField(r, 0); - - if constexpr (std::is_arithmetic_v && isDecimalField()) - return DecimalField(l, 0) == r; - - if constexpr (std::is_same_v) - { - if constexpr (std::is_same_v) - return stringToUUID(l) == r; - - if constexpr (std::is_arithmetic_v) - { - ReadBufferFromString in(l); - T parsed; - readText(parsed, in); - return operator()(parsed, r); - } - } - - if constexpr (std::is_same_v) - { - if constexpr (std::is_same_v) - return l == stringToUUID(r); - - if constexpr (std::is_arithmetic_v) - { - ReadBufferFromString in(r); - T parsed; - readText(parsed, in); - return operator()(l, parsed); - } - } - } - - throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), - ErrorCodes::BAD_TYPE_OF_FIELD); - } -}; - - -class FieldVisitorAccurateLess : public StaticVisitor -{ -public: - template - bool operator() (const T & l, const U & r) const - { - if constexpr (std::is_same_v || std::is_same_v) - return false; - else - { - if constexpr (std::is_same_v) - return l < r; - - if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) - return accurate::lessOp(l, r); - - if constexpr (isDecimalField() && isDecimalField()) - return l < r; - - if constexpr (isDecimalField() && std::is_arithmetic_v) - return l < DecimalField(r, 0); - - if constexpr (std::is_arithmetic_v && isDecimalField()) - return DecimalField(l, 0) < r; - - if constexpr (std::is_same_v) - { - if constexpr (std::is_same_v) - return stringToUUID(l) < r; - - if constexpr (std::is_arithmetic_v) - { - ReadBufferFromString in(l); - T parsed; - readText(parsed, in); - return operator()(parsed, r); - } - } - - if constexpr (std::is_same_v) - { - if constexpr (std::is_same_v) - return l < stringToUUID(r); - - if constexpr (std::is_arithmetic_v) - { - ReadBufferFromString in(r); - T parsed; - readText(parsed, in); - return operator()(l, parsed); - } - } - } - - throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), - ErrorCodes::BAD_TYPE_OF_FIELD); - } -}; - - /** Implements `+=` operation. * Returns false if the result is zero. */ diff --git a/src/Common/FieldVisitorsAccurateComparison.h b/src/Common/FieldVisitorsAccurateComparison.h new file mode 100644 index 00000000000..91fa4bf28de --- /dev/null +++ b/src/Common/FieldVisitorsAccurateComparison.h @@ -0,0 +1,142 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_TYPE_OF_FIELD; +} + +/** More precise comparison, used for index. + * Differs from Field::operator< and Field::operator== in that it also compares values of different types. + * Comparison rules are same as in FunctionsComparison (to be consistent with expression evaluation in query). + */ +class FieldVisitorAccurateEquals : public StaticVisitor +{ +public: + template + bool operator() (const T & l, const U & r) const + { + if constexpr (std::is_same_v || std::is_same_v) + return std::is_same_v; + else + { + if constexpr (std::is_same_v) + return l == r; + + if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) + return accurate::equalsOp(l, r); + + if constexpr (isDecimalField() && isDecimalField()) + return l == r; + + if constexpr (isDecimalField() && std::is_arithmetic_v) + return l == DecimalField(r, 0); + + if constexpr (std::is_arithmetic_v && isDecimalField()) + return DecimalField(l, 0) == r; + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return stringToUUID(l) == r; + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(l); + T parsed; + readText(parsed, in); + return operator()(parsed, r); + } + } + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return l == stringToUUID(r); + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(r); + T parsed; + readText(parsed, in); + return operator()(l, parsed); + } + } + } + + throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), + ErrorCodes::BAD_TYPE_OF_FIELD); + } +}; + + +class FieldVisitorAccurateLess : public StaticVisitor +{ +public: + template + bool operator() (const T & l, const U & r) const + { + if constexpr (std::is_same_v || std::is_same_v) + return false; + else + { + if constexpr (std::is_same_v) + return l < r; + + if constexpr (std::is_arithmetic_v && std::is_arithmetic_v) + return accurate::lessOp(l, r); + + if constexpr (isDecimalField() && isDecimalField()) + return l < r; + + if constexpr (isDecimalField() && std::is_arithmetic_v) + return l < DecimalField(r, 0); + + if constexpr (std::is_arithmetic_v && isDecimalField()) + return DecimalField(l, 0) < r; + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return stringToUUID(l) < r; + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(l); + T parsed; + readText(parsed, in); + return operator()(parsed, r); + } + } + + if constexpr (std::is_same_v) + { + if constexpr (std::is_same_v) + return l < stringToUUID(r); + + if constexpr (std::is_arithmetic_v) + { + ReadBufferFromString in(r); + T parsed; + readText(parsed, in); + return operator()(l, parsed); + } + } + } + + throw Exception("Cannot compare " + demangle(typeid(T).name()) + " with " + demangle(typeid(U).name()), + ErrorCodes::BAD_TYPE_OF_FIELD); + } +}; + +} diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fab1332cbda..50214ee790f 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp index dc48b5347c4..7e32d9514a6 100644 --- a/src/Interpreters/FillingRow.cpp +++ b/src/Interpreters/FillingRow.cpp @@ -1,4 +1,6 @@ #include +#include + namespace DB { diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h index 1753508e139..0e1d60d0d7a 100644 --- a/src/Interpreters/FillingRow.h +++ b/src/Interpreters/FillingRow.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include + namespace DB { diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f9072e6176a..dc32371b6c1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7265e818b51..281f8511a59 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include From 3663e2a47e1ac04ccd7aa0dd3ce41b8685a2de1a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 22:11:32 +0300 Subject: [PATCH 0685/2229] Fix syntax hilite in CREATE USER query --- src/Parsers/ASTCreateUserQuery.cpp | 3 ++- .../0_stateless/01316_create_user_syntax_hilite.reference | 1 + .../0_stateless/01316_create_user_syntax_hilite.sh | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01316_create_user_syntax_hilite.reference create mode 100755 tests/queries/0_stateless/01316_create_user_syntax_hilite.sh diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index e5c1178285b..1d61303860a 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -65,7 +65,8 @@ namespace settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << authentication_type_name << (settings.hilite ? IAST::hilite_none : ""); if (password) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " BY " << quoteString(*password); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " BY " << (settings.hilite ? IAST::hilite_none : "") + << quoteString(*password); } diff --git a/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference new file mode 100644 index 00000000000..ed7daeb3609 --- /dev/null +++ b/tests/queries/0_stateless/01316_create_user_syntax_hilite.reference @@ -0,0 +1 @@ +CREATE USER user IDENTIFIED WITH plaintext_password BY 'hello' diff --git a/tests/queries/0_stateless/01316_create_user_syntax_hilite.sh b/tests/queries/0_stateless/01316_create_user_syntax_hilite.sh new file mode 100755 index 00000000000..1031a96363c --- /dev/null +++ b/tests/queries/0_stateless/01316_create_user_syntax_hilite.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_FORMAT --hilite <<< "CREATE USER user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" From 07ba7ffea52dbc0719cd7eccea77079125e39ebd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 22:23:05 +0300 Subject: [PATCH 0686/2229] Clear password from command line #11624 --- programs/benchmark/Benchmark.cpp | 5 ++++- programs/client/Client.cpp | 2 ++ src/Common/clearPasswordFromCommandLine.cpp | 18 ++++++++++++++++++ src/Common/clearPasswordFromCommandLine.h | 6 ++++++ src/Common/ya.make | 1 + 5 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 src/Common/clearPasswordFromCommandLine.cpp create mode 100644 src/Common/clearPasswordFromCommandLine.h diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index e17320b39ea..bb814f474e3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -539,7 +540,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("password", value()->default_value(""), "") ("database", value()->default_value("default"), "") ("stacktrace", "print stack traces of exceptions") - ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") + ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ; @@ -550,6 +551,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); boost::program_options::notify(options); + clearPasswordFromCommandLine(argc, argv); + if (options.count("help")) { std::cout << "Usage: " << argv[0] << " [options] < queries.txt\n"; diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 7808120d09e..63467c1129d 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -2006,6 +2007,7 @@ public: argsToConfig(common_arguments, config(), 100); + clearPasswordFromCommandLine(argc, argv); } }; diff --git a/src/Common/clearPasswordFromCommandLine.cpp b/src/Common/clearPasswordFromCommandLine.cpp new file mode 100644 index 00000000000..0ff56e25c3f --- /dev/null +++ b/src/Common/clearPasswordFromCommandLine.cpp @@ -0,0 +1,18 @@ +#include +#include "clearPasswordFromCommandLine.h" + +void clearPasswordFromCommandLine(int argc, char ** argv) +{ + for (int arg = 1; arg < argc; ++arg) + { + if (arg + 1 < argc && 0 == strcmp(argv[arg], "--password")) + { + ++arg; + memset(argv[arg], 0, strlen(argv[arg])); + } + else if (0 == strncmp(argv[arg], "--password=", strlen("--password="))) + { + memset(argv[arg] + strlen("--password="), 0, strlen(argv[arg]) - strlen("--password=")); + } + } +} diff --git a/src/Common/clearPasswordFromCommandLine.h b/src/Common/clearPasswordFromCommandLine.h new file mode 100644 index 00000000000..cf90fea1dc8 --- /dev/null +++ b/src/Common/clearPasswordFromCommandLine.h @@ -0,0 +1,6 @@ +#pragma once + +/** If there are --password=... or --password ... arguments in command line, replace their values with zero bytes. + * This is needed to prevent password exposure in 'ps' and similar tools. + */ +void clearPasswordFromCommandLine(int argc, char ** argv); diff --git a/src/Common/ya.make b/src/Common/ya.make index 83a419212bd..327089ff31d 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -30,6 +30,7 @@ SRCS( Config/configReadClient.cpp Config/ConfigReloader.cpp createHardLink.cpp + clearPasswordFromCommandLine.cpp CurrentMetrics.cpp CurrentThread.cpp DNSResolver.cpp From 22366471d03876402a46f1bd4e40602022562cf8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Jun 2020 22:31:45 +0300 Subject: [PATCH 0687/2229] Added a test --- ...1317_no_password_in_command_line.reference | 2 ++ .../01317_no_password_in_command_line.sh | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/01317_no_password_in_command_line.reference create mode 100755 tests/queries/0_stateless/01317_no_password_in_command_line.sh diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.reference b/tests/queries/0_stateless/01317_no_password_in_command_line.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01317_no_password_in_command_line.sh b/tests/queries/0_stateless/01317_no_password_in_command_line.sh new file mode 100755 index 00000000000..1a3ae88616a --- /dev/null +++ b/tests/queries/0_stateless/01317_no_password_in_command_line.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP USER IF EXISTS user" +$CLICKHOUSE_CLIENT --query "CREATE USER user IDENTIFIED WITH PLAINTEXT_PASSWORD BY 'hello'" + +# False positive result due to race condition with sleeps is Ok. + +$CLICKHOUSE_CLIENT --user user --password hello --query "SELECT sleep(1)" & +sleep 0.1 +ps auxw | grep -F -- '--password' | grep -F hello ||: +wait + +$CLICKHOUSE_CLIENT --user user --password=hello --query "SELECT sleep(1)" & +sleep 0.1 +ps auxw | grep -F -- '--password' | grep -F hello ||: +wait + +$CLICKHOUSE_CLIENT --query "DROP USER user" From c4f18d2896b8ed5721b34de68788b0b3ac300eb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 00:39:22 +0300 Subject: [PATCH 0688/2229] Fix tests --- src/AggregateFunctions/AggregateFunctionFactory.cpp | 2 +- src/AggregateFunctions/AggregateFunctionNull.cpp | 2 -- tests/queries/0_stateless/00808_array_enumerate_segfault.sql | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionFactory.cpp b/src/AggregateFunctions/AggregateFunctionFactory.cpp index 7ff52fe0f70..c52b4d122dd 100644 --- a/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -117,7 +117,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl( /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. if (!out_properties.returns_default_when_only_null - && std::any_of(argument_types.begin(), argument_types.end(), [](const auto & type) { return type->onlyNull(); })) + && std::any_of(argument_types.begin(), argument_types.end(), [](const auto & type) { return WhichDataType(type).isNothing(); })) { return nullptr; } diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 85d960eae62..143c9b6246f 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -57,8 +57,6 @@ public: if (has_null_types) { - std::cerr << properties.returns_default_when_only_null << "\n"; - /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. if (properties.returns_default_when_only_null) return std::make_shared(DataTypes{std::make_shared()}, params); diff --git a/tests/queries/0_stateless/00808_array_enumerate_segfault.sql b/tests/queries/0_stateless/00808_array_enumerate_segfault.sql index b492d3114f8..e5acba9cb57 100644 --- a/tests/queries/0_stateless/00808_array_enumerate_segfault.sql +++ b/tests/queries/0_stateless/00808_array_enumerate_segfault.sql @@ -1,4 +1,4 @@ SET send_logs_level = 'none'; SELECT arrayEnumerateUniq(anyHeavy([]), []); -SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 190 } +SELECT arrayEnumerateDense([], [sequenceCount(NULL)]); -- { serverError 42 } SELECT arrayEnumerateDense([STDDEV_SAMP(NULL, 910947.571364)], [NULL]); From 303d1ebdafbe81d3b61572dbb5670f85c62b885f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 01:17:06 +0300 Subject: [PATCH 0689/2229] Fix error; more tests --- src/AggregateFunctions/AggregateFunctionNull.cpp | 6 ++++-- .../01315_count_distinct_return_not_nullable.reference | 7 +++++++ .../01315_count_distinct_return_not_nullable.sql | 10 ++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index 143c9b6246f..b8fbad53350 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -59,9 +59,11 @@ public: { /// Currently the only functions that returns not-NULL on all NULL arguments are count and uniq, and they returns UInt64. if (properties.returns_default_when_only_null) - return std::make_shared(DataTypes{std::make_shared()}, params); + return std::make_shared(DataTypes{ + std::make_shared()}, params); else - return std::make_shared(arguments, params); + return std::make_shared(DataTypes{ + std::make_shared(std::make_shared())}, params); } assert(nested_function); diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference index 76b82419556..b529a357af4 100644 --- a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.reference @@ -4,6 +4,13 @@ 5 5 5 +--- 0 0 0 +--- +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql index 9787ee2bd70..932cd2f69f9 100644 --- a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql @@ -6,6 +6,16 @@ SELECT uniq(number >= 5 ? number : NULL) FROM numbers(10); SELECT uniqExact(number >= 5 ? number : NULL) FROM numbers(10); SELECT count(DISTINCT number >= 5 ? number : NULL) FROM numbers(10); +SELECT '---'; + SELECT count(NULL); SELECT uniq(NULL); SELECT count(DISTINCT NULL); + +SELECT '---'; + +SELECT avg(NULL); +SELECT sum(NULL); +SELECT corr(NULL, NULL); +SELECT corr(1, NULL); +SELECT corr(NULL, 1); From 6467302ad32b4dd6205542675d54fa89944d0ff1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 01:29:22 +0300 Subject: [PATCH 0690/2229] Fix gcc build --- src/Common/Arena.h | 2 +- src/Common/ArenaWithFreeLists.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/Arena.h b/src/Common/Arena.h index aaf71cac525..44a9b444ff2 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -4,10 +4,10 @@ #include #include #include +#include #if __has_include() && defined(ADDRESS_SANITIZER) # include #endif -#include #include #include #include diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h index 3ae727fdaa5..1284c3586c0 100644 --- a/src/Common/ArenaWithFreeLists.h +++ b/src/Common/ArenaWithFreeLists.h @@ -1,9 +1,9 @@ #pragma once +#include #if __has_include() && defined(ADDRESS_SANITIZER) # include #endif -#include #include #include From ceaaf67d3fb6ae409a3515eac4ab0d63ae22303d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Jun 2020 23:44:10 +0300 Subject: [PATCH 0691/2229] Fix parsing CREATE SETTINGS PROFILE with WRITABLE keyword. --- src/Parsers/ParserSettingsProfileElement.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/ParserSettingsProfileElement.cpp index 37044e8ccbe..1dccae50cf5 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/ParserSettingsProfileElement.cpp @@ -87,7 +87,7 @@ namespace readonly = true; return true; } - else if (ParserKeyword{"READONLY"}.ignore(pos, expected)) + else if (ParserKeyword{"WRITABLE"}.ignore(pos, expected)) { readonly = false; return true; From ca2fb5932126175fed213a6f040fd34ff7b2d908 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 6 Jun 2020 05:25:27 +0300 Subject: [PATCH 0692/2229] Fix calculating full names of row policies. --- src/Access/RowPolicy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/RowPolicy.cpp b/src/Access/RowPolicy.cpp index 4249f351eae..acacaf01c6c 100644 --- a/src/Access/RowPolicy.cpp +++ b/src/Access/RowPolicy.cpp @@ -17,7 +17,7 @@ String RowPolicy::NameParts::getName() const name.reserve(database.length() + table_name.length() + short_name.length() + 6); name += backQuoteIfNeed(short_name); name += " ON "; - if (!name.empty()) + if (!database.empty()) { name += backQuoteIfNeed(database); name += '.'; From 3ffcb8e790434245cfeea7aceb9dbd8daf6a003b Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 15 Jun 2020 00:00:57 +0300 Subject: [PATCH 0693/2229] Fix casting values of settings while reading profiles from users.xml. --- src/Access/UsersConfigAccessStorage.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index f5f48a2390e..4d7d1b4cdfe 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -353,16 +353,17 @@ namespace for (const String & name : names) { SettingsProfileElement profile_element; - profile_element.setting_index = Settings::findIndexStrict(name); + size_t setting_index = Settings::findIndexStrict(name); + profile_element.setting_index = setting_index; Poco::Util::AbstractConfiguration::Keys constraint_types; String path_to_name = path_to_constraints + "." + name; config.keys(path_to_name, constraint_types); for (const String & constraint_type : constraint_types) { if (constraint_type == "min") - profile_element.min_value = config.getString(path_to_name + "." + constraint_type); + profile_element.min_value = Settings::valueToCorrespondingType(setting_index, config.getString(path_to_name + "." + constraint_type)); else if (constraint_type == "max") - profile_element.max_value = config.getString(path_to_name + "." + constraint_type); + profile_element.max_value = Settings::valueToCorrespondingType(setting_index, config.getString(path_to_name + "." + constraint_type)); else if (constraint_type == "readonly") profile_element.readonly = true; else @@ -402,8 +403,9 @@ namespace } SettingsProfileElement profile_element; - profile_element.setting_index = Settings::findIndexStrict(key); - profile_element.value = config.getString(profile_config + "." + key); + size_t setting_index = Settings::findIndexStrict(key); + profile_element.setting_index = setting_index; + profile_element.value = Settings::valueToCorrespondingType(setting_index, config.getString(profile_config + "." + key)); profile->elements.emplace_back(std::move(profile_element)); } From 1c438a133ea1db5efea9526dbafe5c9c4762a368 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 03:59:12 +0300 Subject: [PATCH 0694/2229] Leader election both backward and forward compatible --- src/Storages/MergeTree/LeaderElection.h | 68 ++++++++++++++----------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index ef6b68bbe15..4d3a533d139 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -33,10 +33,8 @@ namespace zkutil * to maintain compatibility when replicas with different versions work on the same cluster * (this is allowed for short time period during cluster update). * - * Replicas with old versions participate in leader election with ephemeral sequential nodes. - * If the node is first, then replica is the leader. - * Replicas with new versions creates persistent sequential nodes. - * If the first node is persistent, then all replicas with new versions become leaders. + * Replicas with new versions creates ephemeral sequential nodes with values like "replica_name (multiple leaders Ok)". + * If the first node belongs to a replica with new version, then all replicas with new versions become leaders. */ class LeaderElection { @@ -55,7 +53,7 @@ public: ZooKeeper & zookeeper_, LeadershipHandler handler_, const std::string & identifier_) - : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_) + : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_ + suffix) , log_name("LeaderElection (" + path + ")") , log(&Poco::Logger::get(log_name)) { @@ -74,19 +72,23 @@ public: ~LeaderElection() { - shutdown(); + releaseNode(); } private: + static inline constexpr auto suffix = " (multiple leaders Ok)"; DB::BackgroundSchedulePool & pool; DB::BackgroundSchedulePool::TaskHolder task; - const std::string path; + std::string path; ZooKeeper & zookeeper; LeadershipHandler handler; std::string identifier; std::string log_name; Poco::Logger * log; + EphemeralNodeHolderPtr node; + std::string node_name; + std::atomic shutdown_called {false}; CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; @@ -94,45 +96,52 @@ private: void createNode() { shutdown_called = false; + node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier); - /// If there is at least one persistent node, we don't have to create another. - Strings children = zookeeper.getChildren(path); - for (const auto & child : children) - { - Coordination::Stat stat; - zookeeper.get(path + "/" + child, &stat); - if (!stat.ephemeralOwner) - { - ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); - handler(); - return; - } - } + std::string node_path = node->getPath(); + node_name = node_path.substr(node_path.find_last_of('/') + 1); - zookeeper.create(path + "/leader_election-", identifier, CreateMode::PersistentSequential); task->activateAndSchedule(); } + void releaseNode() + { + shutdown(); + node = nullptr; + } + void threadFunction() { + bool success = false; + try { Strings children = zookeeper.getChildren(path); - if (children.empty()) - throw Poco::Exception("Assertion failed in LeaderElection"); - std::sort(children.begin(), children.end()); - Coordination::Stat stat; - zookeeper.get(path + "/" + children.front(), &stat); + auto my_node_it = std::lower_bound(children.begin(), children.end(), node_name); + if (my_node_it == children.end() || *my_node_it != node_name) + throw Poco::Exception("Assertion failed in LeaderElection"); - if (!stat.ephemeralOwner) + String value = zookeeper.get(path + "/" + children.front()); + +#if !defined(ARCADIA_BUILD) /// C++20; Replicated tables are unused in Arcadia. + if (value.ends_with(suffix)) { - /// It is persistent node - we can become leader. ProfileEvents::increment(ProfileEvents::LeaderElectionAcquiredLeadership); handler(); return; } +#endif + if (my_node_it == children.begin()) + throw Poco::Exception("Assertion failed in LeaderElection"); + + /// Watch for the node in front of us. + --my_node_it; + if (!zookeeper.existsWatch(path + "/" + *my_node_it, nullptr, task->getWatchCallback())) + task->schedule(); + + success = true; } catch (const KeeperException & e) { @@ -146,7 +155,8 @@ private: DB::tryLogCurrentException(log); } - task->scheduleAfter(10 * 1000); + if (!success) + task->scheduleAfter(10 * 1000); } }; From 689b6901f8ac076bcb11249a9a69303b2817e679 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 04:04:42 +0300 Subject: [PATCH 0695/2229] Fix typo --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index d861173d8a0..f867a39581f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -298,7 +298,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge( if (parts_to_merge.empty()) { if (out_disable_reason) - *out_disable_reason = "There are no need to merge parts according to merge selector algorithm"; + *out_disable_reason = "There is no need to merge parts according to merge selector algorithm"; return false; } From b51cbbdf15ea74218792d73168e8596b538f9802 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 04:08:56 +0300 Subject: [PATCH 0696/2229] Update test --- .../00620_optimize_on_nonleader_replica_zookeeper.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql b/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql index 9622a5bd3c2..f488502b13b 100644 --- a/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql +++ b/tests/queries/0_stateless/00620_optimize_on_nonleader_replica_zookeeper.sql @@ -1,3 +1,5 @@ +-- The test is mostly outdated as now every replica is leader and can do OPTIMIZE locally. + DROP TABLE IF EXISTS rename1; DROP TABLE IF EXISTS rename2; DROP TABLE IF EXISTS rename3; @@ -14,7 +16,9 @@ SELECT * FROM rename1; RENAME TABLE rename2 TO rename3; INSERT INTO rename1 VALUES (0, 1, 2); +SYSTEM SYNC REPLICA rename3; -- Make "rename3" to see all data parts. OPTIMIZE TABLE rename3; +SYSTEM SYNC REPLICA rename1; -- Make "rename1" to see and process all scheduled merges. SELECT * FROM rename1; DROP TABLE IF EXISTS rename1; From 66ccb2f6b121b1e85cb035d6e6a256617722d4d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 04:12:01 +0300 Subject: [PATCH 0697/2229] Remove "current_password" because it is harmful --- src/Interpreters/ClientInfo.h | 2 -- src/Interpreters/Context.cpp | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 ++ 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 7a4df63c17a..294eb47e3e9 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -47,8 +47,6 @@ public: String current_user; String current_query_id; Poco::Net::SocketAddress current_address; - /// Use current user and password when sending query to replica leader - String current_password; /// When query_kind == INITIAL_QUERY, these values are equal to current. String initial_user; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cb780443e03..bd99039c36d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -660,7 +660,6 @@ void Context::setUser(const String & name, const String & password, const Poco:: auto lock = getLock(); client_info.current_user = name; - client_info.current_password = password; client_info.current_address = address; auto new_user_id = getAccessControlManager().find(name); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ac762b3e05f..055e709cbc3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4416,7 +4416,7 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query const auto & query_settings = query_context.getSettingsRef(); const auto & query_client_info = query_context.getClientInfo(); String user = query_client_info.current_user; - String password = query_client_info.current_password; + String password; if (auto address = findClusterAddress(leader_address); address) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5083abf7ef9..b2bd546b478 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -222,6 +222,7 @@ private: zkutil::EphemeralNodeHolderPtr replica_is_active_node; /** Is this replica "leading". The leader replica selects the parts to merge. + * It can be false only when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. */ std::atomic is_leader {false}; zkutil::LeaderElectionPtr leader_election; @@ -497,6 +498,7 @@ private: bool waitForReplicaToProcessLogEntry(const String & replica_name, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); /// Choose leader replica, send requst to it and wait. + /// Only makes sense when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. void sendRequestToLeaderReplica(const ASTPtr & query, const Context & query_context); /// Throw an exception if the table is readonly. From 5866401f6078010e51f31b6b2bed367c0bccca49 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 05:12:06 +0300 Subject: [PATCH 0698/2229] Less noise in cleanup thread --- .../ReplicatedMergeTreeCleanupThread.cpp | 75 +++++++++++++++---- .../ReplicatedMergeTreeCleanupThread.h | 3 +- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 1bc132eaba4..0870c0fdf72 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -85,8 +86,14 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() int children_count = stat.numChildren; - /// We will wait for 1.1 times more records to accumulate than necessary. - if (static_cast(children_count) < storage_settings->min_replicated_logs_to_keep * 1.1) + /// We will wait for 1.05 to 1.15 times more records to accumulate than necessary. + /// Randomization is needed to spread the time when multiple replicas come here. + /// Numbers are arbitrary. + std::uniform_real_distribution distr(1.05, 1.15); + double ratio = distr(rng); + size_t min_replicated_logs_to_keep = storage_settings->min_replicated_logs_to_keep * ratio; + + if (static_cast(children_count) < min_replicated_logs_to_keep) return; Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &stat); @@ -214,10 +221,15 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() if (entries.empty()) return; - markLostReplicas(host_versions_lost_replicas, log_pointers_candidate_lost_replicas, replicas.size() - num_replicas_were_marked_is_lost, zookeeper); + markLostReplicas( + host_versions_lost_replicas, + log_pointers_candidate_lost_replicas, + replicas.size() - num_replicas_were_marked_is_lost, + zookeeper); Coordination::Requests ops; - for (size_t i = 0; i < entries.size(); ++i) + size_t i = 0; + for (; i < entries.size(); ++i) { ops.emplace_back(zkutil::makeRemoveRequest(storage.zookeeper_path + "/log/" + entries[i], -1)); @@ -229,12 +241,25 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() /// Simultaneously with clearing the log, we check to see if replica was added since we received replicas list. ops.emplace_back(zkutil::makeCheckRequest(storage.zookeeper_path + "/replicas", stat.version)); - zookeeper->multi(ops); + + try + { + zookeeper->multi(ops); + } + catch (const zkutil::KeeperMultiException & e) + { + /// Another replica already deleted the same node concurrently. + if (e.code == Coordination::Error::ZNONODE) + break; + + throw; + } ops.clear(); } } - LOG_DEBUG(log, "Removed {} old log entries: {} - {}", entries.size(), entries.front(), entries.back()); + if (i != 0) + LOG_DEBUG(log, "Removed {} old log entries: {} - {}", i, entries[0], entries[i - 1]); } @@ -250,8 +275,10 @@ void ReplicatedMergeTreeCleanupThread::markLostReplicas(const std::unordered_map String replica = pair.first; Coordination::Requests ops; /// If host changed version we can not mark replicas, because replica started to be active. - ops.emplace_back(zkutil::makeCheckRequest(storage.zookeeper_path + "/replicas/" + replica + "/host", host_versions_lost_replicas.at(replica))); - ops.emplace_back(zkutil::makeSetRequest(storage.zookeeper_path + "/replicas/" + replica + "/is_lost", "1", -1)); + ops.emplace_back(zkutil::makeCheckRequest( + storage.zookeeper_path + "/replicas/" + replica + "/host", host_versions_lost_replicas.at(replica))); + ops.emplace_back(zkutil::makeSetRequest( + storage.zookeeper_path + "/replicas/" + replica + "/is_lost", "1", -1)); candidate_lost_replicas.push_back(replica); requests.push_back(ops); } @@ -299,14 +326,17 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() /// Use ZooKeeper's first node (last according to time) timestamp as "current" time. Int64 current_time = timed_blocks.front().ctime; - Int64 time_threshold = std::max(static_cast(0), current_time - static_cast(1000 * storage_settings->replicated_deduplication_window_seconds)); + Int64 time_threshold = std::max( + static_cast(0), + current_time - static_cast(1000 * storage_settings->replicated_deduplication_window_seconds)); /// Virtual node, all nodes that are "greater" than this one will be deleted NodeWithStat block_threshold{{}, time_threshold}; size_t current_deduplication_window = std::min(timed_blocks.size(), storage_settings->replicated_deduplication_window); auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window; - auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); + auto first_outdated_block_time_threshold = std::upper_bound( + timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); zkutil::AsyncResponses try_remove_futures; @@ -326,13 +356,16 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() zookeeper->removeRecursive(path); cached_block_stats.erase(first_outdated_block->node); } - else if (rc != Coordination::Error::ZOK) - LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); - else + else if (rc == Coordination::Error::ZOK || rc == Coordination::Error::ZNONODE) { + /// No node is Ok. Another replica is removing nodes concurrently. /// Successfully removed blocks have to be removed from cache cached_block_stats.erase(first_outdated_block->node); } + else + { + LOG_WARNING(log, "Error while deleting ZooKeeper path `{}`: {}, ignoring.", path, Coordination::errorMessage(rc)); + } first_outdated_block++; } @@ -453,8 +486,20 @@ void ReplicatedMergeTreeCleanupThread::clearOldMutations() { /// Simultaneously with clearing the log, we check to see if replica was added since we received replicas list. ops.emplace_back(zkutil::makeCheckRequest(storage.zookeeper_path + "/replicas", replicas_stat.version)); - zookeeper->multi(ops); - LOG_DEBUG(log, "Removed {} old mutation entries: {} - {}", (i + 1 - batch_start_i), entries[batch_start_i], entries[i]); + try + { + zookeeper->multi(ops); + } + catch (const zkutil::KeeperMultiException & e) + { + /// Another replica already deleted the same node concurrently. + if (e.code == Coordination::Error::ZNONODE) + break; + + throw; + } + LOG_DEBUG(log, "Removed {} old mutation entries: {} - {}", + i + 1 - batch_start_i, entries[batch_start_i], entries[i]); batch_start_i = i + 1; ops.clear(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index a787f99d907..f4191482d64 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -36,7 +37,7 @@ private: String log_name; Poco::Logger * log; BackgroundSchedulePool::TaskHolder task; - pcg64 rng; + pcg64 rng{randomSeed()}; void run(); void iterate(); From bbe5f4c9090d7dda7d79281f24b75e39a384aae0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 05:13:41 +0300 Subject: [PATCH 0699/2229] Revert "Remove "current_password" because it is harmful" This reverts commit 66ccb2f6b121b1e85cb035d6e6a256617722d4d3. --- src/Interpreters/ClientInfo.h | 2 ++ src/Interpreters/Context.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 -- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 294eb47e3e9..7a4df63c17a 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -47,6 +47,8 @@ public: String current_user; String current_query_id; Poco::Net::SocketAddress current_address; + /// Use current user and password when sending query to replica leader + String current_password; /// When query_kind == INITIAL_QUERY, these values are equal to current. String initial_user; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index bd99039c36d..cb780443e03 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -660,6 +660,7 @@ void Context::setUser(const String & name, const String & password, const Poco:: auto lock = getLock(); client_info.current_user = name; + client_info.current_password = password; client_info.current_address = address; auto new_user_id = getAccessControlManager().find(name); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 055e709cbc3..ac762b3e05f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4416,7 +4416,7 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query const auto & query_settings = query_context.getSettingsRef(); const auto & query_client_info = query_context.getClientInfo(); String user = query_client_info.current_user; - String password; + String password = query_client_info.current_password; if (auto address = findClusterAddress(leader_address); address) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index b2bd546b478..5083abf7ef9 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -222,7 +222,6 @@ private: zkutil::EphemeralNodeHolderPtr replica_is_active_node; /** Is this replica "leading". The leader replica selects the parts to merge. - * It can be false only when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. */ std::atomic is_leader {false}; zkutil::LeaderElectionPtr leader_election; @@ -498,7 +497,6 @@ private: bool waitForReplicaToProcessLogEntry(const String & replica_name, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); /// Choose leader replica, send requst to it and wait. - /// Only makes sense when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. void sendRequestToLeaderReplica(const ASTPtr & query, const Context & query_context); /// Throw an exception if the table is readonly. From d2c66f96881bcdc18248711a2db3ed6e953437c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 05:14:59 +0300 Subject: [PATCH 0700/2229] Added comments --- src/Storages/StorageReplicatedMergeTree.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5083abf7ef9..b2bd546b478 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -222,6 +222,7 @@ private: zkutil::EphemeralNodeHolderPtr replica_is_active_node; /** Is this replica "leading". The leader replica selects the parts to merge. + * It can be false only when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. */ std::atomic is_leader {false}; zkutil::LeaderElectionPtr leader_election; @@ -497,6 +498,7 @@ private: bool waitForReplicaToProcessLogEntry(const String & replica_name, const ReplicatedMergeTreeLogEntryData & entry, bool wait_for_non_active = true); /// Choose leader replica, send requst to it and wait. + /// Only makes sense when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. void sendRequestToLeaderReplica(const ASTPtr & query, const Context & query_context); /// Throw an exception if the table is readonly. From 1ab599b0a0598be53720765bebc0565d222addaa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 05:17:08 +0300 Subject: [PATCH 0701/2229] Remove "current_password" but keep it for Arcadians --- src/Interpreters/ClientInfo.h | 3 ++- src/Interpreters/Context.cpp | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 7a4df63c17a..704fba3b3ef 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -47,7 +47,8 @@ public: String current_user; String current_query_id; Poco::Net::SocketAddress current_address; - /// Use current user and password when sending query to replica leader + + /// This field is only used in foreign "Arcadia" build. String current_password; /// When query_kind == INITIAL_QUERY, these values are equal to current. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cb780443e03..02060534aef 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -660,9 +660,13 @@ void Context::setUser(const String & name, const String & password, const Poco:: auto lock = getLock(); client_info.current_user = name; - client_info.current_password = password; client_info.current_address = address; +#if defined(ARCADIA_BUILD) + /// This is harmful field that is used only in foreign "Arcadia" build. + client_info.current_password = password; +#endif + auto new_user_id = getAccessControlManager().find(name); std::shared_ptr new_access; if (new_user_id) From 0c1b2d48a30d4006271638de056070ac9cc5a4ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 06:58:07 +0300 Subject: [PATCH 0702/2229] Update test --- .../01249_bad_arguments_for_bloom_filter.reference | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference index 70d176d9b7a..e3f4955d4cf 100644 --- a/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference +++ b/tests/queries/0_stateless/01249_bad_arguments_for_bloom_filter.reference @@ -1,3 +1,3 @@ -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(0., 1.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(-0.1) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 -CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64, \n `i32` Int32, \n `f64` Float64, \n `d` Decimal(10, 2), \n `s` String, \n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3), \n `dt` Date, \n INDEX bloom_filter_a i32 TYPE bloom_filter(1.01) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(0., 1.) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(-0.1) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 +CREATE TABLE default.bloom_filter_idx_good\n(\n `u64` UInt64,\n `i32` Int32,\n `f64` Float64,\n `d` Decimal(10, 2),\n `s` String,\n `e` Enum8(\'a\' = 1, \'b\' = 2, \'c\' = 3),\n `dt` Date,\n INDEX bloom_filter_a i32 TYPE bloom_filter(1.01) GRANULARITY 1\n)\nENGINE = MergeTree()\nORDER BY u64\nSETTINGS index_granularity = 8192 From 1c5c2f8c690a714e4f53b7809813364989789ef9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:08:20 +0300 Subject: [PATCH 0703/2229] Fix formatting of CREATE DICTIONARY --- src/Parsers/ASTDictionaryAttributeDeclaration.cpp | 3 --- src/Parsers/ParserCreateQuery.cpp | 1 - 2 files changed, 4 deletions(-) diff --git a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp index 2b056cb3743..05ba48ace7b 100644 --- a/src/Parsers/ASTDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ASTDictionaryAttributeDeclaration.cpp @@ -34,9 +34,6 @@ void ASTDictionaryAttributeDeclaration::formatImpl(const FormatSettings & settin { frame.need_parens = false; - if (!settings.one_line) - settings.ostr << settings.nl_or_ws << std::string(4 * frame.indent, ' '); - settings.ostr << backQuote(name); if (type) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c54033bd27d..f8c137fb679 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -796,7 +796,6 @@ bool ParserCreateDictionaryQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, E ParserDictionaryAttributeDeclarationList attributes_p; ParserDictionary dictionary_p; - bool if_not_exists = false; ASTPtr database; From e9eb722d4ac539539fd8a1b803b3b203fa42d91b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:27:33 +0300 Subject: [PATCH 0704/2229] Better formatting of CREATE queries --- src/Parsers/ASTCreateQuery.cpp | 1 + src/Parsers/ASTExpressionList.cpp | 6 ++++-- src/Parsers/IAST.h | 1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index f7481ac3c09..fb6bbaeafb0 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -197,6 +197,7 @@ ASTPtr ASTCreateQuery::clone() const void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; + frame.expression_list_always_start_on_new_line = true; if (!database.empty() && table.empty()) { diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 1395d8b15fe..abab1e895cf 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -39,10 +39,12 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For settings.ostr << separator; } - if (children.size() > 1) + if (children.size() > 1 || frame.expression_list_always_start_on_new_line) settings.ostr << indent_str; - (*it)->formatImpl(settings, state, frame); + FormatStateStacked frame_nested = frame; + frame_nested.expression_list_always_start_on_new_line = false; + (*it)->formatImpl(settings, state, frame_nested); } } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 88dedc54d3f..c0c286ac0d2 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -202,6 +202,7 @@ public: { UInt8 indent = 0; bool need_parens = false; + bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. const IAST * current_select = nullptr; }; From e2607f005c334ee8e35b107308b016bd98db7412 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:36:20 +0300 Subject: [PATCH 0705/2229] Fix error with ALTER CONSTRAINT formatting; added a test --- src/Parsers/ASTAlterQuery.cpp | 2 +- .../01318_alter_add_constraint_format.reference | 1 + .../0_stateless/01318_alter_add_constraint_format.sh | 8 ++++++++ 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01318_alter_add_constraint_format.reference create mode 100755 tests/queries/0_stateless/01318_alter_add_constraint_format.sh diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index f323f66be17..1309037ec01 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -146,7 +146,7 @@ void ASTAlterCommand::formatImpl( } else if (type == ASTAlterCommand::ADD_CONSTRAINT) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT" << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD CONSTRAINT " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); constraint_decl->formatImpl(settings, state, frame); } else if (type == ASTAlterCommand::DROP_CONSTRAINT) diff --git a/tests/queries/0_stateless/01318_alter_add_constraint_format.reference b/tests/queries/0_stateless/01318_alter_add_constraint_format.reference new file mode 100644 index 00000000000..4283da7b3af --- /dev/null +++ b/tests/queries/0_stateless/01318_alter_add_constraint_format.reference @@ -0,0 +1 @@ +ALTER TABLE replicated_constraints1 ADD CONSTRAINT IF NOT EXISTS b_constraint CHECK b > 10 diff --git a/tests/queries/0_stateless/01318_alter_add_constraint_format.sh b/tests/queries/0_stateless/01318_alter_add_constraint_format.sh new file mode 100755 index 00000000000..f8eb655a766 --- /dev/null +++ b/tests/queries/0_stateless/01318_alter_add_constraint_format.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_FORMAT --oneline <<<"ALTER TABLE replicated_constraints1 ADD CONSTRAINT IF NOT EXISTS b_constraint CHECK b > 10" From d5e3e7ff761d9fa56b5b6ad75dd81516e45a043f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:36:55 +0300 Subject: [PATCH 0706/2229] Update tests --- .../queries/0_stateless/01018_ddl_dictionaries_create.reference | 2 +- .../01110_dictionary_layout_without_arguments.reference | 2 +- .../0_stateless/01224_no_superfluous_dict_reload.reference | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference index ad16e8ae7f2..7c2eca9cedf 100644 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_create.reference @@ -1,5 +1,5 @@ =DICTIONARY in Ordinary DB -CREATE DICTIONARY ordinary_db.dict1\n(\n `key_column` UInt64 DEFAULT 0, \n `second_column` UInt8 DEFAULT 1, \n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) +CREATE DICTIONARY ordinary_db.dict1\n(\n `key_column` UInt64 DEFAULT 0,\n `second_column` UInt8 DEFAULT 1,\n `third_column` String DEFAULT \'qqq\'\n)\nPRIMARY KEY key_column\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' PASSWORD \'\' DB \'database_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(FLAT()) dict1 1 ordinary_db dict1 diff --git a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference index 852abeea187..69018bef2ef 100644 --- a/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference +++ b/tests/queries/0_stateless/01110_dictionary_layout_without_arguments.reference @@ -1,3 +1,3 @@ World -CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64, \n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) +CREATE DICTIONARY db_for_dict.dict_with_hashed_layout\n(\n `key1` UInt64,\n `value` String\n)\nPRIMARY KEY key1\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9000 USER \'default\' TABLE \'table_for_dict\' DB \'db_for_dict\'))\nLIFETIME(MIN 1 MAX 10)\nLAYOUT(HASHED) Hello diff --git a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference index 96d4393e06b..d80501b3f4d 100644 --- a/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference +++ b/tests/queries/0_stateless/01224_no_superfluous_dict_reload.reference @@ -2,7 +2,7 @@ NOT_LOADED NOT_LOADED CREATE DICTIONARY dict_db_01224.dict ( - `key` UInt64 DEFAULT 0, + `key` UInt64 DEFAULT 0, `val` UInt64 DEFAULT 10 ) PRIMARY KEY key From 94d55abfd13d0a3c0e4299f96e80fabb75b35a01 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:40:03 +0300 Subject: [PATCH 0707/2229] Update tests --- src/Parsers/ASTCreateQuery.cpp | 5 ++++- .../0_stateless/00933_ttl_replicated_zookeeper.reference | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index fb6bbaeafb0..201e2e45528 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -197,7 +197,6 @@ ASTPtr ASTCreateQuery::clone() const void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; - frame.expression_list_always_start_on_new_line = true; if (!database.empty() && table.empty()) { @@ -271,6 +270,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); } + frame.expression_list_always_start_on_new_line = true; + if (columns_list) { settings.ostr << (settings.one_line ? " (" : "\n("); @@ -290,6 +291,8 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat settings.ostr << (settings.one_line ? ")" : "\n)"); } + frame.expression_list_always_start_on_new_line = false; + if (storage) storage->formatImpl(settings, state, frame); diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference index 629fbf2a4a3..c727c24707d 100644 --- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference +++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.reference @@ -1,3 +1,3 @@ 200 400 -CREATE TABLE test.ttl_repl2\n(\n `d` Date, \n `x` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\')\nPARTITION BY toDayOfMonth(d)\nORDER BY x\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 +CREATE TABLE test.ttl_repl2\n(\n `d` Date,\n `x` UInt32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/ttl_repl\', \'2\')\nPARTITION BY toDayOfMonth(d)\nORDER BY x\nTTL d + toIntervalDay(1)\nSETTINGS index_granularity = 8192 From e99c6d9143163bcc5104d391d620e18b4aaf83a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:42:29 +0300 Subject: [PATCH 0708/2229] Update tests --- ...cated_merge_tree_alter_zookeeper.reference | 48 +++++++++---------- ...om_compression_codecs_replicated.reference | 2 +- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference index fa5e65d2d60..ac0e0d557cb 100644 --- a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference +++ b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference @@ -1,22 +1,22 @@ d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 9 41 1992-01-01 08:00:00 2015-01-01 10 42 0000-00-00 00:00:00 d Date @@ -25,14 +25,14 @@ i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] @@ -43,7 +43,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -51,7 +51,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] [] @@ -64,7 +64,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -73,7 +73,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `n.d` Array(Date), \n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `n.d` Array(Date),\n `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -86,7 +86,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -94,7 +94,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 @@ -108,7 +108,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -117,7 +117,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 DEFAULT \'0\' n.d Array(Date) -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.ui8` Array(UInt8), \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\', \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.ui8` Array(UInt8),\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\',\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -129,14 +129,14 @@ i32 Int32 dt DateTime n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.s Array(String) s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `n.s` Array(String), \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `n.s` Array(String),\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 ['12','13','14'] 0 @@ -147,13 +147,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -166,7 +166,7 @@ dt DateTime s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -174,7 +174,7 @@ dt DateTime s UInt32 DEFAULT \'0\' n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\', \n `n.s` Array(String), \n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\',\n `n.s` Array(String),\n `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 [] [] 2015-01-01 7 39 2014-07-14 13:26:50 0 [] [] 2015-01-01 8 40 2012-12-12 12:12:12 0 [] [] @@ -185,13 +185,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 DEFAULT \'0\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` DateTime, \n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` DateTime,\n `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -202,13 +202,13 @@ k UInt64 i32 Int32 dt Date s DateTime DEFAULT \'0000-00-00 00:00:00\' -CREATE TABLE test.replicated_alter1\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` Date, \n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt Date s DateTime DEFAULT \'0000-00-00 00:00:00\' -CREATE TABLE test.replicated_alter2\n(\n `d` Date, \n `k` UInt64, \n `i32` Int32, \n `dt` Date, \n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2\n(\n `d` Date,\n `k` UInt64,\n `i32` Int32,\n `dt` Date,\n `s` DateTime DEFAULT \'0000-00-00 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 1970-01-02 06:55:00 2015-01-01 7 39 2014-07-14 0000-00-00 00:00:00 2015-01-01 8 40 2012-12-12 0000-00-00 00:00:00 diff --git a/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference b/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference index ee481c88d89..62cea01089a 100644 --- a/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference +++ b/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference @@ -20,7 +20,7 @@ 274972506.6 9175437371954010821 9175437371954010821 -CREATE TABLE test.compression_codec_multiple_more_types_replicated\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), \n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), \n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), \n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE test.compression_codec_multiple_more_types_replicated\n(\n `id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)),\n `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)),\n `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)),\n `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\')\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! From 6ddc6d7f085aae86e5ed261be4788a6a19db66cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 07:51:27 +0300 Subject: [PATCH 0709/2229] Make the test faster #11637 --- tests/queries/0_stateless/01307_multiple_leaders.reference | 4 ++-- tests/queries/0_stateless/01307_multiple_leaders.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01307_multiple_leaders.reference b/tests/queries/0_stateless/01307_multiple_leaders.reference index 576441b288d..62cda31dff8 100644 --- a/tests/queries/0_stateless/01307_multiple_leaders.reference +++ b/tests/queries/0_stateless/01307_multiple_leaders.reference @@ -1,2 +1,2 @@ -2000 1999000 -2000 1999000 +400 79800 +400 79800 diff --git a/tests/queries/0_stateless/01307_multiple_leaders.sh b/tests/queries/0_stateless/01307_multiple_leaders.sh index 0bf5e0b13bf..e19a10bcecb 100755 --- a/tests/queries/0_stateless/01307_multiple_leaders.sh +++ b/tests/queries/0_stateless/01307_multiple_leaders.sh @@ -22,8 +22,8 @@ function thread() } -thread 0 1000 & -thread 1 1000 & +thread 0 200 & +thread 1 200 & wait From 65a8fe7cf053b9209c591c249971c8a8b9e4a102 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 09:14:58 +0300 Subject: [PATCH 0710/2229] Update tests --- .../00725_ipv4_ipv6_domains.reference | 4 +-- .../01069_database_memory.reference | 2 +- ..._expressions_in_engine_arguments.reference | 12 +++---- .../01272_suspicious_codecs.reference | 32 +++++++++---------- .../01297_alter_distributed.reference | 4 +-- .../0_stateless/01298_alter_merge.reference | 4 +-- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference index 69804e6cd24..28051d15f65 100644 --- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference +++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.ipv4_test\n(`ipv4_` IPv4\n)\nENGINE = Memory +CREATE TABLE default.ipv4_test\n(\n `ipv4_` IPv4\n)\nENGINE = Memory 0.0.0.0 00 8.8.8.8 08080808 127.0.0.1 7F000001 @@ -10,7 +10,7 @@ CREATE TABLE default.ipv4_test\n(`ipv4_` IPv4\n)\nENGINE = Memory > 127.0.0.1 255.255.255.255 = 127.0.0.1 127.0.0.1 euqality of IPv4-mapped IPv6 value and IPv4 promoted to IPv6 with function: 1 -CREATE TABLE default.ipv6_test\n(`ipv6_` IPv6\n)\nENGINE = Memory +CREATE TABLE default.ipv6_test\n(\n `ipv6_` IPv6\n)\nENGINE = Memory :: 00000000000000000000000000000000 :: 00000000000000000000000000000000 ::ffff:8.8.8.8 00000000000000000000FFFF08080808 diff --git a/tests/queries/0_stateless/01069_database_memory.reference b/tests/queries/0_stateless/01069_database_memory.reference index cfccf5b1757..e7486d57276 100644 --- a/tests/queries/0_stateless/01069_database_memory.reference +++ b/tests/queries/0_stateless/01069_database_memory.reference @@ -5,4 +5,4 @@ CREATE DATABASE memory_01069\nENGINE = Memory() 4 3 4 -CREATE TABLE memory_01069.file\n(`n` UInt8\n)\nENGINE = File(\'CSV\') +CREATE TABLE memory_01069.file\n(\n `n` UInt8\n)\nENGINE = File(\'CSV\') diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference index 138f09f2634..d360a046958 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.reference @@ -1,11 +1,11 @@ -CREATE TABLE test_01083.file\n(`n` Int8\n)\nENGINE = File(\'TSVWithNamesAndTypes\') -CREATE TABLE test_01083.buffer\n(`n` Int8\n)\nENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) -CREATE TABLE test_01083.merge\n(`n` Int8\n)\nENGINE = Merge(\'test_01083\', \'distributed\') +CREATE TABLE test_01083.file\n(\n `n` Int8\n)\nENGINE = File(\'TSVWithNamesAndTypes\') +CREATE TABLE test_01083.buffer\n(\n `n` Int8\n)\nENGINE = Buffer(\'test_01083\', \'file\', 16, 10, 200, 10000, 1000000, 10000000, 1000000000) +CREATE TABLE test_01083.merge\n(\n `n` Int8\n)\nENGINE = Merge(\'test_01083\', \'distributed\') CREATE TABLE test_01083.merge_tf AS merge(\'test_01083\', \'.*\') -CREATE TABLE test_01083.distributed\n(`n` Int8\n)\nENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') +CREATE TABLE test_01083.distributed\n(\n `n` Int8\n)\nENGINE = Distributed(\'test_shard_localhost\', \'test_01083\', \'file\') CREATE TABLE test_01083.distributed_tf AS cluster(\'test_shard_localhost\', \'test_01083\', \'buffer\') CREATE TABLE test_01083.url\n(\n `n` UInt64,\n `col` String\n)\nENGINE = URL(\'https://localhost:8443/?query=select+n,+_table+from+test_01083.merge+format+CSV\', \'CSV\') CREATE TABLE test_01083.rich_syntax AS remote(\'localhos{x|y|t}\', cluster(\'test_shard_localhost\', remote(\'127.0.0.{1..4}\', \'test_01083\', \'view\'))) -CREATE VIEW test_01083.view\n(`n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM \n(\n SELECT toString(n) AS n\n FROM test_01083.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM test_01083.file -CREATE DICTIONARY test_01083.dict\n(\n \n `n` UInt64,\n \n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) +CREATE VIEW test_01083.view\n(\n `n` Int64\n) AS\nSELECT toInt64(n) AS n\nFROM \n(\n SELECT toString(n) AS n\n FROM test_01083.merge\n WHERE _table != \'qwerty\'\n ORDER BY _table ASC\n)\nUNION ALL\nSELECT *\nFROM test_01083.file +CREATE DICTIONARY test_01083.dict\n(\n `n` UInt64,\n `col` String DEFAULT \'42\'\n)\nPRIMARY KEY n\nSOURCE(CLICKHOUSE(HOST \'localhost\' PORT 9440 SECURE 1 USER \'default\' TABLE \'url\' DB \'test_01083\'))\nLIFETIME(MIN 0 MAX 1)\nLAYOUT(CACHE(SIZE_IN_CELLS 1)) 16 diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.reference b/tests/queries/0_stateless/01272_suspicious_codecs.reference index de91a1ddb25..559b6df2693 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.reference +++ b/tests/queries/0_stateless/01272_suspicious_codecs.reference @@ -1,16 +1,16 @@ -CREATE TABLE default.codecs1\n(`a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs2\n(`a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs3\n(`a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs4\n(`a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs5\n(`a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs6\n(`a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs7\n(`a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs8\n(`a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs1\n(`a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs2\n(`a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs3\n(`a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs4\n(`a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs5\n(`a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs6\n(`a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs7\n(`a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE default.codecs8\n(`a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs1\n(\n `a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs2\n(\n `a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs3\n(\n `a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs4\n(\n `a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs1\n(\n `a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs2\n(\n `a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs3\n(\n `a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs4\n(\n `a` UInt8 CODEC(LZ4, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01297_alter_distributed.reference b/tests/queries/0_stateless/01297_alter_distributed.reference index bd269322884..8fd8bc7ab72 100644 --- a/tests/queries/0_stateless/01297_alter_distributed.reference +++ b/tests/queries/0_stateless/01297_alter_distributed.reference @@ -6,7 +6,7 @@ VisitID UInt64 UserID UInt64 StartTime DateTime ClickLogID UInt64 -CREATE TABLE default.merge_distributed\n(\n `CounterID` UInt32, \n `dummy` String, \n `StartDate` Date, \n `Sign` Int8, \n `VisitID` UInt64, \n `UserID` UInt64, \n `StartTime` DateTime, \n `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\') +CREATE TABLE default.merge_distributed\n(\n `CounterID` UInt32,\n `dummy` String,\n `StartDate` Date,\n `Sign` Int8,\n `VisitID` UInt64,\n `UserID` UInt64,\n `StartTime` DateTime,\n `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\') 1 Hello, Alter Table! CounterID UInt32 StartDate Date @@ -15,4 +15,4 @@ VisitID UInt64 UserID UInt64 StartTime DateTime ClickLogID UInt64 -CREATE TABLE default.merge_distributed\n(\n `CounterID` UInt32, \n `StartDate` Date, \n `Sign` Int8, \n `VisitID` UInt64, \n `UserID` UInt64, \n `StartTime` DateTime, \n `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\') +CREATE TABLE default.merge_distributed\n(\n `CounterID` UInt32,\n `StartDate` Date,\n `Sign` Int8,\n `VisitID` UInt64,\n `UserID` UInt64,\n `StartTime` DateTime,\n `ClickLogID` UInt64\n)\nENGINE = Distributed(\'test_shard_localhost\', \'default\', \'merge_distributed1\') diff --git a/tests/queries/0_stateless/01298_alter_merge.reference b/tests/queries/0_stateless/01298_alter_merge.reference index 393c0a600ff..a012900f978 100644 --- a/tests/queries/0_stateless/01298_alter_merge.reference +++ b/tests/queries/0_stateless/01298_alter_merge.reference @@ -6,7 +6,7 @@ VisitID UInt64 UserID UInt64 StartTime DateTime ClickLogID UInt64 -CREATE TABLE default.merge\n(\n `CounterID` UInt32, \n `dummy` String, \n `StartDate` Date, \n `Sign` Int8, \n `VisitID` UInt64, \n `UserID` UInt64, \n `StartTime` DateTime, \n `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\') +CREATE TABLE default.merge\n(\n `CounterID` UInt32,\n `dummy` String,\n `StartDate` Date,\n `Sign` Int8,\n `VisitID` UInt64,\n `UserID` UInt64,\n `StartTime` DateTime,\n `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\') CounterID UInt32 StartDate Date Sign Int8 @@ -14,4 +14,4 @@ VisitID UInt64 UserID UInt64 StartTime DateTime ClickLogID UInt64 -CREATE TABLE default.merge\n(\n `CounterID` UInt32, \n `StartDate` Date, \n `Sign` Int8, \n `VisitID` UInt64, \n `UserID` UInt64, \n `StartTime` DateTime, \n `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\') +CREATE TABLE default.merge\n(\n `CounterID` UInt32,\n `StartDate` Date,\n `Sign` Int8,\n `VisitID` UInt64,\n `UserID` UInt64,\n `StartTime` DateTime,\n `ClickLogID` UInt64\n)\nENGINE = Merge(\'default\', \'merge\\\\[0-9\\\\]\') From eccd8d61dd5b67220267a171841827bfebcc2eca Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 15 Jun 2020 10:13:29 +0300 Subject: [PATCH 0711/2229] Update build.py --- docs/tools/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index 95e887f046f..b7ddbc29629 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -220,7 +220,7 @@ if __name__ == '__main__': arg_parser.add_argument('--website-dir', default=website_dir) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--enable-stable-releases', action='store_true') - arg_parser.add_argument('--stable-releases-limit', type=int, default='4') + arg_parser.add_argument('--stable-releases-limit', type=int, default='3') arg_parser.add_argument('--lts-releases-limit', type=int, default='2') arg_parser.add_argument('--nav-limit', type=int, default='0') arg_parser.add_argument('--version-prefix', type=str, default='') From 013c03a70918067bc08f3e1f2dbac0a63533590a Mon Sep 17 00:00:00 2001 From: "dependabot-preview[bot]" <27856297+dependabot-preview[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2020 07:54:48 +0000 Subject: [PATCH 0712/2229] Bump googletrans from 2.4.0 to 3.0.0 in /docs/tools Bumps [googletrans](https://github.com/ssut/py-googletrans) from 2.4.0 to 3.0.0. - [Release notes](https://github.com/ssut/py-googletrans/releases) - [Commits](https://github.com/ssut/py-googletrans/commits) Signed-off-by: dependabot-preview[bot] --- docs/tools/translate/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/translate/requirements.txt b/docs/tools/translate/requirements.txt index 0c9d44a346e..370554fa90f 100644 --- a/docs/tools/translate/requirements.txt +++ b/docs/tools/translate/requirements.txt @@ -1,7 +1,7 @@ Babel==2.8.0 certifi==2020.4.5.2 chardet==3.0.4 -googletrans==2.4.0 +googletrans==3.0.0 idna==2.9 Jinja2==2.11.2 pandocfilters==1.4.2 From c20ce687cf21bfc2015a7bf8f0c3bb92e81b52ce Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 11:12:01 +0300 Subject: [PATCH 0713/2229] bump ci --- src/Storages/AlterCommands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 8e8b308ac3d..32b6a94ff23 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -716,7 +716,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co if (!command.ignore) command.apply(metadata_copy, context); - /// Changes in columns may lead to changes in keys expression + /// Changes in columns may lead to changes in keys expression. metadata_copy.sorting_key.recalculateWithNewColumns(metadata_copy.columns, context); if (metadata_copy.primary_key.definition_ast != nullptr) { @@ -728,7 +728,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, const Context & co metadata_copy.primary_key.definition_ast = nullptr; } - /// Changes in columns may lead to changes in TTL expressions + /// Changes in columns may lead to changes in TTL expressions. auto column_ttl_asts = metadata_copy.columns.getColumnTTLs(); for (const auto & [name, ast] : column_ttl_asts) { From 2c9ce0f3fa4e7f02294bbb6e6021eee3633cc289 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 13:14:36 +0300 Subject: [PATCH 0714/2229] Bump CI --- src/Storages/AlterCommands.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 32b6a94ff23..c0fc53aa8e3 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -327,7 +327,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, const Context & con primary_key = KeyDescription::getKeyFromAST(sorting_key.definition_ast, metadata.columns, context); } - /// Recalculate key with new order_by expression + /// Recalculate key with new order_by expression. sorting_key.recalculateWithNewAST(order_by, metadata.columns, context); } else if (type == COMMENT_COLUMN) From ccf2ceb8769ccf450abcea3a16d541bdf9b5a08a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 14:02:47 +0300 Subject: [PATCH 0715/2229] Fix pipeline stuck for parallel final. --- .../Algorithms/AggregatingSortedAlgorithm.cpp | 23 +++++--- .../Algorithms/AggregatingSortedAlgorithm.h | 4 +- .../Algorithms/CollapsingSortedAlgorithm.cpp | 8 +++ .../GraphiteRollupSortedAlgorithm.cpp | 9 ++- .../Merges/Algorithms/IMergingAlgorithm.h | 28 ++++++++- .../IMergingAlgorithmWithDelayedChunk.cpp | 31 ++++++---- .../IMergingAlgorithmWithDelayedChunk.h | 9 +-- .../IMergingAlgorithmWithSharedChunks.cpp | 38 ++++++------ .../IMergingAlgorithmWithSharedChunks.h | 20 +++++-- .../Algorithms/MergingSortedAlgorithm.cpp | 36 ++++++----- .../Algorithms/MergingSortedAlgorithm.h | 6 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 7 +++ .../Algorithms/SummingSortedAlgorithm.cpp | 23 +++++--- .../Algorithms/SummingSortedAlgorithm.h | 4 +- .../VersionedCollapsingAlgorithm.cpp | 7 +++ src/Processors/Merges/IMergingTransform.cpp | 59 +++++++------------ src/Processors/Merges/IMergingTransform.h | 10 ++-- 17 files changed, 198 insertions(+), 124 deletions(-) diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index be9bf3e354c..86a2f188104 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -280,19 +280,19 @@ AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( { } -void AggregatingSortedAlgorithm::initialize(Chunks chunks) +void AggregatingSortedAlgorithm::initialize(Inputs inputs) { - for (auto & chunk : chunks) - if (chunk) - preprocessChunk(chunk, columns_definition); + for (auto & input : inputs) + if (input.chunk) + preprocessChunk(input.chunk, columns_definition); - initializeQueue(std::move(chunks)); + initializeQueue(std::move(inputs)); } -void AggregatingSortedAlgorithm::consume(Chunk & chunk, size_t source_num) +void AggregatingSortedAlgorithm::consume(Input & input, size_t source_num) { - preprocessChunk(chunk, columns_definition); - updateCursor(chunk, source_num); + preprocessChunk(input.chunk, columns_definition); + updateCursor(input, source_num); } IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() @@ -303,6 +303,13 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() bool key_differs; SortCursor current = queue.current(); + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + { detail::RowRef current_key; current_key.set(current); diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index fe1710adc8b..da4ec876b69 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -19,8 +19,8 @@ public: const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size); - void initialize(Chunks chunks) override; - void consume(Chunk & chunk, size_t source_num) override; + void initialize(Inputs inputs) override; + void consume(Input & input, size_t source_num) override; Status merge() override; struct SimpleAggregateDescription; diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 49a3d018098..a1fe2de61f2 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -114,6 +114,14 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() while (queue.isValid()) { auto current = queue.current(); + + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + Int8 sign = assert_cast(*current->all_columns[sign_column_number]).getData()[current->pos]; RowRef current_row; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index f26fe96876f..b4136adbf2d 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -157,6 +157,13 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() { SortCursor current = queue.current(); + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + StringRef next_path = current->all_columns[columns_definition.path_column_num]->getDataAt(current->pos); bool new_path = is_first || next_path != current_group_path; @@ -224,7 +231,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() *(*current_subgroup_newest_row.all_columns)[columns_definition.version_column_num], /* nan_direction_hint = */ 1) >= 0) { - current_subgroup_newest_row.set(current, source_chunks[current.impl->order]); + current_subgroup_newest_row.set(current, sources[current.impl->order].chunk); /// Small hack: group and subgroups have the same path, so we can set current_group_path here instead of startNextGroup /// But since we keep in memory current_subgroup_newest_row's block, we could use StringRef for current_group_path and don't diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h index b49209e462e..f86be2a7d1b 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithm.h @@ -20,8 +20,32 @@ public: explicit Status(size_t source) : required_source(source) {} }; - virtual void initialize(Chunks chunks) = 0; - virtual void consume(Chunk & chunk, size_t source_num) = 0; + struct Input + { + Chunk chunk; + + /// It is a flag which says that last row from chunk should be ignored in result. + /// This row is not ignored in sorting and is needed to synchronize required source + /// between different algorithm objects in parallel FINAL. + bool skip_last_row = false; + + void swap(Input & other) + { + chunk.swap(other.chunk); + std::swap(skip_last_row, other.skip_last_row); + } + + void set(Chunk chunk_) + { + chunk = std::move(chunk_); + skip_last_row = false; + } + }; + + using Inputs = std::vector; + + virtual void initialize(Inputs inputs) = 0; + virtual void consume(Input & input, size_t source_num) = 0; virtual Status merge() = 0; IMergingAlgorithm() = default; diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp index 751a08ce69f..36d622daa9a 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp @@ -8,36 +8,43 @@ IMergingAlgorithmWithDelayedChunk::IMergingAlgorithmWithDelayedChunk( size_t num_inputs, SortDescription description_) : description(std::move(description_)) - , source_chunks(num_inputs) + , current_inputs(num_inputs) , cursors(num_inputs) { } -void IMergingAlgorithmWithDelayedChunk::initializeQueue(Chunks chunks) +void IMergingAlgorithmWithDelayedChunk::initializeQueue(Inputs inputs) { - source_chunks = std::move(chunks); + current_inputs = std::move(inputs); - for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num) { - if (!source_chunks[source_num]) + if (!current_inputs[source_num].chunk) continue; - cursors[source_num] = SortCursorImpl(source_chunks[source_num].getColumns(), description, source_num); + cursors[source_num] = SortCursorImpl(current_inputs[source_num].chunk.getColumns(), description, source_num); } queue = SortingHeap(cursors); } -void IMergingAlgorithmWithDelayedChunk::updateCursor(Chunk & chunk, size_t source_num) +void IMergingAlgorithmWithDelayedChunk::updateCursor(Input & input, size_t source_num) { - auto & source_chunk = source_chunks[source_num]; + auto & current_input = current_inputs[source_num]; /// Extend lifetime of last chunk. - last_chunk.swap(source_chunk); - last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); + if (current_input.skip_last_row && current_input.chunk.getNumRows() <= 1) + { + /// But if chunk has only single skipped row, ignore it. + } + else + { + last_chunk.swap(current_input.chunk); + last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); + } - source_chunk.swap(chunk); - cursors[source_num].reset(source_chunk.getColumns(), {}); + current_input.swap(input); + cursors[source_num].reset(current_input.chunk.getColumns(), {}); queue.push(cursors[source_num]); } diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h index f7d5f630238..69530a707c2 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.h @@ -23,12 +23,13 @@ protected: ColumnRawPtrs last_chunk_sort_columns; /// Point to last_chunk if valid. - void initializeQueue(Chunks chunks); - void updateCursor(Chunk & chunk, size_t source_num); + void initializeQueue(Inputs inputs); + void updateCursor(Input & input, size_t source_num); + bool skipLastRowFor(size_t input_number) const { return current_inputs[input_number].skip_last_row; } private: - /// Chunks currently being merged. - std::vector source_chunks; + /// Inputs currently being merged. + Inputs current_inputs; SortCursorImpls cursors; /// In merging algorithm, we need to compare current sort key with the last one. diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp index 1fe61653ecc..39abe5c0ec7 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.cpp @@ -11,7 +11,7 @@ IMergingAlgorithmWithSharedChunks::IMergingAlgorithmWithSharedChunks( : description(std::move(description_)) , chunk_allocator(num_inputs + max_row_refs) , cursors(num_inputs) - , source_chunks(num_inputs) + , sources(num_inputs) , out_row_sources_buf(out_row_sources_buf_) { } @@ -26,39 +26,39 @@ static void prepareChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -void IMergingAlgorithmWithSharedChunks::initialize(Chunks chunks) +void IMergingAlgorithmWithSharedChunks::initialize(Inputs inputs) { - source_chunks.resize(chunks.size()); - - for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + for (size_t source_num = 0; source_num < inputs.size(); ++source_num) { - if (!chunks[source_num]) + if (!inputs[source_num].chunk) continue; - prepareChunk(chunks[source_num]); + prepareChunk(inputs[source_num].chunk); - auto & source_chunk = source_chunks[source_num]; + auto & source = sources[source_num]; - source_chunk = chunk_allocator.alloc(chunks[source_num]); - cursors[source_num] = SortCursorImpl(source_chunk->getColumns(), description, source_num); + source.skip_last_row = inputs[source_num].skip_last_row; + source.chunk = chunk_allocator.alloc(inputs[source_num].chunk); + cursors[source_num] = SortCursorImpl(source.chunk->getColumns(), description, source_num); - source_chunk->all_columns = cursors[source_num].all_columns; - source_chunk->sort_columns = cursors[source_num].sort_columns; + source.chunk->all_columns = cursors[source_num].all_columns; + source.chunk->sort_columns = cursors[source_num].sort_columns; } queue = SortingHeap(cursors); } -void IMergingAlgorithmWithSharedChunks::consume(Chunk & chunk, size_t source_num) +void IMergingAlgorithmWithSharedChunks::consume(Input & input, size_t source_num) { - prepareChunk(chunk); + prepareChunk(input.chunk); - auto & source_chunk = source_chunks[source_num]; - source_chunk = chunk_allocator.alloc(chunk); - cursors[source_num].reset(source_chunk->getColumns(), {}); + auto & source = sources[source_num]; + source.skip_last_row = input.skip_last_row; + source.chunk = chunk_allocator.alloc(input.chunk); + cursors[source_num].reset(source.chunk->getColumns(), {}); - source_chunk->all_columns = cursors[source_num].all_columns; - source_chunk->sort_columns = cursors[source_num].sort_columns; + source.chunk->all_columns = cursors[source_num].all_columns; + source.chunk->sort_columns = cursors[source_num].sort_columns; queue.push(cursors[source_num]); } diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h index a3dbadc458d..65c456ea44c 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithSharedChunks.h @@ -15,8 +15,8 @@ public: WriteBuffer * out_row_sources_buf_, size_t max_row_refs); - void initialize(Chunks chunks) override; - void consume(Chunk & chunk, size_t source_num) override; + void initialize(Inputs inputs) override; + void consume(Input & input, size_t source_num) override; private: SortDescription description; @@ -27,9 +27,16 @@ private: SortCursorImpls cursors; protected: - /// Chunks currently being merged. - using SourceChunks = std::vector; - SourceChunks source_chunks; + + struct Source + { + detail::SharedChunkPtr chunk; + bool skip_last_row; + }; + + /// Sources currently being merged. + using Sources = std::vector; + Sources sources; SortingHeap queue; @@ -38,7 +45,8 @@ protected: WriteBuffer * out_row_sources_buf = nullptr; using RowRef = detail::RowRefWithOwnedChunk; - void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, source_chunks[cursor.impl->order]); } + void setRowRef(RowRef & row, SortCursor & cursor) { row.set(cursor, sources[cursor.impl->order].chunk); } + bool skipLastRowFor(size_t input_number) const { return sources[input_number].skip_last_row; } }; } diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index f1d6c56809d..77258a8187b 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -22,7 +22,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( , description(std::move(description_)) , limit(limit_) , out_row_sources_buf(out_row_sources_buf_) - , source_chunks(num_inputs) + , current_inputs(num_inputs) , cursors(num_inputs) { /// Replace column names in description to positions. @@ -39,7 +39,7 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( void MergingSortedAlgorithm::addInput() { - source_chunks.emplace_back(); + current_inputs.emplace_back(); cursors.emplace_back(); } @@ -53,13 +53,13 @@ static void prepareChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } -void MergingSortedAlgorithm::initialize(Chunks chunks) +void MergingSortedAlgorithm::initialize(Inputs inputs) { - source_chunks = std::move(chunks); + current_inputs = std::move(inputs); - for (size_t source_num = 0; source_num < source_chunks.size(); ++source_num) + for (size_t source_num = 0; source_num < current_inputs.size(); ++source_num) { - auto & chunk = source_chunks[source_num]; + auto & chunk = current_inputs[source_num].chunk; if (!chunk) continue; @@ -74,11 +74,11 @@ void MergingSortedAlgorithm::initialize(Chunks chunks) queue_without_collation = SortingHeap(cursors); } -void MergingSortedAlgorithm::consume(Chunk & chunk, size_t source_num) +void MergingSortedAlgorithm::consume(Input & input, size_t source_num) { - prepareChunk(chunk); - source_chunks[source_num].swap(chunk); - cursors[source_num].reset(source_chunks[source_num].getColumns(), {}); + prepareChunk(input.chunk); + current_inputs[source_num].swap(input); + cursors[source_num].reset(current_inputs[source_num].chunk.getColumns(), {}); if (has_collation) queue_with_collation.push(cursors[source_num]); @@ -105,10 +105,18 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::mergeImpl(TSortingHeap & queue auto current = queue.current(); + if (current->isLast() && current_inputs[current->pos].skip_last_row) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + /** And what if the block is totally less or equal than the rest for the current cursor? * Or is there only one data source left in the queue? Then you can take the entire block on current cursor. */ if (current.impl->isFirst() + && !current_inputs[current->pos].skip_last_row /// Ignore optimization if last row should be skipped. && (queue.size() == 1 || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild())))) { @@ -167,7 +175,7 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::insertFromChunk(size_t source_ //std::cerr << "copied columns\n"; - auto num_rows = source_chunks[source_num].getNumRows(); + auto num_rows = current_inputs[source_num].chunk.getNumRows(); UInt64 total_merged_rows_after_insertion = merged_data.mergedRows() + num_rows; bool is_finished = limit && total_merged_rows_after_insertion >= limit; @@ -175,12 +183,12 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::insertFromChunk(size_t source_ if (limit && total_merged_rows_after_insertion > limit) { num_rows -= total_merged_rows_after_insertion - limit; - merged_data.insertFromChunk(std::move(source_chunks[source_num]), num_rows); + merged_data.insertFromChunk(std::move(current_inputs[source_num].chunk), num_rows); } else - merged_data.insertFromChunk(std::move(source_chunks[source_num]), 0); + merged_data.insertFromChunk(std::move(current_inputs[source_num].chunk), 0); - source_chunks[source_num] = Chunk(); + current_inputs[source_num].chunk = Chunk(); /// Write order of rows for other columns /// this data will be used in gather stream diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h index 5b361c1000e..054aec94464 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h @@ -22,8 +22,8 @@ public: void addInput(); - void initialize(Chunks chunks) override; - void consume(Chunk & chunk, size_t source_num) override; + void initialize(Inputs inputs) override; + void consume(Input & input, size_t source_num) override; Status merge() override; const MergedData & getMergedData() const { return merged_data; } @@ -41,7 +41,7 @@ private: WriteBuffer * out_row_sources_buf = nullptr; /// Chunks currently being merged. - std::vector source_chunks; + Inputs current_inputs; SortCursorImpls cursors; diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 4a0f7493637..ada779ea29b 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -40,6 +40,13 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() { SortCursor current = queue.current(); + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + RowRef current_row; setRowRef(current_row, current); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 89154044ae5..fe21c4f6023 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -623,19 +623,19 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( { } -void SummingSortedAlgorithm::initialize(Chunks chunks) +void SummingSortedAlgorithm::initialize(Inputs inputs) { - for (auto & chunk : chunks) - if (chunk) - preprocessChunk(chunk); + for (auto & input : inputs) + if (input.chunk) + preprocessChunk(input.chunk); - initializeQueue(std::move(chunks)); + initializeQueue(std::move(inputs)); } -void SummingSortedAlgorithm::consume(Chunk & chunk, size_t source_num) +void SummingSortedAlgorithm::consume(Input & input, size_t source_num) { - preprocessChunk(chunk); - updateCursor(chunk, source_num); + preprocessChunk(input.chunk); + updateCursor(input, source_num); } IMergingAlgorithm::Status SummingSortedAlgorithm::merge() @@ -647,6 +647,13 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() SortCursor current = queue.current(); + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + { detail::RowRef current_key; current_key.set(current); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index fc5431f1a08..a188a5fb538 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -22,8 +22,8 @@ public: const Names & column_names_to_sum, size_t max_block_size); - void initialize(Chunks chunks) override; - void consume(Chunk & chunk, size_t source_num) override; + void initialize(Inputs inputs) override; + void consume(Input & input, size_t source_num) override; Status merge() override; struct AggregateDescription; diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index e039c383995..66f9865c483 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -64,6 +64,13 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() { SortCursor current = queue.current(); + if (current->isLast() && skipLastRowFor(current->pos)) + { + /// Get the next block from the corresponding source, if there is one. + queue.removeTop(); + return Status(current.impl->order); + } + RowRef current_row; Int8 sign = assert_cast(*current->all_columns[sign_column_number]).getData()[current->pos]; diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 2037a88733a..eff786b150f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -87,7 +87,7 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs() continue; } - state.init_chunks[i] = std::move(chunk); + state.init_chunks[i].set(std::move(chunk)); input_states[i].is_initialized = true; } @@ -158,8 +158,8 @@ IProcessor::Status IMergingTransformBase::prepare() if (!input.hasData()) return Status::NeedData; - state.input_chunk = input.pull(); - if (!state.input_chunk.hasRows() && !input.isFinished()) + state.input_chunk.set(input.pull()); + if (!state.input_chunk.chunk.hasRows() && !input.isFinished()) return Status::NeedData; state.has_input = true; @@ -174,12 +174,12 @@ IProcessor::Status IMergingTransformBase::prepare() return Status::Ready; } -static void filterChunk(Chunk & chunk, size_t selector_position) +static void filterChunk(IMergingAlgorithm::Input & input, size_t selector_position) { - if (!chunk.getChunkInfo()) + if (!input.chunk.getChunkInfo()) throw Exception("IMergingTransformBase expected ChunkInfo for input chunk", ErrorCodes::LOGICAL_ERROR); - const auto * chunk_info = typeid_cast(chunk.getChunkInfo().get()); + const auto * chunk_info = typeid_cast(input.chunk.getChunkInfo().get()); if (!chunk_info) throw Exception("IMergingTransformBase expected SelectorInfo for input chunk", ErrorCodes::LOGICAL_ERROR); @@ -188,8 +188,8 @@ static void filterChunk(Chunk & chunk, size_t selector_position) IColumn::Filter filter; filter.resize_fill(selector.size()); - size_t num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); + size_t num_rows = input.chunk.getNumRows(); + auto columns = input.chunk.detachColumns(); size_t num_result_rows = 0; @@ -202,54 +202,39 @@ static void filterChunk(Chunk & chunk, size_t selector_position) } } + if (!filter.empty() && filter.back() == 0) + { + filter.back() = 1; + ++num_result_rows; + input.skip_last_row = true; + } + for (auto & column : columns) column = column->filter(filter, num_result_rows); - chunk.clear(); - chunk.setColumns(std::move(columns), num_result_rows); + input.chunk.clear(); + input.chunk.setColumns(std::move(columns), num_result_rows); } -bool IMergingTransformBase::filterChunks() +void IMergingTransformBase::filterChunks() { if (state.selector_position < 0) - return true; - - bool has_empty_chunk = false; + return; if (!state.init_chunks.empty()) { for (size_t i = 0; i < input_states.size(); ++i) { - auto & chunk = state.init_chunks[i]; - if (!chunk || input_states[i].is_filtered) + auto & input = state.init_chunks[i]; + if (!input.chunk) continue; - filterChunk(chunk, state.selector_position); - - if (!chunk.hasRows()) - { - chunk.clear(); - has_empty_chunk = true; - input_states[i].is_initialized = false; - is_initialized = false; - } - else - input_states[i].is_filtered = true; + filterChunk(input, state.selector_position); } } if (state.has_input) - { filterChunk(state.input_chunk, state.selector_position); - if (!state.input_chunk.hasRows()) - { - state.has_input = false; - state.need_data = true; - has_empty_chunk = true; - } - } - - return !has_empty_chunk; } diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 12a366bf21b..ce673131ab6 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -36,19 +36,19 @@ protected: virtual void onNewInput(); /// Is called when new input is added. Only if have_all_inputs = false. virtual void onFinish() {} /// Is called when all data is processed. - bool filterChunks(); /// Filter chunks if selector position was set. For parallel final. + void filterChunks(); /// Filter chunks if selector position was set. For parallel final. /// Processor state. struct State { Chunk output_chunk; - Chunk input_chunk; + IMergingAlgorithm::Input input_chunk; bool has_input = false; bool is_finished = false; bool need_data = false; size_t next_input_to_read = 0; - Chunks init_chunks; + IMergingAlgorithm::Inputs init_chunks; ssize_t selector_position = -1; }; @@ -61,7 +61,6 @@ private: InputPort & port; bool is_initialized = false; - bool is_filtered = false; }; std::vector input_states; @@ -90,8 +89,7 @@ public: void work() override { - if (!filterChunks()) - return; + filterChunks(); if (!state.init_chunks.empty()) algorithm.initialize(std::move(state.init_chunks)); From def0158638b82c6d2d38ceb80daec6f74b992a15 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 15 Jun 2020 14:33:44 +0300 Subject: [PATCH 0716/2229] configure query handler as default --- src/Server/HTTPHandlerFactory.cpp | 31 ++++++++++++------- src/Server/HTTPHandlerFactory.h | 2 -- .../test_http_handlers_config/test.py | 3 ++ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index 6459b0aab3b..f34852054d1 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -20,6 +20,9 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; } +static void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server); +static void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics); + HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string & name_) : log(&Poco::Logger::get(name_)), name(name_) { @@ -75,7 +78,7 @@ static inline auto createHandlersFactoryFromConfig( for (const auto & key : keys) { if (key == "defaults") - addDefaultHandlersFactory(*main_handler_factory, server, &async_metrics); + addDefaultHandlersFactory(*main_handler_factory, server, async_metrics); else if (startsWith(key, "rule")) { const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", ""); @@ -113,12 +116,7 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IS else { auto factory = std::make_unique(name); - addDefaultHandlersFactory(*factory, server, &async_metrics); - - auto query_handler = std::make_unique>(server, "query"); - query_handler->allowPostAndGetParamsRequest(); - factory->addHandler(query_handler.release()); - + addDefaultHandlersFactory(*factory, server, async_metrics); return factory.release(); } } @@ -126,7 +124,7 @@ static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IS static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name) { auto factory = std::make_unique(name); - addDefaultHandlersFactory(*factory, server, nullptr); + addCommonDefaultHandlersFactory(*factory, server); auto main_handler = std::make_unique>(server); main_handler->allowPostAndGetParamsRequest(); @@ -157,7 +155,7 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As static const auto ping_response_expression = "Ok.\n"; static const auto root_response_expression = "config://http_server_default_response"; -void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics * async_metrics) +void addCommonDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server) { auto root_handler = std::make_unique>(server, root_response_expression); root_handler->attachStrictPath("/")->allowGetAndHeadRequest(); @@ -170,13 +168,22 @@ void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer auto replicas_status_handler = std::make_unique>(server); replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest(); factory.addHandler(replicas_status_handler.release()); +} + +void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics & async_metrics) +{ + addCommonDefaultHandlersFactory(factory, server); + + auto query_handler = std::make_unique>(server, "query"); + query_handler->allowPostAndGetParamsRequest(); + factory.addHandler(query_handler.release()); /// We check that prometheus handler will be served on current (default) port. - /// Otherwise it will be created separately, see below. - if (async_metrics && server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) + /// Otherwise it will be created separately, see createHandlerFactory(...). + if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0) { auto prometheus_handler = std::make_unique>( - server, PrometheusMetricsWriter(server.config(), "prometheus", *async_metrics)); + server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)); prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest(); factory.addHandler(prometheus_handler.release()); } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index 8e21a13ba18..3e8313172eb 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -103,8 +103,6 @@ private: std::function creator; }; -void addDefaultHandlersFactory(HTTPRequestHandlerFactoryMain & factory, IServer & server, AsynchronousMetrics * async_metrics); - Poco::Net::HTTPRequestHandlerFactory * createStaticHandlerFactory(IServer & server, const std::string & config_prefix); Poco::Net::HTTPRequestHandlerFactory * createDynamicHandlerFactory(IServer & server, const std::string & config_prefix); diff --git a/tests/integration/test_http_handlers_config/test.py b/tests/integration/test_http_handlers_config/test.py index b31913ba962..b15cd1fdb89 100644 --- a/tests/integration/test_http_handlers_config/test.py +++ b/tests/integration/test_http_handlers_config/test.py @@ -124,6 +124,9 @@ def test_defaults_http_handlers(): assert 200 == cluster.instance.http_request('replicas_status', method='GET').status_code assert 'Ok.\n' == cluster.instance.http_request('replicas_status', method='GET').content + assert 200 == cluster.instance.http_request('?query=SELECT+1', method='GET').status_code + assert '1\n' == cluster.instance.http_request('?query=SELECT+1', method='GET').content + def test_prometheus_handler(): with contextlib.closing(SimpleCluster(ClickHouseCluster(__file__), "prometheus_handler", "test_prometheus_handler")) as cluster: assert 404 == cluster.instance.http_request('', method='GET', headers={'XXX': 'xxx'}).status_code From 10566e2b43705364fc1d54224a5393e681f16a5b Mon Sep 17 00:00:00 2001 From: Mikhail Malafeev <50805089+demo-99@users.noreply.github.com> Date: Mon, 15 Jun 2020 17:03:01 +0500 Subject: [PATCH 0717/2229] Remove duplicate ORDER BY and DISTINCT from subqueries (#10067) --- src/Core/Settings.h | 1 + src/Interpreters/DuplicateDistinctVisitor.h | 72 ++++++++++ src/Interpreters/DuplicateOrderByVisitor.h | 127 ++++++++++++++++++ src/Interpreters/SyntaxAnalyzer.cpp | 18 +++ .../duplicate_order_by_and_distinct.xml | 10 ++ ..._duplicate_order_by_and_distinct.reference | 14 ++ .../01305_duplicate_order_by_and_distinct.sql | 124 +++++++++++++++++ ...t_optimize_for_distributed_table.reference | 2 + ...istinct_optimize_for_distributed_table.sql | 20 +++ 9 files changed, 388 insertions(+) create mode 100644 src/Interpreters/DuplicateDistinctVisitor.h create mode 100644 src/Interpreters/DuplicateOrderByVisitor.h create mode 100644 tests/performance/duplicate_order_by_and_distinct.xml create mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference create mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql create mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference create mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1e7728709ba..adc804c3a28 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -360,6 +360,7 @@ struct Settings : public SettingsCollection M(SettingBool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \ M(SettingUInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \ M(SettingBool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ + M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(SettingBool, optimize_if_chain_to_miltiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \ M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ diff --git a/src/Interpreters/DuplicateDistinctVisitor.h b/src/Interpreters/DuplicateDistinctVisitor.h new file mode 100644 index 00000000000..9ce2624f5bd --- /dev/null +++ b/src/Interpreters/DuplicateDistinctVisitor.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Removes duplicate DISTINCT from queries. +class DuplicateDistinctMatcher +{ +public: + struct Data + { + bool is_distinct; + std::vector last_ids; + }; + + static void visit(const ASTPtr & ast, Data & data) + { + auto * select_query = ast->as(); + if (select_query) + visit(*select_query, data); + } + + static void visit(ASTSelectQuery & select_query, Data & data) + { + if (!select_query.distinct || !select_query.select()) + return; + + /// Optimize shouldn't work for distributed tables + for (const auto & elem : select_query.children) + { + if (elem->as() && !elem->as()->is_standalone) + return; + } + + auto expression_list = select_query.select(); + std::vector current_ids; + + if (expression_list->children.empty()) + return; + + current_ids.reserve(expression_list->children.size()); + for (const auto & id : expression_list->children) + current_ids.push_back(id->getColumnName()); + + if (data.is_distinct && current_ids == data.last_ids) + select_query.distinct = false; + + data.is_distinct = true; + data.last_ids = std::move(current_ids); + } + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) + { + return true; + } + +}; + +using DuplicateDistinctVisitor = InDepthNodeVisitor; + +} diff --git a/src/Interpreters/DuplicateOrderByVisitor.h b/src/Interpreters/DuplicateOrderByVisitor.h new file mode 100644 index 00000000000..85f34377e54 --- /dev/null +++ b/src/Interpreters/DuplicateOrderByVisitor.h @@ -0,0 +1,127 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Checks if SELECT has stateful functions +class ASTFunctionStatefulData +{ +public: + using TypeToVisit = ASTFunction; + + const Context & context; + bool & is_stateful; + void visit(ASTFunction & ast_function, ASTPtr &) + { + if (ast_function.name == "any" || ast_function.name == "groupArray") + { + is_stateful = true; + return; + } + + const auto & function = FunctionFactory::instance().tryGet(ast_function.name, context); + + if (function && function->isStateful()) + { + is_stateful = true; + return; + } + } +}; + +using ASTFunctionStatefulMatcher = OneTypeMatcher; +using ASTFunctionStatefulVisitor = InDepthNodeVisitor; + + +/// Erases unnecessary ORDER BY from subquery +class DuplicateOrderByFromSubqueriesData +{ +public: + using TypeToVisit = ASTSelectQuery; + + bool done = false; + + void visit(ASTSelectQuery & select_query, ASTPtr &) + { + if (done) + return; + + if (select_query.orderBy() && !select_query.limitBy() && !select_query.limitByOffset() && + !select_query.limitByLength() && !select_query.limitLength() && !select_query.limitOffset()) + { + select_query.setExpression(ASTSelectQuery::Expression::ORDER_BY, nullptr); + } + + done = true; + } +}; + +using DuplicateOrderByFromSubqueriesMatcher = OneTypeMatcher; +using DuplicateOrderByFromSubqueriesVisitor = InDepthNodeVisitor; + + +/// Finds SELECT that can be optimized +class DuplicateOrderByData +{ +public: + using TypeToVisit = ASTSelectQuery; + + const Context & context; + bool done = false; + + void visit(ASTSelectQuery & select_query, ASTPtr &) + { + if (done) + return; + + /// Disable optimization for distributed tables + for (const auto & elem : select_query.children) + { + if (elem->as() && !elem->as()->is_standalone) + return; + } + + if (select_query.orderBy() || select_query.groupBy()) + { + for (auto & elem : select_query.children) + { + if (elem->as()) + { + bool is_stateful = false; + ASTFunctionStatefulVisitor::Data data{context, is_stateful}; + ASTFunctionStatefulVisitor(data).visit(elem); + if (is_stateful) + return; + } + } + + if (auto select_table_ptr = select_query.tables()) + { + if (auto * select_table = select_table_ptr->as()) + { + if (!select_table->children.empty()) + { + DuplicateOrderByFromSubqueriesVisitor::Data data{false}; + DuplicateOrderByFromSubqueriesVisitor(data).visit(select_table->children[0]); + } + } + } + } + } +}; + +using DuplicateOrderByMatcher = OneTypeMatcher; +using DuplicateOrderByVisitor = InDepthNodeVisitor; + +} diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index 8f6d368e6ad..4bfae18f9a5 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -23,12 +23,15 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include @@ -370,6 +373,18 @@ void optimizeOrderBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } +/// Optimize duplicate ORDER BY and DISTINCT +void optimizeDuplicateOrderByAndDistinct(ASTPtr & query, bool optimize_duplicate_order_by_and_distinct, const Context & context) +{ + if (optimize_duplicate_order_by_and_distinct) + { + DuplicateOrderByVisitor::Data order_by_data{context, false}; + DuplicateOrderByVisitor(order_by_data).visit(query); + DuplicateDistinctVisitor::Data distinct_data{}; + DuplicateDistinctVisitor(distinct_data).visit(query); + } +} + /// Remove duplicate items from LIMIT BY. void optimizeLimitBy(const ASTSelectQuery * select_query) { @@ -831,6 +846,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( /// Remove duplicate items from ORDER BY. optimizeOrderBy(select_query); + /// Remove duplicate ORDER BY and DISTINCT from subqueries. + optimizeDuplicateOrderByAndDistinct(query, settings.optimize_duplicate_order_by_and_distinct, context); + /// Remove duplicated elements from LIMIT BY clause. optimizeLimitBy(select_query); diff --git a/tests/performance/duplicate_order_by_and_distinct.xml b/tests/performance/duplicate_order_by_and_distinct.xml new file mode 100644 index 00000000000..0c05af3fc56 --- /dev/null +++ b/tests/performance/duplicate_order_by_and_distinct.xml @@ -0,0 +1,10 @@ + + + hits_10m_single + + + SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null + SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null + SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null + + diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference new file mode 100644 index 00000000000..208f3d1abe5 --- /dev/null +++ b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference @@ -0,0 +1,14 @@ +SELECT number\nFROM \n(\n SELECT number\n FROM \n (\n SELECT DISTINCT number\n FROM numbers(3)\n )\n)\nORDER BY number ASC +0 +1 +2 +SELECT DISTINCT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number\n FROM numbers(3)\n ORDER BY number ASC\n )\n ORDER BY number ASC\n)\nORDER BY number ASC +0 +1 +2 +SELECT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number % 2 AS number\n FROM numbers(3)\n )\n)\nORDER BY number ASC +0 +1 +SELECT DISTINCT number\nFROM \n(\n SELECT DISTINCT number\n FROM \n (\n SELECT DISTINCT number % 2 AS number\n FROM numbers(3)\n ORDER BY number ASC\n )\n ORDER BY number ASC\n)\nORDER BY number ASC +0 +1 diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql new file mode 100644 index 00000000000..a660e5f0b77 --- /dev/null +++ b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql @@ -0,0 +1,124 @@ +set enable_debug_queries = 1; +set optimize_duplicate_order_by_and_distinct = 1; + +analyze SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT * + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT * + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +set optimize_duplicate_order_by_and_distinct = 0; + +analyze SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT * + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT * + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +set optimize_duplicate_order_by_and_distinct = 1; + +analyze SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT number % 2 + AS number + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT number % 2 + AS number + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +set optimize_duplicate_order_by_and_distinct = 0; + +analyze SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT number % 2 + AS number + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; + +SELECT DISTINCT * +FROM +( + SELECT DISTINCT * + FROM + ( + SELECT DISTINCT number % 2 + AS number + FROM numbers(3) + ORDER BY number + ) + ORDER BY number +) +ORDER BY number; diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql new file mode 100644 index 00000000000..e1467bacf2f --- /dev/null +++ b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql @@ -0,0 +1,20 @@ +set optimize_duplicate_order_by_and_distinct = 1; +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM remote('127.0.0.{1,2}', system.numbers) + LIMIT 1 + SETTINGS distributed_group_by_no_merge = 1 +); + +set optimize_duplicate_order_by_and_distinct = 0; +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM remote('127.0.0.{1,2}', system.numbers) + LIMIT 1 + SETTINGS distributed_group_by_no_merge = 1 +); + From f9c4721ba2e1ec58ae95982c5b834cd3db2458d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 15:04:30 +0300 Subject: [PATCH 0718/2229] Added test. --- .../0_stateless/01318_parallel_final_stuck.reference | 0 tests/queries/0_stateless/01318_parallel_final_stuck.sql | 6 ++++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/01318_parallel_final_stuck.reference create mode 100644 tests/queries/0_stateless/01318_parallel_final_stuck.sql diff --git a/tests/queries/0_stateless/01318_parallel_final_stuck.reference b/tests/queries/0_stateless/01318_parallel_final_stuck.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01318_parallel_final_stuck.sql b/tests/queries/0_stateless/01318_parallel_final_stuck.sql new file mode 100644 index 00000000000..1a54a9e0ae5 --- /dev/null +++ b/tests/queries/0_stateless/01318_parallel_final_stuck.sql @@ -0,0 +1,6 @@ +drop table if exists final_bug; +create table final_bug (x UInt64, y UInt8) engine = ReplacingMergeTree(y) order by x settings index_granularity = 8; +insert into final_bug select number % 10, 1 from numbers(1000); +insert into final_bug select number % 10, 1 from numbers(1000); +select x from final_bug final order by x settings max_threads=2, max_final_threads=2, max_block_size=8 format Null; +drop table if exists final_bug; \ No newline at end of file From 24059efad5dadac02a728d0aedbc419e0a4b0e53 Mon Sep 17 00:00:00 2001 From: Artem Zuikov Date: Mon, 15 Jun 2020 15:36:10 +0300 Subject: [PATCH 0719/2229] Change push down logic in VIEW (#11513) --- src/Interpreters/CrossToInnerJoinVisitor.cpp | 4 +- src/Interpreters/IdentifierSemantic.cpp | 28 ++++--- src/Interpreters/IdentifierSemantic.h | 6 +- src/Interpreters/InterpreterExplainQuery.cpp | 61 ++++----------- src/Interpreters/InterpreterSelectQuery.cpp | 58 +++++++++----- src/Interpreters/InterpreterSelectQuery.h | 2 + .../JoinToSubqueryTransformVisitor.cpp | 3 +- src/Interpreters/JoinedTables.h | 5 +- src/Storages/SelectQueryInfo.h | 2 + src/Storages/StorageView.cpp | 78 +++++++++---------- src/Storages/StorageView.h | 8 +- ...76_predicate_optimizer_with_view.reference | 8 +- ...e_with_ambiguous_column_and_view.reference | 3 + ...rewrite_with_ambiguous_column_and_view.sql | 35 +++++++++ 14 files changed, 171 insertions(+), 130 deletions(-) create mode 100644 tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.reference create mode 100644 tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index b2f3f56be4d..5ebebae2578 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -202,11 +202,11 @@ private: { std::optional left_table_pos = IdentifierSemantic::getMembership(left); if (!left_table_pos) - left_table_pos = IdentifierSemantic::chooseTable(left, tables); + left_table_pos = IdentifierSemantic::chooseTableColumnMatch(left, tables); std::optional right_table_pos = IdentifierSemantic::getMembership(right); if (!right_table_pos) - right_table_pos = IdentifierSemantic::chooseTable(right, tables); + right_table_pos = IdentifierSemantic::chooseTableColumnMatch(right, tables); if (left_table_pos && right_table_pos && (*left_table_pos != *right_table_pos)) { diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 8f254b50400..f661ec2ae71 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -16,7 +16,8 @@ namespace { template -std::optional tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, bool allow_ambiguous) +std::optional tryChooseTable(const ASTIdentifier & identifier, const std::vector & tables, + bool allow_ambiguous, bool column_match [[maybe_unused]] = false) { using ColumnMatch = IdentifierSemantic::ColumnMatch; @@ -27,6 +28,13 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std for (size_t i = 0; i < tables.size(); ++i) { auto match = IdentifierSemantic::canReferColumnToTable(identifier, tables[i]); + + if constexpr (std::is_same_v) + { + if (column_match && match == ColumnMatch::NoMatch && identifier.isShort() && tables[i].hasColumn(identifier.shortName())) + match = ColumnMatch::ColumnName; + } + if (match != ColumnMatch::NoMatch) { if (match > best_match) @@ -125,12 +133,17 @@ std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & iden return tryChooseTable(identifier, tables, ambiguous); } -std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const std::vector & tables, - bool ambiguous) +std::optional IdentifierSemantic::chooseTable(const ASTIdentifier & identifier, const TablesWithColumns & tables, bool ambiguous) { return tryChooseTable(identifier, tables, ambiguous); } +std::optional IdentifierSemantic::chooseTableColumnMatch(const ASTIdentifier & identifier, const TablesWithColumns & tables, + bool ambiguous) +{ + return tryChooseTable(identifier, tables, ambiguous, true); +} + StorageID IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) @@ -191,14 +204,9 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const } IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const ASTIdentifier & identifier, - const TableWithColumnNamesAndTypes & db_and_table) + const TableWithColumnNamesAndTypes & table_with_columns) { - ColumnMatch match = canReferColumnToTable(identifier, db_and_table.table); -#if 0 - if (match == ColumnMatch::NoMatch && identifier.isShort() && db_and_table.hasColumn(identifier.shortName())) - match = ColumnMatch::ColumnName; -#endif - return match; + return canReferColumnToTable(identifier, table_with_columns.table); } /// Strip qualificators from left side of column name. diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index 7e84e10a26f..0aef297c734 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -41,7 +41,7 @@ struct IdentifierSemantic static std::optional extractNestedName(const ASTIdentifier & identifier, const String & table_name); static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & db_and_table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & table_with_columns); static void setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); @@ -52,7 +52,9 @@ struct IdentifierSemantic static std::optional getMembership(const ASTIdentifier & identifier); static std::optional chooseTable(const ASTIdentifier &, const std::vector & tables, bool allow_ambiguous = false); - static std::optional chooseTable(const ASTIdentifier &, const std::vector & tables, + static std::optional chooseTable(const ASTIdentifier &, const TablesWithColumns & tables, + bool allow_ambiguous = false); + static std::optional chooseTableColumnMatch(const ASTIdentifier &, const TablesWithColumns & tables, bool allow_ambiguous = false); private: diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index dacd7ca5f20..4890287e81e 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -4,20 +4,15 @@ #include #include #include -#include -#include #include +#include #include #include -#include #include -#include #include #include -#include +#include -#include -#include #include #include @@ -31,56 +26,30 @@ namespace { struct Data { - bool analyzed = false; const Context & context; }; - static bool needChildVisit(ASTPtr &, ASTPtr &) { return true; } + static bool needChildVisit(ASTPtr & node, ASTPtr &) + { + return !node->as(); + } static void visit(ASTPtr & ast, Data & data) { - if (auto * select_query = ast->as()) - visit(*select_query, ast, data); - if (auto * union_select_query = ast->as()) - visit(*union_select_query, ast, data); + if (auto * select = ast->as()) + visit(*select, ast, data); } - static void visit(ASTSelectQuery & select_query, ASTPtr &, Data & data) + static void visit(ASTSelectQuery & select, ASTPtr & node, Data & data) { - if (!select_query.tables()) - return; + InterpreterSelectQuery interpreter( + node, data.context, SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify()); - for (const auto & child : select_query.tables()->children) + const SelectQueryInfo & query_info = interpreter.getQueryInfo(); + if (query_info.view_query) { - auto * tables_element = child->as(); - - if (tables_element && tables_element->table_expression) - visit(*tables_element->table_expression->as(), select_query, data); - } - } - - static void visit(ASTSelectWithUnionQuery &, ASTPtr & node, Data & data) - { - if (!data.analyzed) - { - data.analyzed = true; - InterpreterSelectWithUnionQuery interpreter( - node, data.context, SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify()); - } - } - - static void visit(ASTTableExpression & expression, ASTSelectQuery & select_query, Data & data) - { - if (data.context.getSettingsRef().enable_optimize_predicate_expression && expression.database_and_table_name) - { - if (const auto * identifier = expression.database_and_table_name->as()) - { - auto table_id = data.context.resolveStorageID(*identifier); - const auto & storage = DatabaseCatalog::instance().getTable(table_id, data.context); - - if (auto * storage_view = dynamic_cast(storage.get())) - storage_view->getRuntimeViewQuery(&select_query, data.context, true); - } + ASTPtr tmp; + StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp); } } }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 297679c4616..ac17a3042d8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include @@ -186,6 +187,26 @@ static Context getSubqueryContext(const Context & context) return subquery_context; } +static void rewriteMultipleJoins(ASTPtr & query, const TablesWithColumns & tables, const String & database, const Settings & settings) +{ + ASTSelectQuery & select = query->as(); + + Aliases aliases; + if (ASTPtr with = select.with()) + QueryAliasesNoSubqueriesVisitor(aliases).visit(with); + QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select()); + + CrossToInnerJoinVisitor::Data cross_to_inner{tables, aliases, database}; + CrossToInnerJoinVisitor(cross_to_inner).visit(query); + + size_t rewriter_version = settings.multiple_joins_rewriter_version; + if (!rewriter_version || rewriter_version > 2) + throw Exception("Bad multiple_joins_rewriter_version setting value: " + settings.multiple_joins_rewriter_version.toString(), + ErrorCodes::INVALID_SETTING_VALUE); + JoinToSubqueryTransformVisitor::Data join_to_subs_data{tables, aliases, rewriter_version}; + JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query); +} + InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, @@ -242,29 +263,14 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Rewrite JOINs if (!has_input && joined_tables.tablesCount() > 1) { - ASTSelectQuery & select = getSelectQuery(); + rewriteMultipleJoins(query_ptr, joined_tables.tablesWithColumns(), context->getCurrentDatabase(), settings); - Aliases aliases; - if (ASTPtr with = select.with()) - QueryAliasesNoSubqueriesVisitor(aliases).visit(with); - QueryAliasesNoSubqueriesVisitor(aliases).visit(select.select()); - - CrossToInnerJoinVisitor::Data cross_to_inner{joined_tables.tablesWithColumns(), aliases, context->getCurrentDatabase()}; - CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr); - - size_t rewriter_version = settings.multiple_joins_rewriter_version; - if (!rewriter_version || rewriter_version > 2) - throw Exception("Bad multiple_joins_rewriter_version setting value: " + settings.multiple_joins_rewriter_version.toString(), - ErrorCodes::INVALID_SETTING_VALUE); - JoinToSubqueryTransformVisitor::Data join_to_subs_data{joined_tables.tablesWithColumns(), aliases, rewriter_version}; - JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr); - - joined_tables.reset(select); + joined_tables.reset(getSelectQuery()); joined_tables.resolveTables(); if (storage && joined_tables.isLeftTableSubquery()) { - /// Rewritten with subquery. Free storage here locks here. + /// Rewritten with subquery. Free storage locks here. storage = {}; table_lock.release(); table_id = StorageID::createEmpty(); @@ -288,12 +294,28 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) row_policy_filter = context->getRowPolicyCondition(table_id.getDatabaseName(), table_id.getTableName(), RowPolicy::SELECT_FILTER); + StorageView * view = nullptr; + if (storage) + view = dynamic_cast(storage.get()); + auto analyze = [&] (bool try_move_to_prewhere) { + /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. + ASTPtr view_table; + if (view) + view->replaceWithSubquery(getSelectQuery(), view_table); + syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect( query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage), options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); + if (view) + { + /// Restore original view name. Save rewritten subquery for future usage in StorageView. + query_info.view_query = view->restoreViewName(getSelectQuery(), view_table); + view = nullptr; + } + if (try_move_to_prewhere && storage && !row_policy_filter && query.where() && !query.prewhere() && !query.final()) { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index c60451d5f4a..8ed775f60ae 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -88,6 +88,8 @@ public: size_t getMaxStreams() const { return max_streams; } + const SelectQueryInfo & getQueryInfo() const { return query_info; } + private: InterpreterSelectQuery( const ASTPtr & query_ptr_, diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 7ed1bb9d1bb..9bfa9e1e98b 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -585,8 +585,9 @@ std::vector normalizeColumnNamesExtractNeeded( for (ASTIdentifier * ident : identifiers) { bool got_alias = aliases.count(ident->name); + bool allow_ambiguous = got_alias; /// allow ambiguous column overridden by an alias - if (auto table_pos = IdentifierSemantic::chooseTable(*ident, tables)) + if (auto table_pos = IdentifierSemantic::chooseTableColumnMatch(*ident, tables, allow_ambiguous)) { if (!ident->isShort()) { diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 55244e1225c..2591b49527b 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -34,7 +34,8 @@ public: void makeFakeTable(StoragePtr storage, const Block & source_header); std::shared_ptr makeTableJoin(const ASTSelectQuery & select_query); - const std::vector & tablesWithColumns() const { return tables_with_columns; } + const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; } + TablesWithColumns moveTablesWithColumns() { return std::move(tables_with_columns); } bool isLeftTableSubquery() const; bool isLeftTableFunction() const; @@ -49,7 +50,7 @@ public: private: Context context; std::vector table_expressions; - std::vector tables_with_columns; + TablesWithColumns tables_with_columns; /// Legacy (duplicated left table values) ASTPtr left_table_expression; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index c4cd1035ea7..26b318f107b 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -70,6 +71,7 @@ using ReadInOrderOptimizerPtr = std::shared_ptr; struct SelectQueryInfo { ASTPtr query; + ASTPtr view_query; /// Optimized VIEW query SyntaxAnalyzerResultPtr syntax_analyzer_result; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 97403a359c3..745fc823703 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -1,8 +1,6 @@ #include #include -#include #include -#include #include #include @@ -30,7 +28,6 @@ namespace ErrorCodes { extern const int INCORRECT_QUERY; extern const int LOGICAL_ERROR; - extern const int ALIAS_REQUIRED; } @@ -60,9 +57,12 @@ Pipes StorageView::read( Pipes pipes; ASTPtr current_inner_query = inner_query; - - if (context.getSettings().enable_optimize_predicate_expression) - current_inner_query = getRuntimeViewQuery(*query_info.query->as(), context); + if (query_info.view_query) + { + if (!query_info.view_query->as()) + throw Exception("Unexpected optimized VIEW query", ErrorCodes::LOGICAL_ERROR); + current_inner_query = query_info.view_query->clone(); + } InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, {}, column_names); @@ -87,60 +87,52 @@ Pipes StorageView::read( return pipes; } -ASTPtr StorageView::getRuntimeViewQuery(const ASTSelectQuery & outer_query, const Context & context) +static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query) { - auto temp_outer_query = outer_query.clone(); - auto * new_outer_select = temp_outer_query->as(); - return getRuntimeViewQuery(new_outer_select, context, false); -} - - -static void replaceTableNameWithSubquery(ASTSelectQuery * select_query, ASTPtr & subquery) -{ - auto * select_element = select_query->tables()->children[0]->as(); + auto * select_element = select_query.tables()->children[0]->as(); if (!select_element->table_expression) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); - auto * table_expression = select_element->table_expression->as(); + return select_element->table_expression->as(); +} + +void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) +{ + ASTTableExpression * table_expression = getFirstTableExpression(outer_query); if (!table_expression->database_and_table_name) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); - const auto alias = table_expression->database_and_table_name->tryGetAlias(); + DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name); + String alias = db_table.alias.empty() ? db_table.table : db_table.alias; + + view_name = table_expression->database_and_table_name; table_expression->database_and_table_name = {}; table_expression->subquery = std::make_shared(); - table_expression->subquery->children.push_back(subquery); - table_expression->children.push_back(table_expression->subquery); - if (!alias.empty()) - table_expression->subquery->setAlias(alias); + table_expression->subquery->children.push_back(view_query); + table_expression->subquery->setAlias(alias); + + for (auto & child : table_expression->children) + if (child.get() == view_name.get()) + child = view_query; } - -ASTPtr StorageView::getRuntimeViewQuery(ASTSelectQuery * outer_query, const Context & context, bool normalize) +ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) { - auto runtime_view_query = inner_query->clone(); + ASTTableExpression * table_expression = getFirstTableExpression(select_query); - /// TODO: remove getTableExpressions and getTablesWithColumns - { - const auto & table_expressions = getTableExpressions(*outer_query); - const auto & tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); + if (!table_expression->subquery) + throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); - replaceTableNameWithSubquery(outer_query, runtime_view_query); - if (context.getSettingsRef().joined_subquery_requires_alias && tables_with_columns.size() > 1) - { - for (const auto & pr : tables_with_columns) - if (pr.table.table.empty() && pr.table.alias.empty()) - throw Exception("Not unique subquery in FROM requires an alias (or joined_subquery_requires_alias=0 to disable restriction).", - ErrorCodes::ALIAS_REQUIRED); - } + ASTPtr subquery = table_expression->subquery; + table_expression->subquery = {}; + table_expression->database_and_table_name = view_name; - if (PredicateExpressionsOptimizer(context, tables_with_columns, context.getSettings()).optimize(*outer_query) && normalize) - InterpreterSelectWithUnionQuery( - runtime_view_query, context, SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify(), {}); - } - - return runtime_view_query; + for (auto & child : table_expression->children) + if (child.get() == subquery.get()) + child = view_name; + return subquery->children[0]; } void registerStorageView(StorageFactory & factory) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 86550db83ce..61885460249 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -29,9 +29,13 @@ public: size_t max_block_size, unsigned num_streams) override; - ASTPtr getRuntimeViewQuery(const ASTSelectQuery & outer_query, const Context & context); + void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name) const + { + replaceWithSubquery(select_query, inner_query->clone(), view_name); + } - ASTPtr getRuntimeViewQuery(ASTSelectQuery * outer_query, const Context & context, bool normalize); + static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); + static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); private: ASTPtr inner_query; diff --git a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference index 1e92e7b8596..c9b4ed1e1f7 100644 --- a/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference +++ b/tests/queries/0_stateless/01076_predicate_optimizer_with_view.reference @@ -1,4 +1,4 @@ -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 -SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 2\n)\nWHERE id = 2 -SELECT id\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n)\nWHERE id = 1 -SELECT id\nFROM \n(\n SELECT \n date, \n id, \n name, \n value\n FROM default.test\n WHERE id = 1\n) AS s\nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT *\n FROM default.test\n HAVING id = 1\n) AS test_view\nWHERE id = 1 +SELECT \n date, \n id, \n name, \n value\nFROM \n(\n SELECT *\n FROM default.test\n HAVING id = 2\n) AS test_view\nWHERE id = 2 +SELECT id\nFROM \n(\n SELECT *\n FROM default.test\n HAVING id = 1\n) AS test_view\nWHERE id = 1 +SELECT id\nFROM \n(\n SELECT *\n FROM default.test\n HAVING id = 1\n) AS s\nWHERE id = 1 diff --git a/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.reference b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.reference new file mode 100644 index 00000000000..461a50ea880 --- /dev/null +++ b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.reference @@ -0,0 +1,3 @@ +1 1 1 +2 2 0 +1 val11 val21 val31 diff --git a/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql new file mode 100644 index 00000000000..c90d01ff76d --- /dev/null +++ b/tests/queries/0_stateless/01144_join_rewrite_with_ambiguous_column_and_view.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +DROP TABLE IF EXISTS view1; + +CREATE TABLE t1 (id UInt32, value1 String) ENGINE MergeTree() ORDER BY id; +CREATE TABLE t2 (id UInt32, value2 String) ENGINE MergeTree() ORDER BY id; +CREATE TABLE t3 (id UInt32, value3 String) ENGINE MergeTree() ORDER BY id; + +INSERT INTO t1 (id, value1) VALUES (1, 'val11'); +INSERT INTO t2 (id, value2) VALUES (1, 'val21'); +INSERT INTO t3 (id, value3) VALUES (1, 'val31'); + +SET multiple_joins_rewriter_version = 2; +SET enable_optimize_predicate_expression = 1; + +SELECT t1.id, t2.id as id, t3.id as value +FROM (select number as id, 42 as value from numbers(4)) t1 +LEFT JOIN (select number as id, 42 as value from numbers(3)) t2 ON t1.id = t2.id +LEFT JOIN (select number as id, 42 as value from numbers(2)) t3 ON t1.id = t3.id +WHERE id > 0 AND value < 42; + +CREATE VIEW IF NOT EXISTS view1 AS + SELECT t1.id AS id, t1.value1 AS value1, t2.value2 AS value2, t3.value3 AS value3 + FROM t1 + LEFT JOIN t2 ON t1.id = t2.id + LEFT JOIN t3 ON t1.id = t3.id + WHERE t1.id > 0; + +SELECT * FROM view1 WHERE id = 1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +DROP TABLE IF EXISTS view1; From c7140724a8c2abbd7793744904cf475841d927fa Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 15 Jun 2020 16:25:27 +0300 Subject: [PATCH 0720/2229] Fix that ALTER USER RENAME could change allowed hosts. --- src/Parsers/ParserCreateUserQuery.cpp | 16 +++++----------- .../01075_allowed_client_hosts.reference | 4 ++-- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 3bf7e508220..e03f8334d42 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -23,14 +23,14 @@ namespace ErrorCodes namespace { - bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, String & new_name, std::optional & new_host_pattern) + bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, String & new_name) { return IParserBase::wrapParseImpl(pos, [&] { if (!ParserKeyword{"RENAME TO"}.ignore(pos, expected)) return false; - return parseUserName(pos, expected, new_name, new_host_pattern); + return parseUserName(pos, expected, new_name); }); } @@ -274,7 +274,6 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; String new_name; - std::optional new_host_pattern; std::optional authentication; std::optional hosts; std::optional add_hosts; @@ -302,7 +301,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (alter) { - if (new_name.empty() && parseRenameTo(pos, expected, new_name, new_host_pattern)) + if (new_name.empty() && parseRenameTo(pos, expected, new_name)) continue; if (parseHosts(pos, expected, "ADD", add_hosts) || parseHosts(pos, expected, "DROP", remove_hosts)) @@ -312,13 +311,8 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec break; } - if (!hosts) - { - if (!alter && host_pattern) - hosts.emplace().addLikePattern(*host_pattern); - else if (alter && new_host_pattern) - hosts.emplace().addLikePattern(*new_host_pattern); - } + if (!alter && !hosts && host_pattern) + hosts.emplace().addLikePattern(*host_pattern); auto query = std::make_shared(); node = query; diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.reference b/tests/queries/0_stateless/01075_allowed_client_hosts.reference index 3fdea9d1cda..5fb11bae65e 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.reference +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.reference @@ -13,5 +13,5 @@ CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite\\\\.com\', \'.*\\\\ CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite2\\\\.com\', \'.*\\\\.anothersite2\\\\.org\' CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite3\\\\.com\', \'.*\\\\.anothersite3\\\\.org\' CREATE USER `test_user_01075_x@localhost` HOST LOCAL -CREATE USER test_user_01075_x -CREATE USER `test_user_01075_x@192.168.23.15` HOST LIKE \'192.168.23.15\' +CREATE USER test_user_01075_x HOST LOCAL +CREATE USER `test_user_01075_x@192.168.23.15` HOST LOCAL From 8945d0073a9532c8a27264d7d37310f7acb28570 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 16:37:40 +0300 Subject: [PATCH 0721/2229] Fix misunderstanding bug in mutations finalization --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 28 +++++---- .../MergeTree/ReplicatedMergeTreeQueue.h | 11 +++- ..._long_failing_mutation_zookeeper.reference | 6 ++ .../01318_long_failing_mutation_zookeeper.sh | 57 +++++++++++++++++++ 4 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.reference create mode 100755 tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.sh diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 8a9dbceba04..7cb46ffda7d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -201,21 +201,28 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( if (is_successful) { - if (!entry->actual_new_part_name.empty()) { /// We don't add bigger fetched part to current_parts because we /// have an invariant `virtual_parts` = `current_parts` + `queue`. - /// But we can remove it from mutations, because we actually have it. - removePartFromMutations(entry->actual_new_part_name); + /// + /// But we remove covered parts from mutations, because we actually + /// have replacing part. + Strings covered_parts = current_parts.getPartsCoveredBy(MergeTreePartInfo::fromPartName(entry->actual_new_part_name, format_version)); + + for (const auto & covered_part : covered_parts) + removePartFromMutations(covered_part); } for (const String & virtual_part_name : entry->getVirtualPartNames()) { - current_parts.add(virtual_part_name); - /// Each processed part may be already mutated, so we try to remove - /// all current parts from mutations. - removePartFromMutations(virtual_part_name); + Strings replaced_parts; + current_parts.add(virtual_part_name, &replaced_parts); + + /// These parts are already covered by newer part, we don't have to + /// mutate it. + for (const auto & replaced_part : replaced_parts) + removePartFromMutations(replaced_part); } String drop_range_part_name; @@ -240,11 +247,11 @@ void ReplicatedMergeTreeQueue::updateStateOnQueueEntryRemoval( { for (const String & virtual_part_name : entry->getVirtualPartNames()) { - /// Because execution of the entry is unsuccessful, `virtual_part_name` will never appear - /// so we won't need to mutate it. + /// Because execution of the entry is unsuccessful, + /// `virtual_part_name` will never appear so we won't need to mutate + /// it. removePartFromMutations(virtual_part_name); } - } } @@ -678,6 +685,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C const String & partition_id = pair.first; Int64 block_num = pair.second; mutations_by_partition[partition_id].emplace(block_num, &mutation); + LOG_TRACE(log, "Adding mutation {} for partition {} for all block numbers less than {}", entry->znode_name, partition_id, block_num); } /// Initialize `mutation.parts_to_do`. First we need to mutate all parts in `current_parts`. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index e093e193381..8f3a7719076 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -107,8 +107,13 @@ private: ReplicatedMergeTreeMutationEntryPtr entry; - /// Parts we have to mutate to complete mutation. We use ActiveDataPartSet structure - /// to be able to manage covering and covered parts. + /// Current parts we have to mutate to complete mutation. + /// + /// current_part_name =mutation> result_part_name + /// ^~~parts_to_do~~^ ^~virtual_parts~^ + /// + /// We use ActiveDataPartSet structure to be able to manage covering and + /// covered parts. ActiveDataPartSet parts_to_do; /// Note that is_done is not equivalent to parts_to_do.size() == 0 @@ -204,7 +209,7 @@ private: /// Add part for mutations with block_number > part.getDataVersion() void addPartToMutations(const String & part_name); - /// Remove part from mutations which were assigned to mutate it + /// Remove part from mutations (parts_to_do) which were assigned to mutate it /// with block_number > part.getDataVersion() /// and block_number == part.getDataVersion() /// ^ (this may happen if we downloaded mutated part from other replica) diff --git a/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.reference b/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.reference new file mode 100644 index 00000000000..785123ae030 --- /dev/null +++ b/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.reference @@ -0,0 +1,6 @@ +90001 +2 +waiting default mutation_table 0000000000 MODIFY COLUMN `value` UInt64 +is_done parts_to_do +0 1 +MUTATE_PART 0_0_0_0_2 diff --git a/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.sh new file mode 100755 index 00000000000..3dc8b34fff6 --- /dev/null +++ b/tests/queries/0_stateless/01318_long_failing_mutation_zookeeper.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mutation_table" + +$CLICKHOUSE_CLIENT --query " + CREATE TABLE mutation_table( + key UInt64, + value String + ) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/mutation_table', '1') + ORDER BY key + PARTITION BY key % 10 +" + +$CLICKHOUSE_CLIENT --query "INSERT INTO mutation_table select number, toString(number) from numbers(100000) where number % 10 != 0" + +$CLICKHOUSE_CLIENT --query "INSERT INTO mutation_table VALUES(0, 'hello')" + +$CLICKHOUSE_CLIENT --query "SELECT COUNT() FROM mutation_table" + +$CLICKHOUSE_CLIENT --query "ALTER TABLE mutation_table MODIFY COLUMN value UInt64 SETTINGS replication_alter_partitions_sync=0" + +first_mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations where table='mutation_table' and database='$CLICKHOUSE_DATABASE'") + +# Here we have long sleeps, but they shouldn't lead to flaps. We just check that +# background mutation finalization function will be triggered at least once. In +# rare cases this test doesn't check anything, but will report OK. +sleep 7 + +$CLICKHOUSE_CLIENT --query "ALTER TABLE mutation_table MODIFY COLUMN value UInt32 SETTINGS replication_alter_partitions_sync=0" + + +#### just check that both mutations started +check_query="SELECT count() FROM system.mutations WHERE table='mutation_table' and database='$CLICKHOUSE_DATABASE'" + +query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1` + +while [ "$query_result" != "2" ] +do + query_result=`$CLICKHOUSE_CLIENT --query="$check_query" 2>&1` + sleep 0.5 +done + +echo $query_result + +$CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$first_mutation_id'" + +sleep 7 + +$CLICKHOUSE_CLIENT --query "SELECT is_done, parts_to_do FROM system.mutations where table='mutation_table' and database='$CLICKHOUSE_DATABASE' FORMAT TSVWithNames" + +$CLICKHOUSE_CLIENT --query "SELECT type, new_part_name FROM system.replication_queue WHERE table='mutation_table' and database='$CLICKHOUSE_DATABASE'" + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS mutation_table" From b5ecef6adf9de711e1e6d50afda63676b13a77f8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 16:56:38 +0300 Subject: [PATCH 0722/2229] Fix tests. --- .../Merges/Algorithms/AggregatingSortedAlgorithm.cpp | 2 ++ .../Algorithms/IMergingAlgorithmWithDelayedChunk.cpp | 11 ++--------- .../Merges/Algorithms/SummingSortedAlgorithm.cpp | 2 ++ 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 86a2f188104..6a7cca3b87a 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -305,6 +305,8 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() if (current->isLast() && skipLastRowFor(current->pos)) { + /// If we skip this row, it's not equals with any key we process. + last_key.reset(); /// Get the next block from the corresponding source, if there is one. queue.removeTop(); return Status(current.impl->order); diff --git a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp index 36d622daa9a..0b13d689636 100644 --- a/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp +++ b/src/Processors/Merges/Algorithms/IMergingAlgorithmWithDelayedChunk.cpp @@ -33,15 +33,8 @@ void IMergingAlgorithmWithDelayedChunk::updateCursor(Input & input, size_t sourc auto & current_input = current_inputs[source_num]; /// Extend lifetime of last chunk. - if (current_input.skip_last_row && current_input.chunk.getNumRows() <= 1) - { - /// But if chunk has only single skipped row, ignore it. - } - else - { - last_chunk.swap(current_input.chunk); - last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); - } + last_chunk.swap(current_input.chunk); + last_chunk_sort_columns = std::move(cursors[source_num].sort_columns); current_input.swap(input); cursors[source_num].reset(current_input.chunk.getColumns(), {}); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index fe21c4f6023..2a7514f855c 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -649,6 +649,8 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() if (current->isLast() && skipLastRowFor(current->pos)) { + /// If we skip this row, it's not equals with any key we process. + last_key.reset(); /// Get the next block from the corresponding source, if there is one. queue.removeTop(); return Status(current.impl->order); From 4c100dcf8f56909a739f2afbd626a353b13819b1 Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Mon, 15 Jun 2020 22:10:23 +0800 Subject: [PATCH 0723/2229] Update syntax.md (#11679) translate to chinese doc --- docs/zh/sql-reference/syntax.md | 182 +++++++++++++++++--------------- 1 file changed, 95 insertions(+), 87 deletions(-) diff --git a/docs/zh/sql-reference/syntax.md b/docs/zh/sql-reference/syntax.md index b0aa9e7364f..687638a9be6 100644 --- a/docs/zh/sql-reference/syntax.md +++ b/docs/zh/sql-reference/syntax.md @@ -1,156 +1,162 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 31 -toc_title: "\u8BED\u6CD5" +toc_title: SQL语法 --- -# 语法 {#syntax} - -系统中有两种类型的解析器:完整SQL解析器(递归下降解析器)和数据格式解析器(快速流解析器)。 -在所有情况下,除了 `INSERT` 查询时,只使用完整的SQL解析器。 -该 `INSERT` 查询使用两个解析器: +# SQL语法 {#syntax} +CH有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) +除了 `INSERT` 查询,其它情况下仅使用完整SQL解析器。 + `INSERT`查询会同时使用2种解析器: ``` sql INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -该 `INSERT INTO t VALUES` 片段由完整的解析器解析,并且数据 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 由快速流解析器解析。 您也可以通过使用 [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) 设置。 当 `input_format_values_interpret_expressions = 1`,ClickHouse首先尝试使用fast stream解析器解析值。 如果失败,ClickHouse将尝试对数据使用完整的解析器,将其视为SQL [表达式](#syntax-expressions). +含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input\_format\_values\_interpret\_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,CH优先采用快速流式解析器来解析数据。如果失败,CH再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 -数据可以有任何格式。 当接收到查询时,服务器计算不超过 [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) RAM中请求的字节(默认为1MB),其余的是流解析。 -它允许避免与大的问题 `INSERT` 查询。 +数据可以采用任何格式。当CH接受到请求时,服务端先在内存中计算不超过 [max\_query\_size](../operations/settings/settings.md#settings-max_query_size) 字节的请求数据(默认1 mb),然后剩下部分交给快速流式解析器。 -使用时 `Values` 格式为 `INSERT` 查询,它可能看起来数据被解析相同的表达式 `SELECT` 查询,但事实并非如此。 该 `Values` 格式更为有限。 +这将避免在处理大型的 `INSERT`语句时出现问题。 -本文的其余部分将介绍完整的解析器。 有关格式解析器的详细信息,请参阅 [格式](../interfaces/formats.md) 科。 +当 `INSERT` 语句中使用 `Values` 形式时,看起来 数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 形式非常有限。 +该篇的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 -## 空间 {#spaces} +## 空字符 {#spaces} -语法结构之间可能有任意数量的空格符号(包括查询的开始和结束)。 空格符号包括空格、制表符、换行符、CR和换页符。 +sql语句中(包含sql的起始和结束)可以有任意的空字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 -## 评论 {#comments} +## 注释 {#comments} -ClickHouse支持SQL风格和C风格的注释。 -SQL风格的注释以下开头 `--` 并继续到线的末尾,一个空格后 `--` 可以省略。 -C型是从 `/*` 到 `*/`并且可以是多行,也不需要空格。 +CH支持SQL风格或C语言风格的注释: +- SQL风格的注释以 `--` 开始,直到行末,`--` 后紧跟的空格可以忽略 +- C语言风格的注释以 `/*` 开始,以 `*/` 结束,支持多行形式,同样可以省略 `/*` 后的空格 -## 关键词 {#syntax-keywords} +## 关键字 {#syntax-keywords} -当关键字对应于以下关键字时,不区分大小写: +以下场景的关键字是大小写不敏感的: +- 标准SQL。例如,`SELECT`, `select` 和 `SeLeCt` 都是允许的 +- 在某些流行的RDBMS中被实现的关键字,例如,`DateTime` 和 `datetime`是一样的 -- SQL标准。 例如, `SELECT`, `select` 和 `SeLeCt` 都是有效的。 -- 在一些流行的DBMS(MySQL或Postgres)中实现。 例如, `DateTime` 是一样的 `datetime`. -数据类型名称是否区分大小写可以在 `system.data_type_families` 桌子 +你可以在系统表 [system.data_type_families](../operations/system-tables.md#system_tables-data_type_families) 中检查某个数据类型的名称是否是大小写敏感型。 -与标准SQL相比,所有其他关键字(包括函数名称)都是 **区分大小写**. +和标准SQL相反,所有其它的关键字都是 **大小写敏感的**,包括函数名称。 +In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -不保留关键字;它们仅在相应的上下文中被视为保留关键字。 如果您使用 [标识符](#syntax-identifiers) 使用与关键字相同的名称,将它们括在双引号或反引号中。 例如,查询 `SELECT "FROM" FROM table_name` 是有效的,如果表 `table_name` 具有名称的列 `"FROM"`. +关键字不是保留的;它们仅在相应的上下文中才会被处理。如果你使用和关键字同名的 [变量名](#syntax-identifiers) ,需要使用双引号或转移符将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 -## 标识符 {#syntax-identifiers} +## 变量名 {#syntax-identifiers} -标识符是: +变量包括: +Identifiers are: -- 集群、数据库、表、分区和列名称。 -- 功能。 -- 数据类型。 -- [表达式别名](#syntax-expression_aliases). +- 集群,数据库,表,分区,列名称 +- 函数 +- 数据类型 +- 表达式别名 -标识符可以是引号或非引号。 后者是优选的。 +变量名可以使用反引号包含起来 -非引号标识符必须与正则表达式匹配 `^[a-zA-Z_][0-9a-zA-Z_]*$` 并且不能等于 [关键词](#syntax-keywords). 例: `x, _1, X_y__Z123_.` +没有使用反引号包含的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字]相同 -如果要使用与关键字相同的标识符,或者要在标识符中使用其他符号,请使用双引号或反引号对其进行引用,例如, `"id"`, `` `id` ``. +如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或转义符号,例如: `"id"`, `` `id` `` -## 文字数 {#literals} +## 字符 {#literals} -有数字,字符串,复合和 `NULL` 文字。 +CH包含数字,字母,括号,NULL值等字符 ### 数字 {#numeric} -数值文字尝试进行分析: +数字类型字符会被做如下解析: +- 首先,当做64位的有符号整数,使用该函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) +- 如果失败,解析成64位无符号整数,同样使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) -- 首先,作为一个64位有符号的数字,使用 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) 功能。 -- 如果不成功,作为64位无符号数,使用 [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) 功能。 -- 如果不成功,作为一个浮点数使用 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) 功能。 -- 否则,将返回错误。 +- 如果还失败了,试图解析成浮点型数值,使用函数 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) +Numeric literal tries to be parsed: -文本值具有该值适合的最小类型。 -例如,1被解析为 `UInt8`,但256被解析为 `UInt16`. 有关详细信息,请参阅 [数据类型](../sql-reference/data-types/index.md). +- 最后,以上情形都不符合时,返回异常 -例: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### 字符串 {#syntax-string-literal} +数字类型的值类型为能容纳该值的最小数据类型。 +例如:1 解析成 `UInt8`型,256 则解析成 `UInt16`。更多信息,参见 [数据类型](../sql-reference/data-types/index.md) -仅支持单引号中的字符串文字。 封闭的字符可以反斜杠转义。 以下转义序列具有相应的特殊值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. 在所有其他情况下,转义序列的格式为 `\c`,哪里 `c` 是任何字符,被转换为 `c`. 这意味着你可以使用序列 `\'`和`\\`. 该值将具有 [字符串](../sql-reference/data-types/string.md) 类型。 +例如: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -在字符串文字中,你至少需要转义 `'` 和 `\`. 单引号可以用单引号,文字转义 `'It\'s'` 和 `'It''s'` 是平等的。 +### 字母 {#syntax-string-literal} +CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 -### 化合物 {#compound} -数组使用方括号构造 `[1, 2, 3]`. Nuples用圆括号构造 `(1, 'Hello, world!', 2)`. -从技术上讲,这些不是文字,而是分别具有数组创建运算符和元组创建运算符的表达式。 -数组必须至少包含一个项目,元组必须至少包含两个项目。 -有一个单独的情况下,当元组出现在 `IN` a条款 `SELECT` 查询。 查询结果可以包含元组,但元组不能保存到数据库(除了具有以下内容的表 [记忆](../engines/table-engines/special/memory.md) 发动机)。 +在字符串中,你至少需要对 `'` 和 `\` 进行转义。单引号可以使用单引号转义,例如 `'It\'s'` 和 `'It''s'` 是相同的。 -### NULL {#null-literal} +### 括号 {#compound} +数组都是使用方括号进行构造 `[1, 2, 3]`,元组则使用圆括号 `(1, 'Hello, world!', 2)` -指示该值丢失。 +从技术上来讲,这些都不是字符串,而是包含创建数组和元组运算符的表达式。 -为了存储 `NULL` 在表字段中,它必须是 [可为空](../sql-reference/data-types/nullable.md) 类型。 +创建一个数组必须至少包含一个元素,创建一个元组至少包含2个元素 -根据数据格式(输入或输出), `NULL` 可能有不同的表示。 有关详细信息,请参阅以下文档 [数据格式](../interfaces/formats.md#formats). +当元组出现在 `SELECT` 查询的 `IN` 部分时,是一种例外情形。查询结果可以包含元组,但是元组类型不能保存到数据库中(除非表采用 [内存表](../engines/table-engines/special/memory.md)引擎) -处理有许多细微差别 `NULL`. 例如,如果比较操作的至少一个参数是 `NULL`,此操作的结果也是 `NULL`. 对于乘法,加法和其他操作也是如此。 有关详细信息,请阅读每个操作的文档。 -在查询中,您可以检查 `NULL` 使用 [IS NULL](operators/index.md#operator-is-null) 和 [IS NOT NULL](operators/index.md) 运算符及相关功能 `isNull` 和 `isNotNull`. +### NULL值 {#null-literal} -## 功能 {#functions} +代表不存在的值 -函数调用像一个标识符一样写入,并在圆括号中包含一个参数列表(可能是空的)。 与标准SQL相比,括号是必需的,即使是空的参数列表。 示例: `now()`. -有常规函数和聚合函数(请参阅部分 “Aggregate functions”). 某些聚合函数可以包含括号中的两个参数列表。 示例: `quantile (0.9) (x)`. 这些聚合函数被调用 “parametric” 函数,并在第一个列表中的参数被调用 “parameters”. 不带参数的聚合函数的语法与常规函数的语法相同。 +为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型 +根据数据的格式(输入或输出),NULL值有不同的表现形式。更多信息参见文档 [数据格式](../interfaces/formats.md#formats) -## 运营商 {#operators} +在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 -在查询解析过程中,运算符会转换为相应的函数,同时考虑它们的优先级和关联性。 -例如,表达式 `1 + 2 * 3 + 4` 转化为 `plus(plus(1, multiply(2, 3)), 4)`. +在语句中,可以通过 [是否为NULL](operators/index.md#operator-is-null) 以及 [是否不为NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 -## 数据类型和数据库表引擎 {#data_types-and-database-table-engines} +## 函数 {#functions} +函数调用的写法,类似于变量并带有被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 -数据类型和表引擎 `CREATE` 查询的编写方式与标识符或函数相同。 换句话说,它们可能包含也可能不包含括号中的参数列表。 有关详细信息,请参阅部分 “Data types,” “Table engines,” 和 “CREATE”. +函数分为常规函数和聚合函数(参见“Aggregate functions”一章)。有些聚合函数包含2个参数列表,第一个参数列表中的参数被称为“parameters”。不包含“parameters”的聚合函数语法和常规函数是一样的。 + + +## 运算符 {#operators} + +在查询解析阶段,运算符会被转换成对应的函数,使用时请注意它们的优先级。例如: +表达式 `1 + 2 * 3 + 4` 会被解析成 `plus(plus(1, multiply(2, 3)), 4)`. + + +## 数据类型及数据库/表引擎 {#data_types-and-database-table-engines} + +`CREATE` 语句中的数据类型和表引擎写法与变量或函数类似。 +换句话说,它们可以用括号包含参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 ## 表达式别名 {#syntax-expression_aliases} -别名是查询中表达式的用户定义名称。 +别名是用户对表达式的自定义名称 ``` sql expr AS alias ``` -- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` 子句不使用 `AS` 关键字。 +- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) + 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. + 在 [CAST函数](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 - In the [CAST](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. -- `expr` — Any expression supported by ClickHouse. +- `expr` — 任意CH支持的表达式. - For example, `SELECT column_name * 2 AS double FROM some_table`. + 例如, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — Name for `expr`. 别名应符合 [标识符](#syntax-identifiers) 语法 +- `alias` — `expr` 的名称。别名必须符合 [变量名]](#syntax-identifiers) 语法. - For example, `SELECT "table t".column_name FROM table_name AS "table t"`. + 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. -### 使用注意事项 {#notes-on-usage} +### 用法注意 {#notes-on-usage} -别名对于查询或子查询是全局的,您可以在查询的任何部分中为任何表达式定义别名。 例如, `SELECT (1 AS n) + 2, n`. +别名在当前查询或子查询中是全局可见的,你可以在查询语句的任何位置对表达式定义别名 -别名在子查询和子查询之间不可见。 例如,在执行查询时 `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ClickHouse生成异常 `Unknown identifier: num`. +别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,CH会提示异常 `Unknown identifier: num`. -如果为结果列定义了别名 `SELECT` 子查询的子句,这些列在外部查询中可见。 例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. - -小心使用与列或表名相同的别名。 让我们考虑以下示例: +如果给select子查询语句的结果列定义其别名,那么在外层可以使用该别名。例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. +注意列的别名和表的别名相同时的情形,考虑如下示例: ``` sql CREATE TABLE t ( @@ -172,16 +178,18 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -在这个例子中,我们声明表 `t` 带柱 `b`. 然后,在选择数据时,我们定义了 `sum(b) AS b` 别名 由于别名是全局的,ClickHouse替换了文字 `b` 在表达式中 `argMax(a, b)` 用表达式 `sum(b)`. 这种替换导致异常。 +在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,CH使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 ## 星号 {#asterisk} -在一个 `SELECT` 查询中,星号可以替换表达式。 有关详细信息,请参阅部分 “SELECT”. +select查询中,星号可以代替表达式使用。详情请参见“select”部分 + ## 表达式 {#syntax-expressions} -表达式是函数、标识符、文字、运算符的应用程序、括号中的表达式、子查询或星号。 它还可以包含别名。 -表达式列表是一个或多个用逗号分隔的表达式。 -函数和运算符,反过来,可以有表达式作为参数。 -[原始文章](https://clickhouse.tech/docs/en/sql_reference/syntax/) +An expression is a function, identifier, literal, application of an operator, expression in brackets, subquery, or asterisk. It can also contain an alias. +A list of expressions is one or more expressions separated by commas. +Functions and operators, in turn, can have expressions as arguments. + +[原始文档](https://clickhouse.tech/docs/en/sql_reference/syntax/) From c33b472f9a02c19b65cea68f57a6fcfa5e59be66 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 15 Jun 2020 17:25:42 +0300 Subject: [PATCH 0724/2229] fixup --- programs/benchmark/Benchmark.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 6884f6faed3..590e1496fd6 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -363,7 +363,6 @@ private: try { execute(connection_entries, query, distribution(generator)); - ++queries_executed; } catch (...) { @@ -374,7 +373,14 @@ private: shutdown = true; throw; } + else + { + std::cerr << getCurrentExceptionMessage(print_stacktrace, true) ; + } } + // Count failed queries toward executed, so that we'd reach + // max_iterations even if every run fails. + ++queries_executed; } } From b725df63667f7cb55a0ecc6d4ebbd4723385a7da Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 18:17:44 +0300 Subject: [PATCH 0725/2229] Added ReadFromStorageStep. --- src/Interpreters/InterpreterSelectQuery.cpp | 82 +--------- .../QueryPlan/ReadFromStorageStep.cpp | 154 ++++++++++++++++++ .../QueryPlan/ReadFromStorageStep.h | 50 ++++++ .../Transforms/ExpressionTransform.cpp | 2 +- .../Transforms/ExpressionTransform.h | 2 + src/Processors/Transforms/FilterTransform.cpp | 2 +- src/Processors/Transforms/FilterTransform.h | 6 + src/Processors/ya.make | 1 + 8 files changed, 220 insertions(+), 79 deletions(-) create mode 100644 src/Processors/QueryPlan/ReadFromStorageStep.cpp create mode 100644 src/Processors/QueryPlan/ReadFromStorageStep.h diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 23cb753e96f..cadeb57c03c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -71,6 +71,7 @@ #include #include #include +#include namespace DB @@ -1273,84 +1274,11 @@ void InterpreterSelectQuery::executeFetchColumns( query_info.input_sorting_info = query_info.order_by_optimizer->getInputOrder(storage); } - Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams); + ReadFromStorageStep read_step( + table_lock, options, storage, + required_columns, query_info, *context, processing_stage, max_block_size, max_streams); - if (pipes.empty()) - { - Pipe pipe(std::make_shared(storage->getSampleBlockForColumns(required_columns))); - - if (query_info.prewhere_info) - { - if (query_info.prewhere_info->alias_actions) - pipe.addSimpleTransform(std::make_shared( - pipe.getHeader(), query_info.prewhere_info->alias_actions)); - - pipe.addSimpleTransform(std::make_shared( - pipe.getHeader(), - prewhere_info->prewhere_actions, - prewhere_info->prewhere_column_name, - prewhere_info->remove_prewhere_column)); - - // To remove additional columns - // In some cases, we did not read any marks so that the pipeline.streams is empty - // Thus, some columns in prewhere are not removed as expected - // This leads to mismatched header in distributed table - if (query_info.prewhere_info->remove_columns_actions) - pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), query_info.prewhere_info->remove_columns_actions)); - } - - pipes.emplace_back(std::move(pipe)); - } - - /// Table lock is stored inside pipeline here. - pipeline.addTableLock(table_lock); - - /// Set the limits and quota for reading data, the speed and time of the query. - { - IBlockInputStream::LocalLimits limits; - limits.mode = IBlockInputStream::LIMITS_TOTAL; - limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.speed_limits.max_execution_time = settings.max_execution_time; - limits.timeout_overflow_mode = settings.timeout_overflow_mode; - - /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, - * because the initiating server has a summary of the execution of the request on all servers. - * - * But limits on data size to read and maximum execution time are reasonable to check both on initiator and - * additionally on each remote server, because these limits are checked per block of data processed, - * and remote servers may process way more blocks of data than are received by initiator. - * - * The limits to throttle maximum execution speed is also checked on all servers. - */ - if (options.to_stage == QueryProcessingStage::Complete) - { - limits.speed_limits.min_execution_rps = settings.min_execution_speed; - limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes; - } - - limits.speed_limits.max_execution_rps = settings.max_execution_speed; - limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; - limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; - - auto quota = context->getQuota(); - - for (auto & pipe : pipes) - { - if (!options.ignore_limits) - pipe.setLimits(limits); - - if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) - pipe.setQuota(quota); - } - } - - if (pipes.size() == 1) - pipeline.setMaxThreads(1); - - for (auto & pipe : pipes) - pipe.enableQuota(); - - pipeline.init(std::move(pipes)); + pipeline = std::move(*read_step.updatePipeline({})); } else throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp new file mode 100644 index 00000000000..191f501e9fa --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -0,0 +1,154 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +ReadFromStorageStep::ReadFromStorageStep( + TableStructureReadLockHolder table_lock_, + SelectQueryOptions options_, + StoragePtr storage_, + const Names & required_columns_, + const SelectQueryInfo & query_info_, + const Context & context_, + QueryProcessingStage::Enum processing_stage_, + size_t max_block_size_, + size_t max_streams_) + : table_lock(std::move(table_lock_)) + , options(std::move(options_)) + , storage(std::move(storage_)) + , required_columns(required_columns_) + , query_info(query_info_) + , context(context_) + , processing_stage(processing_stage_) + , max_block_size(max_block_size_) + , max_streams(max_streams_) +{ + Block header = storage->getSampleBlockForColumns(required_columns); + + if (query_info.prewhere_info) + { + if (query_info.prewhere_info->alias_actions) + header = ExpressionTransform::transformHeader(std::move(header), query_info.prewhere_info->alias_actions); + + header = FilterTransform::transformHeader( + std::move(header), + query_info.prewhere_info->prewhere_actions, + query_info.prewhere_info->prewhere_column_name, + query_info.prewhere_info->remove_prewhere_column); + + if (query_info.prewhere_info->remove_columns_actions) + header = ExpressionTransform::transformHeader( + std::move(header), + query_info.prewhere_info->remove_columns_actions); + } + + input_streams.emplace_back(DataStream{.header = std::move(header)}); +} + +ReadFromStorageStep::~ReadFromStorageStep() = default; + +QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines) +{ + Pipes pipes = storage->read(required_columns, query_info, context, processing_stage, max_block_size, max_streams); + + if (pipes.empty()) + { + Pipe pipe(std::make_shared(storage->getSampleBlockForColumns(required_columns))); + + if (query_info.prewhere_info) + { + if (query_info.prewhere_info->alias_actions) + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), query_info.prewhere_info->alias_actions)); + + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), + query_info.prewhere_info->prewhere_actions, + query_info.prewhere_info->prewhere_column_name, + query_info.prewhere_info->remove_prewhere_column)); + + // To remove additional columns + // In some cases, we did not read any marks so that the pipeline.streams is empty + // Thus, some columns in prewhere are not removed as expected + // This leads to mismatched header in distributed table + if (query_info.prewhere_info->remove_columns_actions) + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), query_info.prewhere_info->remove_columns_actions)); + } + + pipes.emplace_back(std::move(pipe)); + } + + if (!blocksHaveEqualStructure(pipes.front().getHeader(), input_streams.front().header)) + { + for (auto & pipe : pipes) + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), input_streams.front().header, ConvertingTransform::MatchColumnsMode::Name)); + } + + auto pipeline = std::make_unique(); + + /// Table lock is stored inside pipeline here. + pipeline->addTableLock(table_lock); + + /// Set the limits and quota for reading data, the speed and time of the query. + { + const Settings & settings = context.getSettingsRef(); + + IBlockInputStream::LocalLimits limits; + limits.mode = IBlockInputStream::LIMITS_TOTAL; + limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); + limits.speed_limits.max_execution_time = settings.max_execution_time; + limits.timeout_overflow_mode = settings.timeout_overflow_mode; + + /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, + * because the initiating server has a summary of the execution of the request on all servers. + * + * But limits on data size to read and maximum execution time are reasonable to check both on initiator and + * additionally on each remote server, because these limits are checked per block of data processed, + * and remote servers may process way more blocks of data than are received by initiator. + * + * The limits to throttle maximum execution speed is also checked on all servers. + */ + if (options.to_stage == QueryProcessingStage::Complete) + { + limits.speed_limits.min_execution_rps = settings.min_execution_speed; + limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes; + } + + limits.speed_limits.max_execution_rps = settings.max_execution_speed; + limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; + limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + + auto quota = context.getQuota(); + + for (auto & pipe : pipes) + { + if (!options.ignore_limits) + pipe.setLimits(limits); + + if (!options.ignore_quota && (options.to_stage == QueryProcessingStage::Complete)) + pipe.setQuota(quota); + } + } + + if (pipes.size() == 1) + pipeline->setMaxThreads(1); + + for (auto & pipe : pipes) + pipe.enableQuota(); + + pipeline->init(std::move(pipes)); + return pipeline; +} + +} diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.h b/src/Processors/QueryPlan/ReadFromStorageStep.h new file mode 100644 index 00000000000..180cf47e6d7 --- /dev/null +++ b/src/Processors/QueryPlan/ReadFromStorageStep.h @@ -0,0 +1,50 @@ +#include +#include +#include +#include + +namespace DB +{ + +class IStorage; +using StoragePtr = std::shared_ptr; + +class SelectQueryInfo; + +class PrewhereInfo; + +/// Reads from storage. +class ReadFromStorageStep : public IQueryPlanStep +{ +public: + ReadFromStorageStep( + TableStructureReadLockHolder table_lock, + SelectQueryOptions options, + StoragePtr storage, + const Names & required_columns, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processing_stage, + size_t max_block_size, + size_t max_streams); + + virtual ~ReadFromStorageStep(); + + String getName() const override { return "ReadFromStorage"; } + + QueryPipelinePtr updatePipeline(QueryPipelines) override; + +private: + TableStructureReadLockHolder table_lock; + SelectQueryOptions options; + + StoragePtr storage; + const Names & required_columns; + const SelectQueryInfo & query_info; + const Context & context; + QueryProcessingStage::Enum processing_stage; + size_t max_block_size; + size_t max_streams; +}; + +} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index bf52a13f08a..bf523d6d7a3 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -5,7 +5,7 @@ namespace DB { -static Block transformHeader(Block header, const ExpressionActionsPtr & expression) +Block ExpressionTransform::transformHeader(Block header, const ExpressionActionsPtr & expression) { expression->execute(header, true); return header; diff --git a/src/Processors/Transforms/ExpressionTransform.h b/src/Processors/Transforms/ExpressionTransform.h index 87f2c01ea1d..60d6dc0f777 100644 --- a/src/Processors/Transforms/ExpressionTransform.h +++ b/src/Processors/Transforms/ExpressionTransform.h @@ -18,6 +18,8 @@ public: String getName() const override { return "ExpressionTransform"; } + static Block transformHeader(Block header, const ExpressionActionsPtr & expression); + protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index e35a399a0ee..aaa44260234 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -27,7 +27,7 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_ } } -static Block transformHeader( +Block FilterTransform::transformHeader( Block header, const ExpressionActionsPtr & expression, const String & filter_column_name, diff --git a/src/Processors/Transforms/FilterTransform.h b/src/Processors/Transforms/FilterTransform.h index 45ec9da2ea3..0497a339c82 100644 --- a/src/Processors/Transforms/FilterTransform.h +++ b/src/Processors/Transforms/FilterTransform.h @@ -19,6 +19,12 @@ public: const Block & header_, ExpressionActionsPtr expression_, String filter_column_name_, bool remove_filter_column_, bool on_totals_ = false); + static Block transformHeader( + Block header, + const ExpressionActionsPtr & expression, + const String & filter_column_name, + bool remove_filter_column); + String getName() const override { return "FilterTransform"; } Status prepare() override; diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 5cbc5dfd291..6ced4fc28cc 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -135,6 +135,7 @@ SRCS( Transforms/SortingTransform.cpp Transforms/TotalsHavingTransform.cpp QueryPlan/IQueryPlanStep.cpp + QueryPlan/ReadFromStorageStep.cpp QueryPlan/QueryPlan.cpp ) From 5436ef38bf2507638117ff976eea887ac5853d30 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 18:21:10 +0300 Subject: [PATCH 0726/2229] Fix MergingSortedAlgorithm. --- src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index 77258a8187b..ee13ef70203 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -105,7 +105,7 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::mergeImpl(TSortingHeap & queue auto current = queue.current(); - if (current->isLast() && current_inputs[current->pos].skip_last_row) + if (current.impl->isLast() && current_inputs[current.impl->order].skip_last_row) { /// Get the next block from the corresponding source, if there is one. queue.removeTop(); @@ -116,7 +116,7 @@ IMergingAlgorithm::Status MergingSortedAlgorithm::mergeImpl(TSortingHeap & queue * Or is there only one data source left in the queue? Then you can take the entire block on current cursor. */ if (current.impl->isFirst() - && !current_inputs[current->pos].skip_last_row /// Ignore optimization if last row should be skipped. + && !current_inputs[current.impl->order].skip_last_row /// Ignore optimization if last row should be skipped. && (queue.size() == 1 || (queue.size() >= 2 && current.totallyLessOrEquals(queue.nextChild())))) { From 6af27d6c324222eec5c51284391518741d2a31b1 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Mon, 15 Jun 2020 18:22:11 +0300 Subject: [PATCH 0727/2229] Update simpleaggregatefunction.md --- docs/en/sql-reference/data-types/simpleaggregatefunction.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 5f4c408f939..8b7e498e535 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -12,6 +12,8 @@ The following aggregate functions are supported: - [`groupBitAnd`](../../sql-reference/aggregate-functions/reference.md#groupbitand) - [`groupBitOr`](../../sql-reference/aggregate-functions/reference.md#groupbitor) - [`groupBitXor`](../../sql-reference/aggregate-functions/reference.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference.md#groupuniqarrayx-groupuniqarraymax-sizex) Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. From 6e18001bd81809e4b8d85c5b388e2111f685cb6f Mon Sep 17 00:00:00 2001 From: long2ice Date: Mon, 15 Jun 2020 23:25:13 +0800 Subject: [PATCH 0728/2229] add mysql2ch in docs (#11680) * add mysql2ch add mysql2ch * Update integrations.md * Update integrations.md * Update integrations.md * Update integrations.md * Update integrations.md * Update integrations.md * Update integrations.md --- docs/en/interfaces/third-party/integrations.md | 1 + docs/es/interfaces/third-party/integrations.md | 1 + docs/fa/interfaces/third-party/integrations.md | 1 + docs/fr/interfaces/third-party/integrations.md | 1 + docs/ja/interfaces/third-party/integrations.md | 1 + docs/ru/interfaces/third-party/integrations.md | 1 + docs/tr/interfaces/third-party/integrations.md | 1 + docs/zh/interfaces/third-party/integrations.md | 1 + 8 files changed, 8 insertions(+) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 716e774871b..17e7f1f18cc 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -12,6 +12,7 @@ toc_title: Integrations - Relational database management systems - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/es/interfaces/third-party/integrations.md b/docs/es/interfaces/third-party/integrations.md index 716e774871b..17e7f1f18cc 100644 --- a/docs/es/interfaces/third-party/integrations.md +++ b/docs/es/interfaces/third-party/integrations.md @@ -12,6 +12,7 @@ toc_title: Integrations - Relational database management systems - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index 657432c7958..df864ef71e6 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "\u06CC\u06A9\u067E\u0627\u0631\u0686\u06AF\u06CC" - سیستم های مدیریت پایگاه داده رابطه ای - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [در حال بارگذاری](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [تاتر-خروجی زیر-داده خوان](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/fr/interfaces/third-party/integrations.md b/docs/fr/interfaces/third-party/integrations.md index f252fd6229b..8332ffe5e59 100644 --- a/docs/fr/interfaces/third-party/integrations.md +++ b/docs/fr/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "Int\xE9gration" - Systèmes de gestion de bases de données relationnelles - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-lecteur de données](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-réplicateur](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md index 3e38d578093..2ac2ad24410 100644 --- a/docs/ja/interfaces/third-party/integrations.md +++ b/docs/ja/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: "\u7D71\u5408" - リレーショナルデータベース管理システム - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-データリーダー](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-レプリケーター](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 39449b54df8..19a72edc4d3 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -7,6 +7,7 @@ - Реляционные системы управления базами данных - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/tr/interfaces/third-party/integrations.md b/docs/tr/interfaces/third-party/integrations.md index 8a1d5c239f6..a5e5a60c72f 100644 --- a/docs/tr/interfaces/third-party/integrations.md +++ b/docs/tr/interfaces/third-party/integrations.md @@ -14,6 +14,7 @@ toc_title: Entegrasyonlar - İlişkisel veritabanı yönetim sistemleri - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-çoğaltıcı](https://github.com/larsnovikov/horgh-replicator) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 014fdc88304..e0f308fecde 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -7,6 +7,7 @@ - 关系数据库管理系统 - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [horgh-复制器](https://github.com/larsnovikov/horgh-replicator) From 3878254e0cf09213dab28ea26d28961fc0c42133 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Mon, 15 Jun 2020 19:21:52 +0300 Subject: [PATCH 0729/2229] Trigger CI --- .../0_stateless/01315_count_distinct_return_not_nullable.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql index 932cd2f69f9..0558d2cfd15 100644 --- a/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql +++ b/tests/queries/0_stateless/01315_count_distinct_return_not_nullable.sql @@ -7,13 +7,11 @@ SELECT uniqExact(number >= 5 ? number : NULL) FROM numbers(10); SELECT count(DISTINCT number >= 5 ? number : NULL) FROM numbers(10); SELECT '---'; - SELECT count(NULL); SELECT uniq(NULL); SELECT count(DISTINCT NULL); SELECT '---'; - SELECT avg(NULL); SELECT sum(NULL); SELECT corr(NULL, NULL); From 857582245e894ef71e59decef44555760f8f9908 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 15 Jun 2020 19:39:00 +0300 Subject: [PATCH 0730/2229] fixup --- programs/benchmark/Benchmark.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index af56aaa6db5..b8e4a0c346a 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -62,12 +62,13 @@ public: bool randomize_, size_t max_iterations_, double max_time_, const String & json_path_, size_t confidence_, const String & query_id_, bool continue_on_errors_, - const Settings & settings_) + bool print_stacktrace_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), json_path(json_path_), confidence(confidence_), query_id(query_id_), - continue_on_errors(continue_on_errors_), settings(settings_), + continue_on_errors(continue_on_errors_), + print_stacktrace(print_stacktrace_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) { @@ -154,6 +155,7 @@ private: size_t confidence; std::string query_id; bool continue_on_errors; + bool print_stacktrace; Settings settings; SharedContextHolder shared_context; Context global_context; @@ -376,7 +378,8 @@ private: } else { - std::cerr << getCurrentExceptionMessage(print_stacktrace, true) ; + std::cerr << getCurrentExceptionMessage(print_stacktrace, + true /*check embedded stack trace*/) ; } } // Count failed queries toward executed, so that we'd reach @@ -605,6 +608,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["confidence"].as(), options["query_id"].as(), options.count("continue_on_errors") > 0, + print_stacktrace, settings); return benchmark.run(); } From af2fe2ba553e7112ef474d73473fbff047c5ae60 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 19:55:33 +0300 Subject: [PATCH 0731/2229] Compilable setColumns, setConstraints, setIndices --- src/Storages/IStorage.cpp | 105 ++++++++---------- src/Storages/IStorage.h | 21 ++-- src/Storages/Kafka/StorageKafka.cpp | 4 +- src/Storages/LiveView/StorageBlocks.h | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 6 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 7 +- src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageDictionary.cpp | 4 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/StorageFile.cpp | 11 +- src/Storages/StorageGenerateRandom.cpp | 4 +- src/Storages/StorageHDFS.cpp | 7 +- src/Storages/StorageInMemoryMetadata.cpp | 15 +++ src/Storages/StorageInMemoryMetadata.h | 13 +++ src/Storages/StorageInput.cpp | 4 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMaterializedView.cpp | 6 +- src/Storages/StorageMemory.cpp | 6 +- src/Storages/StorageMerge.cpp | 6 +- src/Storages/StorageMySQL.cpp | 6 +- src/Storages/StorageNull.cpp | 2 +- src/Storages/StorageNull.h | 6 +- src/Storages/StorageS3.cpp | 6 +- src/Storages/StorageSet.cpp | 7 +- src/Storages/StorageStripeLog.cpp | 6 +- src/Storages/StorageTinyLog.cpp | 6 +- src/Storages/StorageURL.cpp | 7 +- src/Storages/StorageValues.cpp | 4 +- src/Storages/StorageView.cpp | 4 +- src/Storages/System/IStorageSystemOneBlock.h | 4 +- src/Storages/System/StorageSystemColumns.cpp | 4 +- .../System/StorageSystemDetachedParts.cpp | 4 +- src/Storages/System/StorageSystemDisks.cpp | 4 +- src/Storages/System/StorageSystemNumbers.cpp | 4 +- src/Storages/System/StorageSystemOne.cpp | 4 +- .../System/StorageSystemPartsBase.cpp | 4 +- src/Storages/System/StorageSystemReplicas.cpp | 4 +- .../System/StorageSystemStoragePolicies.cpp | 4 +- src/Storages/System/StorageSystemTables.cpp | 4 +- src/Storages/System/StorageSystemZeros.cpp | 5 +- 41 files changed, 211 insertions(+), 137 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index a244f836f5c..6c045a6f365 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -34,22 +34,22 @@ namespace ErrorCodes const ColumnsDescription & IStorage::getColumns() const { - return metadata.columns; + return metadata->columns; } const IndicesDescription & IStorage::getSecondaryIndices() const { - return metadata.secondary_indices; + return metadata->secondary_indices; } bool IStorage::hasSecondaryIndices() const { - return !metadata.secondary_indices.empty(); + return !metadata->secondary_indices.empty(); } const ConstraintsDescription & IStorage::getConstraints() const { - return metadata.constraints; + return metadata->constraints; } Block IStorage::getSampleBlock() const @@ -287,23 +287,6 @@ void IStorage::check(const Block & block, bool need_all) const } } -void IStorage::setColumns(ColumnsDescription columns_) -{ - if (columns_.getOrdinary().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - metadata.columns = std::move(columns_); -} - -void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_) -{ - metadata.secondary_indices = std::move(secondary_indices_); -} - -void IStorage::setConstraints(ConstraintsDescription constraints_) -{ - metadata.constraints = std::move(constraints_); -} - bool IStorage::isVirtualColumn(const String & column_name) const { /// Virtual column maybe overriden by real column @@ -382,7 +365,7 @@ void IStorage::alter( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } @@ -417,137 +400,137 @@ NamesAndTypesList IStorage::getVirtuals() const const KeyDescription & IStorage::getPartitionKey() const { - return metadata.partition_key; + return metadata->partition_key; } void IStorage::setPartitionKey(const KeyDescription & partition_key_) { - metadata.partition_key = partition_key_; + metadata->partition_key = partition_key_; } bool IStorage::isPartitionKeyDefined() const { - return metadata.partition_key.definition_ast != nullptr; + return metadata->partition_key.definition_ast != nullptr; } bool IStorage::hasPartitionKey() const { - return !metadata.partition_key.column_names.empty(); + return !metadata->partition_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPartitionKey() const { if (hasPartitionKey()) - return metadata.partition_key.expression->getRequiredColumns(); + return metadata->partition_key.expression->getRequiredColumns(); return {}; } const KeyDescription & IStorage::getSortingKey() const { - return metadata.sorting_key; + return metadata->sorting_key; } void IStorage::setSortingKey(const KeyDescription & sorting_key_) { - metadata.sorting_key = sorting_key_; + metadata->sorting_key = sorting_key_; } bool IStorage::isSortingKeyDefined() const { - return metadata.sorting_key.definition_ast != nullptr; + return metadata->sorting_key.definition_ast != nullptr; } bool IStorage::hasSortingKey() const { - return !metadata.sorting_key.column_names.empty(); + return !metadata->sorting_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSortingKey() const { if (hasSortingKey()) - return metadata.sorting_key.expression->getRequiredColumns(); + return metadata->sorting_key.expression->getRequiredColumns(); return {}; } Names IStorage::getSortingKeyColumns() const { if (hasSortingKey()) - return metadata.sorting_key.column_names; + return metadata->sorting_key.column_names; return {}; } const KeyDescription & IStorage::getPrimaryKey() const { - return metadata.primary_key; + return metadata->primary_key; } void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { - metadata.primary_key = primary_key_; + metadata->primary_key = primary_key_; } bool IStorage::isPrimaryKeyDefined() const { - return metadata.primary_key.definition_ast != nullptr; + return metadata->primary_key.definition_ast != nullptr; } bool IStorage::hasPrimaryKey() const { - return !metadata.primary_key.column_names.empty(); + return !metadata->primary_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPrimaryKey() const { if (hasPrimaryKey()) - return metadata.primary_key.expression->getRequiredColumns(); + return metadata->primary_key.expression->getRequiredColumns(); return {}; } Names IStorage::getPrimaryKeyColumns() const { - if (!metadata.primary_key.column_names.empty()) - return metadata.primary_key.column_names; + if (!metadata->primary_key.column_names.empty()) + return metadata->primary_key.column_names; return {}; } const KeyDescription & IStorage::getSamplingKey() const { - return metadata.sampling_key; + return metadata->sampling_key; } void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { - metadata.sampling_key = sampling_key_; + metadata->sampling_key = sampling_key_; } bool IStorage::isSamplingKeyDefined() const { - return metadata.sampling_key.definition_ast != nullptr; + return metadata->sampling_key.definition_ast != nullptr; } bool IStorage::hasSamplingKey() const { - return !metadata.sampling_key.column_names.empty(); + return !metadata->sampling_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSampling() const { if (hasSamplingKey()) - return metadata.sampling_key.expression->getRequiredColumns(); + return metadata->sampling_key.expression->getRequiredColumns(); return {}; } TTLTableDescription IStorage::getTableTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl; + return metadata->table_ttl; } void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) { std::lock_guard lock(ttl_mutex); - metadata.table_ttl = table_ttl_; + metadata->table_ttl = table_ttl_; } bool IStorage::hasAnyTableTTL() const @@ -558,43 +541,43 @@ bool IStorage::hasAnyTableTTL() const TTLColumnsDescription IStorage::getColumnTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.column_ttls_by_name; + return metadata->column_ttls_by_name; } void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) { std::lock_guard lock(ttl_mutex); - metadata.column_ttls_by_name = column_ttls_by_name_; + metadata->column_ttls_by_name = column_ttls_by_name_; } bool IStorage::hasAnyColumnTTL() const { std::lock_guard lock(ttl_mutex); - return !metadata.column_ttls_by_name.empty(); + return !metadata->column_ttls_by_name.empty(); } TTLDescription IStorage::getRowsTTL() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.rows_ttl; + return metadata->table_ttl.rows_ttl; } bool IStorage::hasRowsTTL() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.rows_ttl.expression != nullptr; + return metadata->table_ttl.rows_ttl.expression != nullptr; } TTLDescriptions IStorage::getMoveTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.move_ttl; + return metadata->table_ttl.move_ttl; } bool IStorage::hasAnyMoveTTL() const { std::lock_guard lock(ttl_mutex); - return !metadata.table_ttl.move_ttl.empty(); + return !metadata->table_ttl.move_ttl.empty(); } @@ -660,32 +643,32 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum ASTPtr IStorage::getSettingsChanges() const { - if (metadata.settings_changes) - return metadata.settings_changes->clone(); + if (metadata->settings_changes) + return metadata->settings_changes->clone(); return nullptr; } void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { if (settings_changes_) - metadata.settings_changes = settings_changes_->clone(); + metadata->settings_changes = settings_changes_->clone(); else - metadata.settings_changes = nullptr; + metadata->settings_changes = nullptr; } const SelectQueryDescription & IStorage::getSelectQuery() const { - return metadata.select; + return metadata->select; } void IStorage::setSelectQuery(const SelectQueryDescription & select_) { - metadata.select = select_; + metadata->select = select_; } bool IStorage::hasSelectQuery() const { - return metadata.select.select_query != nullptr; + return metadata->select.select_query != nullptr; } } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index c7c8e382a87..4d01bb5370d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -140,27 +140,24 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. - - void setSecondaryIndices(IndicesDescription secondary_indices_); const IndicesDescription & getSecondaryIndices() const; /// Has at least one non primary index bool hasSecondaryIndices() const; const ConstraintsDescription & getConstraints() const; - void setConstraints(ConstraintsDescription constraints_); /// Storage settings ASTPtr getSettingsChanges() const; void setSettingsChanges(const ASTPtr & settings_changes_); - bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } + bool hasSettingsChanges() const { return metadata->settings_changes != nullptr; } /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; - StorageInMemoryMetadata getInMemoryMetadata() const { return metadata; } + StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } + void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. @@ -207,7 +204,7 @@ private: /// TODO (alesap) just use multiversion for atomic metadata mutable std::mutex ttl_mutex; - StorageInMemoryMetadata metadata; + StorageMetadataPtr metadata; private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; @@ -354,7 +351,7 @@ public: /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters. * This method must fully execute the ALTER query, taking care of the locks itself. - * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. + * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata-> */ virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); @@ -445,7 +442,7 @@ public: /// struct). void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. - ASTPtr getPartitionKeyAST() const { return metadata.partition_key.definition_ast; } + ASTPtr getPartitionKeyAST() const { return metadata->partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. bool isPartitionKeyDefined() const; /// Storage has partition key. @@ -460,7 +457,7 @@ public: /// struct). void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. - ASTPtr getSortingKeyAST() const { return metadata.sorting_key.definition_ast; } + ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isSortingKeyDefined() const; /// Storage has sorting key. It means, that it contains at least one column. @@ -477,7 +474,7 @@ public: /// struct). void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. - ASTPtr getPrimaryKeyAST() const { return metadata.primary_key.definition_ast; } + ASTPtr getPrimaryKeyAST() const { return metadata->primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isPrimaryKeyDefined() const; /// Storage has primary key (maybe part of some other key). It means, that @@ -495,7 +492,7 @@ public: /// struct). void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. - ASTPtr getSamplingKeyAST() const { return metadata.sampling_key.definition_ast; } + ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. bool isSamplingKeyDefined() const; /// Storage has sampling key. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index bb721417c5b..2109afed932 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -137,7 +137,9 @@ StorageKafka::StorageKafka( , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 2a9d7766fd7..78d60163d5e 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -18,7 +18,9 @@ public: QueryProcessingStage::Enum to_stage_) : IStorage(table_id_), pipes(std::move(pipes_)), to_stage(to_stage_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } static StoragePtr createStorage(const StorageID & table_id, const ColumnsDescription & columns, Pipes pipes, QueryProcessingStage::Enum to_stage) diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 8a04a9e49e4..ade2d1c967d 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -251,7 +251,9 @@ StorageLiveView::StorageLiveView( live_view_context = std::make_unique(global_context); live_view_context->makeQueryContext(); - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 84470088ebe..14be526d7f6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -387,11 +387,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, checkProperties(new_metadata, attach); /// Other parts of metadata initialized is separate methods - setColumns(std::move(new_metadata.columns)); - setSecondaryIndices(std::move(new_metadata.secondary_indices)); - setConstraints(std::move(new_metadata.constraints)); - setSortingKey(std::move(new_metadata.sorting_key)); - setPrimaryKey(std::move(new_metadata.primary_key)); + setInMemoryMetadata(new_metadata); } namespace diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 3031402715a..342a89c38ea 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -49,12 +49,7 @@ protected: : IStorage(getIDFromPart(part_)) , part(part_) { - setColumns(part_->storage.getColumns()); - setSecondaryIndices(part_->storage.getSecondaryIndices()); - setPrimaryKey(part_->storage.getPrimaryKey()); - setSortingKey(part_->storage.getSortingKey()); - setColumnTTLs(part->storage.getColumnTTLs()); - setTableTTLs(part->storage.getTableTTLs()); + setInMemoryMetadata(part_->storage.getInMemoryMetadata()); } private: diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 08dc81b4945..007625790f4 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -77,8 +77,10 @@ StorageBuffer::StorageBuffer( , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")")) , bg_pool(global_context.getBufferFlushSchedulePool()) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } @@ -778,7 +780,7 @@ void StorageBuffer::alter(const AlterCommands & params, const Context & context, StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 9b2c5784d85..4348973ec60 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -100,7 +100,9 @@ StorageDictionary::StorageDictionary( : IStorage(table_id_) , dictionary_name(dictionary_name_) { - setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 3383c609520..bf5f729ed19 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -283,8 +283,10 @@ StorageDistributed::StorageDistributed( , storage_policy(storage_policy_) , relative_data_path(relative_data_path_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (sharding_key_) { @@ -562,7 +564,7 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0bcb624bec4..f94a7b71e56 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -166,7 +166,10 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us auto & first_path = paths[0]; Block header = StorageDistributedDirectoryMonitor::createStreamFromFile(first_path)->getHeader(); - setColumns(ColumnsDescription(header.getNamesAndTypesList())); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription(header.getNamesAndTypesList())); + setInMemoryMetadata(metadata_); } } } @@ -188,10 +191,12 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.context.getPath()) { + StorageInMemoryMetadata metadata_; if (args.format_name != "Distributed") - setColumns(args.columns); + metadata_.setColumns(args.columns); - setConstraints(args.constraints); + metadata_.setConstraints(args.constraints); + setInMemoryMetadata(metadata_); } class StorageFileSource : public SourceWithProgress diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 8c186f38943..f69478a4bdd 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -388,7 +388,9 @@ StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { random_seed = random_seed_ ? sipHash64(*random_seed_) : randomSeed(); - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 352e0a43f39..082e40f6d6d 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -49,8 +49,11 @@ StorageHDFS::StorageHDFS(const String & uri_, , compression_method(compression_method_) { context.getRemoteHostFilter().checkURL(Poco::URI(uri)); - setColumns(columns_); - setConstraints(constraints_); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } namespace diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 8d23bd7bccf..ac2c0417c45 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -51,4 +51,19 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo } +void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) +{ + columns = std::move(columns_); +} + +void StorageInMemoryMetadata::setSecondaryIndices(IndicesDescription secondary_indices_) +{ + secondary_indices = std::move(secondary_indices_); +} + +void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_) +{ + constraints = std::move(constraints_); +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 889f8e49f69..f4d6e9b38b3 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -8,6 +8,7 @@ #include #include +#include namespace DB { @@ -47,6 +48,18 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); + + + //////////////////////////////////////////////////////////////////////// + void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. + + void setSecondaryIndices(IndicesDescription secondary_indices_); + + void setConstraints(ConstraintsDescription constraints_); + }; +using StorageMetadataPtr = std::shared_ptr; +using MultiVersionStorageMetadataPtr = MultiVersion; + } diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index e30ae55e715..92287051bf3 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -21,7 +21,9 @@ namespace ErrorCodes StorageInput::StorageInput(const StorageID & table_id, const ColumnsDescription & columns_) : IStorage(table_id) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index bc2bbb2ce67..09be868bcfa 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -431,8 +431,10 @@ StorageLog::StorageLog( , max_compress_block_size(max_compress_block_size_) , file_checker(disk, table_path + "sizes.json") { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a0c2fa87eb2..34d5e1d4374 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -50,7 +50,9 @@ StorageMaterializedView::StorageMaterializedView( bool attach_) : IStorage(table_id_), global_context(local_context.getGlobalContext()) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -209,7 +211,7 @@ void StorageMaterializedView::alter( /// end modify query DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b55a6227127..bb89bdb5c48 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -78,8 +78,10 @@ private: StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_) : IStorage(table_id_) { - setColumns(std::move(columns_description_)); - setConstraints(std::move(constraints_)); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(std::move(columns_description_)); + metadata_.setConstraints(std::move(constraints_)); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8264eaa4cb6..3685a777bf0 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -51,7 +51,9 @@ StorageMerge::StorageMerge( , table_name_regexp(table_name_regexp_) , global_context(context_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } template @@ -393,7 +395,7 @@ void StorageMerge::alter( StorageInMemoryMetadata storage_metadata = getInMemoryMetadata(); params.apply(storage_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, storage_metadata); - setColumns(storage_metadata.columns); + setInMemoryMetadata(storage_metadata); } Block StorageMerge::getQueryHeader( diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index ee44ca7948e..f9aad8a58a7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -56,8 +56,10 @@ StorageMySQL::StorageMySQL( , pool(std::move(pool_)) , global_context(context_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 182ce09ef96..7589c4b44dc 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -54,7 +54,7 @@ void StorageNull::alter( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } } diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 5fb4a16a24b..fe8bd05d53a 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -59,8 +59,10 @@ protected: StorageNull(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_) : IStorage(table_id_) { - setColumns(std::move(columns_description_)); - setConstraints(std::move(constraints_)); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_description_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 397d064ba15..acaa2bcc7d6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -206,8 +206,10 @@ StorageS3::StorageS3( , compression_method(compression_method_) { context_global.getRemoteHostFilter().checkURL(uri_.uri); - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); auto settings = context_.getStorageS3Settings().getSettings(uri.endpoint); Aws::Auth::AWSCredentials credentials(access_key_id_, secret_access_key_); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 86bfed5ac84..38b4d30c25b 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -96,8 +96,11 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( const Context & context_) : IStorage(table_id_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); + if (relative_path_.empty()) throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index b61d52657dd..b68505fa147 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -223,8 +223,10 @@ StorageStripeLog::StorageStripeLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageStripeLog")) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 2a62068516e..5bca6072da0 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -336,8 +336,10 @@ StorageTinyLog::StorageTinyLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageTinyLog")) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index a69e140fe5a..0301412e029 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -43,8 +43,11 @@ IStorageURLBase::IStorageURLBase( , format_name(format_name_) { context_global.getRemoteHostFilter().checkURL(uri); - setColumns(columns_); - setConstraints(constraints_); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } namespace diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index cf0b39df8f1..5ba36a936e2 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -16,7 +16,9 @@ StorageValues::StorageValues( const NamesAndTypesList & virtuals_) : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } Pipes StorageValues::read( diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 055faed5899..60ae681e002 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -38,7 +38,9 @@ StorageView::StorageView( const ColumnsDescription & columns_) : IStorage(table_id_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index b3951bc3f75..1ceff26ba83 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -23,7 +23,9 @@ protected: public: IStorageSystemOneBlock(const String & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription(Self::getNamesAndTypes())); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription(Self::getNamesAndTypes())); + setInMemoryMetadata(metadata_); } Pipes read(const Names & column_names, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 90e52ad373e..6359e367106 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -26,7 +26,8 @@ namespace ErrorCodes StorageSystemColumns::StorageSystemColumns(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { { "database", std::make_shared() }, { "table", std::make_shared() }, @@ -45,6 +46,7 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "is_in_sampling_key", std::make_shared() }, { "compression_codec", std::make_shared() }, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f3fd51330d9..ef88c3ca058 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -30,7 +30,8 @@ protected: explicit StorageSystemDetachedParts() : IStorage({"system", "detached_parts"}) { - setColumns(ColumnsDescription{{ + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, {"partition_id", std::make_shared(std::make_shared())}, @@ -41,6 +42,7 @@ protected: {"max_block_number", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared())} }}); + setInMemoryMetadata(metadata_); } Pipes read( diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index b5a5026b2e7..5905080539e 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -14,7 +14,8 @@ namespace ErrorCodes StorageSystemDisks::StorageSystemDisks(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { {"name", std::make_shared()}, {"path", std::make_shared()}, @@ -22,6 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, })); + setInMemoryMetadata(metadata_); } Pipes StorageSystemDisks::read( diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 20dcc58f652..0fa7b71555e 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -118,7 +118,9 @@ private: StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional limit_, UInt64 offset_, bool even_distribution_) : IStorage(table_id), multithreaded(multithreaded_), even_distribution(even_distribution_), limit(limit_), offset(offset_) { - setColumns(ColumnsDescription({{"number", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"number", std::make_shared()}})); + setInMemoryMetadata(metadata_); } Pipes StorageSystemNumbers::read( diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 6cbb634d2b7..e7c8c446847 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -14,7 +14,9 @@ namespace DB StorageSystemOne::StorageSystemOne(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 925a5df889e..42a432489f4 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -277,7 +277,9 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL add_alias("bytes", "bytes_on_disk"); add_alias("marks_size", "marks_bytes"); - setColumns(tmp_columns); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(tmp_columns); + setInMemoryMetadata(metadata_); } NamesAndTypesList StorageSystemPartsBase::getVirtuals() const diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index c2cd3a1e4b1..ca71e7e5f74 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -19,7 +19,8 @@ namespace DB StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription({ + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({ { "database", std::make_shared() }, { "table", std::make_shared() }, { "engine", std::make_shared() }, @@ -52,6 +53,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) { "active_replicas", std::make_shared() }, { "zookeeper_exception", std::make_shared() }, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index acbc9d72a20..dbb47dc771a 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -17,7 +17,8 @@ namespace ErrorCodes StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & name_) : IStorage({"system", name_}) { - setColumns( + StorageInMemoryMetadata metadata_; + metadata_.setColumns( ColumnsDescription({ {"policy_name", std::make_shared()}, {"volume_name", std::make_shared()}, @@ -26,6 +27,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n {"max_data_part_size", std::make_shared()}, {"move_factor", std::make_shared()} })); + setInMemoryMetadata(metadata_); } Pipes StorageSystemStoragePolicies::read( diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 2bf6595bf53..84d441a8c6e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -33,7 +33,8 @@ namespace ErrorCodes StorageSystemTables::StorageSystemTables(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { {"database", std::make_shared()}, {"name", std::make_shared()}, @@ -55,6 +56,7 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"total_rows", std::make_shared(std::make_shared())}, {"total_bytes", std::make_shared(std::make_shared())}, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index cd2fa0a6059..438d31e7e02 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -84,7 +84,10 @@ private: StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multithreaded_, std::optional limit_) : IStorage(table_id_), multithreaded(multithreaded_), limit(limit_) { - setColumns(ColumnsDescription({{"zero", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"zero", std::make_shared()}})); + setInMemoryMetadata(metadata_); + } Pipes StorageSystemZeros::read( From aa30649ce5eb3edc14641b595ccca6c3cba38dfa Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:10:14 +0300 Subject: [PATCH 0732/2229] Working setColumns, setConstraints, setIndices --- src/Storages/IStorage.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4d01bb5370d..403f5293588 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -82,7 +82,7 @@ public: IStorage() = delete; /// Storage fields should be initialized in separate methods like setColumns /// or setTableTTLs. - explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} //-V730 + explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_shared()) {} //-V730 virtual ~IStorage() = default; IStorage(const IStorage &) = delete; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 14be526d7f6..3414143c46b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -142,9 +142,8 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setSettingsChanges(metadata_.settings_changes); - const auto settings = getSettings(); setProperties(metadata_, attach); + const auto settings = getSettings(); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -385,8 +384,6 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool attach) { checkProperties(new_metadata, attach); - - /// Other parts of metadata initialized is separate methods setInMemoryMetadata(new_metadata); } From 9f31184d7624e8220392108fd70942e87774958c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 30 May 2020 17:18:08 +0300 Subject: [PATCH 0733/2229] Support for multiple names in one CREATE/ALTER command. --- src/Access/RowPolicy.cpp | 16 -- src/Access/RowPolicy.h | 18 ++ .../InterpreterCreateQuotaQuery.cpp | 55 ++++--- .../InterpreterCreateRoleQuery.cpp | 52 +++--- .../InterpreterCreateRowPolicyQuery.cpp | 58 +++---- .../InterpreterCreateSettingsProfileQuery.cpp | 65 ++++---- .../InterpreterCreateUserQuery.cpp | 86 +++++----- .../InterpreterDropAccessEntityQuery.cpp | 32 ++-- ...InterpreterShowCreateAccessEntityQuery.cpp | 59 ++++--- .../InterpreterShowCreateAccessEntityQuery.h | 2 +- src/Parsers/ASTCreateQuotaQuery.cpp | 16 +- src/Parsers/ASTCreateQuotaQuery.h | 2 +- src/Parsers/ASTCreateRoleQuery.cpp | 15 +- src/Parsers/ASTCreateRoleQuery.h | 2 +- src/Parsers/ASTCreateRowPolicyQuery.cpp | 15 +- src/Parsers/ASTCreateRowPolicyQuery.h | 7 +- src/Parsers/ASTCreateSettingsProfileQuery.cpp | 15 +- src/Parsers/ASTCreateSettingsProfileQuery.h | 2 +- src/Parsers/ASTCreateUserQuery.cpp | 4 +- src/Parsers/ASTCreateUserQuery.h | 3 +- src/Parsers/ASTDropAccessEntityQuery.cpp | 50 +++--- src/Parsers/ASTDropAccessEntityQuery.h | 5 +- src/Parsers/ASTRowPolicyName.cpp | 134 +++++++++++++++ src/Parsers/ASTRowPolicyName.h | 49 ++++++ .../ASTShowCreateAccessEntityQuery.cpp | 35 +++- src/Parsers/ASTShowCreateAccessEntityQuery.h | 12 +- src/Parsers/ASTUserNameWithHost.cpp | 74 +++++++++ src/Parsers/ASTUserNameWithHost.h | 53 ++++++ src/Parsers/ParserCreateQuotaQuery.cpp | 8 +- src/Parsers/ParserCreateRoleQuery.cpp | 8 +- src/Parsers/ParserCreateRowPolicyQuery.cpp | 20 ++- .../ParserCreateSettingsProfileQuery.cpp | 8 +- src/Parsers/ParserCreateUserQuery.cpp | 31 ++-- src/Parsers/ParserDropAccessEntityQuery.cpp | 83 ++-------- src/Parsers/ParserRowPolicyName.cpp | 154 ++++++++++++++++++ src/Parsers/ParserRowPolicyName.h | 47 ++++++ .../ParserShowCreateAccessEntityQuery.cpp | 29 ++-- src/Parsers/ParserShowGrantsQuery.cpp | 4 +- src/Parsers/ParserUserNameWithHost.cpp | 56 +++++++ src/Parsers/ParserUserNameWithHost.h | 26 +++ .../parseIdentifierOrStringLiteral.cpp | 22 +++ src/Parsers/parseIdentifierOrStringLiteral.h | 3 + src/Parsers/parseUserName.cpp | 56 +++---- src/Parsers/parseUserName.h | 17 +- src/Parsers/ya.make | 4 + .../test_access_control_on_cluster/test.py | 1 - 46 files changed, 1090 insertions(+), 423 deletions(-) create mode 100644 src/Parsers/ASTRowPolicyName.cpp create mode 100644 src/Parsers/ASTRowPolicyName.h create mode 100644 src/Parsers/ASTUserNameWithHost.cpp create mode 100644 src/Parsers/ASTUserNameWithHost.h create mode 100644 src/Parsers/ParserRowPolicyName.cpp create mode 100644 src/Parsers/ParserRowPolicyName.h create mode 100644 src/Parsers/ParserUserNameWithHost.cpp create mode 100644 src/Parsers/ParserUserNameWithHost.h diff --git a/src/Access/RowPolicy.cpp b/src/Access/RowPolicy.cpp index acacaf01c6c..7441f915a46 100644 --- a/src/Access/RowPolicy.cpp +++ b/src/Access/RowPolicy.cpp @@ -11,22 +11,6 @@ namespace ErrorCodes } -String RowPolicy::NameParts::getName() const -{ - String name; - name.reserve(database.length() + table_name.length() + short_name.length() + 6); - name += backQuoteIfNeed(short_name); - name += " ON "; - if (!database.empty()) - { - name += backQuoteIfNeed(database); - name += '.'; - } - name += backQuoteIfNeed(table_name); - return name; -} - - void RowPolicy::setDatabase(const String & database) { name_parts.database = database; diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index 7febf5991fb..9d5b00b427d 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -23,7 +23,9 @@ struct RowPolicy : public IAccessEntity String database; String table_name; + bool empty() const { return short_name.empty(); } String getName() const; + String toString() const { return getName(); } auto toTuple() const { return std::tie(short_name, database, table_name); } friend bool operator ==(const NameParts & left, const NameParts & right) { return left.toTuple() == right.toTuple(); } friend bool operator !=(const NameParts & left, const NameParts & right) { return left.toTuple() != right.toTuple(); } @@ -153,4 +155,20 @@ inline String toString(RowPolicy::ConditionType type) return RowPolicy::ConditionTypeInfo::get(type).raw_name; } + +inline String RowPolicy::NameParts::getName() const +{ + String name; + name.reserve(database.length() + table_name.length() + short_name.length() + 6); + name += backQuoteIfNeed(short_name); + name += " ON "; + if (!database.empty()) + { + name += backQuoteIfNeed(database); + name += '.'; + } + name += backQuoteIfNeed(table_name); + return name; +} + } diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp index 907532c3d89..0cca163beec 100644 --- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp @@ -15,15 +15,18 @@ namespace DB { namespace { -void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, const std::optional & roles_from_query = {}) + void updateQuotaFromQueryImpl( + Quota & quota, + const ASTCreateQuotaQuery & query, + const String & override_name, + const std::optional & override_to_roles) { - if (query.alter) - { - if (!query.new_name.empty()) - quota.setName(query.new_name); - } - else - quota.setName(query.name); + if (!override_name.empty()) + quota.setName(override_name); + else if (!query.new_name.empty()) + quota.setName(query.new_name); + else if (query.names.size() == 1) + quota.setName(query.names.front()); if (query.key_type) quota.key_type = *query.key_type; @@ -59,15 +62,10 @@ void updateQuotaFromQueryImpl(Quota & quota, const ASTCreateQuotaQuery & query, quota_limits.max[resource_type] = query_limits.max[resource_type]; } - const ExtendedRoleSet * roles = nullptr; - std::optional temp_role_set; - if (roles_from_query) - roles = &*roles_from_query; + if (override_to_roles) + quota.to_roles = *override_to_roles; else if (query.roles) - roles = &temp_role_set.emplace(*query.roles); - - if (roles) - quota.to_roles = *roles; + quota.to_roles = *query.roles; } } @@ -93,28 +91,33 @@ BlockIO InterpreterCreateQuotaQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_quota = typeid_cast>(entity->clone()); - updateQuotaFromQueryImpl(*updated_quota, query, roles_from_query); + updateQuotaFromQueryImpl(*updated_quota, query, {}, roles_from_query); return updated_quota; }; if (query.if_exists) { - if (auto id = access_control.find(query.name)) - access_control.tryUpdate(*id, update_func); + auto ids = access_control.find(query.names); + access_control.tryUpdate(ids, update_func); } else - access_control.update(access_control.getID(query.name), update_func); + access_control.update(access_control.getIDs(query.names), update_func); } else { - auto new_quota = std::make_shared(); - updateQuotaFromQueryImpl(*new_quota, query, roles_from_query); + std::vector new_quotas; + for (const String & name : query.names) + { + auto new_quota = std::make_shared(); + updateQuotaFromQueryImpl(*new_quota, query, name, roles_from_query); + new_quotas.emplace_back(std::move(new_quota)); + } if (query.if_not_exists) - access_control.tryInsert(new_quota); + access_control.tryInsert(new_quotas); else if (query.or_replace) - access_control.insertOrReplace(new_quota); + access_control.insertOrReplace(new_quotas); else - access_control.insert(new_quota); + access_control.insert(new_quotas); } return {}; @@ -123,7 +126,7 @@ BlockIO InterpreterCreateQuotaQuery::execute() void InterpreterCreateQuotaQuery::updateQuotaFromQuery(Quota & quota, const ASTCreateQuotaQuery & query) { - updateQuotaFromQueryImpl(quota, query); + updateQuotaFromQueryImpl(quota, query, {}, {}); } } diff --git a/src/Interpreters/InterpreterCreateRoleQuery.cpp b/src/Interpreters/InterpreterCreateRoleQuery.cpp index ed9135b2bb6..2fa04eebae1 100644 --- a/src/Interpreters/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/InterpreterCreateRoleQuery.cpp @@ -13,25 +13,20 @@ namespace void updateRoleFromQueryImpl( Role & role, const ASTCreateRoleQuery & query, - const std::optional & settings_from_query = {}) + const String & override_name, + const std::optional & override_settings) { - if (query.alter) - { - if (!query.new_name.empty()) - role.setName(query.new_name); - } - else - role.setName(query.name); + if (!override_name.empty()) + role.setName(override_name); + else if (!query.new_name.empty()) + role.setName(query.new_name); + else if (query.names.size() == 1) + role.setName(query.names.front()); - const SettingsProfileElements * settings = nullptr; - std::optional temp_settings; - if (settings_from_query) - settings = &*settings_from_query; + if (override_settings) + role.settings = *override_settings; else if (query.settings) - settings = &temp_settings.emplace(*query.settings); - - if (settings) - role.settings = *settings; + role.settings = *query.settings; } } @@ -57,28 +52,33 @@ BlockIO InterpreterCreateRoleQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_role = typeid_cast>(entity->clone()); - updateRoleFromQueryImpl(*updated_role, query, settings_from_query); + updateRoleFromQueryImpl(*updated_role, query, {}, settings_from_query); return updated_role; }; if (query.if_exists) { - if (auto id = access_control.find(query.name)) - access_control.tryUpdate(*id, update_func); + auto ids = access_control.find(query.names); + access_control.tryUpdate(ids, update_func); } else - access_control.update(access_control.getID(query.name), update_func); + access_control.update(access_control.getIDs(query.names), update_func); } else { - auto new_role = std::make_shared(); - updateRoleFromQueryImpl(*new_role, query, settings_from_query); + std::vector new_roles; + for (const auto & name : query.names) + { + auto new_role = std::make_shared(); + updateRoleFromQueryImpl(*new_role, query, name, settings_from_query); + new_roles.emplace_back(std::move(new_role)); + } if (query.if_not_exists) - access_control.tryInsert(new_role); + access_control.tryInsert(new_roles); else if (query.or_replace) - access_control.insertOrReplace(new_role); + access_control.insertOrReplace(new_roles); else - access_control.insert(new_role); + access_control.insert(new_roles); } return {}; @@ -87,6 +87,6 @@ BlockIO InterpreterCreateRoleQuery::execute() void InterpreterCreateRoleQuery::updateRoleFromQuery(Role & role, const ASTCreateRoleQuery & query) { - updateRoleFromQueryImpl(role, query); + updateRoleFromQueryImpl(role, query, {}, {}); } } diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp index 778a32019d9..bfd7d60b397 100644 --- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -16,15 +17,15 @@ namespace void updateRowPolicyFromQueryImpl( RowPolicy & policy, const ASTCreateRowPolicyQuery & query, - const std::optional & roles_from_query = {}) + const RowPolicy::NameParts & override_name, + const std::optional & override_to_roles) { - if (query.alter) - { - if (!query.new_short_name.empty()) - policy.setShortName(query.new_short_name); - } - else - policy.setNameParts(query.name_parts); + if (!override_name.empty()) + policy.setNameParts(override_name); + else if (!query.new_short_name.empty()) + policy.setShortName(query.new_short_name); + else if (query.names->name_parts.size() == 1) + policy.setNameParts(query.names->name_parts.front()); if (query.is_restrictive) policy.setRestrictive(*query.is_restrictive); @@ -36,15 +37,10 @@ namespace policy.conditions[condition_type] = *condition ? serializeAST(**condition) : String{}; } - const ExtendedRoleSet * roles = nullptr; - std::optional temp_role_set; - if (roles_from_query) - roles = &*roles_from_query; + if (override_to_roles) + policy.to_roles = *override_to_roles; else if (query.roles) - roles = &temp_role_set.emplace(*query.roles); - - if (roles) - policy.to_roles = *roles; + policy.to_roles = *query.roles; } } @@ -61,40 +57,46 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() return executeDDLQueryOnCluster(query_ptr, context); } + assert(query.names->cluster.empty()); std::optional roles_from_query; if (query.roles) roles_from_query = ExtendedRoleSet{*query.roles, access_control, context.getUserID()}; - if (query.name_parts.database.empty()) - query.name_parts.database = context.getCurrentDatabase(); + query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_policy = typeid_cast>(entity->clone()); - updateRowPolicyFromQueryImpl(*updated_policy, query, roles_from_query); + updateRowPolicyFromQueryImpl(*updated_policy, query, {}, roles_from_query); return updated_policy; }; + Strings names = query.names->toStrings(); if (query.if_exists) { - if (auto id = access_control.find(query.name_parts.getName())) - access_control.tryUpdate(*id, update_func); + auto ids = access_control.find(names); + access_control.tryUpdate(ids, update_func); } else - access_control.update(access_control.getID(query.name_parts.getName()), update_func); + access_control.update(access_control.getIDs(names), update_func); } else { - auto new_policy = std::make_shared(); - updateRowPolicyFromQueryImpl(*new_policy, query, roles_from_query); + std::vector new_policies; + for (const auto & name_parts : query.names->name_parts) + { + auto new_policy = std::make_shared(); + updateRowPolicyFromQueryImpl(*new_policy, query, name_parts, roles_from_query); + new_policies.emplace_back(std::move(new_policy)); + } if (query.if_not_exists) - access_control.tryInsert(new_policy); + access_control.tryInsert(new_policies); else if (query.or_replace) - access_control.insertOrReplace(new_policy); + access_control.insertOrReplace(new_policies); else - access_control.insert(new_policy); + access_control.insert(new_policies); } return {}; @@ -103,7 +105,7 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() void InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(RowPolicy & policy, const ASTCreateRowPolicyQuery & query) { - updateRowPolicyFromQueryImpl(policy, query); + updateRowPolicyFromQueryImpl(policy, query, {}, {}); } } diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp index cb0b5587bdc..ac2a4249986 100644 --- a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp @@ -15,36 +15,26 @@ namespace void updateSettingsProfileFromQueryImpl( SettingsProfile & profile, const ASTCreateSettingsProfileQuery & query, - const std::optional & settings_from_query = {}, - const std::optional & roles_from_query = {}) + const String & override_name, + const std::optional & override_settings, + const std::optional & override_to_roles) { - if (query.alter) - { - if (!query.new_name.empty()) - profile.setName(query.new_name); - } - else - profile.setName(query.name); + if (!override_name.empty()) + profile.setName(override_name); + else if (!query.new_name.empty()) + profile.setName(query.new_name); + else if (query.names.size() == 1) + profile.setName(query.names.front()); - const SettingsProfileElements * settings = nullptr; - std::optional temp_settings; - if (settings_from_query) - settings = &*settings_from_query; + if (override_settings) + profile.elements = *override_settings; else if (query.settings) - settings = &temp_settings.emplace(*query.settings); + profile.elements = *query.settings; - if (settings) - profile.elements = *settings; - - const ExtendedRoleSet * roles = nullptr; - std::optional temp_role_set; - if (roles_from_query) - roles = &*roles_from_query; + if (override_to_roles) + profile.to_roles = *override_to_roles; else if (query.to_roles) - roles = &temp_role_set.emplace(*query.to_roles); - - if (roles) - profile.to_roles = *roles; + profile.to_roles = *query.to_roles; } } @@ -77,28 +67,33 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_profile = typeid_cast>(entity->clone()); - updateSettingsProfileFromQueryImpl(*updated_profile, query, settings_from_query, roles_from_query); + updateSettingsProfileFromQueryImpl(*updated_profile, query, {}, settings_from_query, roles_from_query); return updated_profile; }; if (query.if_exists) { - if (auto id = access_control.find(query.name)) - access_control.tryUpdate(*id, update_func); + auto ids = access_control.find(query.names); + access_control.tryUpdate(ids, update_func); } else - access_control.update(access_control.getID(query.name), update_func); + access_control.update(access_control.getIDs(query.names), update_func); } else { - auto new_profile = std::make_shared(); - updateSettingsProfileFromQueryImpl(*new_profile, query, settings_from_query, roles_from_query); + std::vector new_profiles; + for (const auto & name : query.names) + { + auto new_profile = std::make_shared(); + updateSettingsProfileFromQueryImpl(*new_profile, query, name, settings_from_query, roles_from_query); + new_profiles.emplace_back(std::move(new_profile)); + } if (query.if_not_exists) - access_control.tryInsert(new_profile); + access_control.tryInsert(new_profiles); else if (query.or_replace) - access_control.insertOrReplace(new_profile); + access_control.insertOrReplace(new_profiles); else - access_control.insert(new_profile); + access_control.insert(new_profiles); } return {}; @@ -107,6 +102,6 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() void InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(SettingsProfile & SettingsProfile, const ASTCreateSettingsProfileQuery & query) { - updateSettingsProfileFromQueryImpl(SettingsProfile, query); + updateSettingsProfileFromQueryImpl(SettingsProfile, query, {}, {}, {}); } } diff --git a/src/Interpreters/InterpreterCreateUserQuery.cpp b/src/Interpreters/InterpreterCreateUserQuery.cpp index 7c488ddf8e9..8b57703f08c 100644 --- a/src/Interpreters/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/InterpreterCreateUserQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -17,51 +18,50 @@ namespace void updateUserFromQueryImpl( User & user, const ASTCreateUserQuery & query, - const std::optional & default_roles_from_query = {}, - const std::optional & settings_from_query = {}) + const std::shared_ptr & override_name, + const std::optional & override_default_roles, + const std::optional & override_settings) { - if (query.alter) - { - if (!query.new_name.empty()) - user.setName(query.new_name); - } - else - user.setName(query.name); + if (override_name) + user.setName(override_name->toString()); + else if (!query.new_name.empty()) + user.setName(query.new_name); + else if (query.names->size() == 1) + user.setName(query.names->front()->toString()); if (query.authentication) user.authentication = *query.authentication; - if (query.hosts) + if (override_name && !override_name->host_pattern.empty()) + { + user.allowed_client_hosts = AllowedClientHosts{}; + user.allowed_client_hosts.addLikePattern(override_name->host_pattern); + } + else if (query.hosts) user.allowed_client_hosts = *query.hosts; + if (query.remove_hosts) user.allowed_client_hosts.remove(*query.remove_hosts); if (query.add_hosts) user.allowed_client_hosts.add(*query.add_hosts); - const ExtendedRoleSet * default_roles = nullptr; - std::optional temp_role_set; - if (default_roles_from_query) - default_roles = &*default_roles_from_query; - else if (query.default_roles) - default_roles = &temp_role_set.emplace(*query.default_roles); - - if (default_roles) + auto set_default_roles = [&](const ExtendedRoleSet & default_roles_) { - if (!query.alter && !default_roles->all) - user.granted_roles.grant(default_roles->getMatchingIDs()); + if (!query.alter && !default_roles_.all) + user.granted_roles.grant(default_roles_.getMatchingIDs()); - InterpreterSetRoleQuery::updateUserSetDefaultRoles(user, *default_roles); - } + InterpreterSetRoleQuery::updateUserSetDefaultRoles(user, default_roles_); + }; - const SettingsProfileElements * settings = nullptr; - std::optional temp_settings; - if (settings_from_query) - settings = &*settings_from_query; + if (override_default_roles) + set_default_roles(*override_default_roles); + else if (query.default_roles) + set_default_roles(*query.default_roles); + + if (override_settings) + user.settings = *override_settings; else if (query.settings) - settings = &temp_settings.emplace(*query.settings); - - if (settings) - user.settings = *settings; + user.settings = *query.settings; } } @@ -96,28 +96,34 @@ BlockIO InterpreterCreateUserQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_user = typeid_cast>(entity->clone()); - updateUserFromQueryImpl(*updated_user, query, default_roles_from_query, settings_from_query); + updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query); return updated_user; }; + Strings names = query.names->toStrings(); if (query.if_exists) { - if (auto id = access_control.find(query.name)) - access_control.tryUpdate(*id, update_func); + auto ids = access_control.find(names); + access_control.tryUpdate(ids, update_func); } else - access_control.update(access_control.getID(query.name), update_func); + access_control.update(access_control.getIDs(names), update_func); } else { - auto new_user = std::make_shared(); - updateUserFromQueryImpl(*new_user, query, default_roles_from_query, settings_from_query); + std::vector new_users; + for (const auto & name : *query.names) + { + auto new_user = std::make_shared(); + updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query); + new_users.emplace_back(std::move(new_user)); + } if (query.if_not_exists) - access_control.tryInsert(new_user); + access_control.tryInsert(new_users); else if (query.or_replace) - access_control.insertOrReplace(new_user); + access_control.insertOrReplace(new_users); else - access_control.insert(new_user); + access_control.insert(new_users); } return {}; @@ -126,7 +132,7 @@ BlockIO InterpreterCreateUserQuery::execute() void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query) { - updateUserFromQueryImpl(user, query); + updateUserFromQueryImpl(user, query, {}, {}, {}); } } diff --git a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp index be82147a322..d79d239ee12 100644 --- a/src/Interpreters/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterDropAccessEntityQuery.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -30,26 +31,21 @@ BlockIO InterpreterDropAccessEntityQuery::execute() if (!query.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context); - if (query.type == EntityType::ROW_POLICY) - { - Strings names; - for (auto & name_parts : query.row_policies_name_parts) - { - if (name_parts.database.empty()) - name_parts.database = context.getCurrentDatabase(); - names.emplace_back(name_parts.getName()); - } - if (query.if_exists) - access_control.tryRemove(access_control.find(names)); - else - access_control.remove(access_control.getIDs(names)); - return {}; - } + query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); - if (query.if_exists) - access_control.tryRemove(access_control.find(query.type, query.names)); + auto do_drop = [&](const Strings & names) + { + if (query.if_exists) + access_control.tryRemove(access_control.find(query.type, names)); + else + access_control.remove(access_control.getIDs(query.type, names)); + }; + + if (query.type == EntityType::ROW_POLICY) + do_drop(query.row_policy_names->toStrings()); else - access_control.remove(access_control.getIDs(query.type, query.names)); + do_drop(query.names); + return {}; } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index e37c31aab22..d3ab5ac5001 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -6,8 +6,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -42,7 +44,8 @@ namespace bool attach_mode) { auto query = std::make_shared(); - query->name = user.getName(); + query->names = std::make_shared(); + query->names->push_back(user.getName()); query->attach = attach_mode; if (user.allowed_client_hosts != AllowedClientHosts::AnyHostTag{}) @@ -77,7 +80,7 @@ namespace ASTPtr getCreateQueryImpl(const Role & role, const AccessControlManager * manager, bool attach_mode) { auto query = std::make_shared(); - query->name = role.getName(); + query->names.emplace_back(role.getName()); query->attach = attach_mode; if (!role.settings.empty()) @@ -95,7 +98,7 @@ namespace ASTPtr getCreateQueryImpl(const SettingsProfile & profile, const AccessControlManager * manager, bool attach_mode) { auto query = std::make_shared(); - query->name = profile.getName(); + query->names.emplace_back(profile.getName()); query->attach = attach_mode; if (!profile.elements.empty()) @@ -126,7 +129,7 @@ namespace bool attach_mode) { auto query = std::make_shared(); - query->name = quota.getName(); + query->names.emplace_back(quota.getName()); query->attach = attach_mode; query->key_type = quota.key_type; @@ -160,7 +163,8 @@ namespace bool attach_mode) { auto query = std::make_shared(); - query->name_parts = policy.getNameParts(); + query->names = std::make_shared(); + query->names->name_parts.emplace_back(policy.getNameParts()); query->attach = attach_mode; if (policy.isRestrictive()) @@ -228,17 +232,17 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() { auto & show_query = query_ptr->as(); - /// Build a create query. - ASTPtr create_query = getCreateQuery(show_query); + /// Build a create queries. + ASTs create_queries = getCreateQueries(show_query); /// Build the result column. MutableColumnPtr column = ColumnString::create(); - if (create_query) + std::stringstream create_query_ss; + for (const auto & create_query : create_queries) { - std::stringstream create_query_ss; formatAST(*create_query, create_query_ss, false, true); - String create_query_str = create_query_ss.str(); - column->insert(create_query_str); + column->insert(create_query_ss.str()); + create_query_ss.str(""); } /// Prepare description of the result column. @@ -253,38 +257,49 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() } -ASTPtr InterpreterShowCreateAccessEntityQuery::getCreateQuery(ASTShowCreateAccessEntityQuery & show_query) const +ASTs InterpreterShowCreateAccessEntityQuery::getCreateQueries(ASTShowCreateAccessEntityQuery & show_query) const { const auto & access_control = context.getAccessControlManager(); context.checkAccess(getRequiredAccess()); + show_query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); if (show_query.current_user) { auto user = context.getUser(); if (!user) - return nullptr; - return getCreateQueryImpl(*user, &access_control, false); + return {}; + return {getCreateQueryImpl(*user, &access_control, false)}; } if (show_query.current_quota) { auto usage = context.getQuotaUsage(); if (!usage) - return nullptr; + return {}; auto quota = access_control.read(usage->quota_id); - return getCreateQueryImpl(*quota, &access_control, false); + return {getCreateQueryImpl(*quota, &access_control, false)}; } + ASTs list; + if (show_query.type == EntityType::ROW_POLICY) { - if (show_query.row_policy_name_parts.database.empty()) - show_query.row_policy_name_parts.database = context.getCurrentDatabase(); - RowPolicyPtr policy = access_control.read(show_query.row_policy_name_parts.getName()); - return getCreateQueryImpl(*policy, &access_control, false); + for (const String & name : show_query.row_policy_names->toStrings()) + { + RowPolicyPtr policy = access_control.read(name); + list.push_back(getCreateQueryImpl(*policy, &access_control, false)); + } + } + else + { + for (const String & name : show_query.names) + { + auto entity = access_control.read(access_control.getID(show_query.type, name)); + list.push_back(getCreateQueryImpl(*entity, &access_control, false)); + } } - auto entity = access_control.read(access_control.getID(show_query.type, show_query.name)); - return getCreateQueryImpl(*entity, &access_control, false); + return list; } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h index ee28bfbdc4a..0d2978cff6c 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h @@ -29,7 +29,7 @@ public: private: BlockInputStreamPtr executeImpl(); - ASTPtr getCreateQuery(ASTShowCreateAccessEntityQuery & show_query) const; + ASTs getCreateQueries(ASTShowCreateAccessEntityQuery & show_query) const; AccessRightsElements getRequiredAccess() const; ASTPtr query_ptr; diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/ASTCreateQuotaQuery.cpp index bdfd1b32e96..d33af6126f1 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/ASTCreateQuotaQuery.cpp @@ -23,6 +23,19 @@ namespace } + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + settings.ostr << " "; + bool need_comma = false; + for (const String & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << backQuoteIfNeed(name); + } + } + + void formatRenameTo(const String & new_name, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " RENAME TO " << (settings.hilite ? IAST::hilite_none : "") @@ -130,8 +143,7 @@ void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatStat else if (or_replace) settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); - settings.ostr << " " << backQuoteIfNeed(name); - + formatNames(names, settings); formatOnCluster(settings); if (!new_name.empty()) diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/ASTCreateQuotaQuery.h index b9994e8ec3a..370083f4e25 100644 --- a/src/Parsers/ASTCreateQuotaQuery.h +++ b/src/Parsers/ASTCreateQuotaQuery.h @@ -38,7 +38,7 @@ public: using KeyType = Quota::KeyType; using ResourceAmount = Quota::ResourceAmount; - String name; + Strings names; String new_name; std::optional key_type; diff --git a/src/Parsers/ASTCreateRoleQuery.cpp b/src/Parsers/ASTCreateRoleQuery.cpp index f3873f7a3eb..5ccfd9c6bd5 100644 --- a/src/Parsers/ASTCreateRoleQuery.cpp +++ b/src/Parsers/ASTCreateRoleQuery.cpp @@ -7,6 +7,18 @@ namespace DB { namespace { + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + settings.ostr << " "; + bool need_comma = false; + for (const String & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << backQuoteIfNeed(name); + } + } + void formatRenameTo(const String & new_name, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " RENAME TO " << (settings.hilite ? IAST::hilite_none : "") @@ -52,8 +64,7 @@ void ASTCreateRoleQuery::formatImpl(const FormatSettings & format, FormatState & else if (or_replace) format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); - format.ostr << " " << backQuoteIfNeed(name); - + formatNames(names, format); formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/ASTCreateRoleQuery.h b/src/Parsers/ASTCreateRoleQuery.h index ab306dd5dec..422bb0e681d 100644 --- a/src/Parsers/ASTCreateRoleQuery.h +++ b/src/Parsers/ASTCreateRoleQuery.h @@ -26,7 +26,7 @@ public: bool if_not_exists = false; bool or_replace = false; - String name; + Strings names; String new_name; std::shared_ptr settings; diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/ASTCreateRowPolicyQuery.cpp index 973f3c6b930..caa52e3ac58 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/ASTCreateRowPolicyQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -154,13 +155,11 @@ void ASTCreateRowPolicyQuery::formatImpl(const FormatSettings & settings, Format else if (or_replace) settings.ostr << (settings.hilite ? hilite_keyword : "") << " OR REPLACE" << (settings.hilite ? hilite_none : ""); - const String & database = name_parts.database; - const String & table_name = name_parts.table_name; - const String & short_name = name_parts.short_name; - settings.ostr << " " << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " - << (settings.hilite ? hilite_none : "") << (database.empty() ? String{} : backQuoteIfNeed(database) + ".") << table_name; + settings.ostr << " "; + names->format(settings); formatOnCluster(settings); + assert(names->cluster.empty()); if (!new_short_name.empty()) formatRenameTo(new_short_name, settings); @@ -180,4 +179,10 @@ void ASTCreateRowPolicyQuery::replaceCurrentUserTagWithName(const String & curre if (roles) roles->replaceCurrentUserTagWithName(current_user_name); } + +void ASTCreateRowPolicyQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) const +{ + if (names) + names->replaceEmptyDatabaseWithCurrent(current_database); +} } diff --git a/src/Parsers/ASTCreateRowPolicyQuery.h b/src/Parsers/ASTCreateRowPolicyQuery.h index 8aa44b784aa..af561b47e12 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/ASTCreateRowPolicyQuery.h @@ -9,6 +9,7 @@ namespace DB { +class ASTRowPolicyNames; class ASTExtendedRoleSet; /** CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] name ON [database.]table @@ -36,7 +37,7 @@ public: bool if_not_exists = false; bool or_replace = false; - RowPolicy::NameParts name_parts; + std::shared_ptr names; String new_short_name; std::optional is_restrictive; @@ -47,7 +48,9 @@ public: String getID(char) const override; ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; - void replaceCurrentUserTagWithName(const String & current_user_name) const; ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } + + void replaceCurrentUserTagWithName(const String & current_user_name) const; + void replaceEmptyDatabaseWithCurrent(const String & current_database) const; }; } diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/ASTCreateSettingsProfileQuery.cpp index 601425d3446..21d8c20ffc1 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ASTCreateSettingsProfileQuery.cpp @@ -8,6 +8,18 @@ namespace DB { namespace { + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + settings.ostr << " "; + bool need_comma = false; + for (const String & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << backQuoteIfNeed(name); + } + } + void formatRenameTo(const String & new_name, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " RENAME TO " << (settings.hilite ? IAST::hilite_none : "") @@ -59,8 +71,7 @@ void ASTCreateSettingsProfileQuery::formatImpl(const FormatSettings & format, Fo else if (or_replace) format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); - format.ostr << " " << backQuoteIfNeed(name); - + formatNames(names, format); formatOnCluster(format); if (!new_name.empty()) diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.h b/src/Parsers/ASTCreateSettingsProfileQuery.h index cd470283410..bb2a9474504 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/ASTCreateSettingsProfileQuery.h @@ -29,7 +29,7 @@ public: bool if_not_exists = false; bool or_replace = false; - String name; + Strings names; String new_name; std::shared_ptr settings; diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index e5c1178285b..60f61fbf51f 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -212,7 +213,8 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & else if (or_replace) format.ostr << (format.hilite ? hilite_keyword : "") << " OR REPLACE" << (format.hilite ? hilite_none : ""); - format.ostr << " " << backQuoteIfNeed(name); + format.ostr << " "; + names->format(format); formatOnCluster(format); diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 28ef6c059da..565c82bc98e 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -8,6 +8,7 @@ namespace DB { +class ASTUserNamesWithHost; class ASTExtendedRoleSet; class ASTSettingsProfileElements; @@ -34,7 +35,7 @@ public: bool if_not_exists = false; bool or_replace = false; - String name; + std::shared_ptr names; String new_name; std::optional authentication; diff --git a/src/Parsers/ASTDropAccessEntityQuery.cpp b/src/Parsers/ASTDropAccessEntityQuery.cpp index 9f7a1d86221..fe98d8b4158 100644 --- a/src/Parsers/ASTDropAccessEntityQuery.cpp +++ b/src/Parsers/ASTDropAccessEntityQuery.cpp @@ -1,10 +1,25 @@ #include +#include #include namespace DB { -using EntityTypeInfo = IAccessEntity::TypeInfo; +namespace +{ + using EntityTypeInfo = IAccessEntity::TypeInfo; + + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ','; + settings.ostr << ' ' << backQuoteIfNeed(name); + } + } +} String ASTDropAccessEntityQuery::getID(char) const @@ -28,32 +43,19 @@ void ASTDropAccessEntityQuery::formatImpl(const FormatSettings & settings, Forma if (type == EntityType::ROW_POLICY) { - bool need_comma = false; - for (const auto & name_parts : row_policies_name_parts) - { - if (need_comma) - settings.ostr << ','; - need_comma = true; - const String & database = name_parts.database; - const String & table_name = name_parts.table_name; - const String & short_name = name_parts.short_name; - settings.ostr << ' ' << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " - << (settings.hilite ? hilite_none : "") << (database.empty() ? String{} : backQuoteIfNeed(database) + ".") - << backQuoteIfNeed(table_name); - } + settings.ostr << " "; + row_policy_names->format(settings); } else - { - bool need_comma = false; - for (const auto & name : names) - { - if (need_comma) - settings.ostr << ','; - need_comma = true; - settings.ostr << ' ' << backQuoteIfNeed(name); - } - } + formatNames(names, settings); formatOnCluster(settings); } + + +void ASTDropAccessEntityQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) const +{ + if (row_policy_names) + row_policy_names->replaceEmptyDatabaseWithCurrent(current_database); +} } diff --git a/src/Parsers/ASTDropAccessEntityQuery.h b/src/Parsers/ASTDropAccessEntityQuery.h index 160b0c2e212..76a5f450566 100644 --- a/src/Parsers/ASTDropAccessEntityQuery.h +++ b/src/Parsers/ASTDropAccessEntityQuery.h @@ -7,6 +7,7 @@ namespace DB { +class ASTRowPolicyNames; /** DROP USER [IF EXISTS] name [,...] * DROP ROLE [IF EXISTS] name [,...] @@ -22,11 +23,13 @@ public: EntityType type; bool if_exists = false; Strings names; - std::vector row_policies_name_parts; + std::shared_ptr row_policy_names; String getID(char) const override; ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabaseWithCurrent(const String & current_database) const; }; } diff --git a/src/Parsers/ASTRowPolicyName.cpp b/src/Parsers/ASTRowPolicyName.cpp new file mode 100644 index 00000000000..5e3c494ccd3 --- /dev/null +++ b/src/Parsers/ASTRowPolicyName.cpp @@ -0,0 +1,134 @@ +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +void ASTRowPolicyName::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + const String & database = name_parts.database; + const String & table_name = name_parts.table_name; + const String & short_name = name_parts.short_name; + settings.ostr << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " + << (settings.hilite ? hilite_none : "") << (database.empty() ? String{} : backQuoteIfNeed(database) + ".") + << backQuoteIfNeed(table_name); + + formatOnCluster(settings); +} + + +void ASTRowPolicyName::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + if (name_parts.database.empty()) + name_parts.database = current_database; +} + + +void ASTRowPolicyNames::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + if (name_parts.empty()) + throw Exception("No names of row policies in AST", ErrorCodes::LOGICAL_ERROR); + + bool same_short_name = true; + if (name_parts.size() > 1) + { + for (size_t i = 1; i != name_parts.size(); ++i) + if (name_parts[i].short_name != name_parts[0].short_name) + { + same_short_name = false; + break; + } + } + + bool same_db_and_table_name = true; + if (name_parts.size() > 1) + { + for (size_t i = 1; i != name_parts.size(); ++i) + if ((name_parts[i].database != name_parts[0].database) || (name_parts[i].table_name != name_parts[0].table_name)) + { + same_db_and_table_name = false; + break; + } + } + + if (same_short_name) + { + const String & short_name = name_parts[0].short_name; + settings.ostr << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " + << (settings.hilite ? hilite_none : ""); + + bool need_comma = false; + for (const auto & np : name_parts) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + const String & database = np.database; + const String & table_name = np.table_name; + if (!database.empty()) + settings.ostr << backQuoteIfNeed(database) + "."; + settings.ostr << backQuoteIfNeed(table_name); + } + } + else if (same_db_and_table_name) + { + bool need_comma = false; + for (const auto & np : name_parts) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + const String & short_name = np.short_name; + settings.ostr << backQuoteIfNeed(short_name); + } + + const String & database = name_parts[0].database; + const String & table_name = name_parts[0].table_name; + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); + if (!database.empty()) + settings.ostr << backQuoteIfNeed(database) + "."; + settings.ostr << backQuoteIfNeed(table_name); + } + else + { + bool need_comma = false; + for (const auto & np : name_parts) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + const String & short_name = np.short_name; + const String & database = np.database; + const String & table_name = np.table_name; + settings.ostr << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " + << (settings.hilite ? hilite_none : ""); + if (!database.empty()) + settings.ostr << backQuoteIfNeed(database) + "."; + settings.ostr << backQuoteIfNeed(table_name); + } + } + + formatOnCluster(settings); +} + + +Strings ASTRowPolicyNames::toStrings() const +{ + Strings res; + res.reserve(name_parts.size()); + for (const auto & np : name_parts) + res.emplace_back(np.toString()); + return res; +} + + +void ASTRowPolicyNames::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + for (auto & np : name_parts) + if (np.database.empty()) + np.database = current_database; +} + +} diff --git a/src/Parsers/ASTRowPolicyName.h b/src/Parsers/ASTRowPolicyName.h new file mode 100644 index 00000000000..ac2f84f5d8b --- /dev/null +++ b/src/Parsers/ASTRowPolicyName.h @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +/** Represents a row policy's name in one of the following forms: + * short_name ON [db.]table_name [ON CLUSTER 'cluster_name'] + * short_name [ON CLUSTER 'cluster_name'] ON [db.]table_name + */ +class ASTRowPolicyName : public IAST, public ASTQueryWithOnCluster +{ +public: + RowPolicy::NameParts name_parts; + String toString() const { return name_parts.getName(); } + + String getID(char) const override { return "RowPolicyName"; } + ASTPtr clone() const override { return std::make_shared(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabaseWithCurrent(const String & current_database); +}; + + +/** Represents multiple names of row policies, comma-separated, in one of the following forms: + * short_name1 ON [db1.]table_name1 [, short_name2 ON [db2.]table_name2 ...] [ON CLUSTER 'cluster_name'] + * short_name1 [, short_name2 ...] ON [db.]table_name [ON CLUSTER 'cluster_name'] + * short_name1 [, short_name2 ...] [ON CLUSTER 'cluster_name'] ON [db.]table_name + * short_name ON [db1.]table_name1 [, [db2.]table_name2 ...] [ON CLUSTER 'cluster_name'] + * short_name [ON CLUSTER 'cluster_name'] ON [db1.]table_name1 [, [db2.]table_name2 ...] + */ +class ASTRowPolicyNames : public IAST, public ASTQueryWithOnCluster +{ +public: + std::vector name_parts; + Strings toStrings() const; + + String getID(char) const override { return "RowPolicyNames"; } + ASTPtr clone() const override { return std::make_shared(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; + ASTPtr getRewrittenASTWithoutOnCluster(const std::string &) const override { return removeOnCluster(clone()); } + + void replaceEmptyDatabaseWithCurrent(const String & current_database); +}; +} diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp b/src/Parsers/ASTShowCreateAccessEntityQuery.cpp index f81b30428fb..cbd31d0d53c 100644 --- a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/ASTShowCreateAccessEntityQuery.cpp @@ -1,10 +1,25 @@ #include +#include #include namespace DB { -using EntityTypeInfo = IAccessEntity::TypeInfo; +namespace +{ + using EntityTypeInfo = IAccessEntity::TypeInfo; + + void formatNames(const Strings & names, const IAST::FormatSettings & settings) + { + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ','; + settings.ostr << ' ' << backQuoteIfNeed(name); + } + } +} String ASTShowCreateAccessEntityQuery::getID(char) const @@ -30,14 +45,18 @@ void ASTShowCreateAccessEntityQuery::formatQueryImpl(const FormatSettings & sett } else if (type == EntityType::ROW_POLICY) { - const String & database = row_policy_name_parts.database; - const String & table_name = row_policy_name_parts.table_name; - const String & short_name = row_policy_name_parts.short_name; - settings.ostr << ' ' << backQuoteIfNeed(short_name) << (settings.hilite ? hilite_keyword : "") << " ON " - << (settings.hilite ? hilite_none : "") << (database.empty() ? String{} : backQuoteIfNeed(database) + ".") - << backQuoteIfNeed(table_name); + settings.ostr << " "; + row_policy_names->format(settings); } else - settings.ostr << " " << backQuoteIfNeed(name); + formatNames(names, settings); } + + +void ASTShowCreateAccessEntityQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + if (row_policy_names) + row_policy_names->replaceEmptyDatabaseWithCurrent(current_database); +} + } diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.h b/src/Parsers/ASTShowCreateAccessEntityQuery.h index df7be2e257c..5fd16d622f7 100644 --- a/src/Parsers/ASTShowCreateAccessEntityQuery.h +++ b/src/Parsers/ASTShowCreateAccessEntityQuery.h @@ -1,12 +1,14 @@ #pragma once #include -#include +#include namespace DB { -/** SHOW CREATE QUOTA [name | CURRENT] +class ASTRowPolicyNames; + +/** SHOW CREATE QUOTA [name] * SHOW CREATE [ROW] POLICY name ON [database.]table * SHOW CREATE USER [name | CURRENT_USER] * SHOW CREATE ROLE name @@ -18,14 +20,16 @@ public: using EntityType = IAccessEntity::Type; EntityType type; - String name; + Strings names; bool current_quota = false; bool current_user = false; - RowPolicy::NameParts row_policy_name_parts; + std::shared_ptr row_policy_names; String getID(char) const override; ASTPtr clone() const override; + void replaceEmptyDatabaseWithCurrent(const String & current_database); + protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; diff --git a/src/Parsers/ASTUserNameWithHost.cpp b/src/Parsers/ASTUserNameWithHost.cpp new file mode 100644 index 00000000000..13d34b99b3d --- /dev/null +++ b/src/Parsers/ASTUserNameWithHost.cpp @@ -0,0 +1,74 @@ +#include +#include + + +namespace DB +{ + +void ASTUserNameWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + settings.ostr << backQuoteIfNeed(base_name); + + if (!host_pattern.empty()) + settings.ostr << "@" << backQuoteIfNeed(host_pattern); +} + +String ASTUserNameWithHost::toString() const +{ + String res = base_name; + if (!host_pattern.empty()) + res += '@' + host_pattern; + return res; +} + +void ASTUserNameWithHost::concatParts() +{ + base_name = toString(); + host_pattern.clear(); +} + + +void ASTUserNamesWithHost::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +{ + assert(!names.empty()); + bool need_comma = false; + for (const auto & name : names) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + name->format(settings); + } +} + +Strings ASTUserNamesWithHost::toStrings() const +{ + Strings res; + res.reserve(names.size()); + for (const auto & name : names) + res.emplace_back(name->toString()); + return res; +} + +void ASTUserNamesWithHost::concatParts() +{ + for (auto & name : names) + name->concatParts(); +} + + +bool ASTUserNamesWithHost::getHostPatternIfCommon(String & out_common_host_pattern) const +{ + out_common_host_pattern.clear(); + + if (names.empty()) + return true; + + for (size_t i = 1; i != names.size(); ++i) + if (names[i]->host_pattern != names[0]->host_pattern) + return false; + + out_common_host_pattern = names[0]->host_pattern; + return true; +} + +} diff --git a/src/Parsers/ASTUserNameWithHost.h b/src/Parsers/ASTUserNameWithHost.h new file mode 100644 index 00000000000..00b1570e062 --- /dev/null +++ b/src/Parsers/ASTUserNameWithHost.h @@ -0,0 +1,53 @@ +#pragma once + +#include + + +namespace DB +{ + +/** Represents a user name. + * It can be a simple string or identifier or something like `name@host`. + * In the last case `host` specifies the hosts user is allowed to connect from. + * The `host` can be an ip address, ip subnet, or a host name. + * The % and _ wildcard characters are permitted in `host`. + * These have the same meaning as for pattern-matching operations performed with the LIKE operator. + */ +class ASTUserNameWithHost : public IAST +{ +public: + String base_name; + String host_pattern; + + String toString() const; + void concatParts(); + + ASTUserNameWithHost() = default; + ASTUserNameWithHost(const String & name_) : base_name(name_) {} + String getID(char) const override { return "UserNameWithHost"; } + ASTPtr clone() const override { return std::make_shared(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + + +class ASTUserNamesWithHost : public IAST +{ +public: + std::vector> names; + + size_t size() const { return names.size(); } + auto begin() const { return names.begin(); } + auto end() const { return names.end(); } + auto front() const { return *begin(); } + void push_back(const String & name_) { names.push_back(std::make_shared(name_)); } + + Strings toStrings() const; + void concatParts(); + bool getHostPatternIfCommon(String & out_common_host_pattern) const; + + String getID(char) const override { return "UserNamesWithHost"; } + ASTPtr clone() const override { return std::make_shared(*this); } + void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; +}; + +} diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index b505fd25a95..5eb138d6be0 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -240,8 +240,8 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe or_replace = true; } - String name; - if (!parseIdentifierOrStringLiteral(pos, expected, name)) + Strings names; + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) return false; String new_name; @@ -251,7 +251,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe while (true) { - if (alter && new_name.empty() && parseRenameTo(pos, expected, new_name)) + if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; if (!key_type && parseKeyType(pos, expected, key_type)) @@ -280,7 +280,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->if_not_exists = if_not_exists; query->or_replace = or_replace; query->cluster = std::move(cluster); - query->name = std::move(name); + query->names = std::move(names); query->new_name = std::move(new_name); query->key_type = key_type; query->all_limits = std::move(all_limits); diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/ParserCreateRoleQuery.cpp index 2a6f2dd2c90..08dd31c51a3 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/ParserCreateRoleQuery.cpp @@ -84,8 +84,8 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec or_replace = true; } - String name; - if (!parseRoleName(pos, expected, name)) + Strings names; + if (!parseRoleNames(pos, expected, names)) return false; String new_name; @@ -94,7 +94,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - if (alter && parseRenameTo(pos, expected, new_name)) + if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; if (parseSettings(pos, expected, attach_mode, settings)) @@ -115,7 +115,7 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->if_not_exists = if_not_exists; query->or_replace = or_replace; query->cluster = std::move(cluster); - query->name = std::move(name); + query->names = std::move(names); query->new_name = std::move(new_name); query->settings = std::move(settings); diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 2456cb80368..4f5f2989a7b 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -1,6 +1,8 @@ #include #include #include +#include +#include #include #include #include @@ -227,22 +229,22 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & or_replace = true; } - RowPolicy::NameParts name_parts; - String & database = name_parts.database; - String & table_name = name_parts.table_name; - String & short_name = name_parts.short_name; - if (!parseIdentifierOrStringLiteral(pos, expected, short_name) || !ParserKeyword{"ON"}.ignore(pos, expected) - || !parseDatabaseAndTableName(pos, expected, database, table_name)) + ParserRowPolicyNames names_parser; + names_parser.allowOnCluster(); + ASTPtr names_ast; + if (!names_parser.parse(pos, names_ast, expected)) return false; + auto names = typeid_cast>(names_ast); + String cluster = std::exchange(names->cluster, ""); + String new_short_name; std::optional is_restrictive; std::array, MAX_CONDITION_TYPE> conditions; - String cluster; while (true) { - if (alter && new_short_name.empty() && parseRenameTo(pos, expected, new_short_name)) + if (alter && new_short_name.empty() && (names->name_parts.size() == 1) && parseRenameTo(pos, expected, new_short_name)) continue; if (!is_restrictive && parseAsRestrictiveOrPermissive(pos, expected, is_restrictive)) @@ -272,7 +274,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->if_not_exists = if_not_exists; query->or_replace = or_replace; query->cluster = std::move(cluster); - query->name_parts = std::move(name_parts); + query->names = std::move(names); query->new_short_name = std::move(new_short_name); query->is_restrictive = is_restrictive; query->conditions = std::move(conditions); diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 83d0f0c1d91..4d5a9c09ad8 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -100,8 +100,8 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec or_replace = true; } - String name; - if (!parseIdentifierOrStringLiteral(pos, expected, name)) + Strings names; + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) return false; String new_name; @@ -110,7 +110,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec while (true) { - if (alter && parseRenameTo(pos, expected, new_name)) + if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; if (parseSettings(pos, expected, attach_mode, settings)) @@ -137,7 +137,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec query->if_not_exists = if_not_exists; query->or_replace = or_replace; query->cluster = std::move(cluster); - query->name = std::move(name); + query->names = std::move(names); query->new_name = std::move(new_name); query->settings = std::move(settings); query->to_roles = std::move(to_roles); diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index e03f8334d42..e99457a2f87 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -6,7 +6,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -16,11 +18,6 @@ namespace DB { -namespace ErrorCodes -{ -} - - namespace { bool parseRenameTo(IParserBase::Pos & pos, Expected & expected, String & new_name) @@ -268,10 +265,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec or_replace = true; } - String name; - std::optional host_pattern; - if (!parseUserName(pos, expected, name, host_pattern)) + ASTPtr names_ast; + if (!ParserUserNamesWithHost{}.parse(pos, names_ast, expected)) return false; + auto names = typeid_cast>(names_ast); + auto names_ref = names->names; String new_name; std::optional authentication; @@ -301,7 +299,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (alter) { - if (new_name.empty() && parseRenameTo(pos, expected, new_name)) + if (new_name.empty() && (names->size() == 1) && parseRenameTo(pos, expected, new_name)) continue; if (parseHosts(pos, expected, "ADD", add_hosts) || parseHosts(pos, expected, "DROP", remove_hosts)) @@ -311,8 +309,17 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec break; } - if (!alter && !hosts && host_pattern) - hosts.emplace().addLikePattern(*host_pattern); + if (!alter && !hosts) + { + String common_host_pattern; + if (names->getHostPatternIfCommon(common_host_pattern) && !common_host_pattern.empty()) + { + hosts.emplace().addLikePattern(common_host_pattern); + names->concatParts(); + } + } + else if (alter) + names->concatParts(); auto query = std::make_shared(); node = query; @@ -323,7 +330,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->if_not_exists = if_not_exists; query->or_replace = or_replace; query->cluster = std::move(cluster); - query->name = std::move(name); + query->names = std::move(names); query->new_name = std::move(new_name); query->authentication = std::move(authentication); query->hosts = std::move(hosts); diff --git a/src/Parsers/ParserDropAccessEntityQuery.cpp b/src/Parsers/ParserDropAccessEntityQuery.cpp index 15f8bbf0a62..d5eac710631 100644 --- a/src/Parsers/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/ParserDropAccessEntityQuery.cpp @@ -1,8 +1,9 @@ #include #include #include +#include +#include #include -#include #include #include @@ -14,65 +15,11 @@ namespace using EntityType = IAccessEntity::Type; using EntityTypeInfo = IAccessEntity::TypeInfo; - bool parseNames(IParserBase::Pos & pos, Expected & expected, Strings & names) + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) { return IParserBase::wrapParseImpl(pos, [&] { - Strings res_names; - do - { - String name; - if (!parseIdentifierOrStringLiteral(pos, expected, name)) - return false; - - res_names.push_back(std::move(name)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - - names = std::move(res_names); - return true; - }); - } - - bool parseRowPolicyNames(IParserBase::Pos & pos, Expected & expected, std::vector & name_parts) - { - return IParserBase::wrapParseImpl(pos, [&] - { - std::vector res_name_parts; - do - { - Strings short_names; - if (!parseNames(pos, expected, short_names)) - return false; - String database, table_name; - if (!ParserKeyword{"ON"}.ignore(pos, expected) || !parseDatabaseAndTableName(pos, expected, database, table_name)) - return false; - for (String & short_name : short_names) - res_name_parts.push_back({std::move(short_name), database, table_name}); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - - name_parts = std::move(res_name_parts); - return true; - }); - } - - bool parseUserNames(IParserBase::Pos & pos, Expected & expected, Strings & names) - { - return IParserBase::wrapParseImpl(pos, [&] - { - Strings res_names; - do - { - String name; - if (!parseUserName(pos, expected, name)) - return false; - - res_names.emplace_back(std::move(name)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - names = std::move(res_names); - return true; + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); }); } } @@ -101,7 +48,8 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if_exists = true; Strings names; - std::vector row_policies_name_parts; + std::shared_ptr row_policy_names; + String cluster; if ((type == EntityType::USER) || (type == EntityType::ROLE)) { @@ -110,21 +58,22 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & } else if (type == EntityType::ROW_POLICY) { - if (!parseRowPolicyNames(pos, expected, row_policies_name_parts)) + ParserRowPolicyNames parser; + ASTPtr ast; + parser.allowOnCluster(); + if (!parser.parse(pos, ast, expected)) return false; + row_policy_names = typeid_cast>(ast); + cluster = std::exchange(row_policy_names->cluster, ""); } else { - if (!parseNames(pos, expected, names)) + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) return false; } - String cluster; - if (ParserKeyword{"ON"}.ignore(pos, expected)) - { - if (!ASTQueryWithOnCluster::parse(pos, cluster, expected)) - return false; - } + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); auto query = std::make_shared(); node = query; @@ -133,7 +82,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & query->if_exists = if_exists; query->cluster = std::move(cluster); query->names = std::move(names); - query->row_policies_name_parts = std::move(row_policies_name_parts); + query->row_policy_names = std::move(row_policy_names); return true; } diff --git a/src/Parsers/ParserRowPolicyName.cpp b/src/Parsers/ParserRowPolicyName.cpp new file mode 100644 index 00000000000..8f1ef91f7c1 --- /dev/null +++ b/src/Parsers/ParserRowPolicyName.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace +{ + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && ASTQueryWithOnCluster::parse(pos, cluster, expected); + }); + } + + + bool parseOnDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database, String & table_name) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"ON"}.ignore(pos, expected)) + return false; + + return parseDatabaseAndTableName(pos, expected, database, table_name); + }); + } + + + bool parseOnDatabaseAndTableName(IParser::Pos & pos, Expected & expected, std::pair & database_and_table_name) + { + return parseOnDatabaseAndTableName(pos, expected, database_and_table_name.first, database_and_table_name.second); + } + + + bool parseOnDatabaseAndTableNames(IParser::Pos & pos, Expected & expected, std::vector> & database_and_table_names) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!ParserKeyword{"ON"}.ignore(pos, expected)) + return false; + + std::vector> res; + std::optional pos_before_comma; + do + { + String database, table_name; + if (!parseDatabaseAndTableName(pos, expected, database, table_name)) + return false; + + String unused; + if (pos_before_comma && database.empty() && ParserKeyword{"ON"}.ignore(pos, expected) + && !ASTQueryWithOnCluster::parse(pos, unused, expected)) + { + pos = *pos_before_comma; + break; + } + + res.emplace_back(std::move(database), std::move(table_name)); + pos_before_comma = pos; + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + database_and_table_names = std::move(res); + return true; + }); + } +} + + +bool ParserRowPolicyName::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + RowPolicy::NameParts name_parts; + if (!parseIdentifierOrStringLiteral(pos, expected, name_parts.short_name)) + return false; + + String cluster; + parseOnCluster(pos, expected, cluster); + + if (!parseOnDatabaseAndTableName(pos, expected, name_parts.database, name_parts.table_name)) + return false; + + if (cluster.empty()) + parseOnCluster(pos, expected, cluster); + + auto result = std::make_shared(); + result->name_parts = std::move(name_parts); + result->cluster = std::move(cluster); + node = result; + return true; +} + + +bool ParserRowPolicyNames::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::vector name_parts; + String cluster; + + do + { + std::vector short_names; + bool allowed_multiple_short_names = name_parts.empty(); + if (allowed_multiple_short_names) + { + if (!parseIdentifiersOrStringLiterals(pos, expected, short_names)) + return false; + } + else + { + if (!parseIdentifierOrStringLiteral(pos, expected, short_names.emplace_back())) + return false; + } + + bool allowed_on_cluster = allow_on_cluster && name_parts.empty(); + if (allowed_on_cluster) + parseOnCluster(pos, expected, cluster); + + std::vector> database_and_table_names; + bool allowed_multiple_db_and_table_names = ((name_parts.empty()) && (short_names.size() == 1)); + if (allowed_multiple_db_and_table_names) + { + if (!parseOnDatabaseAndTableNames(pos, expected, database_and_table_names)) + return false; + } + else + { + if (!parseOnDatabaseAndTableName(pos, expected, database_and_table_names.emplace_back())) + return false; + } + + allowed_on_cluster &= cluster.empty(); + if (allowed_on_cluster) + parseOnCluster(pos, expected, cluster); + + for (const String & short_name : short_names) + for (const auto & [database, table_name] : database_and_table_names) + name_parts.push_back({short_name, database, table_name}); + + if ((short_names.size() != 1) || (database_and_table_names.size() != 1) || !cluster.empty()) + break; + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + auto result = std::make_shared(); + result->name_parts = std::move(name_parts); + result->cluster = std::move(cluster); + node = result; + return true; +} + +} diff --git a/src/Parsers/ParserRowPolicyName.h b/src/Parsers/ParserRowPolicyName.h new file mode 100644 index 00000000000..6af0519d161 --- /dev/null +++ b/src/Parsers/ParserRowPolicyName.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + + +namespace DB +{ +/** Parses a string in one of the following form: + * short_name ON [db.]table_name [ON CLUSTER 'cluster_name'] + * short_name [ON CLUSTER 'cluster_name'] ON [db.]table_name + */ +class ParserRowPolicyName : public IParserBase +{ +public: + void allowOnCluster(bool allow = true) { allow_on_cluster = allow; } + +protected: + const char * getName() const override { return "RowPolicyName"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_on_cluster = false; +}; + + +/** Parses a string in one of the following form: + * short_name1 ON [db1.]table_name1 [, short_name2 ON [db2.]table_name2 ...] [ON CLUSTER 'cluster_name'] + * short_name1 [, short_name2 ...] ON [db.]table_name [ON CLUSTER 'cluster_name'] + * short_name1 [, short_name2 ...] [ON CLUSTER 'cluster_name'] ON [db.]table_name + * short_name ON [db1.]table_name1 [, [db2.]table_name2 ...] [ON CLUSTER 'cluster_name'] + * short_name [ON CLUSTER 'cluster_name'] ON [db1.]table_name1 [, [db2.]table_name2 ...] + */ +class ParserRowPolicyNames : public IParserBase +{ +public: + void allowOnCluster(bool allow = true) { allow_on_cluster = allow; } + +protected: + const char * getName() const override { return "SettingsProfileElements"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_on_cluster = false; +}; + +} diff --git a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp b/src/Parsers/ParserShowCreateAccessEntityQuery.cpp index ca520f4df6f..465a6c380b1 100644 --- a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/ParserShowCreateAccessEntityQuery.cpp @@ -2,7 +2,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -32,33 +33,33 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe if (!type) return false; - String name; + Strings names; bool current_quota = false; bool current_user = false; - RowPolicy::NameParts row_policy_name_parts; + std::shared_ptr row_policy_names; if (type == EntityType::USER) { - if (!parseUserNameOrCurrentUserTag(pos, expected, name, current_user)) + if (parseCurrentUserTag(pos, expected)) current_user = true; + else if (!parseUserNames(pos, expected, names)) + return false; } else if (type == EntityType::ROLE) { - if (!parseRoleName(pos, expected, name)) + if (!parseRoleNames(pos, expected, names)) return false; } else if (type == EntityType::ROW_POLICY) { - String & database = row_policy_name_parts.database; - String & table_name = row_policy_name_parts.table_name; - String & short_name = row_policy_name_parts.short_name; - if (!parseIdentifierOrStringLiteral(pos, expected, short_name) || !ParserKeyword{"ON"}.ignore(pos, expected) - || !parseDatabaseAndTableName(pos, expected, database, table_name)) + ASTPtr ast; + if (!ParserRowPolicyNames{}.parse(pos, ast, expected)) return false; + row_policy_names = typeid_cast>(ast); } else if (type == EntityType::QUOTA) { - if (!parseIdentifierOrStringLiteral(pos, expected, name)) + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) { /// SHOW CREATE QUOTA current_quota = true; @@ -66,7 +67,7 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe } else if (type == EntityType::SETTINGS_PROFILE) { - if (!parseIdentifierOrStringLiteral(pos, expected, name)) + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) return false; } @@ -74,10 +75,10 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe node = query; query->type = *type; - query->name = std::move(name); + query->names = std::move(names); query->current_quota = current_quota; query->current_user = current_user; - query->row_policy_name_parts = std::move(row_policy_name_parts); + query->row_policy_names = std::move(row_policy_names); return true; } diff --git a/src/Parsers/ParserShowGrantsQuery.cpp b/src/Parsers/ParserShowGrantsQuery.cpp index c667894f1d7..993346d2eeb 100644 --- a/src/Parsers/ParserShowGrantsQuery.cpp +++ b/src/Parsers/ParserShowGrantsQuery.cpp @@ -16,7 +16,9 @@ bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (ParserKeyword{"FOR"}.ignore(pos, expected)) { - if (!parseUserNameOrCurrentUserTag(pos, expected, name, current_user)) + if (parseCurrentUserTag(pos, expected)) + current_user = true; + else if (!parseUserName(pos, expected, name)) return false; } else diff --git a/src/Parsers/ParserUserNameWithHost.cpp b/src/Parsers/ParserUserNameWithHost.cpp new file mode 100644 index 00000000000..19ec7a9bbd1 --- /dev/null +++ b/src/Parsers/ParserUserNameWithHost.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ +bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + String base_name; + if (!parseIdentifierOrStringLiteral(pos, expected, base_name)) + return false; + + boost::algorithm::trim(base_name); + + String host_pattern; + if (ParserToken{TokenType::At}.ignore(pos, expected)) + { + if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern)) + return false; + + boost::algorithm::trim(host_pattern); + if (host_pattern == "%") + host_pattern.clear(); + } + + auto result = std::make_shared(); + result->base_name = std::move(base_name); + result->host_pattern = std::move(host_pattern); + node = result; + return true; +} + + +bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::vector> names; + do + { + ASTPtr ast; + if (!ParserUserNameWithHost{}.parse(pos, ast, expected)) + return false; + + names.emplace_back(typeid_cast>(ast)); + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + auto result = std::make_shared(); + result->names = std::move(names); + node = result; + return true; +} + +} diff --git a/src/Parsers/ParserUserNameWithHost.h b/src/Parsers/ParserUserNameWithHost.h new file mode 100644 index 00000000000..453b816a98d --- /dev/null +++ b/src/Parsers/ParserUserNameWithHost.h @@ -0,0 +1,26 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses a user name. + * It can be a simple string or identifier or something like `name@host`. + */ +class ParserUserNameWithHost : public IParserBase +{ +protected: + const char * getName() const override { return "UserNameWithHost"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + +class ParserUserNamesWithHost : public IParserBase +{ +protected: + const char * getName() const override { return "UserNamesWithHost"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/parseIdentifierOrStringLiteral.cpp b/src/Parsers/parseIdentifierOrStringLiteral.cpp index 7258d3e39da..22c77af0b09 100644 --- a/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -3,6 +3,7 @@ #include "ExpressionElementParsers.h" #include "ASTLiteral.h" #include "ASTIdentifier.h" +#include #include namespace DB @@ -25,4 +26,25 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str return true; } + +bool parseIdentifiersOrStringLiterals(IParser::Pos & pos, Expected & expected, Strings & result) +{ + return IParserBase::wrapParseImpl(pos, [&] + { + Strings strs; + do + { + String str; + if (!parseIdentifierOrStringLiteral(pos, expected, str)) + return false; + + strs.push_back(std::move(str)); + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + result = std::move(strs); + return true; + }); +} + } diff --git a/src/Parsers/parseIdentifierOrStringLiteral.h b/src/Parsers/parseIdentifierOrStringLiteral.h index 0f49d44ee01..0174c481704 100644 --- a/src/Parsers/parseIdentifierOrStringLiteral.h +++ b/src/Parsers/parseIdentifierOrStringLiteral.h @@ -9,4 +9,7 @@ namespace DB * name, `name` or 'name' */ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result); +/** Parse a list of identifiers or string literals. */ +bool parseIdentifiersOrStringLiterals(IParser::Pos & pos, Expected & expected, Strings & result); + } diff --git a/src/Parsers/parseUserName.cpp b/src/Parsers/parseUserName.cpp index e6b91ba4af3..1f25f51ef22 100644 --- a/src/Parsers/parseUserName.cpp +++ b/src/Parsers/parseUserName.cpp @@ -1,64 +1,46 @@ #include -#include +#include +#include #include -#include namespace DB { -bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name, std::optional & host_like_pattern) + +bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name) { - String name; - if (!parseIdentifierOrStringLiteral(pos, expected, name)) + ASTPtr ast; + if (!ParserUserNameWithHost{}.parse(pos, ast, expected)) return false; - - boost::algorithm::trim(name); - - std::optional pattern; - if (ParserToken{TokenType::At}.ignore(pos, expected)) - { - if (!parseIdentifierOrStringLiteral(pos, expected, pattern.emplace())) - return false; - - boost::algorithm::trim(*pattern); - } - - if (pattern && (pattern != "%")) - name += '@' + *pattern; - - user_name = std::move(name); - host_like_pattern = std::move(pattern); + user_name = ast->as().toString(); return true; } -bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name) +bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names) { - std::optional unused_pattern; - return parseUserName(pos, expected, user_name, unused_pattern); + ASTPtr ast; + if (!ParserUserNamesWithHost{}.parse(pos, ast, expected)) + return false; + user_names = ast->as().toStrings(); + return true; } -bool parseUserNameOrCurrentUserTag(IParser::Pos & pos, Expected & expected, String & user_name, bool & current_user) +bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected) { - if (ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) || ParserKeyword{"currentUser"}.ignore(pos, expected)) + return IParserBase::wrapParseImpl(pos, [&] { + if (!ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) && !ParserKeyword{"currentUser"}.ignore(pos, expected)) + return false; + if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) { if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) return false; } - current_user = true; return true; - } - - if (parseUserName(pos, expected, user_name)) - { - current_user = false; - return true; - } - - return false; + }); } } diff --git a/src/Parsers/parseUserName.h b/src/Parsers/parseUserName.h index 641aa09d1f3..c1ad36c936e 100644 --- a/src/Parsers/parseUserName.h +++ b/src/Parsers/parseUserName.h @@ -10,11 +10,15 @@ namespace DB /// The `host` can be an ip address, ip subnet, or a host name. /// The % and _ wildcard characters are permitted in `host`. /// These have the same meaning as for pattern-matching operations performed with the LIKE operator. -bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name, std::optional & host_like_pattern); bool parseUserName(IParser::Pos & pos, Expected & expected, String & user_name); -/// Parses either a user name or the 'CURRENT_USER' keyword (or some of the aliases). -bool parseUserNameOrCurrentUserTag(IParser::Pos & pos, Expected & expected, String & user_name, bool & current_user); +/// Parses a comma-separated list of user names. +bool parseUserNames(IParser::Pos & pos, Expected & expected, Strings & user_names); + + +/// Parses either the 'CURRENT_USER' keyword (or some of its aliases). +bool parseCurrentUserTag(IParser::Pos & pos, Expected & expected); + /// Parses a role name. It follows the same rules as a user name, but allowed hosts are never checked /// (because roles are not used to connect to server). @@ -22,4 +26,11 @@ inline bool parseRoleName(IParser::Pos & pos, Expected & expected, String & role { return parseUserName(pos, expected, role_name); } + +/// Parses a comma-separated list of role names. +inline bool parseRoleNames(IParser::Pos & pos, Expected & expected, Strings & role_names) +{ + return parseUserNames(pos, expected, role_names); +} + } diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 8c7e4ff68af..60672c0c116 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -37,6 +37,7 @@ SRCS( ASTQueryWithOnCluster.cpp ASTQueryWithOutput.cpp ASTQueryWithTableAndOutput.cpp + ASTRowPolicyName.cpp ASTSampleRatio.cpp ASTSelectQuery.cpp ASTSelectWithUnionQuery.cpp @@ -51,6 +52,7 @@ SRCS( ASTSystemQuery.cpp ASTTablesInSelectQuery.cpp ASTTTLElement.cpp + ASTUserNameWithHost.cpp ASTWithAlias.cpp CommonParsers.cpp ExpressionElementParsers.cpp @@ -88,6 +90,7 @@ SRCS( ParserQuery.cpp ParserQueryWithOutput.cpp ParserRenameQuery.cpp + ParserRowPolicyName.cpp ParserSampleRatio.cpp ParserSelectQuery.cpp ParserSelectWithUnionQuery.cpp @@ -104,6 +107,7 @@ SRCS( ParserTablesInSelectQuery.cpp ParserUnionQueryElement.cpp ParserUseQuery.cpp + ParserUserNameWithHost.cpp ParserWatchQuery.cpp parseUserName.cpp queryToString.cpp diff --git a/tests/integration/test_access_control_on_cluster/test.py b/tests/integration/test_access_control_on_cluster/test.py index 4dc9baca0a0..07c72e94be0 100644 --- a/tests/integration/test_access_control_on_cluster/test.py +++ b/tests/integration/test_access_control_on_cluster/test.py @@ -38,4 +38,3 @@ def test_access_control_on_cluster(): assert "There is no user `Alex`" in ch1.query_and_get_error("SHOW CREATE USER Alex") assert "There is no user `Alex`" in ch2.query_and_get_error("SHOW CREATE USER Alex") assert "There is no user `Alex`" in ch3.query_and_get_error("SHOW CREATE USER Alex") - From 92b9f4a88d6b79cc7626376d50b02a3a7985544d Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 30 May 2020 23:10:45 +0300 Subject: [PATCH 0734/2229] Rename ExtendedRoleSet => RolesOrUsersSet. --- src/Access/DiskAccessStorage.cpp | 30 ++++---- src/Access/Quota.h | 4 +- src/Access/QuotaCache.h | 2 +- ...xtendedRoleSet.cpp => RolesOrUsersSet.cpp} | 69 +++++++++---------- .../{ExtendedRoleSet.h => RolesOrUsersSet.h} | 42 +++++------ src/Access/RowPolicy.h | 4 +- src/Access/RowPolicyCache.h | 2 +- src/Access/SettingsProfile.h | 4 +- src/Access/User.h | 4 +- src/Access/ya.make | 2 +- .../InterpreterCreateQuotaQuery.cpp | 8 +-- .../InterpreterCreateRowPolicyQuery.cpp | 8 +-- .../InterpreterCreateSettingsProfileQuery.cpp | 8 +-- .../InterpreterCreateUserQuery.cpp | 10 +-- src/Interpreters/InterpreterGrantQuery.cpp | 12 ++-- src/Interpreters/InterpreterSetRoleQuery.cpp | 12 ++-- src/Interpreters/InterpreterSetRoleQuery.h | 4 +- ...InterpreterShowCreateAccessEntityQuery.cpp | 4 +- .../InterpreterShowGrantsQuery.cpp | 8 +-- src/Parsers/ASTCreateQuotaQuery.cpp | 4 +- src/Parsers/ASTCreateQuotaQuery.h | 4 +- src/Parsers/ASTCreateRowPolicyQuery.cpp | 4 +- src/Parsers/ASTCreateRowPolicyQuery.h | 4 +- src/Parsers/ASTCreateSettingsProfileQuery.cpp | 4 +- src/Parsers/ASTCreateSettingsProfileQuery.h | 4 +- src/Parsers/ASTCreateUserQuery.cpp | 4 +- src/Parsers/ASTCreateUserQuery.h | 4 +- src/Parsers/ASTGrantQuery.cpp | 4 +- src/Parsers/ASTGrantQuery.h | 6 +- ...ndedRoleSet.cpp => ASTRolesOrUsersSet.cpp} | 6 +- ...ExtendedRoleSet.h => ASTRolesOrUsersSet.h} | 12 ++-- src/Parsers/ASTSetRoleQuery.cpp | 2 +- src/Parsers/ASTSetRoleQuery.h | 6 +- src/Parsers/ParserCreateQuotaQuery.cpp | 14 ++-- src/Parsers/ParserCreateQuotaQuery.h | 2 +- src/Parsers/ParserCreateRoleQuery.cpp | 4 +- src/Parsers/ParserCreateRoleQuery.h | 2 +- src/Parsers/ParserCreateRowPolicyQuery.cpp | 18 +++-- src/Parsers/ParserCreateRowPolicyQuery.h | 2 +- .../ParserCreateSettingsProfileQuery.cpp | 22 +++--- .../ParserCreateSettingsProfileQuery.h | 2 +- src/Parsers/ParserCreateUserQuery.cpp | 20 +++--- src/Parsers/ParserCreateUserQuery.h | 2 +- src/Parsers/ParserExtendedRoleSet.h | 28 -------- src/Parsers/ParserGrantQuery.cpp | 24 ++++--- src/Parsers/ParserGrantQuery.h | 2 +- ...dRoleSet.cpp => ParserRolesOrUsersSet.cpp} | 26 +++---- src/Parsers/ParserRolesOrUsersSet.h | 32 +++++++++ src/Parsers/ParserSetRoleQuery.cpp | 28 ++++---- src/Parsers/ParserSettingsProfileElement.cpp | 8 ++- src/Parsers/ParserSettingsProfileElement.h | 12 ++-- src/Parsers/ya.make | 4 +- .../System/StorageSystemPrivileges.cpp | 2 +- src/Storages/System/StorageSystemQuotas.cpp | 4 +- .../System/StorageSystemRoleGrants.cpp | 4 +- .../System/StorageSystemRowPolicies.cpp | 4 +- .../System/StorageSystemSettingsProfiles.cpp | 4 +- src/Storages/System/StorageSystemUsers.cpp | 4 +- 58 files changed, 308 insertions(+), 271 deletions(-) rename src/Access/{ExtendedRoleSet.cpp => RolesOrUsersSet.cpp} (76%) rename src/Access/{ExtendedRoleSet.h => RolesOrUsersSet.h} (57%) rename src/Parsers/{ASTExtendedRoleSet.cpp => ASTRolesOrUsersSet.cpp} (93%) rename src/Parsers/{ASTExtendedRoleSet.h => ASTRolesOrUsersSet.h} (57%) delete mode 100644 src/Parsers/ParserExtendedRoleSet.h rename src/Parsers/{ParserExtendedRoleSet.cpp => ParserRolesOrUsersSet.cpp} (81%) create mode 100644 src/Parsers/ParserRolesOrUsersSet.h diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 1195bcf842c..4dc91cd8937 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -64,19 +64,23 @@ namespace bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override { - if (ParserCreateUserQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateRoleQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateRowPolicyQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateQuotaQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserCreateSettingsProfileQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - if (ParserGrantQuery{}.enableAttachMode(true).parse(pos, node, expected)) - return true; - return false; + ParserCreateUserQuery create_user_p; + ParserCreateRoleQuery create_role_p; + ParserCreateRowPolicyQuery create_policy_p; + ParserCreateQuotaQuery create_quota_p; + ParserCreateSettingsProfileQuery create_profile_p; + ParserGrantQuery grant_p; + + create_user_p.useAttachMode(); + create_role_p.useAttachMode(); + create_policy_p.useAttachMode(); + create_quota_p.useAttachMode(); + create_profile_p.useAttachMode(); + grant_p.useAttachMode(); + + return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected) + || create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected) + || create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected); } }; diff --git a/src/Access/Quota.h b/src/Access/Quota.h index 25b56756dc1..101263e76a5 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include @@ -91,7 +91,7 @@ struct Quota : public IAccessEntity KeyType key_type = KeyType::NONE; /// Which roles or users should use this quota. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; bool equal(const IAccessEntity & other) const override; std::shared_ptr clone() const override { return cloneImpl(); } diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index 6e794f0bbd2..0bb5c11a82b 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -39,7 +39,7 @@ private: QuotaPtr quota; UUID quota_id; - const ExtendedRoleSet * roles = nullptr; + const RolesOrUsersSet * roles = nullptr; std::unordered_map> key_to_intervals; }; diff --git a/src/Access/ExtendedRoleSet.cpp b/src/Access/RolesOrUsersSet.cpp similarity index 76% rename from src/Access/ExtendedRoleSet.cpp rename to src/Access/RolesOrUsersSet.cpp index a8e674b3722..cb0beb42700 100644 --- a/src/Access/ExtendedRoleSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -1,9 +1,8 @@ - -#include +#include #include #include #include -#include +#include #include #include #include @@ -20,51 +19,51 @@ namespace ErrorCodes } -ExtendedRoleSet::ExtendedRoleSet() = default; -ExtendedRoleSet::ExtendedRoleSet(const ExtendedRoleSet & src) = default; -ExtendedRoleSet & ExtendedRoleSet::operator =(const ExtendedRoleSet & src) = default; -ExtendedRoleSet::ExtendedRoleSet(ExtendedRoleSet && src) = default; -ExtendedRoleSet & ExtendedRoleSet::operator =(ExtendedRoleSet && src) = default; +RolesOrUsersSet::RolesOrUsersSet() = default; +RolesOrUsersSet::RolesOrUsersSet(const RolesOrUsersSet & src) = default; +RolesOrUsersSet & RolesOrUsersSet::operator =(const RolesOrUsersSet & src) = default; +RolesOrUsersSet::RolesOrUsersSet(RolesOrUsersSet && src) = default; +RolesOrUsersSet & RolesOrUsersSet::operator =(RolesOrUsersSet && src) = default; -ExtendedRoleSet::ExtendedRoleSet(AllTag) +RolesOrUsersSet::RolesOrUsersSet(AllTag) { all = true; } -ExtendedRoleSet::ExtendedRoleSet(const UUID & id) +RolesOrUsersSet::RolesOrUsersSet(const UUID & id) { add(id); } -ExtendedRoleSet::ExtendedRoleSet(const std::vector & ids_) +RolesOrUsersSet::RolesOrUsersSet(const std::vector & ids_) { add(ids_); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast) { init(ast, nullptr); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const std::optional & current_user_id) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::optional & current_user_id) { init(ast, nullptr, current_user_id); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager) { init(ast, &manager); } -ExtendedRoleSet::ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager, const std::optional & current_user_id) +RolesOrUsersSet::RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id) { init(ast, &manager, current_user_id); } -void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlManager * manager, const std::optional & current_user_id) +void RolesOrUsersSet::init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager, const std::optional & current_user_id) { all = ast.all; @@ -73,20 +72,20 @@ void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlMa if (ast.id_mode) return parse(name); assert(manager); - if (ast.can_contain_users && ast.can_contain_roles) + if (ast.allow_user_names && ast.allow_role_names) { auto id = manager->find(name); if (id) return *id; return manager->getID(name); } - else if (ast.can_contain_users) + else if (ast.allow_user_names) { return manager->getID(name); } else { - assert(ast.can_contain_roles); + assert(ast.allow_role_names); return manager->getID(name); } }; @@ -122,9 +121,9 @@ void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlMa } -std::shared_ptr ExtendedRoleSet::toAST() const +std::shared_ptr RolesOrUsersSet::toAST() const { - auto ast = std::make_shared(); + auto ast = std::make_shared(); ast->id_mode = true; ast->all = all; @@ -148,9 +147,9 @@ std::shared_ptr ExtendedRoleSet::toAST() const } -std::shared_ptr ExtendedRoleSet::toASTWithNames(const AccessControlManager & manager) const +std::shared_ptr RolesOrUsersSet::toASTWithNames(const AccessControlManager & manager) const { - auto ast = std::make_shared(); + auto ast = std::make_shared(); ast->all = all; if (!ids.empty()) @@ -181,21 +180,21 @@ std::shared_ptr ExtendedRoleSet::toASTWithNames(const Access } -String ExtendedRoleSet::toString() const +String RolesOrUsersSet::toString() const { auto ast = toAST(); return serializeAST(*ast); } -String ExtendedRoleSet::toStringWithNames(const AccessControlManager & manager) const +String RolesOrUsersSet::toStringWithNames(const AccessControlManager & manager) const { auto ast = toASTWithNames(manager); return serializeAST(*ast); } -Strings ExtendedRoleSet::toStringsWithNames(const AccessControlManager & manager) const +Strings RolesOrUsersSet::toStringsWithNames(const AccessControlManager & manager) const { if (!all && ids.empty()) return {}; @@ -233,13 +232,13 @@ Strings ExtendedRoleSet::toStringsWithNames(const AccessControlManager & manager } -bool ExtendedRoleSet::empty() const +bool RolesOrUsersSet::empty() const { return ids.empty() && !all; } -void ExtendedRoleSet::clear() +void RolesOrUsersSet::clear() { ids.clear(); all = false; @@ -247,26 +246,26 @@ void ExtendedRoleSet::clear() } -void ExtendedRoleSet::add(const UUID & id) +void RolesOrUsersSet::add(const UUID & id) { ids.insert(id); } -void ExtendedRoleSet::add(const std::vector & ids_) +void RolesOrUsersSet::add(const std::vector & ids_) { for (const auto & id : ids_) add(id); } -bool ExtendedRoleSet::match(const UUID & id) const +bool RolesOrUsersSet::match(const UUID & id) const { return (all || ids.count(id)) && !except_ids.count(id); } -bool ExtendedRoleSet::match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const +bool RolesOrUsersSet::match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const { if (!all && !ids.count(user_id)) { @@ -285,7 +284,7 @@ bool ExtendedRoleSet::match(const UUID & user_id, const boost::container::flat_s } -std::vector ExtendedRoleSet::getMatchingIDs() const +std::vector RolesOrUsersSet::getMatchingIDs() const { if (all) throw Exception("getAllMatchingIDs() can't get ALL ids without manager", ErrorCodes::LOGICAL_ERROR); @@ -295,7 +294,7 @@ std::vector ExtendedRoleSet::getMatchingIDs() const } -std::vector ExtendedRoleSet::getMatchingIDs(const AccessControlManager & manager) const +std::vector RolesOrUsersSet::getMatchingIDs(const AccessControlManager & manager) const { if (!all) return getMatchingIDs(); @@ -316,7 +315,7 @@ std::vector ExtendedRoleSet::getMatchingIDs(const AccessControlManager & m } -bool operator ==(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs) +bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return (lhs.all == rhs.all) && (lhs.ids == rhs.ids) && (lhs.except_ids == rhs.except_ids); } diff --git a/src/Access/ExtendedRoleSet.h b/src/Access/RolesOrUsersSet.h similarity index 57% rename from src/Access/ExtendedRoleSet.h rename to src/Access/RolesOrUsersSet.h index eeb4af84f78..bae7f52a574 100644 --- a/src/Access/ExtendedRoleSet.h +++ b/src/Access/RolesOrUsersSet.h @@ -8,35 +8,35 @@ namespace DB { -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; class AccessControlManager; /// Represents a set of users/roles like /// {user_name | role_name | CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {user_name | role_name | CURRENT_USER} [,...] -/// Similar to ASTExtendedRoleSet, but with IDs instead of names. -struct ExtendedRoleSet +/// Similar to ASTRolesOrUsersSet, but with IDs instead of names. +struct RolesOrUsersSet { - ExtendedRoleSet(); - ExtendedRoleSet(const ExtendedRoleSet & src); - ExtendedRoleSet & operator =(const ExtendedRoleSet & src); - ExtendedRoleSet(ExtendedRoleSet && src); - ExtendedRoleSet & operator =(ExtendedRoleSet && src); + RolesOrUsersSet(); + RolesOrUsersSet(const RolesOrUsersSet & src); + RolesOrUsersSet & operator =(const RolesOrUsersSet & src); + RolesOrUsersSet(RolesOrUsersSet && src); + RolesOrUsersSet & operator =(RolesOrUsersSet && src); struct AllTag {}; - ExtendedRoleSet(AllTag); + RolesOrUsersSet(AllTag); - ExtendedRoleSet(const UUID & id); - ExtendedRoleSet(const std::vector & ids_); + RolesOrUsersSet(const UUID & id); + RolesOrUsersSet(const std::vector & ids_); /// The constructor from AST requires the AccessControlManager if `ast.id_mode == false`. - ExtendedRoleSet(const ASTExtendedRoleSet & ast); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const std::optional & current_user_id); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager); - ExtendedRoleSet(const ASTExtendedRoleSet & ast, const AccessControlManager & manager, const std::optional & current_user_id); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const std::optional & current_user_id); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager); + RolesOrUsersSet(const ASTRolesOrUsersSet & ast, const AccessControlManager & manager, const std::optional & current_user_id); - std::shared_ptr toAST() const; - std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; + std::shared_ptr toAST() const; + std::shared_ptr toASTWithNames(const AccessControlManager & manager) const; String toString() const; String toStringWithNames(const AccessControlManager & manager) const; @@ -47,7 +47,7 @@ struct ExtendedRoleSet void add(const UUID & id); void add(const std::vector & ids_); - /// Checks if a specified ID matches this ExtendedRoleSet. + /// Checks if a specified ID matches this RolesOrUsersSet. bool match(const UUID & id) const; bool match(const UUID & user_id, const boost::container::flat_set & enabled_roles) const; @@ -57,15 +57,15 @@ struct ExtendedRoleSet /// Returns a list of matching users and roles. std::vector getMatchingIDs(const AccessControlManager & manager) const; - friend bool operator ==(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs); - friend bool operator !=(const ExtendedRoleSet & lhs, const ExtendedRoleSet & rhs) { return !(lhs == rhs); } + friend bool operator ==(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs); + friend bool operator !=(const RolesOrUsersSet & lhs, const RolesOrUsersSet & rhs) { return !(lhs == rhs); } boost::container::flat_set ids; bool all = false; boost::container::flat_set except_ids; private: - void init(const ASTExtendedRoleSet & ast, const AccessControlManager * manager = nullptr, const std::optional & current_user_id = {}); + void init(const ASTRolesOrUsersSet & ast, const AccessControlManager * manager = nullptr, const std::optional & current_user_id = {}); }; } diff --git a/src/Access/RowPolicy.h b/src/Access/RowPolicy.h index 9d5b00b427d..c9b4d69152d 100644 --- a/src/Access/RowPolicy.h +++ b/src/Access/RowPolicy.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -91,7 +91,7 @@ struct RowPolicy : public IAccessEntity Type getType() const override { return TYPE; } /// Which roles or users should use this row policy. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; private: void setName(const String & name_) override; diff --git a/src/Access/RowPolicyCache.h b/src/Access/RowPolicyCache.h index 139949ae815..f7270c6fce9 100644 --- a/src/Access/RowPolicyCache.h +++ b/src/Access/RowPolicyCache.h @@ -27,7 +27,7 @@ private: void setPolicy(const RowPolicyPtr & policy_); RowPolicyPtr policy; - const ExtendedRoleSet * roles = nullptr; + const RolesOrUsersSet * roles = nullptr; std::shared_ptr> database_and_table_name; ASTPtr parsed_conditions[RowPolicy::MAX_CONDITION_TYPE]; }; diff --git a/src/Access/SettingsProfile.h b/src/Access/SettingsProfile.h index 9589b5b3eb5..210aa47c358 100644 --- a/src/Access/SettingsProfile.h +++ b/src/Access/SettingsProfile.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -14,7 +14,7 @@ struct SettingsProfile : public IAccessEntity SettingsProfileElements elements; /// Which roles or users should use this settings profile. - ExtendedRoleSet to_roles; + RolesOrUsersSet to_roles; bool equal(const IAccessEntity & other) const override; std::shared_ptr clone() const override { return cloneImpl(); } diff --git a/src/Access/User.h b/src/Access/User.h index da2fb14e131..4852fce375d 100644 --- a/src/Access/User.h +++ b/src/Access/User.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include @@ -19,7 +19,7 @@ struct User : public IAccessEntity AllowedClientHosts allowed_client_hosts = AllowedClientHosts::AnyHostTag{}; GrantedAccess access; GrantedRoles granted_roles; - ExtendedRoleSet default_roles = ExtendedRoleSet::AllTag{}; + RolesOrUsersSet default_roles = RolesOrUsersSet::AllTag{}; SettingsProfileElements settings; bool equal(const IAccessEntity & other) const override; diff --git a/src/Access/ya.make b/src/Access/ya.make index 970c0714a93..bdd62ae2b7b 100644 --- a/src/Access/ya.make +++ b/src/Access/ya.make @@ -17,7 +17,6 @@ SRCS( EnabledRolesInfo.cpp EnabledRowPolicies.cpp EnabledSettings.cpp - ExtendedRoleSet.cpp GrantedAccess.cpp GrantedRoles.cpp IAccessEntity.cpp @@ -29,6 +28,7 @@ SRCS( QuotaUsage.cpp Role.cpp RoleCache.cpp + RolesOrUsersSet.cpp RowPolicy.cpp RowPolicyCache.cpp SettingsConstraints.cpp diff --git a/src/Interpreters/InterpreterCreateQuotaQuery.cpp b/src/Interpreters/InterpreterCreateQuotaQuery.cpp index 0cca163beec..f45c2c9709d 100644 --- a/src/Interpreters/InterpreterCreateQuotaQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuotaQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -19,7 +19,7 @@ namespace Quota & quota, const ASTCreateQuotaQuery & query, const String & override_name, - const std::optional & override_to_roles) + const std::optional & override_to_roles) { if (!override_name.empty()) quota.setName(override_name); @@ -82,9 +82,9 @@ BlockIO InterpreterCreateQuotaQuery::execute() return executeDDLQueryOnCluster(query_ptr, context); } - std::optional roles_from_query; + std::optional roles_from_query; if (query.roles) - roles_from_query = ExtendedRoleSet{*query.roles, access_control, context.getUserID()}; + roles_from_query = RolesOrUsersSet{*query.roles, access_control, context.getUserID()}; if (query.alter) { diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp index bfd7d60b397..3a0ee3f16a1 100644 --- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -18,7 +18,7 @@ namespace RowPolicy & policy, const ASTCreateRowPolicyQuery & query, const RowPolicy::NameParts & override_name, - const std::optional & override_to_roles) + const std::optional & override_to_roles) { if (!override_name.empty()) policy.setNameParts(override_name); @@ -58,9 +58,9 @@ BlockIO InterpreterCreateRowPolicyQuery::execute() } assert(query.names->cluster.empty()); - std::optional roles_from_query; + std::optional roles_from_query; if (query.roles) - roles_from_query = ExtendedRoleSet{*query.roles, access_control, context.getUserID()}; + roles_from_query = RolesOrUsersSet{*query.roles, access_control, context.getUserID()}; query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); diff --git a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp index ac2a4249986..2d5f4d499b7 100644 --- a/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/InterpreterCreateSettingsProfileQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -17,7 +17,7 @@ namespace const ASTCreateSettingsProfileQuery & query, const String & override_name, const std::optional & override_settings, - const std::optional & override_to_roles) + const std::optional & override_to_roles) { if (!override_name.empty()) profile.setName(override_name); @@ -58,9 +58,9 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() if (query.settings) settings_from_query = SettingsProfileElements{*query.settings, access_control}; - std::optional roles_from_query; + std::optional roles_from_query; if (query.to_roles) - roles_from_query = ExtendedRoleSet{*query.to_roles, access_control, context.getUserID()}; + roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, context.getUserID()}; if (query.alter) { diff --git a/src/Interpreters/InterpreterCreateUserQuery.cpp b/src/Interpreters/InterpreterCreateUserQuery.cpp index 8b57703f08c..111f698beb9 100644 --- a/src/Interpreters/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/InterpreterCreateUserQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -19,7 +19,7 @@ namespace User & user, const ASTCreateUserQuery & query, const std::shared_ptr & override_name, - const std::optional & override_default_roles, + const std::optional & override_default_roles, const std::optional & override_settings) { if (override_name) @@ -45,7 +45,7 @@ namespace if (query.add_hosts) user.allowed_client_hosts.add(*query.add_hosts); - auto set_default_roles = [&](const ExtendedRoleSet & default_roles_) + auto set_default_roles = [&](const RolesOrUsersSet & default_roles_) { if (!query.alter && !default_roles_.all) user.granted_roles.grant(default_roles_.getMatchingIDs()); @@ -73,10 +73,10 @@ BlockIO InterpreterCreateUserQuery::execute() auto access = context.getAccess(); access->checkAccess(query.alter ? AccessType::ALTER_USER : AccessType::CREATE_USER); - std::optional default_roles_from_query; + std::optional default_roles_from_query; if (query.default_roles) { - default_roles_from_query = ExtendedRoleSet{*query.default_roles, access_control}; + default_roles_from_query = RolesOrUsersSet{*query.default_roles, access_control}; if (!query.alter && !default_roles_from_query->all) { for (const UUID & role : default_roles_from_query->getMatchingIDs()) diff --git a/src/Interpreters/InterpreterGrantQuery.cpp b/src/Interpreters/InterpreterGrantQuery.cpp index c72e48c2019..8981c06f962 100644 --- a/src/Interpreters/InterpreterGrantQuery.cpp +++ b/src/Interpreters/InterpreterGrantQuery.cpp @@ -1,11 +1,11 @@ #include #include -#include +#include #include #include #include #include -#include +#include #include #include #include @@ -74,7 +74,7 @@ BlockIO InterpreterGrantQuery::execute() std::vector roles_from_query; if (query.roles) { - roles_from_query = ExtendedRoleSet{*query.roles, access_control}.getMatchingIDs(access_control); + roles_from_query = RolesOrUsersSet{*query.roles, access_control}.getMatchingIDs(access_control); for (const UUID & role_from_query : roles_from_query) access->checkAdminOption(role_from_query); } @@ -85,7 +85,7 @@ BlockIO InterpreterGrantQuery::execute() return executeDDLQueryOnCluster(query_ptr, context); } - std::vector to_roles = ExtendedRoleSet{*query.to_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); + std::vector to_roles = RolesOrUsersSet{*query.to_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); String current_database = context.getCurrentDatabase(); auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr @@ -115,7 +115,7 @@ void InterpreterGrantQuery::updateUserFromQuery(User & user, const ASTGrantQuery { std::vector roles_from_query; if (query.roles) - roles_from_query = ExtendedRoleSet{*query.roles}.getMatchingIDs(); + roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); updateFromQueryImpl(user, query, roles_from_query, {}); } @@ -124,7 +124,7 @@ void InterpreterGrantQuery::updateRoleFromQuery(Role & role, const ASTGrantQuery { std::vector roles_from_query; if (query.roles) - roles_from_query = ExtendedRoleSet{*query.roles}.getMatchingIDs(); + roles_from_query = RolesOrUsersSet{*query.roles}.getMatchingIDs(); updateFromQueryImpl(role, query, roles_from_query, {}); } diff --git a/src/Interpreters/InterpreterSetRoleQuery.cpp b/src/Interpreters/InterpreterSetRoleQuery.cpp index c627061dd51..f955c881b2e 100644 --- a/src/Interpreters/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/InterpreterSetRoleQuery.cpp @@ -1,8 +1,8 @@ #include #include -#include +#include #include -#include +#include #include #include @@ -38,7 +38,7 @@ void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) } else { - ExtendedRoleSet roles_from_query{*query.roles, access_control}; + RolesOrUsersSet roles_from_query{*query.roles, access_control}; boost::container::flat_set new_current_roles; if (roles_from_query.all) { @@ -65,8 +65,8 @@ void InterpreterSetRoleQuery::setDefaultRole(const ASTSetRoleQuery & query) context.checkAccess(AccessType::ALTER_USER); auto & access_control = context.getAccessControlManager(); - std::vector to_users = ExtendedRoleSet{*query.to_users, access_control, context.getUserID()}.getMatchingIDs(access_control); - ExtendedRoleSet roles_from_query{*query.roles, access_control}; + std::vector to_users = RolesOrUsersSet{*query.to_users, access_control, context.getUserID()}.getMatchingIDs(access_control); + RolesOrUsersSet roles_from_query{*query.roles, access_control}; auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { @@ -79,7 +79,7 @@ void InterpreterSetRoleQuery::setDefaultRole(const ASTSetRoleQuery & query) } -void InterpreterSetRoleQuery::updateUserSetDefaultRoles(User & user, const ExtendedRoleSet & roles_from_query) +void InterpreterSetRoleQuery::updateUserSetDefaultRoles(User & user, const RolesOrUsersSet & roles_from_query) { if (!roles_from_query.all) { diff --git a/src/Interpreters/InterpreterSetRoleQuery.h b/src/Interpreters/InterpreterSetRoleQuery.h index 91cf5fc1b2e..0919b0f23f9 100644 --- a/src/Interpreters/InterpreterSetRoleQuery.h +++ b/src/Interpreters/InterpreterSetRoleQuery.h @@ -9,7 +9,7 @@ namespace DB class Context; class ASTSetRoleQuery; -struct ExtendedRoleSet; +struct RolesOrUsersSet; struct User; @@ -20,7 +20,7 @@ public: BlockIO execute() override; - static void updateUserSetDefaultRoles(User & user, const ExtendedRoleSet & roles_from_query); + static void updateUserSetDefaultRoles(User & user, const RolesOrUsersSet & roles_from_query); private: void setRole(const ASTSetRoleQuery & query); diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index d3ab5ac5001..9c28c3d0bd2 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -51,7 +51,7 @@ namespace if (user.allowed_client_hosts != AllowedClientHosts::AnyHostTag{}) query->hosts = user.allowed_client_hosts; - if (user.default_roles != ExtendedRoleSet::AllTag{}) + if (user.default_roles != RolesOrUsersSet::AllTag{}) { if (attach_mode) query->default_roles = user.default_roles.toAST(); diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index 130749526c7..c6e3ccce7c7 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include @@ -29,7 +29,7 @@ namespace { ASTs res; - std::shared_ptr to_roles = std::make_shared(); + std::shared_ptr to_roles = std::make_shared(); to_roles->names.push_back(grantee.getName()); auto grants_and_partial_revokes = grantee.access.getGrantsAndPartialRevokes(); @@ -87,9 +87,9 @@ namespace grant_query->admin_option = admin_option; grant_query->to_roles = to_roles; if (attach_mode) - grant_query->roles = ExtendedRoleSet{roles}.toAST(); + grant_query->roles = RolesOrUsersSet{roles}.toAST(); else - grant_query->roles = ExtendedRoleSet{roles}.toASTWithNames(*manager); + grant_query->roles = RolesOrUsersSet{roles}.toASTWithNames(*manager); res.push_back(std::move(grant_query)); } diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/ASTCreateQuotaQuery.cpp index d33af6126f1..fc4e2edb9e7 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/ASTCreateQuotaQuery.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -104,7 +104,7 @@ namespace } } - void formatToRoles(const ASTExtendedRoleSet & roles, const IAST::FormatSettings & settings) + void formatToRoles(const ASTRolesOrUsersSet & roles, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TO " << (settings.hilite ? IAST::hilite_none : ""); roles.format(settings); diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/ASTCreateQuotaQuery.h index 370083f4e25..002c374322f 100644 --- a/src/Parsers/ASTCreateQuotaQuery.h +++ b/src/Parsers/ASTCreateQuotaQuery.h @@ -7,7 +7,7 @@ namespace DB { -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; /** CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name @@ -51,7 +51,7 @@ public: }; std::vector all_limits; - std::shared_ptr roles; + std::shared_ptr roles; String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/ASTCreateRowPolicyQuery.cpp index caa52e3ac58..580642f2da5 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/ASTCreateRowPolicyQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -116,7 +116,7 @@ namespace } - void formatToRoles(const ASTExtendedRoleSet & roles, const IAST::FormatSettings & settings) + void formatToRoles(const ASTRolesOrUsersSet & roles, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TO " << (settings.hilite ? IAST::hilite_none : ""); roles.format(settings); diff --git a/src/Parsers/ASTCreateRowPolicyQuery.h b/src/Parsers/ASTCreateRowPolicyQuery.h index af561b47e12..4a7572eaefd 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/ASTCreateRowPolicyQuery.h @@ -10,7 +10,7 @@ namespace DB { class ASTRowPolicyNames; -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; /** CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] name ON [database.]table * [AS {PERMISSIVE | RESTRICTIVE}] @@ -43,7 +43,7 @@ public: std::optional is_restrictive; std::array, RowPolicy::MAX_CONDITION_TYPE> conditions; /// `nullopt` means "not set", `nullptr` means set to NONE. - std::shared_ptr roles; + std::shared_ptr roles; String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.cpp b/src/Parsers/ASTCreateSettingsProfileQuery.cpp index 21d8c20ffc1..77c2f1b22d7 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ASTCreateSettingsProfileQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include @@ -32,7 +32,7 @@ namespace settings.format(format); } - void formatToRoles(const ASTExtendedRoleSet & roles, const IAST::FormatSettings & settings) + void formatToRoles(const ASTRolesOrUsersSet & roles, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TO " << (settings.hilite ? IAST::hilite_none : ""); roles.format(settings); diff --git a/src/Parsers/ASTCreateSettingsProfileQuery.h b/src/Parsers/ASTCreateSettingsProfileQuery.h index bb2a9474504..119019093b2 100644 --- a/src/Parsers/ASTCreateSettingsProfileQuery.h +++ b/src/Parsers/ASTCreateSettingsProfileQuery.h @@ -7,7 +7,7 @@ namespace DB { class ASTSettingsProfileElements; -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; /** CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name @@ -34,7 +34,7 @@ public: std::shared_ptr settings; - std::shared_ptr to_roles; + std::shared_ptr to_roles; String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index 60f61fbf51f..2ba454b3d65 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -167,7 +167,7 @@ namespace } - void formatDefaultRoles(const ASTExtendedRoleSet & default_roles, const IAST::FormatSettings & settings) + void formatDefaultRoles(const ASTRolesOrUsersSet & default_roles, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " DEFAULT ROLE " << (settings.hilite ? IAST::hilite_none : ""); default_roles.format(settings); diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 565c82bc98e..5c8f8fcf563 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -9,7 +9,7 @@ namespace DB { class ASTUserNamesWithHost; -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name @@ -45,7 +45,7 @@ public: std::optional add_hosts; std::optional remove_hosts; - std::shared_ptr default_roles; + std::shared_ptr default_roles; std::shared_ptr settings; diff --git a/src/Parsers/ASTGrantQuery.cpp b/src/Parsers/ASTGrantQuery.cpp index 8114bef0766..cf1943477b2 100644 --- a/src/Parsers/ASTGrantQuery.cpp +++ b/src/Parsers/ASTGrantQuery.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include @@ -75,7 +75,7 @@ namespace } - void formatToRoles(const ASTExtendedRoleSet & to_roles, ASTGrantQuery::Kind kind, const IAST::FormatSettings & settings) + void formatToRoles(const ASTRolesOrUsersSet & to_roles, ASTGrantQuery::Kind kind, const IAST::FormatSettings & settings) { using Kind = ASTGrantQuery::Kind; settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << ((kind == Kind::GRANT) ? " TO " : " FROM ") diff --git a/src/Parsers/ASTGrantQuery.h b/src/Parsers/ASTGrantQuery.h index 7e3321799fb..9a11f5dc509 100644 --- a/src/Parsers/ASTGrantQuery.h +++ b/src/Parsers/ASTGrantQuery.h @@ -7,7 +7,7 @@ namespace DB { -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; /** GRANT access_type[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user_name | CURRENT_USER} [,...] [WITH GRANT OPTION] @@ -27,8 +27,8 @@ public: Kind kind = Kind::GRANT; bool attach = false; AccessRightsElements access_rights_elements; - std::shared_ptr roles; - std::shared_ptr to_roles; + std::shared_ptr roles; + std::shared_ptr to_roles; bool grant_option = false; bool admin_option = false; diff --git a/src/Parsers/ASTExtendedRoleSet.cpp b/src/Parsers/ASTRolesOrUsersSet.cpp similarity index 93% rename from src/Parsers/ASTExtendedRoleSet.cpp rename to src/Parsers/ASTRolesOrUsersSet.cpp index 1803af11ab3..a666d8ae1d5 100644 --- a/src/Parsers/ASTExtendedRoleSet.cpp +++ b/src/Parsers/ASTRolesOrUsersSet.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -20,7 +20,7 @@ namespace } } -void ASTExtendedRoleSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const +void ASTRolesOrUsersSet::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { if (empty()) { @@ -74,7 +74,7 @@ void ASTExtendedRoleSet::formatImpl(const FormatSettings & settings, FormatState } -void ASTExtendedRoleSet::replaceCurrentUserTagWithName(const String & current_user_name) +void ASTRolesOrUsersSet::replaceCurrentUserTagWithName(const String & current_user_name) { if (current_user) { diff --git a/src/Parsers/ASTExtendedRoleSet.h b/src/Parsers/ASTRolesOrUsersSet.h similarity index 57% rename from src/Parsers/ASTExtendedRoleSet.h rename to src/Parsers/ASTRolesOrUsersSet.h index 656f563bd9a..f257ce1066c 100644 --- a/src/Parsers/ASTExtendedRoleSet.h +++ b/src/Parsers/ASTRolesOrUsersSet.h @@ -7,7 +7,7 @@ namespace DB { /// Represents a set of users/roles like /// {user_name | role_name | CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {user_name | role_name | CURRENT_USER} [,...] -class ASTExtendedRoleSet : public IAST +class ASTRolesOrUsersSet : public IAST { public: Strings names; @@ -16,15 +16,15 @@ public: Strings except_names; bool except_current_user = false; - bool id_mode = false; /// true if `names` and `except_names` keep UUIDs, not names. - bool can_contain_roles = true; /// true if this set can contain names of roles. - bool can_contain_users = true; /// true if this set can contain names of users. + bool id_mode = false; /// true if `names` and `except_names` keep UUIDs, not names. + bool allow_role_names = true; /// true if this set can contain names of roles. + bool allow_user_names = true; /// true if this set can contain names of users. bool empty() const { return names.empty() && !current_user && !all; } void replaceCurrentUserTagWithName(const String & current_user_name); - String getID(char) const override { return "ExtendedRoleSet"; } - ASTPtr clone() const override { return std::make_shared(*this); } + String getID(char) const override { return "RolesOrUsersSet"; } + ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; } diff --git a/src/Parsers/ASTSetRoleQuery.cpp b/src/Parsers/ASTSetRoleQuery.cpp index 0c8842fdac6..b5e0c05e083 100644 --- a/src/Parsers/ASTSetRoleQuery.cpp +++ b/src/Parsers/ASTSetRoleQuery.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include diff --git a/src/Parsers/ASTSetRoleQuery.h b/src/Parsers/ASTSetRoleQuery.h index 8f1fb357d86..f0170ae6af2 100644 --- a/src/Parsers/ASTSetRoleQuery.h +++ b/src/Parsers/ASTSetRoleQuery.h @@ -5,7 +5,7 @@ namespace DB { -class ASTExtendedRoleSet; +class ASTRolesOrUsersSet; /** SET ROLE {DEFAULT | NONE | role [,...] | ALL | ALL EXCEPT role [,...]} * SET DEFAULT ROLE {NONE | role [,...] | ALL | ALL EXCEPT role [,...]} TO {user|CURRENT_USER} [,...] @@ -21,8 +21,8 @@ public: }; Kind kind = Kind::SET_ROLE; - std::shared_ptr roles; - std::shared_ptr to_users; + std::shared_ptr roles; + std::shared_ptr to_users; String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index 5eb138d6be0..e953f698de0 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -3,10 +3,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include @@ -185,15 +185,17 @@ namespace }); } - bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr node; - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) || !ParserExtendedRoleSet{}.useIDMode(id_mode).parse(pos, node, expected)) + ParserRolesOrUsersSet roles_p; + roles_p.allowAll().allowRoleNames().allowUserNames().allowCurrentUser().useIDMode(id_mode); + if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) || !roles_p.parse(pos, node, expected)) return false; - roles = std::static_pointer_cast(node); + roles = std::static_pointer_cast(node); return true; }); } @@ -266,7 +268,7 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe break; } - std::shared_ptr roles; + std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); if (cluster.empty()) diff --git a/src/Parsers/ParserCreateQuotaQuery.h b/src/Parsers/ParserCreateQuotaQuery.h index 786c8292b15..e2185d4d5ff 100644 --- a/src/Parsers/ParserCreateQuotaQuery.h +++ b/src/Parsers/ParserCreateQuotaQuery.h @@ -24,7 +24,7 @@ namespace DB class ParserCreateQuotaQuery : public IParserBase { public: - ParserCreateQuotaQuery & enableAttachMode(bool enable_) { attach_mode = enable_; return *this; } + void useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; } protected: const char * getName() const override { return "CREATE QUOTA or ALTER QUOTA query"; } diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/ParserCreateRoleQuery.cpp index 08dd31c51a3..6feeefa4657 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/ParserCreateRoleQuery.cpp @@ -31,7 +31,9 @@ namespace return false; ASTPtr new_settings_ast; - if (!ParserSettingsProfileElements{}.useIDMode(id_mode).parse(pos, new_settings_ast, expected)) + ParserSettingsProfileElements elements_p; + elements_p.useIDMode(id_mode); + if (!elements_p.parse(pos, new_settings_ast, expected)) return false; if (!settings) diff --git a/src/Parsers/ParserCreateRoleQuery.h b/src/Parsers/ParserCreateRoleQuery.h index 2afeb7f7ec4..1fdee67eaab 100644 --- a/src/Parsers/ParserCreateRoleQuery.h +++ b/src/Parsers/ParserCreateRoleQuery.h @@ -16,7 +16,7 @@ namespace DB class ParserCreateRoleQuery : public IParserBase { public: - ParserCreateRoleQuery & enableAttachMode(bool enable) { attach_mode = enable; return *this; } + void useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; } protected: const char * getName() const override { return "CREATE ROLE or ALTER ROLE query"; } diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 4f5f2989a7b..061cca4ce63 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -173,16 +173,20 @@ namespace }); } - bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) - || !ParserExtendedRoleSet{}.useIDMode(id_mode).parse(pos, ast, expected)) + if (roles || !ParserKeyword{"TO"}.ignore(pos, expected)) return false; - roles = std::static_pointer_cast(ast); + ParserRolesOrUsersSet roles_p; + roles_p.allowAll().allowRoleNames().allowUserNames().allowCurrentUser().useIDMode(id_mode); + if (!roles_p.parse(pos, ast, expected)) + return false; + + roles = std::static_pointer_cast(ast); return true; }); } @@ -259,7 +263,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & break; } - std::shared_ptr roles; + std::shared_ptr roles; parseToRoles(pos, expected, attach_mode, roles); if (cluster.empty()) diff --git a/src/Parsers/ParserCreateRowPolicyQuery.h b/src/Parsers/ParserCreateRowPolicyQuery.h index 5110998e53d..1a64f2e893c 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.h +++ b/src/Parsers/ParserCreateRowPolicyQuery.h @@ -24,7 +24,7 @@ namespace DB class ParserCreateRowPolicyQuery : public IParserBase { public: - ParserCreateRowPolicyQuery & enableAttachMode(bool enable_) { attach_mode = enable_; return *this; } + void useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; } protected: const char * getName() const override { return "CREATE ROW POLICY or ALTER ROW POLICY query"; } diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 4d5a9c09ad8..56bd39b9230 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include +#include #include @@ -33,7 +33,9 @@ namespace return false; ASTPtr new_settings_ast; - if (!ParserSettingsProfileElements{}.useIDMode(id_mode).enableInheritKeyword(true).parse(pos, new_settings_ast, expected)) + ParserSettingsProfileElements elements_p; + elements_p.useInheritKeyword(true).useIDMode(id_mode); + if (!elements_p.parse(pos, new_settings_ast, expected)) return false; if (!settings) @@ -44,16 +46,20 @@ namespace }); } - bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) - || !ParserExtendedRoleSet{}.useIDMode(id_mode).parse(pos, ast, expected)) + if (roles || !ParserKeyword{"TO"}.ignore(pos, expected)) return false; - roles = std::static_pointer_cast(ast); + ParserRolesOrUsersSet roles_p; + roles_p.allowAll().allowRoleNames().allowUserNames().allowCurrentUser().useIDMode(id_mode); + if (!roles_p.parse(pos, ast, expected)) + return false; + + roles = std::static_pointer_cast(ast); return true; }); } @@ -122,7 +128,7 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec break; } - std::shared_ptr to_roles; + std::shared_ptr to_roles; parseToRoles(pos, expected, attach_mode, to_roles); if (cluster.empty()) diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.h b/src/Parsers/ParserCreateSettingsProfileQuery.h index 073a8ca75ae..ab730fcd8eb 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.h +++ b/src/Parsers/ParserCreateSettingsProfileQuery.h @@ -16,7 +16,7 @@ namespace DB class ParserCreateSettingsProfileQuery : public IParserBase { public: - ParserCreateSettingsProfileQuery & enableAttachMode(bool enable) { attach_mode = enable; return *this; } + void useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; } protected: const char * getName() const override { return "CREATE SETTINGS PROFILE or ALTER SETTINGS PROFILE query"; } diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index e99457a2f87..ff7f2dc8790 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -7,9 +7,9 @@ #include #include #include -#include +#include #include -#include +#include #include #include #include @@ -186,7 +186,7 @@ namespace } - bool parseDefaultRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & default_roles) + bool parseDefaultRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & default_roles) { return IParserBase::wrapParseImpl(pos, [&] { @@ -194,11 +194,13 @@ namespace return false; ASTPtr ast; - if (!ParserExtendedRoleSet{}.enableCurrentUserKeyword(false).useIDMode(id_mode).parse(pos, ast, expected)) + ParserRolesOrUsersSet default_roles_p; + default_roles_p.allowAll().allowRoleNames().useIDMode(id_mode); + if (!default_roles_p.parse(pos, ast, expected)) return false; - default_roles = typeid_cast>(ast); - default_roles->can_contain_users = false; + default_roles = typeid_cast>(ast); + default_roles->allow_user_names = false; return true; }); } @@ -212,7 +214,9 @@ namespace return false; ASTPtr new_settings_ast; - if (!ParserSettingsProfileElements{}.useIDMode(id_mode).parse(pos, new_settings_ast, expected)) + ParserSettingsProfileElements elements_p; + elements_p.useInheritKeyword(true).useIDMode(id_mode); + if (!elements_p.parse(pos, new_settings_ast, expected)) return false; if (!settings) @@ -276,7 +280,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::optional hosts; std::optional add_hosts; std::optional remove_hosts; - std::shared_ptr default_roles; + std::shared_ptr default_roles; std::shared_ptr settings; String cluster; diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index 2a890f41060..1628f5ea5b9 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -20,7 +20,7 @@ namespace DB class ParserCreateUserQuery : public IParserBase { public: - ParserCreateUserQuery & enableAttachMode(bool enable) { attach_mode = enable; return *this; } + ParserCreateUserQuery & useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; return *this; } protected: const char * getName() const override { return "CREATE USER or ALTER USER query"; } diff --git a/src/Parsers/ParserExtendedRoleSet.h b/src/Parsers/ParserExtendedRoleSet.h deleted file mode 100644 index df723786bd9..00000000000 --- a/src/Parsers/ParserExtendedRoleSet.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ -/** Parses a string like this: - * {role|CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {role|CURRENT_USER} [,...] - */ -class ParserExtendedRoleSet : public IParserBase -{ -public: - ParserExtendedRoleSet & enableAllKeyword(bool enable_) { all_keyword = enable_; return *this; } - ParserExtendedRoleSet & enableCurrentUserKeyword(bool enable_) { current_user_keyword = enable_; return *this; } - ParserExtendedRoleSet & useIDMode(bool enable_) { id_mode = enable_; return *this; } - -protected: - const char * getName() const override { return "ExtendedRoleSet"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - bool all_keyword = true; - bool current_user_keyword = true; - bool id_mode = false; -}; - -} diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 64dde8f6524..03c0daa08a3 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -1,10 +1,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include @@ -199,21 +199,23 @@ namespace } - bool parseRoles(IParser::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) + bool parseRoles(IParser::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (!ParserExtendedRoleSet{}.enableAllKeyword(false).enableCurrentUserKeyword(false).useIDMode(id_mode).parse(pos, ast, expected)) + ParserRolesOrUsersSet roles_p; + roles_p.allowRoleNames().useIDMode(id_mode); + if (!roles_p.parse(pos, ast, expected)) return false; - roles = typeid_cast>(ast); + roles = typeid_cast>(ast); return true; }); } - bool parseToRoles(IParser::Pos & pos, Expected & expected, ASTGrantQuery::Kind kind, std::shared_ptr & to_roles) + bool parseToRoles(IParser::Pos & pos, Expected & expected, ASTGrantQuery::Kind kind, std::shared_ptr & to_roles) { return IParserBase::wrapParseImpl(pos, [&] { @@ -230,10 +232,12 @@ namespace } ASTPtr ast; - if (!ParserExtendedRoleSet{}.enableAllKeyword(kind == Kind::REVOKE).parse(pos, ast, expected)) + ParserRolesOrUsersSet roles_p; + roles_p.allowRoleNames().allowUserNames().allowCurrentUser().allowAll(kind == Kind::REVOKE); + if (!roles_p.parse(pos, ast, expected)) return false; - to_roles = typeid_cast>(ast); + to_roles = typeid_cast>(ast); return true; }); } @@ -282,14 +286,14 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } AccessRightsElements elements; - std::shared_ptr roles; + std::shared_ptr roles; if (!parseAccessRightsElements(pos, expected, elements) && !parseRoles(pos, expected, attach, roles)) return false; if (cluster.empty()) parseOnCluster(pos, expected, cluster); - std::shared_ptr to_roles; + std::shared_ptr to_roles; if (!parseToRoles(pos, expected, kind, to_roles)) return false; diff --git a/src/Parsers/ParserGrantQuery.h b/src/Parsers/ParserGrantQuery.h index ac5bfbb7538..b14f175c12b 100644 --- a/src/Parsers/ParserGrantQuery.h +++ b/src/Parsers/ParserGrantQuery.h @@ -12,7 +12,7 @@ namespace DB class ParserGrantQuery : public IParserBase { public: - ParserGrantQuery & enableAttachMode(bool enable) { attach_mode = enable; return *this; } + ParserGrantQuery & useAttachMode(bool attach_mode_ = true) { attach_mode = attach_mode_; return *this; } protected: const char * getName() const override { return "GRANT or REVOKE query"; } diff --git a/src/Parsers/ParserExtendedRoleSet.cpp b/src/Parsers/ParserRolesOrUsersSet.cpp similarity index 81% rename from src/Parsers/ParserExtendedRoleSet.cpp rename to src/Parsers/ParserRolesOrUsersSet.cpp index 80f05c45f5b..1ba2c05f671 100644 --- a/src/Parsers/ParserExtendedRoleSet.cpp +++ b/src/Parsers/ParserRolesOrUsersSet.cpp @@ -1,8 +1,8 @@ -#include +#include #include #include #include -#include +#include #include #include @@ -39,8 +39,8 @@ namespace IParserBase::Pos & pos, Expected & expected, bool id_mode, - bool all_keyword_enabled, - bool current_user_keyword_enabled, + bool allow_all, + bool allow_current_user_tag, Strings & names, bool & all, bool & current_user) @@ -56,7 +56,7 @@ namespace { } else if ( - current_user_keyword_enabled + allow_current_user_tag && (ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) || ParserKeyword{"currentUser"}.ignore(pos, expected))) { if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) @@ -66,7 +66,7 @@ namespace } res_current_user = true; } - else if (all_keyword_enabled && ParserKeyword{"ALL"}.ignore(pos, expected)) + else if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected)) { res_all = true; } @@ -93,7 +93,7 @@ namespace IParserBase::Pos & pos, Expected & expected, bool id_mode, - bool current_user_keyword_enabled, + bool allow_current_user_tag, Strings & except_names, bool & except_current_user) { @@ -103,13 +103,13 @@ namespace return false; bool dummy; - return parseBeforeExcept(pos, expected, id_mode, false, current_user_keyword_enabled, except_names, dummy, except_current_user); + return parseBeforeExcept(pos, expected, id_mode, false, allow_current_user_tag, except_names, dummy, except_current_user); }); } } -bool ParserExtendedRoleSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +bool ParserRolesOrUsersSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { Strings names; bool current_user = false; @@ -117,21 +117,23 @@ bool ParserExtendedRoleSet::parseImpl(Pos & pos, ASTPtr & node, Expected & expec Strings except_names; bool except_current_user = false; - if (!parseBeforeExcept(pos, expected, id_mode, all_keyword, current_user_keyword, names, all, current_user)) + if (!parseBeforeExcept(pos, expected, id_mode, allow_all, allow_current_user, names, all, current_user)) return false; - parseExceptAndAfterExcept(pos, expected, id_mode, current_user_keyword, except_names, except_current_user); + parseExceptAndAfterExcept(pos, expected, id_mode, allow_current_user, except_names, except_current_user); if (all) names.clear(); - auto result = std::make_shared(); + auto result = std::make_shared(); result->names = std::move(names); result->current_user = current_user; result->all = all; result->except_names = std::move(except_names); result->except_current_user = except_current_user; result->id_mode = id_mode; + result->allow_user_names = allow_user_names; + result->allow_role_names = allow_role_names; node = result; return true; } diff --git a/src/Parsers/ParserRolesOrUsersSet.h b/src/Parsers/ParserRolesOrUsersSet.h new file mode 100644 index 00000000000..c71012e874c --- /dev/null +++ b/src/Parsers/ParserRolesOrUsersSet.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +namespace DB +{ +/** Parses a string like this: + * {role|CURRENT_USER} [,...] | NONE | ALL | ALL EXCEPT {role|CURRENT_USER} [,...] + */ +class ParserRolesOrUsersSet : public IParserBase +{ +public: + ParserRolesOrUsersSet & allowAll(bool allow_all_ = true) { allow_all = allow_all_; return *this; } + ParserRolesOrUsersSet & allowUserNames(bool allow_user_names_ = true) { allow_user_names = allow_user_names_; return *this; } + ParserRolesOrUsersSet & allowRoleNames(bool allow_role_names_ = true) { allow_role_names = allow_role_names_; return *this; } + ParserRolesOrUsersSet & allowCurrentUser(bool allow_current_user_ = true) { allow_current_user = allow_current_user_; return *this; } + ParserRolesOrUsersSet & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + +protected: + const char * getName() const override { return "RolesOrUsersSet"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + +private: + bool allow_all = false; + bool allow_user_names = false; + bool allow_role_names = false; + bool allow_current_user = false; + bool id_mode = false; +}; + +} diff --git a/src/Parsers/ParserSetRoleQuery.cpp b/src/Parsers/ParserSetRoleQuery.cpp index a69480f89eb..e8734f8dfc1 100644 --- a/src/Parsers/ParserSetRoleQuery.cpp +++ b/src/Parsers/ParserSetRoleQuery.cpp @@ -1,29 +1,31 @@ #include #include #include -#include -#include +#include +#include namespace DB { namespace { - bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & roles) + bool parseRoles(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (!ParserExtendedRoleSet{}.enableCurrentUserKeyword(false).parse(pos, ast, expected)) + ParserRolesOrUsersSet roles_p; + roles_p.allowRoleNames().allowAll(); + if (!roles_p.parse(pos, ast, expected)) return false; - roles = typeid_cast>(ast); - roles->can_contain_users = false; + roles = typeid_cast>(ast); + roles->allow_user_names = false; return true; }); } - bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & to_users) + bool parseToUsers(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & to_users) { return IParserBase::wrapParseImpl(pos, [&] { @@ -31,11 +33,13 @@ namespace return false; ASTPtr ast; - if (!ParserExtendedRoleSet{}.enableAllKeyword(false).parse(pos, ast, expected)) + ParserRolesOrUsersSet users_p; + users_p.allowUserNames().allowCurrentUser(); + if (!users_p.parse(pos, ast, expected)) return false; - to_users = typeid_cast>(ast); - to_users->can_contain_roles = false; + to_users = typeid_cast>(ast); + to_users->allow_role_names = false; return true; }); } @@ -55,8 +59,8 @@ bool ParserSetRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else return false; - std::shared_ptr roles; - std::shared_ptr to_users; + std::shared_ptr roles; + std::shared_ptr to_users; if ((kind == Kind::SET_ROLE) || (kind == Kind::SET_DEFAULT_ROLE)) { diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/ParserSettingsProfileElement.cpp index 1dccae50cf5..2dd65e6ae7b 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/ParserSettingsProfileElement.cpp @@ -109,7 +109,7 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected std::optional readonly; if (ParserKeyword{"PROFILE"}.ignore(pos, expected) || - (enable_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected))) + (use_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected))) { if (!parseProfileNameOrID(pos, expected, id_mode, parent_profile)) return false; @@ -140,7 +140,7 @@ bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected result->max_value = std::move(max_value); result->readonly = readonly; result->id_mode = id_mode; - result->use_inherit_keyword = enable_inherit_keyword; + result->use_inherit_keyword = use_inherit_keyword; node = result; return true; } @@ -155,10 +155,12 @@ bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected } else { + ParserSettingsProfileElement element_p; + element_p.useIDMode(id_mode).useInheritKeyword(use_inherit_keyword); do { ASTPtr ast; - if (!ParserSettingsProfileElement{}.useIDMode(id_mode).enableInheritKeyword(enable_inherit_keyword).parse(pos, ast, expected)) + if (!element_p.parse(pos, ast, expected)) return false; auto element = typeid_cast>(ast); elements.push_back(std::move(element)); diff --git a/src/Parsers/ParserSettingsProfileElement.h b/src/Parsers/ParserSettingsProfileElement.h index 309c797e645..8843591a56c 100644 --- a/src/Parsers/ParserSettingsProfileElement.h +++ b/src/Parsers/ParserSettingsProfileElement.h @@ -11,8 +11,8 @@ namespace DB class ParserSettingsProfileElement : public IParserBase { public: - ParserSettingsProfileElement & useIDMode(bool enable_) { id_mode = enable_; return *this; } - ParserSettingsProfileElement & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } + ParserSettingsProfileElement & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + ParserSettingsProfileElement & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } protected: const char * getName() const override { return "SettingsProfileElement"; } @@ -20,15 +20,15 @@ protected: private: bool id_mode = false; - bool enable_inherit_keyword = false; + bool use_inherit_keyword = false; }; class ParserSettingsProfileElements : public IParserBase { public: - ParserSettingsProfileElements & useIDMode(bool enable_) { id_mode = enable_; return *this; } - ParserSettingsProfileElements & enableInheritKeyword(bool enable_) { enable_inherit_keyword = enable_; return *this; } + ParserSettingsProfileElements & useIDMode(bool id_mode_ = true) { id_mode = id_mode_; return *this; } + ParserSettingsProfileElements & useInheritKeyword(bool use_inherit_keyword_ = true) { use_inherit_keyword = use_inherit_keyword_; return *this; } protected: const char * getName() const override { return "SettingsProfileElements"; } @@ -36,7 +36,7 @@ protected: private: bool id_mode = false; - bool enable_inherit_keyword = false; + bool use_inherit_keyword = false; }; } diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 60672c0c116..fd60c2f3551 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -21,7 +21,6 @@ SRCS( ASTDropAccessEntityQuery.cpp ASTDropQuery.cpp ASTExpressionList.cpp - ASTExtendedRoleSet.cpp ASTFunction.cpp ASTFunctionWithKeyValueArguments.cpp ASTGrantQuery.cpp @@ -37,6 +36,7 @@ SRCS( ASTQueryWithOnCluster.cpp ASTQueryWithOutput.cpp ASTQueryWithTableAndOutput.cpp + ASTRolesOrUsersSet.cpp ASTRowPolicyName.cpp ASTSampleRatio.cpp ASTSelectQuery.cpp @@ -81,7 +81,6 @@ SRCS( ParserDictionaryAttributeDeclaration.cpp ParserDropAccessEntityQuery.cpp ParserDropQuery.cpp - ParserExtendedRoleSet.cpp ParserGrantQuery.cpp ParserInsertQuery.cpp ParserKillQueryQuery.cpp @@ -90,6 +89,7 @@ SRCS( ParserQuery.cpp ParserQueryWithOutput.cpp ParserRenameQuery.cpp + ParserRolesOrUsersSet.cpp ParserRowPolicyName.cpp ParserSampleRatio.cpp ParserSelectQuery.cpp diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index f0a1b21368e..5dda0caf201 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index a3b687dc011..5d8c0be5861 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -72,7 +72,7 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, const Context & const String & storage_name, const std::vector & all_limits, KeyType key_type, - const ExtendedRoleSet & apply_to) + const RolesOrUsersSet & apply_to) { column_name.insertData(name.data(), name.length()); column_id.push_back(id); diff --git a/src/Storages/System/StorageSystemRoleGrants.cpp b/src/Storages/System/StorageSystemRoleGrants.cpp index 00147a0dae6..0f0fcd831d9 100644 --- a/src/Storages/System/StorageSystemRoleGrants.cpp +++ b/src/Storages/System/StorageSystemRoleGrants.cpp @@ -78,7 +78,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, const Conte auto add_rows = [&](const String & grantee_name, IAccessEntity::Type grantee_type, const GrantedRoles & granted_roles, - const ExtendedRoleSet * default_roles) + const RolesOrUsersSet * default_roles) { for (const auto & role_id : granted_roles.roles) { @@ -99,7 +99,7 @@ void StorageSystemRoleGrants::fillData(MutableColumns & res_columns, const Conte continue; const GrantedRoles * granted_roles = nullptr; - const ExtendedRoleSet * default_roles = nullptr; + const RolesOrUsersSet * default_roles = nullptr; if (auto role = typeid_cast(entity)) granted_roles = &role->granted_roles; else if (auto user = typeid_cast(entity)) diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index ca77a5182a5..9f5267b3a9b 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -87,7 +87,7 @@ void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, const Cont const String & storage_name, const std::array & conditions, bool is_restrictive, - const ExtendedRoleSet & apply_to) + const RolesOrUsersSet & apply_to) { column_name.insertData(name.data(), name.length()); column_short_name.insertData(name_parts.short_name.data(), name_parts.short_name.length()); diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index d02c5910608..610e5bb68f5 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -51,7 +51,7 @@ void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, const const UUID & id, const String & storage_name, const SettingsProfileElements & elements, - const ExtendedRoleSet & apply_to) + const RolesOrUsersSet & apply_to) { column_name.insertData(name.data(), name.length()); column_id.push_back(id); diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index e0755fe59ab..7f3fe058d9e 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,7 +80,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & const String & storage_name, const Authentication & authentication, const AllowedClientHosts & allowed_hosts, - const ExtendedRoleSet & default_roles) + const RolesOrUsersSet & default_roles) { column_name.insertData(name.data(), name.length()); column_id.push_back(id); From 7d1951a79b6d5b6826cb0da7e4fb35ee18e8dc09 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 1 Jun 2020 13:45:07 +0300 Subject: [PATCH 0735/2229] Improve messages for errors in access storages. --- src/Access/IAccessStorage.cpp | 328 ++++++++++++++++++---------------- 1 file changed, 169 insertions(+), 159 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index a7af61c7712..6813b5eb558 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -24,16 +24,141 @@ namespace using EntityType = IAccessStorage::EntityType; using EntityTypeInfo = IAccessStorage::EntityTypeInfo; - bool isNotFoundErrorCode(int error_code) + + String outputID(const UUID & id) { - if (error_code == ErrorCodes::ACCESS_ENTITY_NOT_FOUND) - return true; + return "ID(" + toString(id) + ")"; + } - for (auto type : ext::range(EntityType::MAX)) - if (error_code == EntityTypeInfo::get(type).not_found_error_code) - return true; + String outputTypeAndNameOrID(const IAccessStorage & storage, const UUID & id) + { + auto entity = storage.tryRead(id); + if (entity) + return entity->outputTypeAndName(); + return outputID(id); + } - return false; + + template > + ResultType doTry(const Func & func) + { + try + { + return func(); + } + catch (Exception &) + { + return {}; + } + } + + + template , + typename ResultType = std::conditional_t, void, std::vector>> + ResultType applyToMultipleEntities( + const std::vector & multiple_entities, + const ApplyFunc & apply_function, + const char * error_message_format [[maybe_unused]] = nullptr, + const GetNameFunc & get_name_function [[maybe_unused]] = nullptr) + { + std::optional exception; + std::vector success; + + auto helper = [&](const auto & apply_and_store_result_function) + { + for (size_t i = 0; i != multiple_entities.size(); ++i) + { + try + { + apply_and_store_result_function(multiple_entities[i]); + if constexpr (!ignore_errors) + success[i] = true; + } + catch (Exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(e); + } + catch (Poco::Exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(Exception::CreateFromPocoTag{}, e); + } + catch (std::exception & e) + { + if (!ignore_errors && !exception) + exception.emplace(Exception::CreateFromSTDTag{}, e); + } + } + }; + + if constexpr (std::is_same_v) + { + if (multiple_entities.empty()) + return; + + if (multiple_entities.size() == 1) + { + apply_function(multiple_entities.front()); + return; + } + + if constexpr (!ignore_errors) + success.resize(multiple_entities.size(), false); + + helper(apply_function); + + if (ignore_errors || !exception) + return; + } + else + { + ResultType result; + if (multiple_entities.empty()) + return result; + + if (multiple_entities.size() == 1) + { + result.emplace_back(apply_function(multiple_entities.front())); + return result; + } + + result.reserve(multiple_entities.size()); + if constexpr (!ignore_errors) + success.resize(multiple_entities.size(), false); + + helper([&](const T & entity) { result.emplace_back(apply_function(entity)); }); + + if (ignore_errors || !exception) + return result; + } + + if constexpr (!ignore_errors) + { + Strings succeeded_names_list; + Strings failed_names_list; + for (size_t i = 0; i != multiple_entities.size(); ++i) + { + const auto & entity = multiple_entities[i]; + String name = get_name_function(entity); + if (success[i]) + succeeded_names_list.emplace_back(name); + else + failed_names_list.emplace_back(name); + } + String succeeded_names = boost::algorithm::join(succeeded_names_list, ", "); + String failed_names = boost::algorithm::join(failed_names_list, ", "); + if (succeeded_names.empty()) + succeeded_names = "none"; + + String error_message = error_message_format; + boost::replace_all(error_message, "{succeeded_names}", succeeded_names); + boost::replace_all(error_message, "{failed_names}", failed_names); + exception->addMessage(error_message); + exception->rethrow(); + } + __builtin_unreachable(); } } @@ -91,14 +216,7 @@ bool IAccessStorage::exists(const UUID & id) const AccessEntityPtr IAccessStorage::tryReadBase(const UUID & id) const { - try - { - return readImpl(id); - } - catch (Exception &) - { - return nullptr; - } + return doTry([&] { return readImpl(id); }); } @@ -110,14 +228,7 @@ String IAccessStorage::readName(const UUID & id) const std::optional IAccessStorage::tryReadName(const UUID & id) const { - try - { - return readNameImpl(id); - } - catch (Exception &) - { - return {}; - } + return doTry([&] { return std::optional{readNameImpl(id)}; }); } @@ -129,56 +240,25 @@ UUID IAccessStorage::insert(const AccessEntityPtr & entity) std::vector IAccessStorage::insert(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - String error_message; - for (const auto & entity : multiple_entities) - { - try - { - ids.push_back(insertImpl(entity, false)); - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - } - } - if (!error_message.empty()) - throw Exception(error_message, ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ false); }, + "Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", + [](const AccessEntityPtr & entity) { return entity->outputTypeAndName(); }); } std::optional IAccessStorage::tryInsert(const AccessEntityPtr & entity) { - try - { - return insertImpl(entity, false); - } - catch (Exception &) - { - return {}; - } + return doTry([&] { return std::optional{insertImpl(entity, false)}; }); } std::vector IAccessStorage::tryInsert(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - for (const auto & entity : multiple_entities) - { - try - { - ids.push_back(insertImpl(entity, false)); - } - catch (Exception &) - { - } - } - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ false); }); } @@ -190,11 +270,11 @@ UUID IAccessStorage::insertOrReplace(const AccessEntityPtr & entity) std::vector IAccessStorage::insertOrReplace(const std::vector & multiple_entities) { - std::vector ids; - ids.reserve(multiple_entities.size()); - for (const auto & entity : multiple_entities) - ids.push_back(insertImpl(entity, true)); - return ids; + return applyToMultipleEntities( + multiple_entities, + [this](const AccessEntityPtr & entity) { return insertImpl(entity, /* replace_if_exists = */ true); }, + "Couldn't insert {failed_names}. Successfully inserted: {succeeded_names}", + [](const AccessEntityPtr & entity) -> String { return entity->outputTypeAndName(); }); } @@ -206,60 +286,25 @@ void IAccessStorage::remove(const UUID & id) void IAccessStorage::remove(const std::vector & ids) { - String error_message; - std::optional error_code; - for (const auto & id : ids) - { - try - { - removeImpl(id); - } - catch (Exception & e) - { - if (!isNotFoundErrorCode(e.code())) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - if (error_code && (*error_code != e.code())) - error_code = ErrorCodes::ACCESS_ENTITY_NOT_FOUND; - else - error_code = e.code(); - } - } - if (!error_message.empty()) - throw Exception(error_message, *error_code); + applyToMultipleEntities( + ids, + [this](const UUID & id) { removeImpl(id); }, + "Couldn't remove {failed_names}. Successfully removed: {succeeded_names}", + [this](const UUID & id) { return outputTypeAndNameOrID(*this, id); }); } bool IAccessStorage::tryRemove(const UUID & id) { - try - { - removeImpl(id); - return true; - } - catch (Exception &) - { - return false; - } + return doTry([&] { removeImpl(id); return true; }); } std::vector IAccessStorage::tryRemove(const std::vector & ids) { - std::vector removed; - removed.reserve(ids.size()); - for (const auto & id : ids) - { - try - { - removeImpl(id); - removed.push_back(id); - } - catch (Exception &) - { - } - } - return removed; + return applyToMultipleEntities( + ids, + [this](const UUID & id) { removeImpl(id); return id; }); } @@ -271,60 +316,25 @@ void IAccessStorage::update(const UUID & id, const UpdateFunc & update_func) void IAccessStorage::update(const std::vector & ids, const UpdateFunc & update_func) { - String error_message; - std::optional error_code; - for (const auto & id : ids) - { - try - { - updateImpl(id, update_func); - } - catch (Exception & e) - { - if (!isNotFoundErrorCode(e.code())) - throw; - error_message += (error_message.empty() ? "" : ". ") + e.message(); - if (error_code && (*error_code != e.code())) - error_code = ErrorCodes::ACCESS_ENTITY_NOT_FOUND; - else - error_code = e.code(); - } - } - if (!error_message.empty()) - throw Exception(error_message, *error_code); + applyToMultipleEntities( + ids, + [this, &update_func](const UUID & id) { updateImpl(id, update_func); }, + "Couldn't update {failed_names}. Successfully updated: {succeeded_names}", + [this](const UUID & id) { return outputTypeAndNameOrID(*this, id); }); } bool IAccessStorage::tryUpdate(const UUID & id, const UpdateFunc & update_func) { - try - { - updateImpl(id, update_func); - return true; - } - catch (Exception &) - { - return false; - } + return doTry([&] { updateImpl(id, update_func); return true; }); } std::vector IAccessStorage::tryUpdate(const std::vector & ids, const UpdateFunc & update_func) { - std::vector updated; - updated.reserve(ids.size()); - for (const auto & id : ids) - { - try - { - updateImpl(id, update_func); - updated.push_back(id); - } - catch (Exception &) - { - } - } - return updated; + return applyToMultipleEntities( + ids, + [this, &update_func](const UUID & id) { updateImpl(id, update_func); return id; }); } @@ -388,7 +398,7 @@ Poco::Logger * IAccessStorage::getLogger() const void IAccessStorage::throwNotFound(const UUID & id) const { - throw Exception("ID {" + toString(id) + "} not found in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_NOT_FOUND); + throw Exception(outputID(id) + " not found in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_NOT_FOUND); } @@ -402,7 +412,7 @@ void IAccessStorage::throwNotFound(EntityType type, const String & name) const void IAccessStorage::throwBadCast(const UUID & id, EntityType type, const String & name, EntityType required_type) { throw Exception( - "ID {" + toString(id) + "}: " + outputEntityTypeAndName(type, name) + " expected to be of type " + toString(required_type), + outputID(id) + ": " + outputEntityTypeAndName(type, name) + " expected to be of type " + toString(required_type), ErrorCodes::LOGICAL_ERROR); } @@ -410,7 +420,7 @@ void IAccessStorage::throwBadCast(const UUID & id, EntityType type, const String void IAccessStorage::throwIDCollisionCannotInsert(const UUID & id, EntityType type, const String & name, EntityType existing_type, const String & existing_name) const { throw Exception( - outputEntityTypeAndName(type, name) + ": cannot insert because the ID {" + toString(id) + "} is already used by " + outputEntityTypeAndName(type, name) + ": cannot insert because the " + outputID(id) + " is already used by " + outputEntityTypeAndName(existing_type, existing_name) + " in [" + getStorageName() + "]", ErrorCodes::ACCESS_ENTITY_ALREADY_EXISTS); } From 4bd00b02e217be02ed128ce78bc13b12db62dc69 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Jun 2020 20:57:33 +0300 Subject: [PATCH 0736/2229] Improve syntax of CREATE QUOTA. Now resource types and key types could be written with underscores. Also rename columns key_type=>keys and source=>storage in table system.quotas. --- src/Access/Quota.h | 22 ++- src/Common/IntervalKind.cpp | 17 ++ src/Common/IntervalKind.h | 2 + ...InterpreterShowCreateAccessEntityQuery.cpp | 4 +- src/Parsers/ASTCreateQuotaQuery.cpp | 65 +++++--- src/Parsers/ASTCreateQuotaQuery.h | 12 +- src/Parsers/ParserCreateQuotaQuery.cpp | 149 ++++++++++++------ src/Parsers/ParserCreateQuotaQuery.h | 12 +- src/Storages/System/StorageSystemQuotas.cpp | 24 ++- .../test_disk_access_storage/test.py | 2 +- tests/integration/test_quota/test.py | 49 +++--- .../0_stateless/01033_quota_dcl.reference | 2 +- 12 files changed, 244 insertions(+), 116 deletions(-) diff --git a/src/Access/Quota.h b/src/Access/Quota.h index 101263e76a5..5bbea36cfda 100644 --- a/src/Access/Quota.h +++ b/src/Access/Quota.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include @@ -84,7 +86,8 @@ struct Quota : public IAccessEntity struct KeyTypeInfo { const char * const raw_name; - const String name; /// Lowercased with spaces, e.g. "client key". + const String name; /// Lowercased with underscores, e.g. "client_key". + const std::vector base_types; /// For combined types keeps base types, e.g. for CLIENT_KEY_OR_USER_NAME it keeps [KeyType::CLIENT_KEY, KeyType::USER_NAME]. static const KeyTypeInfo & get(KeyType type); }; @@ -195,8 +198,21 @@ inline const Quota::KeyTypeInfo & Quota::KeyTypeInfo::get(KeyType type) { String init_name = raw_name_; boost::to_lower(init_name); - boost::replace_all(init_name, "_", " "); - return KeyTypeInfo{raw_name_, std::move(init_name)}; + std::vector init_base_types; + String replaced = boost::algorithm::replace_all_copy(init_name, "_or_", "|"); + Strings tokens; + boost::algorithm::split(tokens, replaced, boost::is_any_of("|")); + if (tokens.size() > 1) + { + for (const auto & token : tokens) + for (auto kt : ext::range(KeyType::MAX)) + if (KeyTypeInfo::get(kt).name == token) + { + init_base_types.push_back(kt); + break; + } + } + return KeyTypeInfo{raw_name_, std::move(init_name), std::move(init_base_types)}; }; switch (type) diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 9443844a54b..1582889eff2 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -83,6 +83,23 @@ const char * IntervalKind::toKeyword() const } +const char * IntervalKind::toLowercasedKeyword() const +{ + switch (kind) + { + case IntervalKind::Second: return "second"; + case IntervalKind::Minute: return "minute"; + case IntervalKind::Hour: return "hour"; + case IntervalKind::Day: return "day"; + case IntervalKind::Week: return "week"; + case IntervalKind::Month: return "month"; + case IntervalKind::Quarter: return "quarter"; + case IntervalKind::Year: return "year"; + } + __builtin_unreachable(); +} + + const char * IntervalKind::toDateDiffUnit() const { switch (kind) diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index 9b7c4bd504e..d8a569b8de4 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -37,6 +37,8 @@ struct IntervalKind /// Returns an uppercased version of what `toString()` returns. const char * toKeyword() const; + const char * toLowercasedKeyword() const; + /// Returns the string which can be passed to the `unit` parameter of the dateDiff() function. /// For example, `IntervalKind{IntervalKind::Day}.getDateDiffParameter()` returns "day". const char * toDateDiffUnit() const; diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 9c28c3d0bd2..18a08d21e93 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -132,7 +132,9 @@ namespace query->names.emplace_back(quota.getName()); query->attach = attach_mode; - query->key_type = quota.key_type; + if (quota.key_type != Quota::KeyType::NONE) + query->key_type = quota.key_type; + query->all_limits.reserve(quota.all_limits.size()); for (const auto & limits : quota.all_limits) diff --git a/src/Parsers/ASTCreateQuotaQuery.cpp b/src/Parsers/ASTCreateQuotaQuery.cpp index fc4e2edb9e7..88516fb6eac 100644 --- a/src/Parsers/ASTCreateQuotaQuery.cpp +++ b/src/Parsers/ASTCreateQuotaQuery.cpp @@ -18,8 +18,28 @@ namespace void formatKeyType(const KeyType & key_type, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " KEYED BY " << (settings.hilite ? IAST::hilite_none : "") << "'" - << KeyTypeInfo::get(key_type).name << "'"; + const auto & type_info = KeyTypeInfo::get(key_type); + if (key_type == KeyType::NONE) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT KEYED" << (settings.hilite ? IAST::hilite_none : ""); + return; + } + + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " KEYED BY " << (settings.hilite ? IAST::hilite_none : ""); + + if (!type_info.base_types.empty()) + { + bool need_comma = false; + for (const auto & base_type : type_info.base_types) + { + if (std::exchange(need_comma, true)) + settings.ostr << ", "; + settings.ostr << KeyTypeInfo::get(base_type).name; + } + return; + } + + settings.ostr << type_info.name; } @@ -43,20 +63,14 @@ namespace } - void formatLimit(ResourceType resource_type, ResourceAmount max, bool first, const IAST::FormatSettings & settings) + void formatLimit(ResourceType resource_type, ResourceAmount max, const IAST::FormatSettings & settings) { - if (first) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX" << (settings.hilite ? IAST::hilite_none : ""); - else - settings.ostr << ","; - const auto & type_info = ResourceTypeInfo::get(resource_type); - settings.ostr << " " << (settings.hilite ? IAST::hilite_keyword : "") << type_info.keyword - << (settings.hilite ? IAST::hilite_none : "") << " " << type_info.amountToString(max); + settings.ostr << " " << type_info.name << " = " << type_info.amountToString(max); } - void formatLimits(const ASTCreateQuotaQuery::Limits & limits, const IAST::FormatSettings & settings) + void formatIntervalWithLimits(const ASTCreateQuotaQuery::Limits & limits, const IAST::FormatSettings & settings) { auto interval_kind = IntervalKind::fromAvgSeconds(limits.duration.count()); Int64 num_intervals = limits.duration.count() / interval_kind.toAvgSeconds(); @@ -64,11 +78,11 @@ namespace settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " FOR" << (limits.randomize_interval ? " RANDOMIZED" : "") - << " INTERVAL " + << " INTERVAL" << (settings.hilite ? IAST::hilite_none : "") - << num_intervals << " " + << " " << num_intervals << " " << (settings.hilite ? IAST::hilite_keyword : "") - << interval_kind.toKeyword() + << interval_kind.toLowercasedKeyword() << (settings.hilite ? IAST::hilite_none : ""); if (limits.drop) @@ -81,17 +95,28 @@ namespace for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) { if (limits.max[resource_type]) - { - formatLimit(resource_type, *limits.max[resource_type], !limit_found, settings); limit_found = true; + } + if (limit_found) + { + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " MAX" << (settings.hilite ? IAST::hilite_none : ""); + bool need_comma = false; + for (auto resource_type : ext::range(Quota::MAX_RESOURCE_TYPE)) + { + if (limits.max[resource_type]) + { + if (std::exchange(need_comma, true)) + settings.ostr << ","; + formatLimit(resource_type, *limits.max[resource_type], settings); + } } } - if (!limit_found) + else settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " TRACKING ONLY" << (settings.hilite ? IAST::hilite_none : ""); } } - void formatAllLimits(const std::vector & all_limits, const IAST::FormatSettings & settings) + void formatIntervalsWithLimits(const std::vector & all_limits, const IAST::FormatSettings & settings) { bool need_comma = false; for (const auto & limits : all_limits) @@ -100,7 +125,7 @@ namespace settings.ostr << ","; need_comma = true; - formatLimits(limits, settings); + formatIntervalWithLimits(limits, settings); } } @@ -152,7 +177,7 @@ void ASTCreateQuotaQuery::formatImpl(const FormatSettings & settings, FormatStat if (key_type) formatKeyType(*key_type, settings); - formatAllLimits(all_limits, settings); + formatIntervalsWithLimits(all_limits, settings); if (roles && (!roles->empty() || alter)) formatToRoles(*roles, settings); diff --git a/src/Parsers/ASTCreateQuotaQuery.h b/src/Parsers/ASTCreateQuotaQuery.h index 002c374322f..1671ae1d00f 100644 --- a/src/Parsers/ASTCreateQuotaQuery.h +++ b/src/Parsers/ASTCreateQuotaQuery.h @@ -11,17 +11,17 @@ class ASTRolesOrUsersSet; /** CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name - * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * [KEYED BY {none | user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED] + * [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day} + * {MAX {{queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number} [,...] | * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] - * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * [KEYED BY {none | user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED] + * [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day} + * {MAX {{queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number} [,...] | * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index e953f698de0..f83bac975b0 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -39,84 +40,126 @@ namespace }); } - bool parseKeyType(IParserBase::Pos & pos, Expected & expected, std::optional & key_type) + bool parseKeyType(IParserBase::Pos & pos, Expected & expected, KeyType & key_type) { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{"KEYED BY"}.ignore(pos, expected)) + if (ParserKeyword{"NOT KEYED"}.ignore(pos, expected)) + { + key_type = KeyType::NONE; + return true; + } + + if (!ParserKeyword{"KEY BY"}.ignore(pos, expected) && !ParserKeyword{"KEYED BY"}.ignore(pos, expected)) return false; - ASTPtr key_type_ast; - if (!ParserStringLiteral().parse(pos, key_type_ast, expected)) + Strings names; + if (!parseIdentifiersOrStringLiterals(pos, expected, names)) return false; - const String & key_type_str = key_type_ast->as().value.safeGet(); + String name = boost::algorithm::join(names, "_or_"); + boost::to_lower(name); + boost::replace_all(name, " ", "_"); + for (auto kt : ext::range(Quota::KeyType::MAX)) - if (boost::iequals(KeyTypeInfo::get(kt).name, key_type_str)) + if (KeyTypeInfo::get(kt).name == name) { key_type = kt; return true; } - String all_key_types_str; + String all_types_str; for (auto kt : ext::range(Quota::KeyType::MAX)) - all_key_types_str += String(all_key_types_str.empty() ? "" : ", ") + "'" + KeyTypeInfo::get(kt).name + "'"; - String msg = "Quota cannot be keyed by '" + key_type_str + "'. Expected one of these literals: " + all_key_types_str; + all_types_str += String(all_types_str.empty() ? "" : ", ") + "'" + KeyTypeInfo::get(kt).name + "'"; + String msg = "Quota cannot be keyed by '" + name + "'. Expected one of the following identifiers: " + all_types_str; throw Exception(msg, ErrorCodes::SYNTAX_ERROR); }); } - bool parseLimit(IParserBase::Pos & pos, Expected & expected, bool first, ResourceType & resource_type, ResourceAmount & max) + + bool parseResourceType(IParserBase::Pos & pos, Expected & expected, ResourceType & resource_type) { return IParserBase::wrapParseImpl(pos, [&] { - if (first) - { - if (!ParserKeyword{"MAX"}.ignore(pos, expected)) - return false; - } - else - { - if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) - return false; - - ParserKeyword{"MAX"}.ignore(pos, expected); - } - - std::optional res_resource_type; for (auto rt : ext::range(Quota::MAX_RESOURCE_TYPE)) { if (ParserKeyword{ResourceTypeInfo::get(rt).keyword.c_str()}.ignore(pos, expected)) { - res_resource_type = rt; - break; + resource_type = rt; + return true; } } - if (!res_resource_type) + + ASTPtr ast; + if (!ParserIdentifier{}.parse(pos, ast, expected)) return false; - ResourceAmount res_max; - ASTPtr max_ast; - if (ParserNumber{}.parse(pos, max_ast, expected)) + String name = getIdentifierName(ast); + for (auto rt : ext::range(Quota::MAX_RESOURCE_TYPE)) { - const Field & max_field = max_ast->as().value; - const auto & type_info = ResourceTypeInfo::get(*res_resource_type); - if (type_info.output_denominator == 1) - res_max = applyVisitor(FieldVisitorConvertToNumber(), max_field); - else - res_max = static_cast( - applyVisitor(FieldVisitorConvertToNumber(), max_field) * type_info.output_denominator); + if (ResourceTypeInfo::get(rt).name == name) + { + resource_type = rt; + return true; + } + } + + return false; + }); + } + + + bool parseMaxAmount(IParserBase::Pos & pos, Expected & expected, ResourceType resource_type, ResourceAmount & max) + { + ASTPtr ast; + if (!ParserNumber{}.parse(pos, ast, expected)) + return false; + + const Field & max_field = ast->as().value; + const auto & type_info = ResourceTypeInfo::get(resource_type); + if (type_info.output_denominator == 1) + max = applyVisitor(FieldVisitorConvertToNumber(), max_field); + else + max = static_cast( + applyVisitor(FieldVisitorConvertToNumber(), max_field) * type_info.output_denominator); + return true; + } + + + bool parseLimit(IParserBase::Pos & pos, Expected & expected, bool first, bool & max_prefix_encountered, ResourceType & resource_type, ResourceAmount & max) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (!first && !ParserToken{TokenType::Comma}.ignore(pos, expected)) + return false; + + max_prefix_encountered |= ParserKeyword{"MAX"}.ignore(pos, expected); + + ResourceType res_resource_type; + if (!parseResourceType(pos, expected, res_resource_type)) + return false; + + if (max_prefix_encountered) + { + ParserToken{TokenType::Equals}.ignore(pos, expected); } else + { + if (!ParserKeyword{"MAX"}.ignore(pos, expected)) + return false; + } + + ResourceAmount res_max; + if (!parseMaxAmount(pos, expected, res_resource_type, res_max)) return false; - resource_type = *res_resource_type; + resource_type = res_resource_type; max = res_max; return true; }); } - bool parseLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits) + bool parseIntervalWithLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -126,8 +169,7 @@ namespace new_limits.randomize_interval = ParserKeyword{"RANDOMIZED"}.ignore(pos, expected); - if (!ParserKeyword{"INTERVAL"}.ignore(pos, expected)) - return false; + ParserKeyword{"INTERVAL"}.ignore(pos, expected); ASTPtr num_intervals_ast; if (!ParserNumber{}.parse(pos, num_intervals_ast, expected)) @@ -152,11 +194,12 @@ namespace { ResourceType resource_type; ResourceAmount max; - if (!parseLimit(pos, expected, true, resource_type, max)) + bool max_prefix_encountered = false; + if (!parseLimit(pos, expected, true, max_prefix_encountered, resource_type, max)) return false; new_limits.max[resource_type] = max; - while (parseLimit(pos, expected, false, resource_type, max)) + while (parseLimit(pos, expected, false, max_prefix_encountered, resource_type, max)) new_limits.max[resource_type] = max; } @@ -165,7 +208,7 @@ namespace }); } - bool parseAllLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) + bool parseIntervalsWithLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) { return IParserBase::wrapParseImpl(pos, [&] { @@ -173,7 +216,7 @@ namespace do { ASTCreateQuotaQuery::Limits limits; - if (!parseLimits(pos, expected, limits)) + if (!parseIntervalWithLimits(pos, expected, limits)) { all_limits.resize(old_size); return false; @@ -185,6 +228,7 @@ namespace }); } + bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { return IParserBase::wrapParseImpl(pos, [&] @@ -192,7 +236,7 @@ namespace ASTPtr node; ParserRolesOrUsersSet roles_p; roles_p.allowAll().allowRoleNames().allowUserNames().allowCurrentUser().useIDMode(id_mode); - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected) || !roles_p.parse(pos, node, expected)) + if (!ParserKeyword{"TO"}.ignore(pos, expected) || !roles_p.parse(pos, node, expected)) return false; roles = std::static_pointer_cast(node); @@ -256,10 +300,17 @@ bool ParserCreateQuotaQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; - if (!key_type && parseKeyType(pos, expected, key_type)) - continue; + if (!key_type) + { + KeyType new_key_type; + if (parseKeyType(pos, expected, new_key_type)) + { + key_type = new_key_type; + continue; + } + } - if (parseAllLimits(pos, expected, all_limits)) + if (parseIntervalsWithLimits(pos, expected, all_limits)) continue; if (cluster.empty() && parseOnCluster(pos, expected, cluster)) diff --git a/src/Parsers/ParserCreateQuotaQuery.h b/src/Parsers/ParserCreateQuotaQuery.h index e2185d4d5ff..03021985357 100644 --- a/src/Parsers/ParserCreateQuotaQuery.h +++ b/src/Parsers/ParserCreateQuotaQuery.h @@ -7,17 +7,17 @@ namespace DB { /** Parses queries like * CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name - * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} [,...] | + * [KEYED BY {none | user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED] + * [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day} + * {MAX {{queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number} [,...] | * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] * * ALTER QUOTA [IF EXISTS] name * [RENAME TO new_name] - * [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - * [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY} - * {MAX {{QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number} } [,...] | + * [KEYED BY {none | user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED] + * [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day} + * {MAX {{queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number} [,...] | * NO LIMITS | TRACKING ONLY} [,...]] * [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] */ diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 5d8c0be5861..7ba2d32fe68 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -26,7 +26,11 @@ namespace { DataTypeEnum8::Values enum_values; for (auto key_type : ext::range(KeyType::MAX)) - enum_values.push_back({KeyTypeInfo::get(key_type).name, static_cast(key_type)}); + { + const auto & type_info = KeyTypeInfo::get(key_type); + if ((key_type != KeyType::NONE) && type_info.base_types.empty()) + enum_values.push_back({type_info.name, static_cast(key_type)}); + } return enum_values; } } @@ -37,8 +41,8 @@ NamesAndTypesList StorageSystemQuotas::getNamesAndTypes() NamesAndTypesList names_and_types{ {"name", std::make_shared()}, {"id", std::make_shared()}, - {"source", std::make_shared()}, - {"key_type", std::make_shared(getKeyTypeEnumValues())}, + {"storage", std::make_shared()}, + {"keys", std::make_shared(std::make_shared(getKeyTypeEnumValues()))}, {"durations", std::make_shared(std::make_shared())}, {"apply_to_all", std::make_shared()}, {"apply_to_list", std::make_shared(std::make_shared())}, @@ -58,7 +62,8 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, const Context & auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_storage = assert_cast(*res_columns[column_index++]); - auto & column_key_type = assert_cast(*res_columns[column_index++]).getData(); + auto & column_key_types = assert_cast(assert_cast(*res_columns[column_index]).getData()).getData(); + auto & column_key_types_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_durations = assert_cast(assert_cast(*res_columns[column_index]).getData()).getData(); auto & column_durations_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_apply_to_all = assert_cast(*res_columns[column_index++]).getData(); @@ -77,7 +82,16 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, const Context & column_name.insertData(name.data(), name.length()); column_id.push_back(id); column_storage.insertData(storage_name.data(), storage_name.length()); - column_key_type.push_back(static_cast(key_type)); + + if (key_type != KeyType::NONE) + { + const auto & type_info = KeyTypeInfo::get(key_type); + for (auto base_type : type_info.base_types) + column_key_types.push_back(static_cast(base_type)); + if (type_info.base_types.empty()) + column_key_types.push_back(static_cast(key_type)); + } + column_key_types_offsets.push_back(column_key_types.size()); for (const auto & limits : all_limits) column_durations.push_back(std::chrono::duration_cast(limits.duration).count()); diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index a47af5ad5b8..dab6758cbd6 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -41,7 +41,7 @@ def test_create(): assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS PROFILE s1\n" assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 IDENTIFIED WITH sha256_password HOST LOCAL DEFAULT ROLE rx\n" assert instance.query("SHOW CREATE ROW POLICY p ON mydb.mytable") == "CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a < 1000 TO u1, u2\n" - assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx\n" + assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q FOR INTERVAL 1 hour MAX queries = 100 TO ALL EXCEPT rx\n" assert instance.query("SHOW GRANTS FOR u1") == "" assert instance.query("SHOW GRANTS FOR u2") == "GRANT rx TO u2\n" assert instance.query("SHOW CREATE ROLE rx") == "CREATE ROLE rx SETTINGS PROFILE s1\n" diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 6e00bf7241b..ab8077030e6 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -61,7 +61,7 @@ def reset_quotas_and_usage_info(): def test_quota_from_users_xml(): - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", [31556952], 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] assert system_quotas_usage() == [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] @@ -76,7 +76,7 @@ def test_quota_from_users_xml(): def test_simpliest_quota(): # Simpliest quota doesn't even track usage. copy_quota_xml('simpliest.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[]", 0, "['default']", "[]"]] assert system_quota_limits() == "" assert system_quota_usage() == [["myQuota", "default", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N"]] @@ -87,7 +87,7 @@ def test_simpliest_quota(): def test_tracking_quota(): # Now we're tracking usage. copy_quota_xml('tracking.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, "\N", "\N", "\N", "\N", "\N", "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, "\N", 0, "\N", 0, "\N", 0, "\N", 0, "\N", 0, "\N", "\N"]] @@ -101,7 +101,7 @@ def test_tracking_quota(): def test_exceed_quota(): # Change quota, now the limits are tiny so we will exceed the quota. copy_quota_xml('tiny_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1, 1, 1, "\N", 1, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, "\N", 0, 1, 0, "\N", "\N"]] @@ -110,7 +110,7 @@ def test_exceed_quota(): # Change quota, now the limits are enough to execute queries. copy_quota_xml('normal_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 1, 1000, 1, "\N", 0, "\N", 0, "\N", 50, 1000, 0, "\N", "\N"]] @@ -119,13 +119,13 @@ def test_exceed_quota(): def test_add_remove_interval(): - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", [31556952], 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] # Add interval. copy_quota_xml('two_intervals.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952,63113904]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952,63113904]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"], ["myQuota", 63113904, 1, "\N", "\N", "\N", 30000, "\N", 20000, 120]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"], @@ -137,7 +137,7 @@ def test_add_remove_interval(): # Remove interval. copy_quota_xml('normal_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", [31556952], 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 1, 1000, 0, "\N", 50, "\N", 200, "\N", 50, 1000, 200, "\N", "\N"]] @@ -146,7 +146,7 @@ def test_add_remove_interval(): # Remove all intervals. copy_quota_xml('simpliest.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[]", 0, "['default']", "[]"]] assert system_quota_limits() == "" assert system_quota_usage() == [["myQuota", "default", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N", "\N"]] @@ -155,20 +155,21 @@ def test_add_remove_interval(): # Add one interval back. copy_quota_xml('normal_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", [31556952], 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quota_usage() == [["myQuota", "default", 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] def test_add_remove_quota(): - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", [31556952], 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quotas_usage() == [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] # Add quota. copy_quota_xml('two_quotas.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"], - ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "client key or user name", "[3600,2629746]", 0, "[]", "[]"]] + print system_quotas() + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"], + ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']", "[3600,2629746]", 0, "[]", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"], ["myQuota2", 3600, 1, "\N", "\N", 4000, 400000, 4000, 400000, 60], ["myQuota2", 2629746, 0, "\N", "\N", "\N", "\N", "\N", "\N", 1800]] @@ -176,7 +177,7 @@ def test_add_remove_quota(): # Drop quota. copy_quota_xml('normal_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quotas_usage() == [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] @@ -188,25 +189,25 @@ def test_add_remove_quota(): # Add one quota back. copy_quota_xml('normal_limits.xml') - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] assert system_quotas_usage() == [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\N", 0, "\N", 0, "\N", 0, 1000, 0, "\N", "\N"]] def test_reload_users_xml_by_timer(): - assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]] + assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"]] time.sleep(1) # The modification time of the 'quota.xml' file should be different, # because config files are reload by timer only when the modification time is changed. copy_quota_xml('tiny_limits.xml', reload_immediately=False) - assert_eq_with_retry(instance, "SELECT * FROM system.quotas", [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "user name", "[31556952]", 0, "['default']", "[]"]]) + assert_eq_with_retry(instance, "SELECT * FROM system.quotas", [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", ['user_name'], "[31556952]", 0, "['default']", "[]"]]) assert_eq_with_retry(instance, "SELECT * FROM system.quota_limits", [["myQuota", 31556952, 0, 1, 1, 1, "\N", 1, "\N", "\N"]]) def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) @@ -217,7 +218,7 @@ def test_dcl_introspection(): # Add interval. copy_quota_xml('two_intervals.xml') assert instance.query("SHOW QUOTAS") == "myQuota\n" - assert instance.query("SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000, FOR RANDOMIZED INTERVAL 2 YEAR MAX RESULT BYTES 30000, READ BYTES 20000, EXECUTION TIME 120 TO default\n" + assert instance.query("SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000, FOR RANDOMIZED INTERVAL 2 year MAX result_bytes = 30000, read_bytes = 20000, execution_time = 120 TO default\n" assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n" "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120", instance.query("SHOW QUOTA")) @@ -225,8 +226,8 @@ def test_dcl_introspection(): # Drop interval, add quota. copy_quota_xml('two_quotas.xml') assert instance.query("SHOW QUOTAS") == "myQuota\nmyQuota2\n" - assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY \\'user name\\' FOR INTERVAL 1 YEAR MAX QUERIES 1000, READ ROWS 1000 TO default\n" - assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY \\'client key or user name\\' FOR RANDOMIZED INTERVAL 1 HOUR MAX RESULT ROWS 4000, RESULT BYTES 400000, READ ROWS 4000, READ BYTES 400000, EXECUTION TIME 60, FOR INTERVAL 1 MONTH MAX EXECUTION TIME 1800\n" + assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) @@ -242,7 +243,7 @@ def test_dcl_management(): assert instance.query("SHOW QUOTA") == "" instance.query("CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 5 QUARTER MAX QUERIES 123 TO default\n" + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n" assert re.match("qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) @@ -251,7 +252,7 @@ def test_dcl_management(): instance.query("SHOW QUOTA")) instance.query("ALTER QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 321, MAX ERRORS 10, FOR INTERVAL 0.5 HOUR MAX EXECUTION TIME 0.5") - assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA KEYED BY \\'none\\' FOR INTERVAL 30 MINUTE MAX EXECUTION TIME 0.5, FOR INTERVAL 5 QUARTER MAX QUERIES 321, ERRORS 10 TO default\n" + assert instance.query("SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n" assert re.match("qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n" "qA\\t\\t.*\\t39446190\\t1\\t321\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) @@ -270,7 +271,7 @@ def test_dcl_management(): instance.query("SHOW QUOTA")) instance.query("ALTER QUOTA qA RENAME TO qB") - assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB KEYED BY \\'none\\' FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY TO default\n" + assert instance.query("SHOW CREATE QUOTA qB") == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n" assert re.match("qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) diff --git a/tests/queries/0_stateless/01033_quota_dcl.reference b/tests/queries/0_stateless/01033_quota_dcl.reference index 7bd2d2923d2..2ad3c3ad784 100644 --- a/tests/queries/0_stateless/01033_quota_dcl.reference +++ b/tests/queries/0_stateless/01033_quota_dcl.reference @@ -1,2 +1,2 @@ default -CREATE QUOTA default KEYED BY \'user name\' FOR INTERVAL 1 HOUR TRACKING ONLY TO default, readonly +CREATE QUOTA default KEYED BY user_name FOR INTERVAL 1 hour TRACKING ONLY TO default, readonly From 5fc41c7eccc9d98d524ee997ae689d5e5333820a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:17:06 +0300 Subject: [PATCH 0737/2229] Move set*Key methods to StorageInMemoryMetadata --- src/Storages/IStorage.cpp | 21 --------------------- src/Storages/IStorage.h | 12 ------------ src/Storages/MergeTree/MergeTreeData.cpp | 3 --- src/Storages/StorageInMemoryMetadata.h | 12 ++++++++++++ 4 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 6c045a6f365..2bbbabbff08 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -403,11 +403,6 @@ const KeyDescription & IStorage::getPartitionKey() const return metadata->partition_key; } -void IStorage::setPartitionKey(const KeyDescription & partition_key_) -{ - metadata->partition_key = partition_key_; -} - bool IStorage::isPartitionKeyDefined() const { return metadata->partition_key.definition_ast != nullptr; @@ -430,11 +425,6 @@ const KeyDescription & IStorage::getSortingKey() const return metadata->sorting_key; } -void IStorage::setSortingKey(const KeyDescription & sorting_key_) -{ - metadata->sorting_key = sorting_key_; -} - bool IStorage::isSortingKeyDefined() const { return metadata->sorting_key.definition_ast != nullptr; @@ -464,11 +454,6 @@ const KeyDescription & IStorage::getPrimaryKey() const return metadata->primary_key; } -void IStorage::setPrimaryKey(const KeyDescription & primary_key_) -{ - metadata->primary_key = primary_key_; -} - bool IStorage::isPrimaryKeyDefined() const { return metadata->primary_key.definition_ast != nullptr; @@ -498,12 +483,6 @@ const KeyDescription & IStorage::getSamplingKey() const return metadata->sampling_key; } -void IStorage::setSamplingKey(const KeyDescription & sampling_key_) -{ - metadata->sampling_key = sampling_key_; -} - - bool IStorage::isSamplingKeyDefined() const { return metadata->sampling_key.definition_ast != nullptr; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 403f5293588..ec7e8fc1795 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -438,9 +438,6 @@ public: /// Returns structure with partition key. const KeyDescription & getPartitionKey() const; - /// Set partition key for storage (methods bellow, are just wrappers for this - /// struct). - void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. ASTPtr getPartitionKeyAST() const { return metadata->partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. @@ -453,9 +450,6 @@ public: /// Returns structure with sorting key. const KeyDescription & getSortingKey() const; - /// Set sorting key for storage (methods bellow, are just wrappers for this - /// struct). - void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -470,9 +464,6 @@ public: /// Returns structure with primary key. const KeyDescription & getPrimaryKey() const; - /// Set primary key for storage (methods bellow, are just wrappers for this - /// struct). - void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. ASTPtr getPrimaryKeyAST() const { return metadata->primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -488,9 +479,6 @@ public: /// Returns structure with sampling key. const KeyDescription & getSamplingKey() const; - /// Set sampling key for storage (methods bellow, are just wrappers for this - /// struct). - void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3414143c46b..ab0544c641b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -154,8 +154,6 @@ MergeTreeData::MergeTreeData( if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - - setSamplingKey(metadata_.sampling_key); } MergeTreeDataFormatVersion min_format_version(0); @@ -472,7 +470,6 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) } } } - setPartitionKey(new_partition_key); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index f4d6e9b38b3..3b3c9d07c89 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -57,6 +57,18 @@ struct StorageInMemoryMetadata void setConstraints(ConstraintsDescription constraints_); + /// Set partition key for storage (methods bellow, are just wrappers for this + /// struct). + void setPartitionKey(const KeyDescription & partition_key_); + /// Set sorting key for storage (methods bellow, are just wrappers for this + /// struct). + void setSortingKey(const KeyDescription & sorting_key_); + /// Set primary key for storage (methods bellow, are just wrappers for this + /// struct). + void setPrimaryKey(const KeyDescription & primary_key_); + /// Set sampling key for storage (methods bellow, are just wrappers for this + /// struct). + void setSamplingKey(const KeyDescription & sampling_key_); }; using StorageMetadataPtr = std::shared_ptr; From e667eb57b2aeaea856c24ebbb6e869ee508a368d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:24:49 +0300 Subject: [PATCH 0738/2229] Working set*Keys methods --- src/Storages/MergeTree/MergeTreeData.cpp | 36 +++++++++++++----------- src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ab0544c641b..8af38bdf500 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -118,7 +118,7 @@ const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt"; MergeTreeData::MergeTreeData( const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata_, + StorageInMemoryMetadata metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, @@ -142,28 +142,15 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setProperties(metadata_, attach); - const auto settings = getSettings(); - - /// NOTE: using the same columns list as is read when performing actual merges. - merging_params.check(getColumns().getAllPhysical()); - - if (metadata_.sampling_key.definition_ast != nullptr) - { - const auto & pk_sample_block = getPrimaryKey().sample_block; - if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach - && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. - throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - } - MergeTreeDataFormatVersion min_format_version(0); + /// TODO(alesap) Move to register methods if (!date_column_name.empty()) { try { auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); - auto partition_key = KeyDescription::getKeyFromAST(partition_by_ast, getColumns(), global_context); - initPartitionKey(partition_key); + metadata_.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata_.columns, global_context); + initPartitionKey(metadata_.partition_key); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -182,6 +169,21 @@ MergeTreeData::MergeTreeData( min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } + setProperties(metadata_, attach); + const auto settings = getSettings(); + + /// NOTE: using the same columns list as is read when performing actual merges. + merging_params.check(getColumns().getAllPhysical()); + + if (metadata_.sampling_key.definition_ast != nullptr) + { + const auto & pk_sample_block = getPrimaryKey().sample_block; + if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach + && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. + throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); + } + + setTTLExpressions(metadata_); /// format_file always contained on any data path diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 007c6898e60..12350b7bd10 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -323,7 +323,7 @@ public: /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata_, + StorageInMemoryMetadata metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, From b456a3cc77c639692d545de1c83f45ba18f5b918 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 20:48:04 +0300 Subject: [PATCH 0739/2229] Fix tests. --- src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp | 2 +- src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp | 2 +- .../Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp | 2 +- src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp | 2 +- .../Merges/Algorithms/VersionedCollapsingAlgorithm.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 6a7cca3b87a..34e32cdfe3a 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -303,7 +303,7 @@ IMergingAlgorithm::Status AggregatingSortedAlgorithm::merge() bool key_differs; SortCursor current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// If we skip this row, it's not equals with any key we process. last_key.reset(); diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index a1fe2de61f2..697ac9496b5 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -115,7 +115,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() { auto current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// Get the next block from the corresponding source, if there is one. queue.removeTop(); diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index b4136adbf2d..e364a452797 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -157,7 +157,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() { SortCursor current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// Get the next block from the corresponding source, if there is one. queue.removeTop(); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index ada779ea29b..3ee0df0efd8 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -40,7 +40,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() { SortCursor current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// Get the next block from the corresponding source, if there is one. queue.removeTop(); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 2a7514f855c..f09a236c33a 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -647,7 +647,7 @@ IMergingAlgorithm::Status SummingSortedAlgorithm::merge() SortCursor current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// If we skip this row, it's not equals with any key we process. last_key.reset(); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index 66f9865c483..5e94415fae9 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -64,7 +64,7 @@ IMergingAlgorithm::Status VersionedCollapsingAlgorithm::merge() { SortCursor current = queue.current(); - if (current->isLast() && skipLastRowFor(current->pos)) + if (current->isLast() && skipLastRowFor(current->order)) { /// Get the next block from the corresponding source, if there is one. queue.removeTop(); From 33a74a3ea05ee7ff405e7255c7faeeae08de144c Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:50:53 +0300 Subject: [PATCH 0740/2229] TTL methods in StorageInMemoryMetadata --- src/Storages/IStorage.cpp | 11 ----------- src/Storages/IStorage.h | 2 -- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- src/Storages/StorageInMemoryMetadata.cpp | 10 ++++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ src/Storages/StorageMergeTree.cpp | 1 - 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 2bbbabbff08..afe61008553 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -506,11 +506,6 @@ TTLTableDescription IStorage::getTableTTLs() const return metadata->table_ttl; } -void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) -{ - std::lock_guard lock(ttl_mutex); - metadata->table_ttl = table_ttl_; -} bool IStorage::hasAnyTableTTL() const { @@ -523,12 +518,6 @@ TTLColumnsDescription IStorage::getColumnTTLs() const return metadata->column_ttls_by_name; } -void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) -{ - std::lock_guard lock(ttl_mutex); - metadata->column_ttls_by_name = column_ttls_by_name_; -} - bool IStorage::hasAnyColumnTTL() const { std::lock_guard lock(ttl_mutex); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec7e8fc1795..f3081386c76 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -500,12 +500,10 @@ public: /// Common tables TTLs (for rows and moves). TTLTableDescription getTableTTLs() const; - void setTableTTLs(const TTLTableDescription & table_ttl_); bool hasAnyTableTTL() const; /// Separate TTLs for columns. TTLColumnsDescription getColumnTTLs() const; - void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); bool hasAnyColumnTTL() const; /// Just wrapper for table TTLs, return rows part of table TTLs. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8af38bdf500..24c787e7c63 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -520,8 +520,8 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata) { checkTTLExpressions(new_metadata); - setColumnTTLs(new_metadata.column_ttls_by_name); - setTableTTLs(new_metadata.table_ttl); + //setColumnTTLs(new_metadata.column_ttls_by_name); + //setTableTTLs(new_metadata.table_ttl); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index ac2c0417c45..2d29ac433e9 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -66,4 +66,14 @@ void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_ constraints = std::move(constraints_); } +void StorageInMemoryMetadata::setTableTTLs(const TTLTableDescription & table_ttl_) +{ + table_ttl = table_ttl_; +} + +void StorageInMemoryMetadata::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) +{ + column_ttls_by_name = column_ttls_by_name_; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 3b3c9d07c89..b5c1a1997b6 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -69,6 +69,10 @@ struct StorageInMemoryMetadata /// Set sampling key for storage (methods bellow, are just wrappers for this /// struct). void setSamplingKey(const KeyDescription & sampling_key_); + + void setTableTTLs(const TTLTableDescription & table_ttl_); + + void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2a7efa164d4..7007a544eac 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -281,7 +281,6 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. setProperties(new_metadata); - setTTLExpressions(new_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); From b47a7327fdbd5f2753b84aed98595c1a7d4df5e3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 21:08:05 +0300 Subject: [PATCH 0741/2229] All set methods in metadata --- src/Storages/IStorage.cpp | 13 ------------- src/Storages/IStorage.h | 2 -- src/Storages/MergeTree/MergeTreeData.cpp | 4 +++- src/Storages/StorageInMemoryMetadata.cpp | 13 +++++++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ src/Storages/StorageMaterializedView.cpp | 6 +++--- src/Storages/StorageReplicatedMergeTree.cpp | 1 - src/Storages/StorageView.cpp | 4 ++-- 8 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index afe61008553..e5ab14e046e 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -616,24 +616,11 @@ ASTPtr IStorage::getSettingsChanges() const return nullptr; } -void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) -{ - if (settings_changes_) - metadata->settings_changes = settings_changes_->clone(); - else - metadata->settings_changes = nullptr; -} - const SelectQueryDescription & IStorage::getSelectQuery() const { return metadata->select; } -void IStorage::setSelectQuery(const SelectQueryDescription & select_) -{ - metadata->select = select_; -} - bool IStorage::hasSelectQuery() const { return metadata->select.select_query != nullptr; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f3081386c76..0f48f3bf63c 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -148,12 +148,10 @@ public: /// thread-unsafe part. lockStructure must be acquired /// Storage settings ASTPtr getSettingsChanges() const; - void setSettingsChanges(const ASTPtr & settings_changes_); bool hasSettingsChanges() const { return metadata->settings_changes != nullptr; } /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; - void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 24c787e7c63..8971b50a0fd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1474,7 +1474,9 @@ void MergeTreeData::changeSettings( MergeTreeSettings copy = *getSettings(); copy.applyChanges(new_changes); storage_settings.set(std::make_unique(copy)); - setSettingsChanges(new_settings); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + new_metadata.setSettingsChanges(new_settings); + setInMemoryMetadata(new_metadata); } } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 2d29ac433e9..b6dd2f38c4e 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -76,4 +76,17 @@ void StorageInMemoryMetadata::setColumnTTLs(const TTLColumnsDescription & column column_ttls_by_name = column_ttls_by_name_; } +void StorageInMemoryMetadata::setSettingsChanges(const ASTPtr & settings_changes_) +{ + if (settings_changes_) + settings_changes = settings_changes_; + else + settings_changes = nullptr; +} + +void StorageInMemoryMetadata::setSelectQuery(const SelectQueryDescription & select_) +{ + select = select_; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index b5c1a1997b6..b129cdc7756 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -73,6 +73,10 @@ struct StorageInMemoryMetadata void setTableTTLs(const TTLTableDescription & table_ttl_); void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); + + void setSettingsChanges(const ASTPtr & settings_changes_); + + void setSelectQuery(const SelectQueryDescription & select_); }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 34d5e1d4374..638a13612f2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -52,7 +52,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata metadata_; metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -68,7 +67,8 @@ StorageMaterializedView::StorageMaterializedView( throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); - setSelectQuery(select); + metadata_.setSelectQuery(select); + setInMemoryMetadata(metadata_); if (!has_inner_table) target_table_id = query.to_table_id; @@ -206,7 +206,7 @@ void StorageMaterializedView::alter( DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); - setSelectQuery(new_select); + new_metadata.setSelectQuery(new_select); } /// end modify query diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d8e45b97438..cb5e5aaf701 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3648,7 +3648,6 @@ void StorageReplicatedMergeTree::alter( StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); params.apply(future_metadata, query_context); - changeSettings(future_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 60ae681e002..d8392b2edd8 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -40,7 +40,6 @@ StorageView::StorageView( { StorageInMemoryMetadata metadata_; metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -48,7 +47,8 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); - setSelectQuery(description); + metadata_.setSelectQuery(description); + setInMemoryMetadata(metadata_); } From a9599d0a37c4e28ef853963901c6d09c8e6d52e1 Mon Sep 17 00:00:00 2001 From: Tom Bombadil <565258751@qq.com> Date: Tue, 16 Jun 2020 02:40:20 +0800 Subject: [PATCH 0742/2229] Update index.md (#11674) * Update index.md optimize chinese-doc translation * Update index.md * Update index.md Co-authored-by: Ivan Blinkov --- docs/zh/sql-reference/index.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/zh/sql-reference/index.md b/docs/zh/sql-reference/index.md index aed96c4b34f..c47c20b9cf9 100644 --- a/docs/zh/sql-reference/index.md +++ b/docs/zh/sql-reference/index.md @@ -1,15 +1,13 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "SQL\u53C2\u8003" +toc_folder_title: SQL参考 toc_hidden: true toc_priority: 28 -toc_title: "\u9690\u85CF" +toc_title: hidden --- # SQL参考 {#sql-reference} -ClickHouse支持以下类型的查询: +ClickHouse支持以下形式的查询: - [SELECT](statements/select/index.md) - [INSERT INTO](statements/insert-into.md) @@ -17,4 +15,4 @@ ClickHouse支持以下类型的查询: - [ALTER](statements/alter.md#query_language_queries_alter) - [其他类型的查询](statements/misc.md) -[原始文章](https://clickhouse.tech/docs/en/sql-reference/) +[原始文档](https://clickhouse.tech/docs/zh/sql-reference/) From 3f8d72c3724b2d80d698aaef643ae036831118a7 Mon Sep 17 00:00:00 2001 From: Yuntao Wu Date: Tue, 16 Jun 2020 02:41:31 +0800 Subject: [PATCH 0743/2229] =?UTF-8?q?merge=20translates=20into=20"?= =?UTF-8?q?=E5=90=88=E5=B9=B6"=20better=20(#11659)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * merge translates into "合并" better * Update index.md Co-authored-by: Ivan Blinkov --- docs/zh/engines/table-engines/mergetree-family/index.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/index.md b/docs/zh/engines/table-engines/mergetree-family/index.md index 746d9f03281..c24dd02bb72 100644 --- a/docs/zh/engines/table-engines/mergetree-family/index.md +++ b/docs/zh/engines/table-engines/mergetree-family/index.md @@ -1,7 +1,5 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd -toc_folder_title: "\u6885\u6811\u5BB6\u65CF" +toc_folder_title: "合并树家族" toc_priority: 28 --- From 68f8372c129527fdd7e1246e309751b676b71885 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 21:41:47 +0300 Subject: [PATCH 0744/2229] Fix build. --- src/Processors/QueryPlan/ReadFromStorageStep.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.h b/src/Processors/QueryPlan/ReadFromStorageStep.h index 180cf47e6d7..ac662c4b06f 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromStorageStep.h @@ -9,9 +9,9 @@ namespace DB class IStorage; using StoragePtr = std::shared_ptr; -class SelectQueryInfo; +struct SelectQueryInfo; -class PrewhereInfo; +struct PrewhereInfo; /// Reads from storage. class ReadFromStorageStep : public IQueryPlanStep @@ -28,7 +28,7 @@ public: size_t max_block_size, size_t max_streams); - virtual ~ReadFromStorageStep(); + ~ReadFromStorageStep() override; String getName() const override { return "ReadFromStorage"; } From 4f8c7bcf78483f86c088185147391686c61c30f7 Mon Sep 17 00:00:00 2001 From: Yuntao Wu Date: Tue, 16 Jun 2020 02:42:35 +0800 Subject: [PATCH 0745/2229] update some errors (#11656) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit collapse means “折叠” in Chinese engine means “引擎” in Chinese when we are developing --- .../mergetree-family/versionedcollapsingmergetree.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 19caae5e1a1..3ee35e7cdf7 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -33,7 +33,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 有关查询参数的说明,请参阅 [查询说明](../../../sql-reference/statements/create.md). -**发动机参数** +**引擎参数** ``` sql VersionedCollapsingMergeTree(sign, version) @@ -79,7 +79,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] -## 崩溃 {#table_engines_versionedcollapsingmergetree} +## 折叠 {#table_engines_versionedcollapsingmergetree} ### 数据 {#data} From 42ff73eb00401ee1609ca65108582dd28f91686c Mon Sep 17 00:00:00 2001 From: bluebirddm Date: Tue, 16 Jun 2020 02:43:06 +0800 Subject: [PATCH 0746/2229] Update versionedcollapsingmergetree.md (#11654) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update versionedcollapsingmergetree.md 简单翻译 * Update versionedcollapsingmergetree.md Co-authored-by: Ivan Blinkov --- .../versionedcollapsingmergetree.md | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 3ee35e7cdf7..257bc2ad203 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -1,6 +1,4 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 37 toc_title: "\u7248\u672C\u96C6\u5408\u5728\u65B0\u6811" --- @@ -39,17 +37,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] VersionedCollapsingMergeTree(sign, version) ``` -- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 列数据类型应为 `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — 指定对象状态版本的列名。 列数据类型应为 `UInt*`. -**查询子句** +**查询 Clauses** -当创建一个 `VersionedCollapsingMergeTree` 表,相同 [条款](mergetree.md) 需要创建一个时 `MergeTree` 桌子 +当创建一个 `VersionedCollapsingMergeTree` 表时,跟创建一个 `MergeTree`表的时候需要相同 [Clause](mergetree.md)
    @@ -69,11 +67,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 所有的参数,除了 `sign` 和 `version` 具有相同的含义 `MergeTree`. -- `sign` — Name of the column with the type of row: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 Column Data Type — `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — 指定对象状态版本的列名。 列数据类型应为 `UInt*`. @@ -125,23 +123,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 1. 写入数据的程序应该记住对象的状态以取消它。 该 “cancel” 字符串应该是 “state” 与相反的字符串 `Sign`. 这增加了存储的初始大小,但允许快速写入数据。 2. 列中长时间增长的数组由于写入负载而降低了引擎的效率。 数据越简单,效率就越高。 -3. `SELECT` 结果很大程度上取决于对象变化历史的一致性。 准备插入数据时要准确。 您可以通过不一致的数据获得不可预测的结果,例如会话深度等非负指标的负值。 +3. `SELECT` 结果很大程度上取决于对象变化历史的一致性。 准备插入数据时要准确。 不一致的数据将导致不可预测的结果,例如会话深度等非负指标的负值。 ### 算法 {#table_engines-versionedcollapsingmergetree-algorithm} -当ClickHouse合并数据部分时,它会删除具有相同主键和版本且不同主键和版本的每对行 `Sign`. 行的顺序并不重要。 +当ClickHouse合并数据部分时,它会删除具有相同主键和版本但 `Sign`值不同的一对行. 行的顺序并不重要。 当ClickHouse插入数据时,它会按主键对行进行排序。 如果 `Version` 列不在主键中,ClickHouse将其隐式添加到主键作为最后一个字段并使用它进行排序。 ## 选择数据 {#selecting-data} -ClickHouse不保证具有相同主键的所有行都将位于相同的结果数据部分中,甚至位于相同的物理服务器上。 对于写入数据和随后合并数据部分都是如此。 此外,ClickHouse流程 `SELECT` 具有多个线程的查询,并且无法预测结果中的行顺序。 这意味着聚合是必需的,如果有必要得到完全 “collapsed” 从数据 `VersionedCollapsingMergeTree` 桌子 +ClickHouse不保证具有相同主键的所有行都将位于相同的结果数据部分中,甚至位于相同的物理服务器上。 对于写入数据和随后合并数据部分都是如此。 此外,ClickHouse流程 `SELECT` 具有多个线程的查询,并且无法预测结果中的行顺序。 这意味着,如果有必要从`VersionedCollapsingMergeTree` 表中得到完全 “collapsed” 的数据,聚合是必需的。 要完成折叠,请使用 `GROUP BY` 考虑符号的子句和聚合函数。 例如,要计算数量,请使用 `sum(Sign)` 而不是 `count()`. 要计算的东西的总和,使用 `sum(Sign * x)` 而不是 `sum(x)`,并添加 `HAVING sum(Sign) > 0`. 聚合 `count`, `sum` 和 `avg` 可以这样计算。 聚合 `uniq` 如果对象至少具有一个非折叠状态,则可以计算。 聚合 `min` 和 `max` 无法计算是因为 `VersionedCollapsingMergeTree` 不保存折叠状态值的历史记录。 -如果您需要提取数据 “collapsing” 但是,如果没有聚合(例如,要检查是否存在其最新值与某些条件匹配的行),则可以使用 `FINAL` 修饰符 `FROM` 条款 这种方法效率低下,不应与大型表一起使用。 +如果您需要提取数据 “collapsing” 但是,如果没有聚合(例如,要检查是否存在其最新值与某些条件匹配的行),则可以使用 `FINAL` 修饰 `FROM` 条件这种方法效率低下,不应与大型表一起使用。 ## 使用示例 {#example-of-use} @@ -233,6 +231,6 @@ SELECT * FROM UAct FINAL └─────────────────────┴───────────┴──────────┴──────┴─────────┘ ``` -这是一个非常低效的方式来选择数据。 不要把它用于大桌子。 +这是一个非常低效的方式来选择数据。 不要把它用于数据量大的表。 [原始文章](https://clickhouse.tech/docs/en/operations/table_engines/versionedcollapsingmergetree/) From 1474dd6d690c97a185dab462689e28ad0fb0ff93 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 15 Jun 2020 21:44:05 +0300 Subject: [PATCH 0747/2229] DOCS-646: randomString (#11610) * [CLICKHOUSEDOCS] Document the "randomString" function (#121) * Add description of randomString function * Add description for randomString * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: BayoNet * Changed example * Add russian version * Fixed links * Fixed links Co-authored-by: Anna Devyatova Co-authored-by: BayoNet * CLICKHOUSEDOCS-646: Updated text. Fixed links. * CLICKHOUSEDOCS-646: Fixed more links. Co-authored-by: Sergei Shtykov Co-authored-by: Anna <42538400+adevyatova@users.noreply.github.com> Co-authored-by: Anna Devyatova --- .../functions/other-functions.md | 48 +++++++++++++++++++ .../settings.md | 4 +- .../functions/other-functions.md | 48 +++++++++++++++++++ docs/ru/sql-reference/statements/system.md | 2 +- .../sql-reference/table-functions/generate.md | 2 +- 5 files changed, 100 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 9aa26f32b18..18641614bef 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1200,4 +1200,52 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers └────────┴────────────────────────────────┴──────────────────────────────────┘ ``` +## randomString {#randomstring} + +Generates a binary string of the specified length filled with random bytes (including zero bytes). + +**Syntax** + +``` sql +randomString(length) +``` + +**Parameters** + +- `length` — String length. Positive integer. + +**Returned value** + +- String filled with random bytes. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +str: 3 G : pT ?w тi k aV f6 +len: 30 + +Row 2: +────── +str: 9 ,] ^ ) ]?? 8 +len: 30 +``` + +**See Also** + +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) + + [Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index e3c1629a46a..5bfedf4c520 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -78,7 +78,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat default ``` -## dictionaries\_config {#dictionaries-config} +## dictionaries\_config {#server_configuration_parameters-dictionaries_config} Путь к конфигурации внешних словарей. @@ -95,7 +95,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat *_dictionary.xml ``` -## dictionaries\_lazy\_load {#dictionaries-lazy-load} +## dictionaries\_lazy\_load {#server_configuration_parameters-dictionaries_lazy_load} Отложенная загрузка словарей. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 2c715cd15a5..7161b1a2468 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1153,4 +1153,52 @@ SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers └────────┴────────────────────────────────┴──────────────────────────────────┘ ``` +## randomString {#randomstring} + +Генерирует бинарную строку заданной длины, заполненную случайными байтами (в том числе нулевыми). + +**Синтаксис** + +``` sql +randomString(length) +``` + +**Параметры** + +- `length` — длина строки. Положительное целое число. + +**Возвращаемое значение** + +- Строка, заполненная случайными байтами. + +Type: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; +``` + +Ответ: + +``` text +Row 1: +────── +str: 3 G : pT ?w тi k aV f6 +len: 30 + +Row 2: +────── +str: 9 ,] ^ ) ]?? 8 +len: 30 +``` + +**Смотрите также** + +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) + + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/other_functions/) diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index 1b66fa039d9..9a6dccd7d89 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -38,7 +38,7 @@ ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Перегружает все словари, которые были успешно загружены до этого. -По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../../sql-reference/statements/system.md#dictionaries-lazy-load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. +По умолчанию включена ленивая загрузка [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load), поэтому словари не загружаются автоматически при старте, а только при первом обращении через dictGet или SELECT к ENGINE=Dictionary. После этого такие словари (LOADED) будут перегружаться командой `system reload dictionaries`. Всегда возвращает `Ok.`, вне зависимости от результата обновления словарей. ## RELOAD DICTIONARY Dictionary\_name {#query_language-system-reload-dictionary} diff --git a/docs/ru/sql-reference/table-functions/generate.md b/docs/ru/sql-reference/table-functions/generate.md index b1abdbf1d63..9e6d36b2a4b 100644 --- a/docs/ru/sql-reference/table-functions/generate.md +++ b/docs/ru/sql-reference/table-functions/generate.md @@ -1,4 +1,4 @@ -# generateRandom {#generateRandom} +# generateRandom {#generaterandom} Генерирует случайные данные с заданной схемой. Позволяет заполнять тестовые таблицы данными. From 012fb819dbaf1005946040f0b5dbc17a63e398f7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 21:49:41 +0300 Subject: [PATCH 0748/2229] Fix build. --- src/Processors/QueryPlan/QueryPlan.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 82bc4210825..6e3535f27d4 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -1,5 +1,6 @@ #include #include +#include #include namespace DB From 7cc54fd4f1f447abf7e8309d45a27fcc7aa547ca Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 15 Jun 2020 21:53:54 +0300 Subject: [PATCH 0749/2229] renames + perf test --- .../AggregateFunctionSumMap.h | 26 +++++++++---------- tests/performance/sum_map.xml | 2 ++ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index 0c4b407b8a8..f9985a3c603 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -28,7 +28,7 @@ namespace ErrorCodes } template -struct AggregateFunctionXxxMapData +struct AggregateFunctionMapData { // Map needs to be ordered to maintain function properties std::map merged_maps; @@ -54,18 +54,18 @@ struct AggregateFunctionXxxMapData */ template -class AggregateFunctionMapOpBase : public IAggregateFunctionDataHelper< - AggregateFunctionXxxMapData>, Derived> +class AggregateFunctionMapBase : public IAggregateFunctionDataHelper< + AggregateFunctionMapData>, Derived> { private: DataTypePtr keys_type; DataTypes values_types; public: - AggregateFunctionMapOpBase( + AggregateFunctionMapBase( const DataTypePtr & keys_type_, const DataTypes & values_types_, const DataTypes & argument_types_, const Array & params_) - : IAggregateFunctionDataHelper>, Derived>(argument_types_, params_) + : IAggregateFunctionDataHelper>, Derived>(argument_types_, params_) , keys_type(keys_type_), values_types(values_types_) {} DataTypePtr getReturnType() const override @@ -305,11 +305,11 @@ public: template class AggregateFunctionSumMap final : - public AggregateFunctionMapOpBase, FieldVisitorSum, overflow, tuple_argument> + public AggregateFunctionMapBase, FieldVisitorSum, overflow, tuple_argument> { private: using Self = AggregateFunctionSumMap; - using Base = AggregateFunctionMapOpBase; + using Base = AggregateFunctionMapBase; public: AggregateFunctionSumMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) @@ -323,7 +323,7 @@ public: template class AggregateFunctionSumMapFiltered final : - public AggregateFunctionMapOpBase, FieldVisitorSum, overflow, @@ -331,7 +331,7 @@ class AggregateFunctionSumMapFiltered final : { private: using Self = AggregateFunctionSumMapFiltered; - using Base = AggregateFunctionMapOpBase; + using Base = AggregateFunctionMapBase; std::unordered_set keys_to_keep; @@ -355,11 +355,11 @@ public: template class AggregateFunctionMinMap final : - public AggregateFunctionMapOpBase, FieldVisitorMin, true, tuple_argument> + public AggregateFunctionMapBase, FieldVisitorMin, true, tuple_argument> { private: using Self = AggregateFunctionMinMap; - using Base = AggregateFunctionMapOpBase; + using Base = AggregateFunctionMapBase; public: AggregateFunctionMinMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) @@ -373,11 +373,11 @@ public: template class AggregateFunctionMaxMap final : - public AggregateFunctionMapOpBase, FieldVisitorMax, true, tuple_argument> + public AggregateFunctionMapBase, FieldVisitorMax, true, tuple_argument> { private: using Self = AggregateFunctionMaxMap; - using Base = AggregateFunctionMapOpBase; + using Base = AggregateFunctionMapBase; public: AggregateFunctionMaxMap(const DataTypePtr & keys_type_, DataTypes & values_types_, const DataTypes & argument_types_) diff --git a/tests/performance/sum_map.xml b/tests/performance/sum_map.xml index cb1a4cb5bc6..cfad530652c 100644 --- a/tests/performance/sum_map.xml +++ b/tests/performance/sum_map.xml @@ -13,6 +13,8 @@ func + minMap + maxMap sumMap sumMapWithOverflow From f0eafed5206e0cbe8f89fd078c733a5889f6b0af Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 15 Jun 2020 21:55:04 +0300 Subject: [PATCH 0750/2229] DOCS-678: Updated ASOF Join description (#11676) * CLICKHOUSEDOCS-678: Updated ASOF Join Usage. * CLICKHOUSEDOCS-678: Updated templates. * Update docs/ru/sql-reference/statements/select/join.md Co-authored-by: Ivan Blinkov * CLICKHOUSEDOCS-678: Update by comments. Co-authored-by: Sergei Shtykov Co-authored-by: emironyuk Co-authored-by: Ivan Blinkov --- docs/_description_templates/template-function.md | 2 +- docs/_description_templates/template-setting.md | 2 +- docs/en/sql-reference/statements/select/join.md | 6 +++++- docs/ru/sql-reference/statements/select/join.md | 6 +++++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index 1acf92cb501..b69d7ed5309 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -1,4 +1,4 @@ -## function-name {#function-name-in-lower-case} +## functionName {#functionname-in-lower-case} Short description. diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md index 5a33716f899..fc912aba3e1 100644 --- a/docs/_description_templates/template-setting.md +++ b/docs/_description_templates/template-setting.md @@ -1,4 +1,4 @@ -## setting-name {#setting-name-in-lower-case} +## setting_name {#setting_name} Description. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 5ac3f4a0e25..87bc542dbdc 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -51,7 +51,11 @@ Modifies how matching by "join keys" is performed `ASOF JOIN` is useful when you need to join records that have no exact match. -Tables for `ASOF JOIN` must have an ordered sequence column. This column cannot be alone in a table, and should be one of the data types: `UInt32`, `UInt64`, `Float32`, `Float64`, `Date`, and `DateTime`. +Algorithm requires the special column in tables. This column: + +- Must contain an ordered sequence. +- Can be one of the following types: [Int*, UInt*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal*](../../data-types/decimal.md). +- Can't be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 60f391d888b..26e7ae8257e 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -45,7 +45,11 @@ FROM `ASOF JOIN` применим в том случае, когда необходимо объединять записи, которые не имеют точного совпадения. -Таблицы для `ASOF JOIN` должны иметь столбец с отсортированной последовательностью. Этот столбец не может быть единственным в таблице и должен быть одного из типов: `UInt32`, `UInt64`, `Float32`, `Float64`, `Date` и `DateTime`. +Для работы алгоритма необходим специальный столбец в таблицах. Этот столбец: + +- Должен содержать упорядоченную последовательность. +- Может быть одного из следующих типов: [Int*, UInt*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Date](../../data-types/date.md), [DateTime](../../data-types/datetime.md), [Decimal*](../../data-types/decimal.md). +- Не может быть единственным столбцом в секции `JOIN`. Синтаксис `ASOF JOIN ... ON`: From d6e69211b1ba074f5e51fd467e51b892a7e091a6 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 15 Jun 2020 22:05:36 +0300 Subject: [PATCH 0751/2229] fix test --- tests/queries/0_stateless/01269_create_with_null.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01269_create_with_null.reference b/tests/queries/0_stateless/01269_create_with_null.reference index 739063af67f..e4945eed114 100644 --- a/tests/queries/0_stateless/01269_create_with_null.reference +++ b/tests/queries/0_stateless/01269_create_with_null.reference @@ -1,4 +1,4 @@ Nullable(Int32) Int32 Nullable(Int32) Int32 -CREATE TABLE default.data_null\n(\n `a` Nullable(Int32), \n `b` Int32, \n `c` Nullable(Int32), \n `d` Int32\n)\nENGINE = Memory() +CREATE TABLE default.data_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Int32\n)\nENGINE = Memory() Nullable(Int32) Int32 Nullable(Int32) Nullable(Int32) -CREATE TABLE default.set_null\n(\n `a` Nullable(Int32), \n `b` Int32, \n `c` Nullable(Int32), \n `d` Nullable(Int32)\n)\nENGINE = Memory() +CREATE TABLE default.set_null\n(\n `a` Nullable(Int32),\n `b` Int32,\n `c` Nullable(Int32),\n `d` Nullable(Int32)\n)\nENGINE = Memory() From 15132d47c9b1f4c7a0ef7a41dfccc81e53c11b57 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 5 Jun 2020 21:56:33 +0300 Subject: [PATCH 0752/2229] Slightly improve syntax of CREATE POLICY. Also rename column source=>storage in table system.row_policies. --- .../InterpreterCreateRowPolicyQuery.cpp | 8 +- ...InterpreterShowCreateAccessEntityQuery.cpp | 2 +- src/Parsers/ASTCreateRowPolicyQuery.cpp | 39 ++--- src/Parsers/ASTCreateRowPolicyQuery.h | 7 +- src/Parsers/ParserCreateRowPolicyQuery.cpp | 128 ++++++++------ src/Parsers/ParserCreateRowPolicyQuery.h | 4 +- src/Parsers/ParserRowPolicyName.cpp | 163 +++++++++++------- .../System/StorageSystemRowPolicies.cpp | 2 +- 8 files changed, 202 insertions(+), 151 deletions(-) diff --git a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp index 3a0ee3f16a1..9dacc9d1bf4 100644 --- a/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp +++ b/src/Interpreters/InterpreterCreateRowPolicyQuery.cpp @@ -30,12 +30,8 @@ namespace if (query.is_restrictive) policy.setRestrictive(*query.is_restrictive); - for (auto condition_type : ext::range(RowPolicy::MAX_CONDITION_TYPE)) - { - const auto & condition = query.conditions[condition_type]; - if (condition) - policy.conditions[condition_type] = *condition ? serializeAST(**condition) : String{}; - } + for (const auto & [condition_type, condition] : query.conditions) + policy.conditions[condition_type] = condition ? serializeAST(*condition) : String{}; if (override_to_roles) policy.to_roles = *override_to_roles; diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 18a08d21e93..5c4173d7aa3 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -179,7 +179,7 @@ namespace { ParserExpression parser; ASTPtr expr = parseQuery(parser, condition, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - query->conditions[static_cast(type)] = expr; + query->conditions.emplace_back(type, std::move(expr)); } } diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/ASTCreateRowPolicyQuery.cpp index 580642f2da5..640b030b6cf 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/ASTCreateRowPolicyQuery.cpp @@ -14,7 +14,6 @@ namespace { using ConditionType = RowPolicy::ConditionType; using ConditionTypeInfo = RowPolicy::ConditionTypeInfo; - constexpr auto MAX_CONDITION_TYPE = RowPolicy::MAX_CONDITION_TYPE; void formatRenameTo(const String & new_short_name, const IAST::FormatSettings & settings) @@ -26,21 +25,22 @@ namespace void formatAsRestrictiveOrPermissive(bool is_restrictive, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (is_restrictive ? "RESTRICTIVE" : "PERMISSIVE") - << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " AS " << (settings.hilite ? IAST::hilite_none : "") + << (is_restrictive ? "restrictive" : "permissive"); } void formatConditionalExpression(const ASTPtr & expr, const IAST::FormatSettings & settings) { + settings.ostr << " "; if (expr) expr->format(settings); else - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NONE" << (settings.hilite ? IAST::hilite_none : ""); + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "NONE" << (settings.hilite ? IAST::hilite_none : ""); } - void formatCondition(const boost::container::flat_set & commands, const String & filter, const String & check, bool alter, const IAST::FormatSettings & settings) + void formatForClause(const boost::container::flat_set & commands, const String & filter, const String & check, bool alter, const IAST::FormatSettings & settings) { settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " FOR " << (settings.hilite ? IAST::hilite_none : ""); bool need_comma = false; @@ -52,27 +52,23 @@ namespace } if (!filter.empty()) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " USING " << (settings.hilite ? IAST::hilite_none : "") << filter; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " USING" << (settings.hilite ? IAST::hilite_none : "") << filter; if (!check.empty() && (alter || (check != filter))) - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH CHECK " << (settings.hilite ? IAST::hilite_none : "") << check; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " WITH CHECK" << (settings.hilite ? IAST::hilite_none : "") << check; } - void formatMultipleConditions(const std::array, MAX_CONDITION_TYPE> & conditions, bool alter, const IAST::FormatSettings & settings) + void formatForClauses(const std::vector> & conditions, bool alter, const IAST::FormatSettings & settings) { - std::array conditions_as_strings; + std::vector> conditions_as_strings; std::stringstream temp_sstream; IAST::FormatSettings temp_settings(temp_sstream, settings); - for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) + for (const auto & [condition_type, condition] : conditions) { - const auto & condition = conditions[condition_type]; - if (condition) - { - formatConditionalExpression(*condition, temp_settings); - conditions_as_strings[condition_type] = temp_sstream.str(); - temp_sstream.str(""); - } + formatConditionalExpression(condition, temp_settings); + conditions_as_strings.emplace_back(condition_type, temp_sstream.str()); + temp_sstream.str(""); } boost::container::flat_set commands; @@ -85,9 +81,8 @@ namespace check.clear(); /// Collect commands using the same filter and check conditions. - for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) + for (auto & [condition_type, condition] : conditions_as_strings) { - const String & condition = conditions_as_strings[condition_type]; if (condition.empty()) continue; const auto & type_info = ConditionTypeInfo::get(condition_type); @@ -106,11 +101,11 @@ namespace continue; } commands.emplace(type_info.command); - conditions_as_strings[condition_type].clear(); /// Skip this condition on the next iteration. + condition.clear(); /// Skip this condition on the next iteration. } if (!filter.empty() || !check.empty()) - formatCondition(commands, filter, check, alter, settings); + formatForClause(commands, filter, check, alter, settings); } while (!filter.empty() || !check.empty()); } @@ -167,7 +162,7 @@ void ASTCreateRowPolicyQuery::formatImpl(const FormatSettings & settings, Format if (is_restrictive) formatAsRestrictiveOrPermissive(*is_restrictive, settings); - formatMultipleConditions(conditions, alter, settings); + formatForClauses(conditions, alter, settings); if (roles && (!roles->empty() || alter)) formatToRoles(*roles, settings); diff --git a/src/Parsers/ASTCreateRowPolicyQuery.h b/src/Parsers/ASTCreateRowPolicyQuery.h index 4a7572eaefd..9d0e2fcce7b 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.h +++ b/src/Parsers/ASTCreateRowPolicyQuery.h @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -13,7 +12,7 @@ class ASTRowPolicyNames; class ASTRolesOrUsersSet; /** CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] name ON [database.]table - * [AS {PERMISSIVE | RESTRICTIVE}] + * [AS {permissive | restrictive}] * [FOR {SELECT | INSERT | UPDATE | DELETE | ALL}] * [USING condition] * [WITH CHECK condition] [,...] @@ -21,7 +20,7 @@ class ASTRolesOrUsersSet; * * ALTER [ROW] POLICY [IF EXISTS] name ON [database.]table * [RENAME TO new_name] - * [AS {PERMISSIVE | RESTRICTIVE}] + * [AS {permissive | restrictive}] * [FOR {SELECT | INSERT | UPDATE | DELETE | ALL}] * [USING {condition | NONE}] * [WITH CHECK {condition | NONE}] [,...] @@ -41,7 +40,7 @@ public: String new_short_name; std::optional is_restrictive; - std::array, RowPolicy::MAX_CONDITION_TYPE> conditions; /// `nullopt` means "not set", `nullptr` means set to NONE. + std::vector> conditions; /// `nullptr` means set to NONE. std::shared_ptr roles; diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index 061cca4ce63..c9fe15d391f 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -33,7 +34,7 @@ namespace }); } - bool parseAsRestrictiveOrPermissive(IParserBase::Pos & pos, Expected & expected, std::optional & is_restrictive) + bool parseAsRestrictiveOrPermissive(IParserBase::Pos & pos, Expected & expected, bool & is_restrictive) { return IParserBase::wrapParseImpl(pos, [&] { @@ -54,7 +55,7 @@ namespace }); } - bool parseConditionalExpression(IParserBase::Pos & pos, Expected & expected, std::optional & expr) + bool parseConditionalExpression(IParserBase::Pos & pos, Expected & expected, ASTPtr & expr) { return IParserBase::wrapParseImpl(pos, [&] { @@ -74,68 +75,80 @@ namespace }); } - bool parseConditions( - IParserBase::Pos & pos, Expected & expected, bool alter, std::array, MAX_CONDITION_TYPE> & conditions) + void addAllCommands(boost::container::flat_set & commands) + { + for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) + { + const std::string_view & command = ConditionTypeInfo::get(condition_type).command; + commands.emplace(command); + } + } + + bool parseCommands(IParserBase::Pos & pos, Expected & expected, boost::container::flat_set & commands) + { + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserKeyword{"ALL"}.ignore(pos, expected)) + { + addAllCommands(commands); + return true; + } + + boost::container::flat_set res_commands; + do + { + bool found_keyword = false; + for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) + { + const std::string_view & command = ConditionTypeInfo::get(condition_type).command; + if (ParserKeyword{command.data()}.ignore(pos, expected)) + { + res_commands.emplace(command); + found_keyword = true; + break; + } + } + + if (!found_keyword) + return false; + } + while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + commands = std::move(res_commands); + return true; + }); + } + + bool parseForClause(IParserBase::Pos & pos, Expected & expected, bool alter, std::vector> & conditions) { return IParserBase::wrapParseImpl(pos, [&] { boost::container::flat_set commands; - auto add_all_commands = [&] - { - for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) - { - const std::string_view & command = ConditionTypeInfo::get(condition_type).command; - commands.emplace(command); - } - }; - if (ParserKeyword{"FOR"}.ignore(pos, expected)) { - do - { - size_t old_size = commands.size(); - if (ParserKeyword{"ALL"}.ignore(pos, expected)) - { - add_all_commands(); - } - else - { - for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) - { - const std::string_view & command = ConditionTypeInfo::get(condition_type).command; - if (ParserKeyword{command.data()}.ignore(pos, expected)) - { - commands.emplace(command); - break; - } - } - } - if (commands.size() == old_size) - return false; - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + if (!parseCommands(pos, expected, commands)) + return false; } + else + addAllCommands(commands); std::optional filter; std::optional check; if (ParserKeyword{"USING"}.ignore(pos, expected)) { - if (!parseConditionalExpression(pos, expected, filter)) + if (!parseConditionalExpression(pos, expected, filter.emplace())) return false; } if (ParserKeyword{"WITH CHECK"}.ignore(pos, expected)) { - if (!parseConditionalExpression(pos, expected, check)) + if (!parseConditionalExpression(pos, expected, check.emplace())) return false; } if (!filter && !check) return false; - if (commands.empty()) - add_all_commands(); - if (!check && !alter) check = filter; @@ -145,9 +158,9 @@ namespace if (commands.count(type_info.command)) { if (type_info.is_check && check) - conditions[condition_type] = check; + conditions.emplace_back(condition_type, *check); else if (filter) - conditions[condition_type] = filter; + conditions.emplace_back(condition_type, *filter); } } @@ -155,15 +168,15 @@ namespace }); } - bool parseMultipleConditions( - IParserBase::Pos & pos, Expected & expected, bool alter, std::array, MAX_CONDITION_TYPE> & conditions) + bool parseForClauses( + IParserBase::Pos & pos, Expected & expected, bool alter, std::vector> & conditions) { return IParserBase::wrapParseImpl(pos, [&] { - std::array, MAX_CONDITION_TYPE> res_conditions; + std::vector> res_conditions; do { - if (!parseConditions(pos, expected, alter, res_conditions)) + if (!parseForClause(pos, expected, alter, res_conditions)) return false; } while (ParserToken{TokenType::Comma}.ignore(pos, expected)); @@ -178,7 +191,7 @@ namespace return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected)) + if (!ParserKeyword{"TO"}.ignore(pos, expected)) return false; ParserRolesOrUsersSet roles_p; @@ -244,18 +257,29 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & String new_short_name; std::optional is_restrictive; - std::array, MAX_CONDITION_TYPE> conditions; + std::vector> conditions; while (true) { if (alter && new_short_name.empty() && (names->name_parts.size() == 1) && parseRenameTo(pos, expected, new_short_name)) continue; - if (!is_restrictive && parseAsRestrictiveOrPermissive(pos, expected, is_restrictive)) - continue; + if (!is_restrictive) + { + bool new_is_restrictive; + if (parseAsRestrictiveOrPermissive(pos, expected, new_is_restrictive)) + { + is_restrictive = new_is_restrictive; + continue; + } + } - if (parseMultipleConditions(pos, expected, alter, conditions)) + std::vector> new_conditions; + if (parseForClauses(pos, expected, alter, new_conditions)) + { + boost::range::push_back(conditions, std::move(new_conditions)); continue; + } if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; diff --git a/src/Parsers/ParserCreateRowPolicyQuery.h b/src/Parsers/ParserCreateRowPolicyQuery.h index 1a64f2e893c..f05dca8179c 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.h +++ b/src/Parsers/ParserCreateRowPolicyQuery.h @@ -7,7 +7,7 @@ namespace DB { /** Parses queries like * CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] name ON [database.]table - * [AS {PERMISSIVE | RESTRICTIVE}] + * [AS {permissive | restrictive}] * [FOR {SELECT | INSERT | UPDATE | DELETE | ALL}] * [USING condition] * [WITH CHECK condition] [,...] @@ -15,7 +15,7 @@ namespace DB * * ALTER [ROW] POLICY [IF EXISTS] name ON [database.]table * [RENAME TO new_name] - * [AS {PERMISSIVE | RESTRICTIVE}] + * [AS {permissive | restrictive}] * [FOR {SELECT | INSERT | UPDATE | DELETE | ALL}] * [USING {condition | NONE}] * [WITH CHECK {condition | NONE}] [,...] diff --git a/src/Parsers/ParserRowPolicyName.cpp b/src/Parsers/ParserRowPolicyName.cpp index 8f1ef91f7c1..a74132cdaca 100644 --- a/src/Parsers/ParserRowPolicyName.cpp +++ b/src/Parsers/ParserRowPolicyName.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -19,25 +20,46 @@ namespace } - bool parseOnDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database, String & table_name) + bool parseDBAndTableName(IParser::Pos & pos, Expected & expected, String & database, String & table_name) { return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserKeyword{"ON"}.ignore(pos, expected)) + String res_database, res_table_name; + if (!parseDatabaseAndTableName(pos, expected, res_database, res_table_name)) return false; - return parseDatabaseAndTableName(pos, expected, database, table_name); + /// If table is specified without DB it cannot be followed by "ON" + /// (but can be followed by "ON CLUSTER"). + /// The following code is necessary to figure out while parsing something like + /// policy1 ON table1, policy2 ON table2 + /// that policy2 is another policy, not another table. + auto end_pos = pos; + if (res_database.empty() && ParserKeyword{"ON"}.ignore(pos, expected)) + { + String unused; + if (ASTQueryWithOnCluster::parse(pos, unused, expected)) + pos = end_pos; + else + return false; + } + + database = std::move(res_database); + table_name = std::move(res_table_name); + return true; }); } - bool parseOnDatabaseAndTableName(IParser::Pos & pos, Expected & expected, std::pair & database_and_table_name) + bool parseOnDBAndTableName(IParser::Pos & pos, Expected & expected, String & database, String & table_name) { - return parseOnDatabaseAndTableName(pos, expected, database_and_table_name.first, database_and_table_name.second); + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) && parseDBAndTableName(pos, expected, database, table_name); + }); } - bool parseOnDatabaseAndTableNames(IParser::Pos & pos, Expected & expected, std::vector> & database_and_table_names) + bool parseOnDBAndTableNames(IParser::Pos & pos, Expected & expected, std::vector> & database_and_table_names) { return IParserBase::wrapParseImpl(pos, [&] { @@ -49,23 +71,76 @@ namespace do { String database, table_name; - if (!parseDatabaseAndTableName(pos, expected, database, table_name)) - return false; - - String unused; - if (pos_before_comma && database.empty() && ParserKeyword{"ON"}.ignore(pos, expected) - && !ASTQueryWithOnCluster::parse(pos, unused, expected)) + if (!parseDBAndTableName(pos, expected, database, table_name)) { + if (!pos_before_comma) + return false; pos = *pos_before_comma; break; } - res.emplace_back(std::move(database), std::move(table_name)); pos_before_comma = pos; } while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + database_and_table_names = std::move(res); return true; + }); + } + + + bool parseRowPolicyNamesAroundON(IParser::Pos & pos, Expected & expected, + bool allow_multiple_short_names, + bool allow_multiple_tables, + bool allow_on_cluster, + std::vector & name_parts, + String & cluster) + { + return IParserBase::wrapParseImpl(pos, [&] + { + std::vector short_names; + if (allow_multiple_short_names) + { + if (!parseIdentifiersOrStringLiterals(pos, expected, short_names)) + return false; + } + else + { + if (!parseIdentifierOrStringLiteral(pos, expected, short_names.emplace_back())) + return false; + } + + String res_cluster; + if (allow_on_cluster) + parseOnCluster(pos, expected, res_cluster); + + std::vector> database_and_table_names; + if (allow_multiple_tables && (short_names.size() == 1)) + { + if (!parseOnDBAndTableNames(pos, expected, database_and_table_names)) + return false; + } + else + { + String database, table_name; + if (!parseOnDBAndTableName(pos, expected, database, table_name)) + return false; + database_and_table_names.emplace_back(std::move(database), std::move(table_name)); + } + + + if (allow_on_cluster && res_cluster.empty()) + parseOnCluster(pos, expected, res_cluster); + + assert(!short_names.empty()); + assert(!database_and_table_names.empty()); + name_parts.clear(); + for (const String & short_name : short_names) + for (const auto & [database, table_name] : database_and_table_names) + name_parts.push_back({short_name, database, table_name}); + + cluster = std::move(res_cluster); + return true; }); } } @@ -73,21 +148,14 @@ namespace bool ParserRowPolicyName::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - RowPolicy::NameParts name_parts; - if (!parseIdentifierOrStringLiteral(pos, expected, name_parts.short_name)) - return false; - + std::vector name_parts; String cluster; - parseOnCluster(pos, expected, cluster); - - if (!parseOnDatabaseAndTableName(pos, expected, name_parts.database, name_parts.table_name)) + if (!parseRowPolicyNamesAroundON(pos, expected, false, false, allow_on_cluster, name_parts, cluster)) return false; - if (cluster.empty()) - parseOnCluster(pos, expected, cluster); - + assert(name_parts.size() == 1); auto result = std::make_shared(); - result->name_parts = std::move(name_parts); + result->name_parts = std::move(name_parts.front()); result->cluster = std::move(cluster); node = result; return true; @@ -101,46 +169,15 @@ bool ParserRowPolicyNames::parseImpl(Pos & pos, ASTPtr & node, Expected & expect do { - std::vector short_names; - bool allowed_multiple_short_names = name_parts.empty(); - if (allowed_multiple_short_names) - { - if (!parseIdentifiersOrStringLiterals(pos, expected, short_names)) - return false; - } - else - { - if (!parseIdentifierOrStringLiteral(pos, expected, short_names.emplace_back())) - return false; - } + std::vector new_name_parts; + if (!parseRowPolicyNamesAroundON(pos, expected, name_parts.empty(), name_parts.empty(), allow_on_cluster, new_name_parts, cluster)) + return false; - bool allowed_on_cluster = allow_on_cluster && name_parts.empty(); - if (allowed_on_cluster) - parseOnCluster(pos, expected, cluster); - - std::vector> database_and_table_names; - bool allowed_multiple_db_and_table_names = ((name_parts.empty()) && (short_names.size() == 1)); - if (allowed_multiple_db_and_table_names) - { - if (!parseOnDatabaseAndTableNames(pos, expected, database_and_table_names)) - return false; - } - else - { - if (!parseOnDatabaseAndTableName(pos, expected, database_and_table_names.emplace_back())) - return false; - } - - allowed_on_cluster &= cluster.empty(); - if (allowed_on_cluster) - parseOnCluster(pos, expected, cluster); - - for (const String & short_name : short_names) - for (const auto & [database, table_name] : database_and_table_names) - name_parts.push_back({short_name, database, table_name}); - - if ((short_names.size() != 1) || (database_and_table_names.size() != 1) || !cluster.empty()) - break; + size_t num_new_name_parts = new_name_parts.size(); + assert(num_new_name_parts >= 1); + boost::range::push_back(name_parts, std::move(new_name_parts)); + if ((num_new_name_parts != 1) || !cluster.empty()) + break; } while (ParserToken{TokenType::Comma}.ignore(pos, expected)); diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 9f5267b3a9b..8999fa8bb47 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -31,7 +31,7 @@ NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() {"database", std::make_shared()}, {"table", std::make_shared()}, {"id", std::make_shared()}, - {"source", std::make_shared()}, + {"storage", std::make_shared()}, }; for (auto type : ext::range(MAX_CONDITION_TYPE)) From a5b70fbdda5957d9ab4ad9c45e872813f1a033af Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 2 Jun 2020 00:43:16 +0300 Subject: [PATCH 0753/2229] Add tests. --- .../0_stateless/01292_create_user.reference | 108 +++++++++ .../queries/0_stateless/01292_create_user.sql | 211 ++++++++++++++++++ .../0_stateless/01293_create_role.reference | 38 ++++ .../queries/0_stateless/01293_create_role.sql | 74 ++++++ .../01294_create_settings_profile.reference | 56 +++++ .../01294_create_settings_profile.sql | 103 +++++++++ .../01295_create_row_policy.reference | 35 +++ .../0_stateless/01295_create_row_policy.sql | 79 +++++++ ...e_row_policy_in_current_database.reference | 20 ++ ..._create_row_policy_in_current_database.sql | 51 +++++ .../0_stateless/01297_create_quota.reference | 63 ++++++ .../0_stateless/01297_create_quota.sql | 129 +++++++++++ 12 files changed, 967 insertions(+) create mode 100644 tests/queries/0_stateless/01292_create_user.reference create mode 100644 tests/queries/0_stateless/01292_create_user.sql create mode 100644 tests/queries/0_stateless/01293_create_role.reference create mode 100644 tests/queries/0_stateless/01293_create_role.sql create mode 100644 tests/queries/0_stateless/01294_create_settings_profile.reference create mode 100644 tests/queries/0_stateless/01294_create_settings_profile.sql create mode 100644 tests/queries/0_stateless/01295_create_row_policy.reference create mode 100644 tests/queries/0_stateless/01295_create_row_policy.sql create mode 100644 tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference create mode 100644 tests/queries/0_stateless/01296_create_row_policy_in_current_database.sql create mode 100644 tests/queries/0_stateless/01297_create_quota.reference create mode 100644 tests/queries/0_stateless/01297_create_quota.sql diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference new file mode 100644 index 00000000000..555bd99bd94 --- /dev/null +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -0,0 +1,108 @@ +-- default +CREATE USER u1_01292 +-- same as default +CREATE USER u2_01292 +CREATE USER u3_01292 +-- rename +CREATE USER u2_01292_renamed +-- authentication +CREATE USER u1_01292 +CREATE USER u2_01292 IDENTIFIED WITH plaintext_password +CREATE USER u3_01292 IDENTIFIED WITH sha256_password +CREATE USER u4_01292 IDENTIFIED WITH sha256_password +CREATE USER u5_01292 IDENTIFIED WITH sha256_password +CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password +CREATE USER u7_01292 IDENTIFIED WITH double_sha1_password +CREATE USER u1_01292 IDENTIFIED WITH sha256_password +CREATE USER u2_01292 IDENTIFIED WITH sha256_password +CREATE USER u3_01292 IDENTIFIED WITH sha256_password +CREATE USER u4_01292 IDENTIFIED WITH plaintext_password +CREATE USER u5_01292 +-- host +CREATE USER u1_01292 +CREATE USER u2_01292 HOST NONE +CREATE USER u3_01292 HOST LOCAL +CREATE USER u4_01292 HOST NAME \'myhost.com\' +CREATE USER u5_01292 HOST LOCAL, NAME \'myhost.com\' +CREATE USER u6_01292 HOST LOCAL, NAME \'myhost.com\' +CREATE USER u7_01292 HOST REGEXP \'.*\\\\.myhost\\\\.com\' +CREATE USER u8_01292 +CREATE USER u9_01292 HOST LIKE \'%.myhost.com\' +CREATE USER u10_01292 HOST LIKE \'%.myhost.com\' +CREATE USER u11_01292 HOST LOCAL +CREATE USER u12_01292 HOST IP \'192.168.1.1\' +CREATE USER u13_01292 HOST IP \'192.168.0.0/16\' +CREATE USER u14_01292 HOST LOCAL +CREATE USER u15_01292 HOST IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765d\' +CREATE USER u16_01292 HOST LOCAL, IP \'65:ff0c::/96\' +CREATE USER u1_01292 HOST NONE +CREATE USER u2_01292 HOST NAME \'myhost.com\' +CREATE USER u3_01292 HOST LOCAL, NAME \'myhost.com\' +CREATE USER u4_01292 HOST NONE +-- host after @ +CREATE USER u1_01292 +CREATE USER u1_01292 +CREATE USER `u2_01292@%.myhost.com` HOST LIKE \'%.myhost.com\' +CREATE USER `u2_01292@%.myhost.com` HOST LIKE \'%.myhost.com\' +CREATE USER `u3_01292@192.168.%.%` HOST LIKE \'192.168.%.%\' +CREATE USER `u3_01292@192.168.%.%` HOST LIKE \'192.168.%.%\' +CREATE USER `u4_01292@::1` HOST LOCAL +CREATE USER `u4_01292@::1` HOST LOCAL +CREATE USER `u5_01292@65:ff0c::/96` HOST LIKE \'65:ff0c::/96\' +CREATE USER `u5_01292@65:ff0c::/96` HOST LIKE \'65:ff0c::/96\' +CREATE USER u1_01292 HOST LOCAL +CREATE USER `u2_01292@%.myhost.com` +-- settings +CREATE USER u1_01292 +CREATE USER u2_01292 SETTINGS PROFILE default +CREATE USER u3_01292 SETTINGS max_memory_usage = 5000000 +CREATE USER u4_01292 SETTINGS max_memory_usage MIN 5000000 +CREATE USER u5_01292 SETTINGS max_memory_usage MAX 5000000 +CREATE USER u6_01292 SETTINGS max_memory_usage READONLY +CREATE USER u7_01292 SETTINGS max_memory_usage WRITABLE +CREATE USER u8_01292 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 READONLY +CREATE USER u9_01292 SETTINGS PROFILE default, max_memory_usage = 5000000 WRITABLE +CREATE USER u1_01292 SETTINGS readonly = 1 +CREATE USER u2_01292 SETTINGS readonly = 1 +CREATE USER u3_01292 +-- default role +CREATE USER u1_01292 +CREATE USER u2_01292 DEFAULT ROLE NONE +CREATE USER u3_01292 DEFAULT ROLE r1_01292 +CREATE USER u4_01292 DEFAULT ROLE r1_01292, r2_01292 +CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r2_01292 +CREATE USER u6_01292 DEFAULT ROLE ALL EXCEPT r1_01292, r2_01292 +CREATE USER u1_01292 DEFAULT ROLE r1_01292 +CREATE USER u2_01292 DEFAULT ROLE ALL EXCEPT r2_01292 +CREATE USER u3_01292 DEFAULT ROLE r2_01292 +CREATE USER u4_01292 +CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r1_01292 +CREATE USER u6_01292 DEFAULT ROLE NONE +-- complex +CREATE USER u1_01292 IDENTIFIED WITH plaintext_password HOST LOCAL SETTINGS readonly = 1 +CREATE USER u1_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE NONE SETTINGS PROFILE default +-- multiple users in one command +CREATE USER u1_01292 DEFAULT ROLE NONE +CREATE USER u2_01292 DEFAULT ROLE NONE +CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' +CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' +CREATE USER `u5_01292@%.host.com` HOST LIKE \'%.host.com\' +CREATE USER `u6_01292@%.host.com` HOST LIKE \'%.host.com\' +CREATE USER `u7_01292@%.host.com` HOST LIKE \'%.host.com\' +CREATE USER `u8_01292@%.otherhost.com` HOST LIKE \'%.otherhost.com\' +CREATE USER u1_01292 DEFAULT ROLE NONE SETTINGS readonly = 1 +CREATE USER u2_01292 DEFAULT ROLE r1_01292, r2_01292 SETTINGS readonly = 1 +CREATE USER u3_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 +CREATE USER u4_01292 HOST LIKE \'%.%.myhost.com\' DEFAULT ROLE r1_01292, r2_01292 +-- system.users +u1_01292 disk plaintext_password [] [] ['localhost'] [] [] 1 [] [] +u2_01292 disk no_password [] [] [] [] ['%.%.myhost.com'] 0 [] [] +u3_01292 disk sha256_password [] ['192.169.1.1','192.168.0.0/16'] ['localhost'] [] [] 0 ['r1_01292'] [] +u4_01292 disk double_sha1_password [] ['::/0'] [] [] [] 1 [] ['r1_01292'] +-- system.settings_profile_elements +\N u1_01292 \N 0 readonly 1 \N \N \N \N +\N u2_01292 \N 0 \N \N \N \N \N default +\N u3_01292 \N 0 max_memory_usage 5000000 4000000 6000000 0 \N +\N u4_01292 \N 0 \N \N \N \N \N default +\N u4_01292 \N 1 max_memory_usage 5000000 \N \N \N \N +\N u4_01292 \N 2 readonly 1 \N \N \N \N diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql new file mode 100644 index 00000000000..5ae7f3921e6 --- /dev/null +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -0,0 +1,211 @@ +DROP USER IF EXISTS u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292; +DROP USER IF EXISTS u10_01292, u11_01292, u12_01292, u13_01292, u14_01292, u15_01292, u16_01292; +DROP USER IF EXISTS u2_01292_renamed; +DROP USER IF EXISTS u1_01292@'%', 'u2_01292@%.myhost.com', u3_01292@'192.168.%.%', 'u4_01292@::1', u5_01292@'65:ff0c::/96'; +DROP USER IF EXISTS u5_01292@'%.host.com', u6_01292@'%.host.com', u7_01292@'%.host.com', u8_01292@'%.otherhost.com'; +DROP ROLE IF EXISTS r1_01292, r2_01292; + +SELECT '-- default'; +CREATE USER u1_01292; +SHOW CREATE USER u1_01292; + +SELECT '-- same as default'; +CREATE USER u2_01292 NOT IDENTIFIED HOST ANY SETTINGS NONE DEFAULT ROLE ALL; +CREATE USER u3_01292 DEFAULT ROLE ALL IDENTIFIED WITH no_password SETTINGS NONE HOST ANY; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; + +SELECT '-- rename'; +ALTER USER u2_01292 RENAME TO 'u2_01292_renamed'; +SHOW CREATE USER u2_01292; -- { serverError 192 } -- User not found +SHOW CREATE USER u2_01292_renamed; +DROP USER u1_01292, u2_01292_renamed, u3_01292; + +SELECT '-- authentication'; +CREATE USER u1_01292 NOT IDENTIFIED; +CREATE USER u2_01292 IDENTIFIED WITH plaintext_password BY 'qwe123'; +CREATE USER u3_01292 IDENTIFIED BY 'qwe123'; +CREATE USER u4_01292 IDENTIFIED WITH sha256_password BY 'qwe123'; +CREATE USER u5_01292 IDENTIFIED WITH sha256_hash BY '18138372FAD4B94533CD4881F03DC6C69296DD897234E0CEE83F727E2E6B1F63'; +CREATE USER u6_01292 IDENTIFIED WITH double_sha1_password BY 'qwe123'; +CREATE USER u7_01292 IDENTIFIED WITH double_sha1_hash BY '8DCDD69CE7D121DE8013062AEAEB2A148910D50E'; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +SHOW CREATE USER u6_01292; +SHOW CREATE USER u7_01292; +ALTER USER u1_01292 IDENTIFIED BY '123qwe'; +ALTER USER u2_01292 IDENTIFIED BY '123qwe'; +ALTER USER u3_01292 IDENTIFIED BY '123qwe'; +ALTER USER u4_01292 IDENTIFIED WITH plaintext_password BY '123qwe'; +ALTER USER u5_01292 NOT IDENTIFIED; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292; + +SELECT '-- host'; +CREATE USER u1_01292 HOST ANY; +CREATE USER u2_01292 HOST NONE; +CREATE USER u3_01292 HOST LOCAL; +CREATE USER u4_01292 HOST NAME 'myhost.com'; +CREATE USER u5_01292 HOST NAME 'myhost.com', LOCAL; +CREATE USER u6_01292 HOST LOCAL, NAME 'myhost.com'; +CREATE USER u7_01292 HOST REGEXP '.*\\.myhost\\.com'; +CREATE USER u8_01292 HOST LIKE '%'; +CREATE USER u9_01292 HOST LIKE '%.myhost.com'; +CREATE USER u10_01292 HOST LIKE '%.myhost.com', '%.myhost2.com'; +CREATE USER u11_01292 HOST IP '127.0.0.1'; +CREATE USER u12_01292 HOST IP '192.168.1.1'; +CREATE USER u13_01292 HOST IP '192.168.0.0/16'; +CREATE USER u14_01292 HOST IP '::1'; +CREATE USER u15_01292 HOST IP '2001:0db8:11a3:09d7:1f34:8a2e:07a0:765d'; +CREATE USER u16_01292 HOST IP '65:ff0c::/96', '::1'; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +SHOW CREATE USER u6_01292; +SHOW CREATE USER u7_01292; +SHOW CREATE USER u8_01292; +SHOW CREATE USER u9_01292; +SHOW CREATE USER u10_01292; +SHOW CREATE USER u11_01292; +SHOW CREATE USER u12_01292; +SHOW CREATE USER u13_01292; +SHOW CREATE USER u14_01292; +SHOW CREATE USER u15_01292; +SHOW CREATE USER u16_01292; +ALTER USER u1_01292 HOST NONE; +ALTER USER u2_01292 HOST NAME 'myhost.com'; +ALTER USER u3_01292 ADD HOST NAME 'myhost.com'; +ALTER USER u4_01292 DROP HOST NAME 'myhost.com'; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292; +DROP USER u10_01292, u11_01292, u12_01292, u13_01292, u14_01292, u15_01292, u16_01292; + +SELECT '-- host after @'; +CREATE USER u1_01292@'%'; +CREATE USER u2_01292@'%.myhost.com'; +CREATE USER u3_01292@'192.168.%.%'; +CREATE USER u4_01292@'::1'; +CREATE USER u5_01292@'65:ff0c::/96'; +SHOW CREATE USER u1_01292@'%'; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292@'%.myhost.com'; +SHOW CREATE USER 'u2_01292@%.myhost.com'; +SHOW CREATE USER u3_01292@'192.168.%.%'; +SHOW CREATE USER 'u3_01292@192.168.%.%'; +SHOW CREATE USER u4_01292@'::1'; +SHOW CREATE USER 'u4_01292@::1'; +SHOW CREATE USER u5_01292@'65:ff0c::/96'; +SHOW CREATE USER 'u5_01292@65:ff0c::/96'; +ALTER USER u1_01292@'%' HOST LOCAL; +ALTER USER u2_01292@'%.myhost.com' HOST ANY; +SHOW CREATE USER u1_01292@'%'; +SHOW CREATE USER u2_01292@'%.myhost.com'; +DROP USER u1_01292@'%', 'u2_01292@%.myhost.com', u3_01292@'192.168.%.%', 'u4_01292@::1', u5_01292@'65:ff0c::/96'; + +SELECT '-- settings'; +CREATE USER u1_01292 SETTINGS NONE; +CREATE USER u2_01292 SETTINGS PROFILE 'default'; +CREATE USER u3_01292 SETTINGS max_memory_usage=5000000; +CREATE USER u4_01292 SETTINGS max_memory_usage MIN=5000000; +CREATE USER u5_01292 SETTINGS max_memory_usage MAX=5000000; +CREATE USER u6_01292 SETTINGS max_memory_usage READONLY; +CREATE USER u7_01292 SETTINGS max_memory_usage WRITABLE; +CREATE USER u8_01292 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 READONLY; +CREATE USER u9_01292 SETTINGS PROFILE 'default', max_memory_usage=5000000 WRITABLE; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +SHOW CREATE USER u6_01292; +SHOW CREATE USER u7_01292; +SHOW CREATE USER u8_01292; +SHOW CREATE USER u9_01292; +ALTER USER u1_01292 SETTINGS readonly=1; +ALTER USER u2_01292 SETTINGS readonly=1; +ALTER USER u3_01292 SETTINGS NONE; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292, u7_01292, u8_01292, u9_01292; + +SELECT '-- default role'; +CREATE ROLE r1_01292, r2_01292; +CREATE USER u1_01292 DEFAULT ROLE ALL; +CREATE USER u2_01292 DEFAULT ROLE NONE; +CREATE USER u3_01292 DEFAULT ROLE r1_01292; +CREATE USER u4_01292 DEFAULT ROLE r1_01292, r2_01292; +CREATE USER u5_01292 DEFAULT ROLE ALL EXCEPT r2_01292; +CREATE USER u6_01292 DEFAULT ROLE ALL EXCEPT r1_01292, r2_01292; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +SHOW CREATE USER u6_01292; +GRANT r1_01292, r2_01292 TO u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292; +ALTER USER u1_01292 DEFAULT ROLE r1_01292; +ALTER USER u2_01292 DEFAULT ROLE ALL EXCEPT r2_01292; +SET DEFAULT ROLE r2_01292 TO u3_01292; +SET DEFAULT ROLE ALL TO u4_01292; +SET DEFAULT ROLE ALL EXCEPT r1_01292 TO u5_01292; +SET DEFAULT ROLE NONE TO u6_01292; +SHOW CREATE USER u1_01292; +SHOW CREATE USER u2_01292; +SHOW CREATE USER u3_01292; +SHOW CREATE USER u4_01292; +SHOW CREATE USER u5_01292; +SHOW CREATE USER u6_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292, u6_01292; + +SELECT '-- complex'; +CREATE USER u1_01292 IDENTIFIED WITH plaintext_password BY 'qwe123' HOST LOCAL SETTINGS readonly=1; +SHOW CREATE USER u1_01292; +ALTER USER u1_01292 NOT IDENTIFIED HOST LIKE '%.%.myhost.com' DEFAULT ROLE NONE SETTINGS PROFILE 'default'; +SHOW CREATE USER u1_01292; +DROP USER u1_01292; + +SELECT '-- multiple users in one command'; +CREATE USER u1_01292, u2_01292 DEFAULT ROLE NONE; +CREATE USER u3_01292, u4_01292 HOST LIKE '%.%.myhost.com'; +CREATE USER u5_01292@'%.host.com', u6_01292@'%.host.com'; +CREATE USER u7_01292@'%.host.com', u8_01292@'%.otherhost.com'; +SHOW CREATE USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292@'%.host.com', u6_01292@'%.host.com'; +SHOW CREATE USER u7_01292@'%.host.com', u8_01292@'%.otherhost.com'; +ALTER USER u1_01292, u2_01292 SETTINGS readonly=1; +GRANT r1_01292, r2_01292 TO u2_01292, u3_01292, u4_01292; +SET DEFAULT ROLE r1_01292, r2_01292 TO u2_01292, u3_01292, u4_01292; +SHOW CREATE USER u1_01292, u2_01292, u3_01292, u4_01292; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292@'%.host.com', u6_01292@'%.host.com'; +DROP USER u7_01292@'%.host.com', u8_01292@'%.otherhost.com'; + +SELECT '-- system.users'; +CREATE USER u1_01292 IDENTIFIED WITH plaintext_password BY 'qwe123' HOST LOCAL; +CREATE USER u2_01292 NOT IDENTIFIED HOST LIKE '%.%.myhost.com' DEFAULT ROLE NONE; +CREATE USER u3_01292 IDENTIFIED BY 'qwe123' HOST IP '192.168.0.0/16', '192.169.1.1', '::1' DEFAULT ROLE r1_01292; +CREATE USER u4_01292 IDENTIFIED WITH double_sha1_password BY 'qwe123' HOST ANY DEFAULT ROLE ALL EXCEPT r1_01292; +SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except FROM system.users WHERE name LIKE 'u%\_01292' ORDER BY name; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292; + +SELECT '-- system.settings_profile_elements'; +CREATE USER u1_01292 SETTINGS readonly=1; +CREATE USER u2_01292 SETTINGS PROFILE 'default'; +CREATE USER u3_01292 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 WRITABLE; +CREATE USER u4_01292 SETTINGS PROFILE 'default', max_memory_usage=5000000, readonly=1; +CREATE USER u5_01292 SETTINGS NONE; +SELECT * FROM system.settings_profile_elements WHERE user_name LIKE 'u%\_01292' ORDER BY user_name, index; +DROP USER u1_01292, u2_01292, u3_01292, u4_01292, u5_01292; + +DROP ROLE r1_01292, r2_01292; diff --git a/tests/queries/0_stateless/01293_create_role.reference b/tests/queries/0_stateless/01293_create_role.reference new file mode 100644 index 00000000000..0cba719af66 --- /dev/null +++ b/tests/queries/0_stateless/01293_create_role.reference @@ -0,0 +1,38 @@ +-- default +CREATE ROLE r1_01293 +-- same as default +CREATE ROLE r2_01293 +-- rename +CREATE ROLE r2_01293_renamed +-- host after @ +CREATE ROLE r1_01293 +CREATE ROLE r1_01293 +CREATE ROLE `r2_01293@%.myhost.com` +CREATE ROLE `r2_01293@%.myhost.com` +-- settings +CREATE ROLE r1_01293 +CREATE ROLE r2_01293 SETTINGS PROFILE default +CREATE ROLE r3_01293 SETTINGS max_memory_usage = 5000000 +CREATE ROLE r4_01293 SETTINGS max_memory_usage MIN 5000000 +CREATE ROLE r5_01293 SETTINGS max_memory_usage MAX 5000000 +CREATE ROLE r6_01293 SETTINGS max_memory_usage READONLY +CREATE ROLE r7_01293 SETTINGS max_memory_usage WRITABLE +CREATE ROLE r8_01293 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 READONLY +CREATE ROLE r9_01293 SETTINGS PROFILE default, max_memory_usage = 5000000 WRITABLE +CREATE ROLE r1_01293 SETTINGS readonly = 1 +CREATE ROLE r2_01293 SETTINGS readonly = 1 +CREATE ROLE r3_01293 +-- multiple roles in one command +CREATE ROLE r1_01293 +CREATE ROLE r2_01293 +CREATE ROLE r1_01293 SETTINGS readonly = 1 +CREATE ROLE r2_01293 SETTINGS readonly = 1 +-- system.roles +r1_01293 disk +-- system.settings_profile_elements +\N \N r1_01293 0 readonly 1 \N \N \N \N +\N \N r2_01293 0 \N \N \N \N \N default +\N \N r3_01293 0 max_memory_usage 5000000 4000000 6000000 0 \N +\N \N r4_01293 0 \N \N \N \N \N default +\N \N r4_01293 1 max_memory_usage 5000000 \N \N \N \N +\N \N r4_01293 2 readonly 1 \N \N \N \N diff --git a/tests/queries/0_stateless/01293_create_role.sql b/tests/queries/0_stateless/01293_create_role.sql new file mode 100644 index 00000000000..963a1020e3f --- /dev/null +++ b/tests/queries/0_stateless/01293_create_role.sql @@ -0,0 +1,74 @@ +DROP ROLE IF EXISTS r1_01293, r2_01293, r3_01293, r4_01293, r5_01293, r6_01293, r7_01293, r8_01293, r9_01293; +DROP ROLE IF EXISTS r2_01293_renamed; +DROP ROLE IF EXISTS r1_01293@'%', 'r2_01293@%.myhost.com'; + +SELECT '-- default'; +CREATE ROLE r1_01293; +SHOW CREATE ROLE r1_01293; + +SELECT '-- same as default'; +CREATE ROLE r2_01293 SETTINGS NONE; +SHOW CREATE ROLE r2_01293; + +SELECT '-- rename'; +ALTER ROLE r2_01293 RENAME TO 'r2_01293_renamed'; +SHOW CREATE ROLE r2_01293; -- { serverError 511 } -- Role not found +SHOW CREATE ROLE r2_01293_renamed; +DROP ROLE r1_01293, r2_01293_renamed; + +SELECT '-- host after @'; +CREATE ROLE r1_01293@'%'; +CREATE ROLE r2_01293@'%.myhost.com'; +SHOW CREATE ROLE r1_01293@'%'; +SHOW CREATE ROLE r1_01293; +SHOW CREATE ROLE r2_01293@'%.myhost.com'; +SHOW CREATE ROLE 'r2_01293@%.myhost.com'; +DROP ROLE r1_01293@'%', 'r2_01293@%.myhost.com'; + +SELECT '-- settings'; +CREATE ROLE r1_01293 SETTINGS NONE; +CREATE ROLE r2_01293 SETTINGS PROFILE 'default'; +CREATE ROLE r3_01293 SETTINGS max_memory_usage=5000000; +CREATE ROLE r4_01293 SETTINGS max_memory_usage MIN=5000000; +CREATE ROLE r5_01293 SETTINGS max_memory_usage MAX=5000000; +CREATE ROLE r6_01293 SETTINGS max_memory_usage READONLY; +CREATE ROLE r7_01293 SETTINGS max_memory_usage WRITABLE; +CREATE ROLE r8_01293 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 READONLY; +CREATE ROLE r9_01293 SETTINGS PROFILE 'default', max_memory_usage=5000000 WRITABLE; +SHOW CREATE ROLE r1_01293; +SHOW CREATE ROLE r2_01293; +SHOW CREATE ROLE r3_01293; +SHOW CREATE ROLE r4_01293; +SHOW CREATE ROLE r5_01293; +SHOW CREATE ROLE r6_01293; +SHOW CREATE ROLE r7_01293; +SHOW CREATE ROLE r8_01293; +SHOW CREATE ROLE r9_01293; +ALTER ROLE r1_01293 SETTINGS readonly=1; +ALTER ROLE r2_01293 SETTINGS readonly=1; +ALTER ROLE r3_01293 SETTINGS NONE; +SHOW CREATE ROLE r1_01293; +SHOW CREATE ROLE r2_01293; +SHOW CREATE ROLE r3_01293; +DROP ROLE r1_01293, r2_01293, r3_01293, r4_01293, r5_01293, r6_01293, r7_01293, r8_01293, r9_01293; + +SELECT '-- multiple roles in one command'; +CREATE ROLE r1_01293, r2_01293; +SHOW CREATE ROLE r1_01293, r2_01293; +ALTER ROLE r1_01293, r2_01293 SETTINGS readonly=1; +SHOW CREATE ROLE r1_01293, r2_01293; +DROP ROLE r1_01293, r2_01293; + +SELECT '-- system.roles'; +CREATE ROLE r1_01293; +SELECT name, storage from system.roles WHERE name='r1_01293'; +DROP ROLE r1_01293; + +SELECT '-- system.settings_profile_elements'; +CREATE ROLE r1_01293 SETTINGS readonly=1; +CREATE ROLE r2_01293 SETTINGS PROFILE 'default'; +CREATE ROLE r3_01293 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 WRITABLE; +CREATE ROLE r4_01293 SETTINGS PROFILE 'default', max_memory_usage=5000000, readonly=1; +CREATE ROLE r5_01293 SETTINGS NONE; +SELECT * FROM system.settings_profile_elements WHERE role_name LIKE 'r%\_01293' ORDER BY role_name, index; +DROP ROLE r1_01293, r2_01293, r3_01293, r4_01293, r5_01293; diff --git a/tests/queries/0_stateless/01294_create_settings_profile.reference b/tests/queries/0_stateless/01294_create_settings_profile.reference new file mode 100644 index 00000000000..527ceea3dd7 --- /dev/null +++ b/tests/queries/0_stateless/01294_create_settings_profile.reference @@ -0,0 +1,56 @@ +-- default +CREATE SETTINGS PROFILE s1_01294 +-- same as default +CREATE SETTINGS PROFILE s2_01294 +CREATE SETTINGS PROFILE s3_01294 +-- rename +CREATE SETTINGS PROFILE s2_01294_renamed +-- settings +CREATE SETTINGS PROFILE s1_01294 +CREATE SETTINGS PROFILE s2_01294 SETTINGS INHERIT default +CREATE SETTINGS PROFILE s3_01294 SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE s4_01294 SETTINGS max_memory_usage MIN 5000000 +CREATE SETTINGS PROFILE s5_01294 SETTINGS max_memory_usage MAX 5000000 +CREATE SETTINGS PROFILE s6_01294 SETTINGS max_memory_usage READONLY +CREATE SETTINGS PROFILE s7_01294 SETTINGS max_memory_usage WRITABLE +CREATE SETTINGS PROFILE s8_01294 SETTINGS max_memory_usage = 5000000 MIN 4000000 MAX 6000000 READONLY +CREATE SETTINGS PROFILE s9_01294 SETTINGS INHERIT default, max_memory_usage = 5000000 WRITABLE +CREATE SETTINGS PROFILE s10_01294 SETTINGS INHERIT s1_01294, INHERIT s3_01294, INHERIT default, readonly = 0, max_memory_usage MAX 6000000 +CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly = 0 +CREATE SETTINGS PROFILE s2_01294 SETTINGS readonly = 1 +CREATE SETTINGS PROFILE s3_01294 +-- to roles +CREATE SETTINGS PROFILE s1_01294 +CREATE SETTINGS PROFILE s2_01294 TO ALL +CREATE SETTINGS PROFILE s3_01294 TO r1_01294 +CREATE SETTINGS PROFILE s4_01294 TO u1_01294 +CREATE SETTINGS PROFILE s5_01294 TO r1_01294, u1_01294 +CREATE SETTINGS PROFILE s6_01294 TO ALL EXCEPT r1_01294 +CREATE SETTINGS PROFILE s7_01294 TO ALL EXCEPT r1_01294, u1_01294 +CREATE SETTINGS PROFILE s1_01294 TO u1_01294 +CREATE SETTINGS PROFILE s2_01294 +-- complex +CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly = 0 TO r1_01294 +CREATE SETTINGS PROFILE s1_01294 SETTINGS INHERIT default +-- multiple profiles in one command +CREATE SETTINGS PROFILE s1_01294 SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 5000000 +CREATE SETTINGS PROFILE s3_01294 TO ALL +CREATE SETTINGS PROFILE s4_01294 TO ALL +CREATE SETTINGS PROFILE s1_01294 SETTINGS max_memory_usage = 6000000 +CREATE SETTINGS PROFILE s2_01294 SETTINGS max_memory_usage = 6000000 TO r1_01294 +CREATE SETTINGS PROFILE s3_01294 SETTINGS max_memory_usage = 6000000 TO r1_01294 +CREATE SETTINGS PROFILE s4_01294 TO r1_01294 +-- system.settings_profiles +s1_01294 disk 0 0 [] [] +s2_01294 disk 1 0 ['r1_01294'] [] +s3_01294 disk 1 0 ['r1_01294'] [] +s4_01294 disk 1 0 ['r1_01294'] [] +s5_01294 disk 3 1 [] ['r1_01294'] +-- system.settings_profile_elements +s2_01294 \N \N 0 readonly 0 \N \N \N \N +s3_01294 \N \N 0 max_memory_usage 5000000 4000000 6000000 1 \N +s4_01294 \N \N 0 max_memory_usage 5000000 \N \N \N \N +s5_01294 \N \N 0 \N \N \N \N \N default +s5_01294 \N \N 1 readonly 0 \N \N \N \N +s5_01294 \N \N 2 max_memory_usage \N \N 6000000 0 \N diff --git a/tests/queries/0_stateless/01294_create_settings_profile.sql b/tests/queries/0_stateless/01294_create_settings_profile.sql new file mode 100644 index 00000000000..2d34042f2b4 --- /dev/null +++ b/tests/queries/0_stateless/01294_create_settings_profile.sql @@ -0,0 +1,103 @@ +DROP SETTINGS PROFILE IF EXISTS s1_01294, s2_01294, s3_01294, s4_01294, s5_01294, s6_01294, s7_01294, s8_01294, s9_01294, s10_01294; +DROP SETTINGS PROFILE IF EXISTS s2_01294_renamed; +DROP USER IF EXISTS u1_01294; +DROP ROLE IF EXISTS r1_01294; + +SELECT '-- default'; +CREATE SETTINGS PROFILE s1_01294; +SHOW CREATE SETTINGS PROFILE s1_01294; + +SELECT '-- same as default'; +CREATE SETTINGS PROFILE s2_01294 SETTINGS NONE TO NONE; +CREATE PROFILE s3_01294; +SHOW CREATE PROFILE s2_01294; +SHOW CREATE SETTINGS PROFILE s3_01294; + +SELECT '-- rename'; +ALTER SETTINGS PROFILE s2_01294 RENAME TO 's2_01294_renamed'; +SHOW CREATE SETTINGS PROFILE s2_01294; -- { serverError 180 } -- Profile not found +SHOW CREATE SETTINGS PROFILE s2_01294_renamed; +DROP SETTINGS PROFILE s1_01294, s2_01294_renamed, s3_01294; + +SELECT '-- settings'; +CREATE PROFILE s1_01294 SETTINGS NONE; +CREATE PROFILE s2_01294 SETTINGS INHERIT 'default'; +CREATE PROFILE s3_01294 SETTINGS max_memory_usage=5000000; +CREATE PROFILE s4_01294 SETTINGS max_memory_usage MIN=5000000; +CREATE PROFILE s5_01294 SETTINGS max_memory_usage MAX=5000000; +CREATE PROFILE s6_01294 SETTINGS max_memory_usage READONLY; +CREATE PROFILE s7_01294 SETTINGS max_memory_usage WRITABLE; +CREATE PROFILE s8_01294 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 READONLY; +CREATE PROFILE s9_01294 SETTINGS INHERIT 'default', max_memory_usage=5000000 WRITABLE; +CREATE PROFILE s10_01294 SETTINGS INHERIT s1_01294, s3_01294, INHERIT default, readonly=0, max_memory_usage MAX 6000000; +SHOW CREATE PROFILE s1_01294; +SHOW CREATE PROFILE s2_01294; +SHOW CREATE PROFILE s3_01294; +SHOW CREATE PROFILE s4_01294; +SHOW CREATE PROFILE s5_01294; +SHOW CREATE PROFILE s6_01294; +SHOW CREATE PROFILE s7_01294; +SHOW CREATE PROFILE s8_01294; +SHOW CREATE PROFILE s9_01294; +SHOW CREATE PROFILE s10_01294; +ALTER PROFILE s1_01294 SETTINGS readonly=0; +ALTER PROFILE s2_01294 SETTINGS readonly=1; +ALTER PROFILE s3_01294 SETTINGS NONE; +SHOW CREATE PROFILE s1_01294; +SHOW CREATE PROFILE s2_01294; +SHOW CREATE PROFILE s3_01294; +DROP PROFILE s1_01294, s2_01294, s3_01294, s4_01294, s5_01294, s6_01294, s7_01294, s8_01294, s9_01294, s10_01294; + +SELECT '-- to roles'; +CREATE ROLE r1_01294; +CREATE USER u1_01294; +CREATE PROFILE s1_01294 TO NONE; +CREATE PROFILE s2_01294 TO ALL; +CREATE PROFILE s3_01294 TO r1_01294; +CREATE PROFILE s4_01294 TO u1_01294; +CREATE PROFILE s5_01294 TO r1_01294, u1_01294; +CREATE PROFILE s6_01294 TO ALL EXCEPT r1_01294; +CREATE PROFILE s7_01294 TO ALL EXCEPT r1_01294, u1_01294; +SHOW CREATE PROFILE s1_01294; +SHOW CREATE PROFILE s2_01294; +SHOW CREATE PROFILE s3_01294; +SHOW CREATE PROFILE s4_01294; +SHOW CREATE PROFILE s5_01294; +SHOW CREATE PROFILE s6_01294; +SHOW CREATE PROFILE s7_01294; +ALTER PROFILE s1_01294 TO u1_01294; +ALTER PROFILE s2_01294 TO NONE; +SHOW CREATE PROFILE s1_01294; +SHOW CREATE PROFILE s2_01294; +DROP PROFILE s1_01294, s2_01294, s3_01294, s4_01294, s5_01294, s6_01294, s7_01294; + +SELECT '-- complex'; +CREATE SETTINGS PROFILE s1_01294 SETTINGS readonly=0 TO r1_01294; +SHOW CREATE SETTINGS PROFILE s1_01294; +ALTER SETTINGS PROFILE s1_01294 SETTINGS INHERIT 'default' TO NONE; +SHOW CREATE SETTINGS PROFILE s1_01294; +DROP SETTINGS PROFILE s1_01294; + +SELECT '-- multiple profiles in one command'; +CREATE PROFILE s1_01294, s2_01294 SETTINGS max_memory_usage=5000000; +CREATE PROFILE s3_01294, s4_01294 TO ALL; +SHOW CREATE PROFILE s1_01294, s2_01294, s3_01294, s4_01294; +ALTER PROFILE s1_01294, s2_01294, s3_01294 SETTINGS max_memory_usage=6000000; +ALTER PROFILE s2_01294, s3_01294, s4_01294 TO r1_01294; +SHOW CREATE PROFILE s1_01294, s2_01294, s3_01294, s4_01294; +DROP PROFILE s1_01294, s2_01294, s3_01294, s4_01294; + +SELECT '-- system.settings_profiles'; +CREATE PROFILE s1_01294; +CREATE PROFILE s2_01294 SETTINGS readonly=0 TO r1_01294;; +CREATE PROFILE s3_01294 SETTINGS max_memory_usage=5000000 MIN 4000000 MAX 6000000 READONLY TO r1_01294; +CREATE PROFILE s4_01294 SETTINGS max_memory_usage=5000000 TO r1_01294; +CREATE PROFILE s5_01294 SETTINGS INHERIT default, readonly=0, max_memory_usage MAX 6000000 WRITABLE TO ALL EXCEPT r1_01294; +SELECT name, storage, num_elements, apply_to_all, apply_to_list, apply_to_except FROM system.settings_profiles WHERE name LIKE 's%\_01294' ORDER BY name; + +SELECT '-- system.settings_profile_elements'; +SELECT * FROM system.settings_profile_elements WHERE profile_name LIKE 's%\_01294' ORDER BY profile_name, index; +DROP PROFILE s1_01294, s2_01294, s3_01294, s4_01294, s5_01294; + +DROP ROLE r1_01294; +DROP USER u1_01294; diff --git a/tests/queries/0_stateless/01295_create_row_policy.reference b/tests/queries/0_stateless/01295_create_row_policy.reference new file mode 100644 index 00000000000..2ed894c923e --- /dev/null +++ b/tests/queries/0_stateless/01295_create_row_policy.reference @@ -0,0 +1,35 @@ +-- default +CREATE ROW POLICY p1_01295 ON db.table +-- same as default +CREATE ROW POLICY p2_01295 ON db.table +CREATE ROW POLICY p3_01295 ON db.table +-- rename +CREATE ROW POLICY p2_01295_renamed ON db.table +-- filter +CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING (a < b) AND (c > d) +CREATE ROW POLICY p2_01295 ON db.table AS restrictive FOR SELECT USING id = currentUser() +CREATE ROW POLICY p3_01295 ON db.table FOR SELECT USING 1 +CREATE ROW POLICY p1_01295 ON db.table AS restrictive FOR SELECT USING 0 +-- to roles +CREATE ROW POLICY p1_01295 ON db.table +CREATE ROW POLICY p2_01295 ON db.table TO ALL +CREATE ROW POLICY p3_01295 ON db.table TO r1_01295 +CREATE ROW POLICY p4_01295 ON db.table TO u1_01295 +CREATE ROW POLICY p5_01295 ON db.table TO r1_01295, u1_01295 +CREATE ROW POLICY p6_01295 ON db.table TO ALL EXCEPT r1_01295 +CREATE ROW POLICY p7_01295 ON db.table TO ALL EXCEPT r1_01295, u1_01295 +CREATE ROW POLICY p1_01295 ON db.table TO u1_01295 +CREATE ROW POLICY p2_01295 ON db.table +-- multiple policies in one command +CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 +CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 +CREATE ROW POLICY p3_01295 ON db.table TO u1_01295 +CREATE ROW POLICY p3_01295 ON db2.table2 TO u1_01295 +CREATE ROW POLICY p4_01295 ON db.table FOR SELECT USING a = b +CREATE ROW POLICY p5_01295 ON db2.table2 FOR SELECT USING a = b +CREATE ROW POLICY p1_01295 ON db.table FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p2_01295 ON db.table FOR SELECT USING 1 TO ALL +-- system.row_policies +p1_01295 ON db.table p1_01295 db table disk (a < b) AND (c > d) 0 0 [] [] +p2_01295 ON db.table p2_01295 db table disk id = currentUser() 1 0 ['u1_01295'] [] +p3_01295 ON db.table p3_01295 db table disk 1 0 1 [] ['r1_01295'] diff --git a/tests/queries/0_stateless/01295_create_row_policy.sql b/tests/queries/0_stateless/01295_create_row_policy.sql new file mode 100644 index 00000000000..b484d0ea0f3 --- /dev/null +++ b/tests/queries/0_stateless/01295_create_row_policy.sql @@ -0,0 +1,79 @@ +DROP ROW POLICY IF EXISTS p1_01295, p2_01295, p3_01295, p4_01295, p5_01295, p6_01295, p7_01295, p8_01295, p9_01295, p10_01295 ON db.table; +DROP ROW POLICY IF EXISTS p2_01295_renamed ON db.table; +DROP ROW POLICY IF EXISTS p3_01295 ON db.table, db2.table2; +DROP ROW POLICY IF EXISTS p4_01295 ON db.table, p5_01295 ON db2.table2; +DROP USER IF EXISTS u1_01295; +DROP ROLE IF EXISTS r1_01295; + +SELECT '-- default'; +CREATE ROW POLICY p1_01295 ON db.table; +SHOW CREATE ROW POLICY p1_01295 ON db.table; + +SELECT '-- same as default'; +CREATE ROW POLICY p2_01295 ON db.table USING NONE TO NONE; +CREATE POLICY p3_01295 ON db.table; +SHOW CREATE POLICY p2_01295 ON db.table; +SHOW CREATE ROW POLICY p3_01295 ON db.table; + +SELECT '-- rename'; +ALTER ROW POLICY p2_01295 ON db.table RENAME TO 'p2_01295_renamed'; +SHOW CREATE ROW POLICY p2_01295 ON db.table; -- { serverError 523 } -- Policy not found +SHOW CREATE ROW POLICY p2_01295_renamed ON db.table; +DROP ROW POLICY p1_01295, p2_01295_renamed, p3_01295 ON db.table; + +SELECT '-- filter'; +CREATE ROW POLICY p1_01295 ON db.table USING ad; +CREATE ROW POLICY p2_01295 ON db.table USING id=currentUser() AS RESTRICTIVE; +CREATE ROW POLICY p3_01295 ON db.table USING 1 AS PERMISSIVE; +SHOW CREATE POLICY p1_01295 ON db.table; +SHOW CREATE POLICY p2_01295 ON db.table; +SHOW CREATE POLICY p3_01295 ON db.table; +ALTER ROW POLICY p1_01295 ON db.table FOR SELECT USING 0 AS RESTRICTIVE; +SHOW CREATE POLICY p1_01295 ON db.table; +DROP ROW POLICY p1_01295, p2_01295, p3_01295 ON db.table; + +SELECT '-- to roles'; +CREATE ROLE r1_01295; +CREATE USER u1_01295; +CREATE POLICY p1_01295 ON db.table TO NONE; +CREATE POLICY p2_01295 ON db.table TO ALL; +CREATE POLICY p3_01295 ON db.table TO r1_01295; +CREATE POLICY p4_01295 ON db.table TO u1_01295; +CREATE POLICY p5_01295 ON db.table TO r1_01295, u1_01295; +CREATE POLICY p6_01295 ON db.table TO ALL EXCEPT r1_01295; +CREATE POLICY p7_01295 ON db.table TO ALL EXCEPT r1_01295, u1_01295; +SHOW CREATE POLICY p1_01295 ON db.table; +SHOW CREATE POLICY p2_01295 ON db.table; +SHOW CREATE POLICY p3_01295 ON db.table; +SHOW CREATE POLICY p4_01295 ON db.table; +SHOW CREATE POLICY p5_01295 ON db.table; +SHOW CREATE POLICY p6_01295 ON db.table; +SHOW CREATE POLICY p7_01295 ON db.table; +ALTER POLICY p1_01295 ON db.table TO u1_01295; +ALTER POLICY p2_01295 ON db.table TO NONE; +SHOW CREATE POLICY p1_01295 ON db.table; +SHOW CREATE POLICY p2_01295 ON db.table; +DROP POLICY p1_01295, p2_01295, p3_01295, p4_01295, p5_01295, p6_01295, p7_01295 ON db.table; + +SELECT '-- multiple policies in one command'; +CREATE ROW POLICY p1_01295, p2_01295 ON db.table USING 1; +CREATE ROW POLICY p3_01295 ON db.table, db2.table2 TO u1_01295; +CREATE ROW POLICY p4_01295 ON db.table, p5_01295 ON db2.table2 USING a=b; +SHOW CREATE POLICY p1_01295, p2_01295 ON db.table; +SHOW CREATE POLICY p3_01295 ON db.table, db2.table2; +SHOW CREATE POLICY p4_01295 ON db.table, p5_01295 ON db2.table2; +ALTER POLICY p1_01295, p2_01295 ON db.table TO ALL; +SHOW CREATE POLICY p1_01295, p2_01295 ON db.table; +DROP POLICY p1_01295, p2_01295 ON db.table; +DROP POLICY p3_01295 ON db.table, db2.table2; +DROP POLICY p4_01295 ON db.table, p5_01295 ON db2.table2; + +SELECT '-- system.row_policies'; +CREATE ROW POLICY p1_01295 ON db.table USING ad; +CREATE ROW POLICY p2_01295 ON db.table USING id=currentUser() AS RESTRICTIVE TO u1_01295; +CREATE ROW POLICY p3_01295 ON db.table USING 1 AS PERMISSIVE TO ALL EXCEPT r1_01295; +SELECT name, short_name, database, table, storage, select_filter, is_restrictive, apply_to_all, apply_to_list, apply_to_except from system.row_policies WHERE short_name LIKE 'p%\_01295' ORDER BY name; +DROP ROW POLICY p1_01295, p2_01295, p3_01295 ON db.table; + +DROP ROLE r1_01295; +DROP USER u1_01295; diff --git a/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference new file mode 100644 index 00000000000..fa9c2f73021 --- /dev/null +++ b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.reference @@ -0,0 +1,20 @@ +-- one policy +CREATE ROW POLICY p1_01296 ON db_01296.table +CREATE ROW POLICY p1_01296 ON db_01296.table +CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 +-- multiple policies +CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p3_01296 ON db_01296.table TO u1_01296 +CREATE ROW POLICY p3_01296 ON db_01296.table2 TO u1_01296 +CREATE ROW POLICY p4_01296 ON db_01296.table FOR SELECT USING a = b +CREATE ROW POLICY p5_01296 ON db_01296.table2 FOR SELECT USING a = b +CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 +CREATE ROW POLICY p3_01296 ON db_01296.table TO u1_01296 +CREATE ROW POLICY p3_01296 ON db_01296.table2 TO u1_01296 +CREATE ROW POLICY p4_01296 ON db_01296.table FOR SELECT USING a = b +CREATE ROW POLICY p5_01296 ON db_01296.table2 FOR SELECT USING a = b +CREATE ROW POLICY p1_01296 ON db_01296.table FOR SELECT USING 1 TO ALL +CREATE ROW POLICY p2_01296 ON db_01296.table FOR SELECT USING 1 TO ALL diff --git a/tests/queries/0_stateless/01296_create_row_policy_in_current_database.sql b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.sql new file mode 100644 index 00000000000..fca570b5651 --- /dev/null +++ b/tests/queries/0_stateless/01296_create_row_policy_in_current_database.sql @@ -0,0 +1,51 @@ +DROP ROW POLICY IF EXISTS p1_01296, p2_01296, p3_01296, p4_01296, p5_01296 ON db_01296.table; +DROP ROW POLICY IF EXISTS p3_01296, p5_01296 ON db_01296.table2; +DROP DATABASE IF EXISTS db_01296; +DROP USER IF EXISTS u1_01296; + +CREATE DATABASE db_01296; +USE db_01296; + +SELECT '-- one policy'; +CREATE POLICY p1_01296 ON table; +SHOW CREATE POLICY p1_01296 ON db_01296.table; +SHOW CREATE POLICY p1_01296 ON table; +ALTER POLICY p1_01296 ON table USING 1; +SHOW CREATE POLICY p1_01296 ON db_01296.table; +SHOW CREATE POLICY p1_01296 ON table; +DROP POLICY p1_01296 ON table; +DROP POLICY p1_01296 ON db_01296.table; -- { serverError 523 } -- Policy not found + +SELECT '-- multiple policies'; +CREATE ROW POLICY p1_01296, p2_01296 ON table USING 1; +CREATE USER u1_01296; +CREATE ROW POLICY p3_01296 ON table, table2 TO u1_01296; +CREATE ROW POLICY p4_01296 ON table, p5_01296 ON table2 USING a=b; +SHOW CREATE POLICY p1_01296 ON table; +SHOW CREATE POLICY p2_01296 ON table; +SHOW CREATE POLICY p3_01296 ON table; +SHOW CREATE POLICY p3_01296 ON table2; +SHOW CREATE POLICY p4_01296 ON table; +SHOW CREATE POLICY p5_01296 ON table2; +SHOW CREATE POLICY p1_01296 ON db_01296.table; +SHOW CREATE POLICY p2_01296 ON db_01296.table; +SHOW CREATE POLICY p3_01296 ON db_01296.table; +SHOW CREATE POLICY p3_01296 ON db_01296.table2; +SHOW CREATE POLICY p4_01296 ON db_01296.table; +SHOW CREATE POLICY p5_01296 ON db_01296.table2; +ALTER POLICY p1_01296, p2_01296 ON table TO ALL; +SHOW CREATE POLICY p1_01296 ON table; +SHOW CREATE POLICY p2_01296 ON table; +DROP POLICY p1_01296, p2_01296 ON table; +DROP POLICY p3_01296 ON table, table2; +DROP POLICY p4_01296 ON table, p5_01296 ON table2; +DROP POLICY p1_01296 ON db_01296.table; -- { serverError 523 } -- Policy not found +DROP POLICY p2_01296 ON db_01296.table; -- { serverError 523 } -- Policy not found +DROP POLICY p3_01296 ON db_01296.table; -- { serverError 523 } -- Policy not found +DROP POLICY p3_01296 ON db_01296.table2; -- { serverError 523 } -- Policy not found +DROP POLICY p4_01296 ON db_01296.table; -- { serverError 523 } -- Policy not found +DROP POLICY p5_01296 ON db_01296.table2; -- { serverError 523 } -- Policy not found + +USE default; +DROP DATABASE db_01296; +DROP USER u1_01296; diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference new file mode 100644 index 00000000000..b58d3f0f390 --- /dev/null +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -0,0 +1,63 @@ +-- default +CREATE QUOTA q1_01297 +-- same as default +CREATE QUOTA q2_01297 +CREATE QUOTA q3_01297 +CREATE QUOTA q4_01297 +-- rename +CREATE QUOTA q2_01297_renamed +-- key +CREATE QUOTA q1_01297 +CREATE QUOTA q2_01297 KEYED BY user_name +CREATE QUOTA q3_01297 KEYED BY ip_address +CREATE QUOTA q4_01297 KEYED BY client_key +CREATE QUOTA q5_01297 KEYED BY client_key, user_name +CREATE QUOTA q6_01297 KEYED BY client_key, ip_address +CREATE QUOTA q7_01297 +CREATE QUOTA q8_01297 KEYED BY user_name +CREATE QUOTA q9_01297 KEYED BY ip_address +CREATE QUOTA q10_01297 KEYED BY client_key +CREATE QUOTA q11_01297 KEYED BY client_key, user_name +CREATE QUOTA q12_01297 KEYED BY client_key, ip_address +CREATE QUOTA q1_01297 KEYED BY user_name +CREATE QUOTA q2_01297 KEYED BY client_key, user_name +CREATE QUOTA q3_01297 +-- intervals +CREATE QUOTA q1_01297 FOR INTERVAL 5 day MAX errors = 3 +CREATE QUOTA q2_01297 FOR INTERVAL 30 minute MAX errors = 4 +CREATE QUOTA q3_01297 FOR INTERVAL 1 hour MAX errors = 5 +CREATE QUOTA q4_01297 FOR INTERVAL 2000 second MAX errors = 5 +CREATE QUOTA q5_01297 FOR RANDOMIZED INTERVAL 1 year MAX queries = 100, errors = 11 +CREATE QUOTA q6_01297 FOR INTERVAL 2 month MAX queries = 100, errors = 11, result_rows = 1000, result_bytes = 10000, read_rows = 1001, read_bytes = 10001, execution_time = 2.5 +CREATE QUOTA q7_01297 FOR INTERVAL 1 quarter MAX queries = 100, errors = 11 +CREATE QUOTA q8_01297 FOR INTERVAL 2 month MAX result_rows = 1002, FOR INTERVAL 2 quarter MAX queries = 100, errors = 11 +CREATE QUOTA q1_01297 +CREATE QUOTA q2_01297 FOR INTERVAL 30 minute TRACKING ONLY +CREATE QUOTA q3_01297 FOR INTERVAL 1 hour MAX queries = 70, FOR INTERVAL 2 hour MAX errors = 10 +CREATE QUOTA q4_01297 FOR RANDOMIZED INTERVAL 2000 second MAX errors = 5 +CREATE QUOTA q5_01297 FOR INTERVAL 1 year MAX errors = 111 +-- to roles +CREATE QUOTA q1_01297 +CREATE QUOTA q2_01297 TO ALL +CREATE QUOTA q3_01297 TO r1_01297 +CREATE QUOTA q4_01297 TO u1_01297 +CREATE QUOTA q5_01297 TO r1_01297, u1_01297 +CREATE QUOTA q6_01297 TO ALL EXCEPT r1_01297 +CREATE QUOTA q7_01297 TO ALL EXCEPT r1_01297, u1_01297 +CREATE QUOTA q1_01297 TO u1_01297 +CREATE QUOTA q2_01297 +-- multiple quotas in one command +CREATE QUOTA q1_01297 FOR INTERVAL 1 day MAX errors = 5 +CREATE QUOTA q2_01297 FOR INTERVAL 1 day MAX errors = 5 +CREATE QUOTA q1_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 +CREATE QUOTA q2_01297 FOR INTERVAL 1 day TRACKING ONLY TO r1_01297 +-- system.quotas +q1_01297 disk ['user_name'] [] 0 ['r1_01297'] [] +q2_01297 disk [] [5259492] 0 ['r1_01297','u1_01297'] [] +q3_01297 disk ['client_key','user_name'] [5259492,15778476] 0 [] [] +q4_01297 disk [] [604800] 1 [] ['u1_01297'] +-- system.quota_limits +q2_01297 5259492 0 100 11 1000 10000 1001 10001 2.5 +q3_01297 5259492 0 \N \N 1002 \N \N \N \N +q3_01297 15778476 0 100 11 \N \N \N \N \N +q4_01297 604800 0 \N \N \N \N \N \N \N diff --git a/tests/queries/0_stateless/01297_create_quota.sql b/tests/queries/0_stateless/01297_create_quota.sql new file mode 100644 index 00000000000..a3fb8331e16 --- /dev/null +++ b/tests/queries/0_stateless/01297_create_quota.sql @@ -0,0 +1,129 @@ +DROP QUOTA IF EXISTS q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297, q8_01297, q9_01297, q10_01297; +DROP QUOTA IF EXISTS q11_01297, q12_01297; +DROP QUOTA IF EXISTS q2_01297_renamed; +DROP USER IF EXISTS u1_01297; +DROP ROLE IF EXISTS r1_01297; + +SELECT '-- default'; +CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q1_01297; + +SELECT '-- same as default'; +CREATE QUOTA q2_01297 TO NONE; +CREATE QUOTA q3_01297 FOR INTERVAL 1 HOUR NO LIMITS NOT KEYED TO NONE; +CREATE QUOTA q4_01297 KEYED BY none FOR 1 hour NO LIMITS; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; + +SELECT '-- rename'; +ALTER QUOTA q2_01297 RENAME TO 'q2_01297_renamed'; +SHOW CREATE QUOTA q2_01297; -- { serverError 199 } -- Policy not found +SHOW CREATE QUOTA q2_01297_renamed; +DROP QUOTA q1_01297, q2_01297_renamed, q3_01297, q4_01297; + +SELECT '-- key'; +CREATE QUOTA q1_01297 NOT KEYED; +CREATE QUOTA q2_01297 KEY BY user_name; +CREATE QUOTA q3_01297 KEY BY ip_address; +CREATE QUOTA q4_01297 KEY BY client_key; +CREATE QUOTA q5_01297 KEY BY client_key, user_name; +CREATE QUOTA q6_01297 KEY BY client_key, ip_address; +CREATE QUOTA q7_01297 KEYED BY 'none'; +CREATE QUOTA q8_01297 KEYED BY 'user name'; +CREATE QUOTA q9_01297 KEYED BY 'IP_ADDRESS'; +CREATE QUOTA q10_01297 KEYED BY CLIENT_KEY; +CREATE QUOTA q11_01297 KEYED BY 'client key or user name'; +CREATE QUOTA q12_01297 KEYED BY 'client key or ip address'; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +SHOW CREATE QUOTA q6_01297; +SHOW CREATE QUOTA q7_01297; +SHOW CREATE QUOTA q8_01297; +SHOW CREATE QUOTA q9_01297; +SHOW CREATE QUOTA q10_01297; +SHOW CREATE QUOTA q11_01297; +SHOW CREATE QUOTA q12_01297; +ALTER QUOTA q1_01297 KEY BY user_name; +ALTER QUOTA q2_01297 KEY BY client_key, user_name; +ALTER QUOTA q3_01297 NOT KEYED; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297, q8_01297, q9_01297, q10_01297, q11_01297, q12_01297; + +SELECT '-- intervals'; +CREATE QUOTA q1_01297 FOR INTERVAL 5 DAY MAX ERRORS = 3; +CREATE QUOTA q2_01297 FOR INTERVAL 30 minute MAX ERRORS 4; +CREATE QUOTA q3_01297 FOR 1 HOUR errors MAX 5; +CREATE QUOTA q4_01297 FOR 2000 SECOND errors MAX 5; +CREATE QUOTA q5_01297 FOR RANDOMIZED INTERVAL 1 YEAR MAX errors = 11, MAX queries = 100; +CREATE QUOTA q6_01297 FOR 2 MONTH MAX errors = 11, queries = 100, result_rows = 1000, result_bytes = 10000, read_rows = 1001, read_bytes = 10001, execution_time=2.5; +CREATE QUOTA q7_01297 FOR 1 QUARTER MAX errors 11, queries 100; +CREATE QUOTA q8_01297 FOR 0.5 year ERRORS MAX 11, QUERIES MAX 100, FOR 2 MONTH RESULT ROWS MAX 1002; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +SHOW CREATE QUOTA q6_01297; +SHOW CREATE QUOTA q7_01297; +SHOW CREATE QUOTA q8_01297; +ALTER QUOTA q1_01297 FOR INTERVAL 5 DAY NO LIMITS; +ALTER QUOTA q2_01297 FOR INTERVAL 30 MINUTE TRACKING ONLY; +ALTER QUOTA q3_01297 FOR INTERVAL 2 HOUR MAX errors = 10, FOR INTERVAL 1 HOUR MAX queries = 70; +ALTER QUOTA q4_01297 FOR RANDOMIZED INTERVAL 2000 SECOND errors MAX 5; +ALTER QUOTA q5_01297 FOR 1 YEAR MAX errors = 111; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297, q8_01297; + +SELECT '-- to roles'; +CREATE ROLE r1_01297; +CREATE USER u1_01297; +CREATE QUOTA q1_01297 TO NONE; +CREATE QUOTA q2_01297 TO ALL; +CREATE QUOTA q3_01297 TO r1_01297; +CREATE QUOTA q4_01297 TO u1_01297; +CREATE QUOTA q5_01297 TO r1_01297, u1_01297; +CREATE QUOTA q6_01297 TO ALL EXCEPT r1_01297; +CREATE QUOTA q7_01297 TO ALL EXCEPT r1_01297, u1_01297; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +SHOW CREATE QUOTA q3_01297; +SHOW CREATE QUOTA q4_01297; +SHOW CREATE QUOTA q5_01297; +SHOW CREATE QUOTA q6_01297; +SHOW CREATE QUOTA q7_01297; +ALTER QUOTA q1_01297 TO u1_01297; +ALTER QUOTA q2_01297 TO NONE; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297, q5_01297, q6_01297, q7_01297; + +SELECT '-- multiple quotas in one command'; +CREATE QUOTA q1_01297, q2_01297 FOR 1 day MAX errors=5; +SHOW CREATE QUOTA q1_01297, q2_01297; +ALTER QUOTA q1_01297, q2_01297 FOR 1 day TRACKING ONLY TO r1_01297; +SHOW CREATE QUOTA q1_01297, q2_01297; +DROP QUOTA q1_01297, q2_01297; + +SELECT '-- system.quotas'; +CREATE QUOTA q1_01297 KEYED BY user_name TO r1_01297; +CREATE QUOTA q2_01297 FOR 2 MONTH MAX errors = 11, queries = 100, result_rows = 1000, result_bytes = 10000, read_rows = 1001, read_bytes = 10001, execution_time=2.5 TO r1_01297, u1_01297; +CREATE QUOTA q3_01297 KEYED BY client_key, user_name FOR 0.5 YEAR ERRORS MAX 11, QUERIES MAX 100, FOR 2 MONTH RESULT ROWS MAX 1002; +CREATE QUOTA q4_01297 FOR 1 WEEK TRACKING ONLY TO ALL EXCEPT u1_01297; +SELECT name, storage, keys, durations, apply_to_all, apply_to_list, apply_to_except FROM system.quotas WHERE name LIKE 'q%\_01297' ORDER BY name; + +SELECT '-- system.quota_limits'; +SELECT * FROM system.quota_limits WHERE quota_name LIKE 'q%\_01297' ORDER BY quota_name, duration; +DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297; + +DROP ROLE r1_01297; +DROP USER u1_01297; From 6146766465486724c1baace82c1bdf786d7ffbd6 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 6 Jun 2020 00:31:37 +0300 Subject: [PATCH 0754/2229] Use function ParserList::parseUtil() to parse lists more accurately. --- src/Parsers/ExpressionListParsers.cpp | 48 ++-- src/Parsers/ExpressionListParsers.h | 37 +++ src/Parsers/ParserCreateQuotaQuery.cpp | 91 ++++---- src/Parsers/ParserCreateRoleQuery.cpp | 16 +- src/Parsers/ParserCreateRowPolicyQuery.cpp | 76 +++---- .../ParserCreateSettingsProfileQuery.cpp | 18 +- src/Parsers/ParserCreateUserQuery.cpp | 210 +++++++++++------- src/Parsers/ParserGrantQuery.cpp | 66 ++++-- src/Parsers/ParserRolesOrUsersSet.cpp | 91 ++++---- src/Parsers/ParserRowPolicyName.cpp | 49 ++-- src/Parsers/ParserSettingsProfileElement.cpp | 160 ++++++++----- src/Parsers/ParserUserNameWithHost.cpp | 71 +++--- .../parseIdentifierOrStringLiteral.cpp | 53 +++-- 13 files changed, 578 insertions(+), 408 deletions(-) diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index a967ae19691..e33e80f1f18 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -76,41 +76,25 @@ const char * ParserTupleElementExpression::operators[] = bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - bool first = true; + ASTs elements; + + auto parse_element = [&] + { + ASTPtr element; + if (!elem_parser->parse(pos, element, expected)) + return false; + + elements.push_back(element); + return true; + }; + + if (!parseUtil(pos, expected, parse_element, *separator_parser, allow_empty)) + return false; auto list = std::make_shared(result_separator); + list->children = std::move(elements); node = list; - - while (true) - { - if (first) - { - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - break; - - list->children.push_back(elem); - first = false; - } - else - { - auto prev_pos = pos; - - if (!separator_parser->ignore(pos, expected)) - break; - - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - { - pos = prev_pos; - break; - } - - list->children.push_back(elem); - } - } - - return allow_empty || !first; + return true; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 0cef29b6d67..93a47648a0b 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -26,6 +26,43 @@ public: , result_separator(result_separator_) { } + + template + static bool parseUtil(Pos & pos, Expected & expected, const F & parse_element, IParser & separator_parser_, bool allow_empty_ = true) + { + Pos begin = pos; + if (!parse_element()) + { + pos = begin; + return allow_empty_; + } + + while (true) + { + begin = pos; + if (!separator_parser_.ignore(pos, expected) || !parse_element()) + { + pos = begin; + return true; + } + } + + return false; + } + + template + static bool parseUtil(Pos & pos, Expected & expected, const F & parse_element, TokenType separator, bool allow_empty_ = true) + { + ParserToken sep_parser{separator}; + return parseUtil(pos, expected, parse_element, sep_parser, allow_empty_); + } + + template + static bool parseUtil(Pos & pos, Expected & expected, const F & parse_element, bool allow_empty_ = true) + { + return parseUtil(pos, expected, parse_element, TokenType::Comma, allow_empty_); + } + protected: const char * getName() const override { return "list of elements"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserCreateQuotaQuery.cpp b/src/Parsers/ParserCreateQuotaQuery.cpp index f83bac975b0..324519b9c01 100644 --- a/src/Parsers/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/ParserCreateQuotaQuery.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -126,17 +127,17 @@ namespace } - bool parseLimit(IParserBase::Pos & pos, Expected & expected, bool first, bool & max_prefix_encountered, ResourceType & resource_type, ResourceAmount & max) + bool parseLimits(IParserBase::Pos & pos, Expected & expected, std::vector> & limits) { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!first && !ParserToken{TokenType::Comma}.ignore(pos, expected)) - return false; + std::vector> res_limits; + bool max_prefix_encountered = false; + auto parse_limit = [&] + { max_prefix_encountered |= ParserKeyword{"MAX"}.ignore(pos, expected); - ResourceType res_resource_type; - if (!parseResourceType(pos, expected, res_resource_type)) + ResourceType resource_type; + if (!parseResourceType(pos, expected, resource_type)) return false; if (max_prefix_encountered) @@ -149,25 +150,32 @@ namespace return false; } - ResourceAmount res_max; - if (!parseMaxAmount(pos, expected, res_resource_type, res_max)) + ResourceAmount max; + if (!parseMaxAmount(pos, expected, resource_type, max)) return false; - resource_type = res_resource_type; - max = res_max; + res_limits.emplace_back(resource_type, max); return true; - }); + }; + + if (!ParserList::parseUtil(pos, expected, parse_limit, false)) + return false; + + limits = std::move(res_limits); + return true; } - bool parseIntervalWithLimits(IParserBase::Pos & pos, Expected & expected, ASTCreateQuotaQuery::Limits & limits) + bool parseIntervalsWithLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) { - return IParserBase::wrapParseImpl(pos, [&] + std::vector res_all_limits; + + auto parse_interval_with_limits = [&] { - ASTCreateQuotaQuery::Limits new_limits; if (!ParserKeyword{"FOR"}.ignore(pos, expected)) return false; - new_limits.randomize_interval = ParserKeyword{"RANDOMIZED"}.ignore(pos, expected); + ASTCreateQuotaQuery::Limits limits; + limits.randomize_interval = ParserKeyword{"RANDOMIZED"}.ignore(pos, expected); ParserKeyword{"INTERVAL"}.ignore(pos, expected); @@ -181,53 +189,34 @@ namespace if (!parseIntervalKind(pos, expected, interval_kind)) return false; - new_limits.duration = std::chrono::seconds(static_cast(num_intervals * interval_kind.toAvgSeconds())); + limits.duration = std::chrono::seconds(static_cast(num_intervals * interval_kind.toAvgSeconds())); + std::vector> maxs; if (ParserKeyword{"NO LIMITS"}.ignore(pos, expected)) { - new_limits.drop = true; + limits.drop = true; } else if (ParserKeyword{"TRACKING ONLY"}.ignore(pos, expected)) { } + else if (parseLimits(pos, expected, maxs)) + { + for (const auto & [resource_type, max] : maxs) + limits.max[resource_type] = max; + } else - { - ResourceType resource_type; - ResourceAmount max; - bool max_prefix_encountered = false; - if (!parseLimit(pos, expected, true, max_prefix_encountered, resource_type, max)) - return false; + return false; - new_limits.max[resource_type] = max; - while (parseLimit(pos, expected, false, max_prefix_encountered, resource_type, max)) - new_limits.max[resource_type] = max; - } - - limits = new_limits; + res_all_limits.emplace_back(std::move(limits)); return true; - }); - } + }; - bool parseIntervalsWithLimits(IParserBase::Pos & pos, Expected & expected, std::vector & all_limits) - { - return IParserBase::wrapParseImpl(pos, [&] - { - size_t old_size = all_limits.size(); - do - { - ASTCreateQuotaQuery::Limits limits; - if (!parseIntervalWithLimits(pos, expected, limits)) - { - all_limits.resize(old_size); - return false; - } - all_limits.push_back(limits); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - return true; - }); - } + if (!ParserList::parseUtil(pos, expected, parse_interval_with_limits, false)) + return false; + all_limits = std::move(res_all_limits); + return true; + } bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) { diff --git a/src/Parsers/ParserCreateRoleQuery.cpp b/src/Parsers/ParserCreateRoleQuery.cpp index 6feeefa4657..5863136750f 100644 --- a/src/Parsers/ParserCreateRoleQuery.cpp +++ b/src/Parsers/ParserCreateRoleQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -23,7 +24,7 @@ namespace }); } - bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & settings) + bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::vector> & settings) { return IParserBase::wrapParseImpl(pos, [&] { @@ -36,10 +37,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - if (!settings) - settings = std::make_shared(); - const auto & new_settings = new_settings_ast->as(); - settings->elements.insert(settings->elements.end(), new_settings.elements.begin(), new_settings.elements.end()); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -99,8 +97,14 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; - if (parseSettings(pos, expected, attach_mode, settings)) + std::vector> new_settings; + if (parseSettings(pos, expected, attach_mode, new_settings)) + { + if (!settings) + settings = std::make_shared(); + boost::range::push_back(settings->elements, std::move(new_settings)); continue; + } if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; diff --git a/src/Parsers/ParserCreateRowPolicyQuery.cpp b/src/Parsers/ParserCreateRowPolicyQuery.cpp index c9fe15d391f..fae5bd35b43 100644 --- a/src/Parsers/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/ParserCreateRowPolicyQuery.cpp @@ -75,6 +75,7 @@ namespace }); } + void addAllCommands(boost::container::flat_set & commands) { for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) @@ -84,44 +85,47 @@ namespace } } - bool parseCommands(IParserBase::Pos & pos, Expected & expected, boost::container::flat_set & commands) + + bool parseCommands(IParserBase::Pos & pos, Expected & expected, + boost::container::flat_set & commands) { - return IParserBase::wrapParseImpl(pos, [&] + boost::container::flat_set res_commands; + + auto parse_command = [&] { if (ParserKeyword{"ALL"}.ignore(pos, expected)) { - addAllCommands(commands); + addAllCommands(res_commands); return true; } - boost::container::flat_set res_commands; - do + for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) { - bool found_keyword = false; - for (auto condition_type : ext::range(MAX_CONDITION_TYPE)) + const std::string_view & command = ConditionTypeInfo::get(condition_type).command; + if (ParserKeyword{command.data()}.ignore(pos, expected)) { - const std::string_view & command = ConditionTypeInfo::get(condition_type).command; - if (ParserKeyword{command.data()}.ignore(pos, expected)) - { - res_commands.emplace(command); - found_keyword = true; - break; - } + res_commands.emplace(command); + return true; } - - if (!found_keyword) - return false; } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - commands = std::move(res_commands); - return true; - }); + return false; + }; + + if (!ParserList::parseUtil(pos, expected, parse_command, false)) + return false; + + commands = std::move(res_commands); + return true; } - bool parseForClause(IParserBase::Pos & pos, Expected & expected, bool alter, std::vector> & conditions) + + bool + parseForClauses(IParserBase::Pos & pos, Expected & expected, bool alter, std::vector> & conditions) { - return IParserBase::wrapParseImpl(pos, [&] + std::vector> res_conditions; + + auto parse_for_clause = [&] { boost::container::flat_set commands; @@ -158,32 +162,20 @@ namespace if (commands.count(type_info.command)) { if (type_info.is_check && check) - conditions.emplace_back(condition_type, *check); + res_conditions.emplace_back(condition_type, *check); else if (filter) - conditions.emplace_back(condition_type, *filter); + res_conditions.emplace_back(condition_type, *filter); } } return true; - }); - } + }; - bool parseForClauses( - IParserBase::Pos & pos, Expected & expected, bool alter, std::vector> & conditions) - { - return IParserBase::wrapParseImpl(pos, [&] - { - std::vector> res_conditions; - do - { - if (!parseForClause(pos, expected, alter, res_conditions)) - return false; - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + if (!ParserList::parseUtil(pos, expected, parse_for_clause, false)) + return false; - conditions = std::move(res_conditions); - return true; - }); + conditions = std::move(res_conditions); + return true; } bool parseToRoles(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & roles) diff --git a/src/Parsers/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/ParserCreateSettingsProfileQuery.cpp index 56bd39b9230..797379509e4 100644 --- a/src/Parsers/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/ParserCreateSettingsProfileQuery.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB @@ -25,7 +26,7 @@ namespace }); } - bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & settings) + bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::vector> & settings) { return IParserBase::wrapParseImpl(pos, [&] { @@ -38,10 +39,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - if (!settings) - settings = std::make_shared(); - const auto & new_settings = new_settings_ast->as(); - settings->elements.insert(settings->elements.end(), new_settings.elements.begin(), new_settings.elements.end()); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -51,7 +49,7 @@ namespace return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (roles || !ParserKeyword{"TO"}.ignore(pos, expected)) + if (!ParserKeyword{"TO"}.ignore(pos, expected)) return false; ParserRolesOrUsersSet roles_p; @@ -119,8 +117,14 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec if (alter && new_name.empty() && (names.size() == 1) && parseRenameTo(pos, expected, new_name)) continue; - if (parseSettings(pos, expected, attach_mode, settings)) + std::vector> new_settings; + if (parseSettings(pos, expected, attach_mode, new_settings)) + { + if (!settings) + settings = std::make_shared(); + boost::range::push_back(settings->elements, std::move(new_settings)); continue; + } if (cluster.empty() && parseOnCluster(pos, expected, cluster)) continue; diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index ff7f2dc8790..4641c94d592 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -32,7 +33,7 @@ namespace } - bool parseAuthentication(IParserBase::Pos & pos, Expected & expected, std::optional & authentication) + bool parseAuthentication(IParserBase::Pos & pos, Expected & expected, Authentication & authentication) { return IParserBase::wrapParseImpl(pos, [&] { @@ -96,91 +97,109 @@ namespace authentication = Authentication{*type}; if (expect_password) - authentication->setPassword(password); + authentication.setPassword(password); else if (expect_hash) - authentication->setPasswordHashHex(password); + authentication.setPasswordHashHex(password); return true; }); } - bool parseHosts(IParserBase::Pos & pos, Expected & expected, const char * prefix, std::optional & hosts) + bool parseHostsWithoutPrefix(IParserBase::Pos & pos, Expected & expected, AllowedClientHosts & hosts) + { + AllowedClientHosts res_hosts; + + auto parse_host = [&] + { + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + return true; + + if (ParserKeyword{"ANY"}.ignore(pos, expected)) + { + res_hosts.addAnyHost(); + return true; + } + + if (ParserKeyword{"LOCAL"}.ignore(pos, expected)) + { + res_hosts.addLocalHost(); + return true; + } + + if (ParserKeyword{"REGEXP"}.ignore(pos, expected)) + { + ASTPtr ast; + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) + return false; + + for (const auto & name_regexp_ast : ast->children) + res_hosts.addNameRegexp(name_regexp_ast->as().value.safeGet()); + return true; + } + + if (ParserKeyword{"NAME"}.ignore(pos, expected)) + { + ASTPtr ast; + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) + return false; + + for (const auto & name_ast : ast->children) + res_hosts.addName(name_ast->as().value.safeGet()); + + return true; + } + + if (ParserKeyword{"IP"}.ignore(pos, expected)) + { + ASTPtr ast; + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) + return false; + + for (const auto & subnet_ast : ast->children) + res_hosts.addSubnet(subnet_ast->as().value.safeGet()); + + return true; + } + + if (ParserKeyword{"LIKE"}.ignore(pos, expected)) + { + ASTPtr ast; + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) + return false; + + for (const auto & pattern_ast : ast->children) + res_hosts.addLikePattern(pattern_ast->as().value.safeGet()); + + return true; + } + + return false; + }; + + if (!ParserList::parseUtil(pos, expected, parse_host, false)) + return false; + + hosts = std::move(res_hosts); + return true; + } + + + bool parseHosts(IParserBase::Pos & pos, Expected & expected, const String & prefix, AllowedClientHosts & hosts) { return IParserBase::wrapParseImpl(pos, [&] { - if (prefix && !ParserKeyword{prefix}.ignore(pos, expected)) + if (!prefix.empty() && !ParserKeyword{prefix.c_str()}.ignore(pos, expected)) return false; if (!ParserKeyword{"HOST"}.ignore(pos, expected)) return false; - if (ParserKeyword{"ANY"}.ignore(pos, expected)) - { - if (!hosts) - hosts.emplace(); - hosts->addAnyHost(); - return true; - } + AllowedClientHosts res_hosts; + if (!parseHostsWithoutPrefix(pos, expected, res_hosts)) + return false; - if (ParserKeyword{"NONE"}.ignore(pos, expected)) - { - if (!hosts) - hosts.emplace(); - return true; - } - - AllowedClientHosts new_hosts; - do - { - if (ParserKeyword{"LOCAL"}.ignore(pos, expected)) - { - new_hosts.addLocalHost(); - } - else if (ParserKeyword{"REGEXP"}.ignore(pos, expected)) - { - ASTPtr ast; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) - return false; - - for (const auto & name_regexp_ast : ast->children) - new_hosts.addNameRegexp(name_regexp_ast->as().value.safeGet()); - } - else if (ParserKeyword{"NAME"}.ignore(pos, expected)) - { - ASTPtr ast; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) - return false; - - for (const auto & name_ast : ast->children) - new_hosts.addName(name_ast->as().value.safeGet()); - } - else if (ParserKeyword{"IP"}.ignore(pos, expected)) - { - ASTPtr ast; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) - return false; - - for (const auto & subnet_ast : ast->children) - new_hosts.addSubnet(subnet_ast->as().value.safeGet()); - } - else if (ParserKeyword{"LIKE"}.ignore(pos, expected)) - { - ASTPtr ast; - if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) - return false; - - for (const auto & pattern_ast : ast->children) - new_hosts.addLikePattern(pattern_ast->as().value.safeGet()); - } - else - return false; - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - - if (!hosts) - hosts.emplace(); - hosts->add(new_hosts); + hosts.add(std::move(res_hosts)); return true; }); } @@ -206,7 +225,7 @@ namespace } - bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::shared_ptr & settings) + bool parseSettings(IParserBase::Pos & pos, Expected & expected, bool id_mode, std::vector> & settings) { return IParserBase::wrapParseImpl(pos, [&] { @@ -215,14 +234,11 @@ namespace ASTPtr new_settings_ast; ParserSettingsProfileElements elements_p; - elements_p.useInheritKeyword(true).useIDMode(id_mode); + elements_p.useIDMode(id_mode); if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - if (!settings) - settings = std::make_shared(); - const auto & new_settings = new_settings_ast->as(); - settings->elements.insert(settings->elements.end(), new_settings.elements.begin(), new_settings.elements.end()); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -286,14 +302,33 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec while (true) { - if (!authentication && parseAuthentication(pos, expected, authentication)) - continue; + if (!authentication) + { + Authentication new_authentication; + if (parseAuthentication(pos, expected, new_authentication)) + { + authentication = std::move(new_authentication); + continue; + } + } - if (parseHosts(pos, expected, nullptr, hosts)) + AllowedClientHosts new_hosts; + if (parseHosts(pos, expected, "", new_hosts)) + { + if (!hosts) + hosts.emplace(); + hosts->add(new_hosts); continue; + } - if (parseSettings(pos, expected, attach_mode, settings)) + std::vector> new_settings; + if (parseSettings(pos, expected, attach_mode, new_settings)) + { + if (!settings) + settings = std::make_shared(); + boost::range::push_back(settings->elements, std::move(new_settings)); continue; + } if (!default_roles && parseDefaultRoles(pos, expected, attach_mode, default_roles)) continue; @@ -306,8 +341,21 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (new_name.empty() && (names->size() == 1) && parseRenameTo(pos, expected, new_name)) continue; - if (parseHosts(pos, expected, "ADD", add_hosts) || parseHosts(pos, expected, "DROP", remove_hosts)) + if (parseHosts(pos, expected, "ADD", new_hosts)) + { + if (!add_hosts) + add_hosts.emplace(); + add_hosts->add(new_hosts); continue; + } + + if (parseHosts(pos, expected, "DROP", new_hosts)) + { + if (!remove_hosts) + remove_hosts.emplace(); + remove_hosts->add(new_hosts); + continue; + } } break; diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 03c0daa08a3..5eb5353f2ee 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -2,8 +2,11 @@ #include #include #include +#include +#include #include #include +#include #include #include @@ -66,15 +69,13 @@ namespace if (!ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) return false; + ASTPtr ast; + if (!ParserList{std::make_unique(), std::make_unique(TokenType::Comma), false}.parse(pos, ast, expected)) + return false; + Strings res_columns; - do - { - ASTPtr column_ast; - if (!ParserIdentifier().parse(pos, column_ast, expected)) - return false; - res_columns.emplace_back(getIdentifierName(column_ast)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + for (const auto & child : ast->children) + res_columns.emplace_back(getIdentifierName(child)); if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) return false; @@ -150,25 +151,42 @@ namespace } + bool parseAccessTypesWithColumns(IParser::Pos & pos, Expected & expected, + std::vector> & access_and_columns) + { + std::vector> res; + + auto parse_access_and_columns = [&] + { + AccessFlags access_flags; + if (!parseAccessFlags(pos, expected, access_flags)) + return false; + + Strings columns; + parseColumnNames(pos, expected, columns); + res.emplace_back(access_flags, std::move(columns)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_access_and_columns, false)) + return false; + + access_and_columns = std::move(res); + return true; + } + + bool parseAccessRightsElements(IParser::Pos & pos, Expected & expected, AccessRightsElements & elements) { return IParserBase::wrapParseImpl(pos, [&] { AccessRightsElements res_elements; - do + + auto parse_around_on = [&] { std::vector> access_and_columns; - do - { - AccessFlags access_flags; - if (!parseAccessFlags(pos, expected, access_flags)) - return false; - - Strings columns; - parseColumnNames(pos, expected, columns); - access_and_columns.emplace_back(access_flags, std::move(columns)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + if (!parseAccessTypesWithColumns(pos, expected, access_and_columns)) + return false; if (!ParserKeyword{"ON"}.ignore(pos, expected)) return false; @@ -190,8 +208,12 @@ namespace element.table = table_name; res_elements.emplace_back(std::move(element)); } - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_around_on, false)) + return false; elements = std::move(res_elements); return true; diff --git a/src/Parsers/ParserRolesOrUsersSet.cpp b/src/Parsers/ParserRolesOrUsersSet.cpp index 1ba2c05f671..0f3ba3f0f84 100644 --- a/src/Parsers/ParserRolesOrUsersSet.cpp +++ b/src/Parsers/ParserRolesOrUsersSet.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -11,11 +12,15 @@ namespace DB { namespace { - bool parseRoleNameOrID(IParserBase::Pos & pos, Expected & expected, bool parse_id, String & res) + bool parseRoleNameOrID( + IParserBase::Pos & pos, + Expected & expected, + bool id_mode, + String & res) { return IParserBase::wrapParseImpl(pos, [&] { - if (!parse_id) + if (!id_mode) return parseRoleName(pos, expected, res); if (!ParserKeyword{"ID"}.ignore(pos, expected)) @@ -40,60 +45,56 @@ namespace Expected & expected, bool id_mode, bool allow_all, - bool allow_current_user_tag, + bool allow_current_user, Strings & names, bool & all, bool & current_user) { - return IParserBase::wrapParseImpl(pos, [&] - { - bool res_all = false; - bool res_current_user = false; - Strings res_names; - while (true) - { - if (ParserKeyword{"NONE"}.ignore(pos, expected)) - { - } - else if ( - allow_current_user_tag - && (ParserKeyword{"CURRENT_USER"}.ignore(pos, expected) || ParserKeyword{"currentUser"}.ignore(pos, expected))) - { - if (ParserToken{TokenType::OpeningRoundBracket}.ignore(pos, expected)) - { - if (!ParserToken{TokenType::ClosingRoundBracket}.ignore(pos, expected)) - return false; - } - res_current_user = true; - } - else if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected)) - { - res_all = true; - } - else - { - String name; - if (!parseRoleNameOrID(pos, expected, id_mode, name)) - return false; - res_names.push_back(name); - } + bool res_all = false; + bool res_current_user = false; + Strings res_names; - if (!ParserToken{TokenType::Comma}.ignore(pos, expected)) - break; + auto parse_element = [&] + { + if (ParserKeyword{"NONE"}.ignore(pos, expected)) + return true; + + if (allow_all && ParserKeyword{"ALL"}.ignore(pos, expected)) + { + res_all = true; + return true; } - all = res_all; - current_user = res_current_user; - names = std::move(res_names); - return true; - }); + if (allow_current_user && parseCurrentUserTag(pos, expected)) + { + res_current_user = true; + return true; + } + + String name; + if (parseRoleNameOrID(pos, expected, id_mode, name)) + { + res_names.emplace_back(std::move(name)); + return true; + } + + return false; + }; + + if (!ParserList::parseUtil(pos, expected, parse_element, false)) + return false; + + names = std::move(res_names); + all = res_all; + current_user = res_current_user; + return true; } bool parseExceptAndAfterExcept( IParserBase::Pos & pos, Expected & expected, bool id_mode, - bool allow_current_user_tag, + bool allow_current_user, Strings & except_names, bool & except_current_user) { @@ -102,8 +103,8 @@ namespace if (!ParserKeyword{"EXCEPT"}.ignore(pos, expected)) return false; - bool dummy; - return parseBeforeExcept(pos, expected, id_mode, false, allow_current_user_tag, except_names, dummy, except_current_user); + bool unused; + return parseBeforeExcept(pos, expected, id_mode, false, allow_current_user, except_names, unused, except_current_user); }); } } diff --git a/src/Parsers/ParserRowPolicyName.cpp b/src/Parsers/ParserRowPolicyName.cpp index a74132cdaca..a3e12009c9a 100644 --- a/src/Parsers/ParserRowPolicyName.cpp +++ b/src/Parsers/ParserRowPolicyName.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -67,21 +68,18 @@ namespace return false; std::vector> res; - std::optional pos_before_comma; - do + + auto parse_db_and_table_name = [&] { String database, table_name; if (!parseDBAndTableName(pos, expected, database, table_name)) - { - if (!pos_before_comma) - return false; - pos = *pos_before_comma; - break; - } + return false; res.emplace_back(std::move(database), std::move(table_name)); - pos_before_comma = pos; - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_db_and_table_name, false)) + return false; database_and_table_names = std::move(res); return true; @@ -165,21 +163,28 @@ bool ParserRowPolicyName::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte bool ParserRowPolicyNames::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::vector name_parts; + size_t num_added_names_last_time = 0; String cluster; - do + auto parse_around_on = [&] { - std::vector new_name_parts; - if (!parseRowPolicyNamesAroundON(pos, expected, name_parts.empty(), name_parts.empty(), allow_on_cluster, new_name_parts, cluster)) - return false; + if (!name_parts.empty()) + { + if ((num_added_names_last_time != 1) || !cluster.empty()) + return false; + } - size_t num_new_name_parts = new_name_parts.size(); - assert(num_new_name_parts >= 1); - boost::range::push_back(name_parts, std::move(new_name_parts)); - if ((num_new_name_parts != 1) || !cluster.empty()) - break; - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + std::vector new_name_parts; + if (!parseRowPolicyNamesAroundON(pos, expected, name_parts.empty(), name_parts.empty(), allow_on_cluster, new_name_parts, cluster)) + return false; + + num_added_names_last_time = new_name_parts.size(); + boost::range::push_back(name_parts, std::move(new_name_parts)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_around_on, false)) + return false; auto result = std::make_shared(); result->name_parts = std::move(name_parts); diff --git a/src/Parsers/ParserSettingsProfileElement.cpp b/src/Parsers/ParserSettingsProfileElement.cpp index 2dd65e6ae7b..39e1f2d3594 100644 --- a/src/Parsers/ParserSettingsProfileElement.cpp +++ b/src/Parsers/ParserSettingsProfileElement.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -11,12 +12,19 @@ namespace DB { namespace { - bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool parse_id, String & res) + bool parseProfileKeyword(IParserBase::Pos & pos, Expected & expected, bool use_inherit_keyword) + { + return ParserKeyword{"PROFILE"}.ignore(pos, expected) || + (use_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected)); + } + + + bool parseProfileNameOrID(IParserBase::Pos & pos, Expected & expected, bool id_mode, String & res) { return IParserBase::wrapParseImpl(pos, [&] { ASTPtr ast; - if (!parse_id) + if (!id_mode) return parseIdentifierOrStringLiteral(pos, expected, res); if (!ParserKeyword{"ID"}.ignore(pos, expected)) @@ -96,52 +104,98 @@ namespace return false; }); } + + + bool parseSettingNameWithValueOrConstraints( + IParserBase::Pos & pos, + Expected & expected, + String & setting_name, + Field & value, + Field & min_value, + Field & max_value, + std::optional & readonly) + { + return IParserBase::wrapParseImpl(pos, [&] + { + ASTPtr name_ast; + if (!ParserIdentifier{}.parse(pos, name_ast, expected)) + return false; + + String res_setting_name = getIdentifierName(name_ast); + Field res_value; + Field res_min_value; + Field res_max_value; + std::optional res_readonly; + + bool has_value_or_constraint = false; + while (parseValue(pos, expected, res_value) || parseMinMaxValue(pos, expected, res_min_value, res_max_value) + || parseReadonlyOrWritableKeyword(pos, expected, res_readonly)) + { + has_value_or_constraint = true; + } + + if (!has_value_or_constraint) + return false; + + setting_name = std::move(res_setting_name); + value = std::move(res_value); + min_value = std::move(res_min_value); + max_value = std::move(res_max_value); + readonly = res_readonly; + return true; + }); + } + + + bool parseSettingsProfileElement(IParserBase::Pos & pos, + Expected & expected, + bool id_mode, + bool use_inherit_keyword, + bool previous_element_was_parent_profile, + std::shared_ptr & result) + { + return IParserBase::wrapParseImpl(pos, [&] + { + String parent_profile; + String setting_name; + Field value; + Field min_value; + Field max_value; + std::optional readonly; + + if (parseSettingNameWithValueOrConstraints(pos, expected, setting_name, value, min_value, max_value, readonly)) + { + } + else if (parseProfileKeyword(pos, expected, use_inherit_keyword) || previous_element_was_parent_profile) + { + if (!parseProfileNameOrID(pos, expected, id_mode, parent_profile)) + return false; + } + else + return false; + + result = std::make_shared(); + result->parent_profile = std::move(parent_profile); + result->setting_name = std::move(setting_name); + result->value = std::move(value); + result->min_value = std::move(min_value); + result->max_value = std::move(max_value); + result->readonly = readonly; + result->id_mode = id_mode; + result->use_inherit_keyword = use_inherit_keyword; + return true; + }); + } } bool ParserSettingsProfileElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - String parent_profile; - String setting_name; - Field value; - Field min_value; - Field max_value; - std::optional readonly; + std::shared_ptr res; + if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, false, res)) + return false; - if (ParserKeyword{"PROFILE"}.ignore(pos, expected) || - (use_inherit_keyword && ParserKeyword{"INHERIT"}.ignore(pos, expected))) - { - if (!parseProfileNameOrID(pos, expected, id_mode, parent_profile)) - return false; - } - else - { - ASTPtr name_ast; - if (!ParserIdentifier{}.parse(pos, name_ast, expected)) - return false; - setting_name = getIdentifierName(name_ast); - - bool has_value_or_constraint = false; - while (parseValue(pos, expected, value) || parseMinMaxValue(pos, expected, min_value, max_value) - || parseReadonlyOrWritableKeyword(pos, expected, readonly)) - { - has_value_or_constraint = true; - } - - if (!has_value_or_constraint) - return false; - } - - auto result = std::make_shared(); - result->parent_profile = std::move(parent_profile); - result->setting_name = std::move(setting_name); - result->value = std::move(value); - result->min_value = std::move(min_value); - result->max_value = std::move(max_value); - result->readonly = readonly; - result->id_mode = id_mode; - result->use_inherit_keyword = use_inherit_keyword; - node = result; + node = res; return true; } @@ -155,17 +209,21 @@ bool ParserSettingsProfileElements::parseImpl(Pos & pos, ASTPtr & node, Expected } else { - ParserSettingsProfileElement element_p; - element_p.useIDMode(id_mode).useInheritKeyword(use_inherit_keyword); - do + bool previous_element_was_parent_profile = false; + + auto parse_element = [&] { - ASTPtr ast; - if (!element_p.parse(pos, ast, expected)) + std::shared_ptr element; + if (!parseSettingsProfileElement(pos, expected, id_mode, use_inherit_keyword, previous_element_was_parent_profile, element)) return false; - auto element = typeid_cast>(ast); - elements.push_back(std::move(element)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + + elements.push_back(element); + previous_element_was_parent_profile = !element->parent_profile.empty(); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_element, false)) + return false; } auto result = std::make_shared(); diff --git a/src/Parsers/ParserUserNameWithHost.cpp b/src/Parsers/ParserUserNameWithHost.cpp index 19ec7a9bbd1..9cb4bb6fc97 100644 --- a/src/Parsers/ParserUserNameWithHost.cpp +++ b/src/Parsers/ParserUserNameWithHost.cpp @@ -1,35 +1,52 @@ #include #include #include +#include #include #include namespace DB { +namespace +{ + bool parseUserNameWithHost(IParserBase::Pos & pos, Expected & expected, std::shared_ptr & ast) + { + return IParserBase::wrapParseImpl(pos, [&] + { + String base_name; + if (!parseIdentifierOrStringLiteral(pos, expected, base_name)) + return false; + + boost::algorithm::trim(base_name); + + String host_pattern; + if (ParserToken{TokenType::At}.ignore(pos, expected)) + { + if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern)) + return false; + + boost::algorithm::trim(host_pattern); + if (host_pattern == "%") + host_pattern.clear(); + } + + ast = std::make_shared(); + ast->base_name = std::move(base_name); + ast->host_pattern = std::move(host_pattern); + return true; + }); + } +} + + bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - String base_name; - if (!parseIdentifierOrStringLiteral(pos, expected, base_name)) + std::shared_ptr res; + if (!parseUserNameWithHost(pos, expected, res)) return false; - boost::algorithm::trim(base_name); - - String host_pattern; - if (ParserToken{TokenType::At}.ignore(pos, expected)) - { - if (!parseIdentifierOrStringLiteral(pos, expected, host_pattern)) - return false; - - boost::algorithm::trim(host_pattern); - if (host_pattern == "%") - host_pattern.clear(); - } - - auto result = std::make_shared(); - result->base_name = std::move(base_name); - result->host_pattern = std::move(host_pattern); - node = result; + node = res; return true; } @@ -37,15 +54,19 @@ bool ParserUserNameWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expe bool ParserUserNamesWithHost::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { std::vector> names; - do + + auto parse_single_name = [&] { - ASTPtr ast; - if (!ParserUserNameWithHost{}.parse(pos, ast, expected)) + std::shared_ptr ast; + if (!parseUserNameWithHost(pos, expected, ast)) return false; - names.emplace_back(typeid_cast>(ast)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); + names.emplace_back(std::move(ast)); + return true; + }; + + if (!ParserList::parseUtil(pos, expected, parse_single_name, false)) + return false; auto result = std::make_shared(); result->names = std::move(names); diff --git a/src/Parsers/parseIdentifierOrStringLiteral.cpp b/src/Parsers/parseIdentifierOrStringLiteral.cpp index 22c77af0b09..e1444eb2d5a 100644 --- a/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -4,47 +4,52 @@ #include "ASTLiteral.h" #include "ASTIdentifier.h" #include +#include #include namespace DB { - bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, String & result) { - ASTPtr res; - - if (!ParserIdentifier().parse(pos, res, expected)) + return IParserBase::wrapParseImpl(pos, [&] { - if (!ParserStringLiteral().parse(pos, res, expected)) - return false; + ASTPtr ast; + if (ParserIdentifier().parse(pos, ast, expected)) + { + result = getIdentifierName(ast); + return true; + } - result = res->as().value.safeGet(); - } - else - result = getIdentifierName(res); + if (ParserStringLiteral().parse(pos, ast, expected)) + { + result = ast->as().value.safeGet(); + return true; + } - return true; + return false; + }); } bool parseIdentifiersOrStringLiterals(IParser::Pos & pos, Expected & expected, Strings & result) { - return IParserBase::wrapParseImpl(pos, [&] + Strings res; + + auto parse_single_id_or_literal = [&] { - Strings strs; - do - { - String str; - if (!parseIdentifierOrStringLiteral(pos, expected, str)) - return false; + String str; + if (!parseIdentifierOrStringLiteral(pos, expected, str)) + return false; - strs.push_back(std::move(str)); - } - while (ParserToken{TokenType::Comma}.ignore(pos, expected)); - - result = std::move(strs); + res.emplace_back(std::move(str)); return true; - }); + }; + + if (!ParserList::parseUtil(pos, expected, parse_single_id_or_literal, false)) + return false; + + result = std::move(res); + return true; } } From 4187edd9f958b95b37b7a048d119691c4f275411 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 15 Jun 2020 01:07:52 +0300 Subject: [PATCH 0755/2229] Split integration test 'test_grant_and_revoke' into two tests. --- .../__init__.py | 0 .../test_create_user_and_login/test.py | 85 +++++++++++++++++++ tests/integration/test_role/__init__.py | 0 .../test.py | 40 +-------- 4 files changed, 86 insertions(+), 39 deletions(-) rename tests/integration/{test_grant_and_revoke => test_create_user_and_login}/__init__.py (100%) create mode 100644 tests/integration/test_create_user_and_login/test.py create mode 100644 tests/integration/test_role/__init__.py rename tests/integration/{test_grant_and_revoke => test_role}/test.py (77%) diff --git a/tests/integration/test_grant_and_revoke/__init__.py b/tests/integration/test_create_user_and_login/__init__.py similarity index 100% rename from tests/integration/test_grant_and_revoke/__init__.py rename to tests/integration/test_create_user_and_login/__init__.py diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py new file mode 100644 index 00000000000..32bf0af6bb6 --- /dev/null +++ b/tests/integration/test_create_user_and_login/test.py @@ -0,0 +1,85 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +import re + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance') + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + instance.query("CREATE TABLE test_table(x UInt32, y UInt32) ENGINE = MergeTree ORDER BY tuple()") + instance.query("INSERT INTO test_table VALUES (1,5), (2,10)") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def cleanup_after_test(): + try: + yield + finally: + instance.query("DROP USER IF EXISTS A, B") + + +def test_login(): + instance.query("CREATE USER A") + instance.query("CREATE USER B") + assert instance.query("SELECT 1", user='A') == "1\n" + assert instance.query("SELECT 1", user='B') == "1\n" + + +def test_grant_and_revoke(): + instance.query("CREATE USER A") + assert "Not enough privileges" in instance.query_and_get_error("SELECT * FROM test_table", user='A') + + instance.query('GRANT SELECT ON test_table TO A') + assert instance.query("SELECT * FROM test_table", user='A') == "1\t5\n2\t10\n" + + instance.query('REVOKE SELECT ON test_table FROM A') + assert "Not enough privileges" in instance.query_and_get_error("SELECT * FROM test_table", user='A') + + +def test_grant_option(): + instance.query("CREATE USER A") + instance.query("CREATE USER B") + + instance.query('GRANT SELECT ON test_table TO A') + assert instance.query("SELECT * FROM test_table", user='A') == "1\t5\n2\t10\n" + assert "Not enough privileges" in instance.query_and_get_error("GRANT SELECT ON test_table TO B", user='A') + + instance.query('GRANT SELECT ON test_table TO A WITH GRANT OPTION') + instance.query("GRANT SELECT ON test_table TO B", user='A') + assert instance.query("SELECT * FROM test_table", user='B') == "1\t5\n2\t10\n" + + instance.query('REVOKE SELECT ON test_table FROM A, B') + + +def test_introspection(): + instance.query("CREATE USER A") + instance.query("CREATE USER B") + instance.query('GRANT SELECT ON test.table TO A') + instance.query('GRANT CREATE ON *.* TO B WITH GRANT OPTION') + + assert instance.query("SHOW USERS") == TSV([ "A", "B", "default" ]) + assert instance.query("SHOW GRANTS FOR A") == TSV([ "GRANT SELECT ON test.table TO A" ]) + assert instance.query("SHOW GRANTS FOR B") == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + + assert instance.query("SHOW GRANTS", user='A') == TSV([ "GRANT SELECT ON test.table TO A" ]) + assert instance.query("SHOW GRANTS", user='B') == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + + assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ + TSV([[ "A", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], + [ "B", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) + + assert instance.query("SELECT * from system.grants WHERE user_name IN ('A', 'B') ORDER BY user_name, access_type, grant_option") ==\ + TSV([[ "A", "\N", "SELECT", "test", "table", "\N", 0, 0 ], + [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 0 ], + [ "B", "\N", "CREATE", "\N", "\N", "\N", 0, 1 ]]) diff --git a/tests/integration/test_role/__init__.py b/tests/integration/test_role/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_role/test.py similarity index 77% rename from tests/integration/test_grant_and_revoke/test.py rename to tests/integration/test_role/test.py index 7054ce28e59..92e9f00d326 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_role/test.py @@ -22,7 +22,7 @@ def started_cluster(): @pytest.fixture(autouse=True) -def reset_users_and_roles(): +def cleanup_after_test(): try: yield finally: @@ -30,39 +30,6 @@ def reset_users_and_roles(): instance.query("DROP ROLE IF EXISTS R1, R2") -def test_login(): - instance.query("CREATE USER A") - instance.query("CREATE USER B") - assert instance.query("SELECT 1", user='A') == "1\n" - assert instance.query("SELECT 1", user='B') == "1\n" - - -def test_grant_and_revoke(): - instance.query("CREATE USER A") - assert "Not enough privileges" in instance.query_and_get_error("SELECT * FROM test_table", user='A') - - instance.query('GRANT SELECT ON test_table TO A') - assert instance.query("SELECT * FROM test_table", user='A') == "1\t5\n2\t10\n" - - instance.query('REVOKE SELECT ON test_table FROM A') - assert "Not enough privileges" in instance.query_and_get_error("SELECT * FROM test_table", user='A') - - -def test_grant_option(): - instance.query("CREATE USER A") - instance.query("CREATE USER B") - - instance.query('GRANT SELECT ON test_table TO A') - assert instance.query("SELECT * FROM test_table", user='A') == "1\t5\n2\t10\n" - assert "Not enough privileges" in instance.query_and_get_error("GRANT SELECT ON test_table TO B", user='A') - - instance.query('GRANT SELECT ON test_table TO A WITH GRANT OPTION') - instance.query("GRANT SELECT ON test_table TO B", user='A') - assert instance.query("SELECT * FROM test_table", user='B') == "1\t5\n2\t10\n" - - instance.query('REVOKE SELECT ON test_table FROM A, B') - - def test_create_role(): instance.query("CREATE USER A") instance.query('CREATE ROLE R1') @@ -141,7 +108,6 @@ def test_introspection(): instance.query('GRANT CREATE ON *.* TO B WITH GRANT OPTION') instance.query('REVOKE SELECT(x) ON test.table FROM R2') - assert instance.query("SHOW USERS") == TSV([ "A", "B", "default" ]) assert instance.query("SHOW ROLES") == TSV([ "R1", "R2" ]) assert instance.query("SHOW GRANTS FOR A") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT R1 TO A" ]) assert instance.query("SHOW GRANTS FOR B") == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT R2 TO B WITH ADMIN OPTION" ]) @@ -155,10 +121,6 @@ def test_introspection(): assert instance.query("SHOW ENABLED ROLES", user='A') == TSV([[ "R1", 0, 1, 1 ]]) assert instance.query("SHOW ENABLED ROLES", user='B') == TSV([[ "R2", 1, 1, 1 ]]) - assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ - TSV([[ "A", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], - [ "B", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) - assert instance.query("SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name") ==\ TSV([[ "R1", "disk" ], [ "R2", "disk" ]]) From 9fe47df2e8bf2cc2535e2515d4a35cad09c36ae3 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 6 Jun 2020 10:21:02 +0300 Subject: [PATCH 0756/2229] Support multiple users/roles in SHOW CREATE USER(ROLE, etc.) and SHOW GRANTS FOR commands. Support syntax "SHOW CREATE USER ALL" and "SHOW GRANTS FOR ALL". --- src/Access/DiskAccessStorage.cpp | 4 +- src/Access/IAccessEntity.h | 49 +++-- .../InterpreterShowAccessEntitiesQuery.cpp | 21 ++- ...InterpreterShowCreateAccessEntityQuery.cpp | 94 +++++++--- .../InterpreterShowCreateAccessEntityQuery.h | 7 +- .../InterpreterShowGrantsQuery.cpp | 46 +++-- src/Interpreters/InterpreterShowGrantsQuery.h | 6 +- src/Parsers/ASTShowAccessEntitiesQuery.cpp | 58 +++--- src/Parsers/ASTShowAccessEntitiesQuery.h | 19 +- .../ASTShowCreateAccessEntityQuery.cpp | 45 ++++- src/Parsers/ASTShowCreateAccessEntityQuery.h | 19 +- src/Parsers/ASTShowGrantsQuery.cpp | 14 +- src/Parsers/ASTShowGrantsQuery.h | 5 +- src/Parsers/ParserDropAccessEntityQuery.cpp | 32 ++-- src/Parsers/ParserGrantQuery.cpp | 71 +------ src/Parsers/ParserShowAccessEntitiesQuery.cpp | 77 +++++--- src/Parsers/ParserShowAccessEntitiesQuery.h | 10 +- .../ParserShowCreateAccessEntityQuery.cpp | 174 +++++++++++++----- .../ParserShowCreateAccessEntityQuery.h | 9 + src/Parsers/ParserShowGrantsQuery.cpp | 23 ++- src/Parsers/parseDatabaseAndTableName.cpp | 70 +++++++ src/Parsers/parseDatabaseAndTableName.h | 5 +- .../test_create_user_and_login/test.py | 8 + tests/integration/test_quota/test.py | 4 +- tests/integration/test_role/test.py | 5 + tests/integration/test_row_policy/test.py | 12 +- .../integration/test_settings_profile/test.py | 5 + 27 files changed, 603 insertions(+), 289 deletions(-) diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 4dc91cd8937..8b249813f7c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -265,7 +265,9 @@ namespace /// Calculates the path for storing a map of name of access entity to UUID for access entities of some type. std::filesystem::path getListFilePath(const String & directory_path, EntityType type) { - std::string_view file_name = EntityTypeInfo::get(type).list_filename; + String file_name = EntityTypeInfo::get(type).plural_raw_name; + boost::to_lower(file_name); + file_name += ".list"; return std::filesystem::path(directory_path).append(file_name); } diff --git a/src/Access/IAccessEntity.h b/src/Access/IAccessEntity.h index 39a5cefa7d7..68e14c99982 100644 --- a/src/Access/IAccessEntity.h +++ b/src/Access/IAccessEntity.h @@ -45,11 +45,13 @@ struct IAccessEntity struct TypeInfo { const char * const raw_name; + const char * const plural_raw_name; const String name; /// Uppercased with spaces instead of underscores, e.g. "SETTINGS PROFILE". const String alias; /// Alias of the keyword or empty string, e.g. "PROFILE". + const String plural_name; /// Uppercased with spaces plural name, e.g. "SETTINGS PROFILES". + const String plural_alias; /// Uppercased with spaces plural name alias, e.g. "PROFILES". const String name_for_output_with_entity_name; /// Lowercased with spaces instead of underscores, e.g. "settings profile". const char unique_char; /// Unique character for this type. E.g. 'P' for SETTINGS_PROFILE. - const String list_filename; /// Name of the file containing list of objects of this type, including the file extension ".list". const int not_found_error_code; static const TypeInfo & get(Type type_); @@ -69,6 +71,18 @@ struct IAccessEntity friend bool operator ==(const IAccessEntity & lhs, const IAccessEntity & rhs) { return lhs.equal(rhs); } friend bool operator !=(const IAccessEntity & lhs, const IAccessEntity & rhs) { return !(lhs == rhs); } + struct LessByName + { + bool operator()(const IAccessEntity & lhs, const IAccessEntity & rhs) const { return (lhs.getName() < rhs.getName()); } + bool operator()(const std::shared_ptr & lhs, const std::shared_ptr & rhs) const { return operator()(*lhs, *rhs); } + }; + + struct LessByTypeAndName + { + bool operator()(const IAccessEntity & lhs, const IAccessEntity & rhs) const { return (lhs.getType() < rhs.getType()) || ((lhs.getType() == rhs.getType()) && (lhs.getName() < rhs.getName())); } + bool operator()(const std::shared_ptr & lhs, const std::shared_ptr & rhs) const { return operator()(*lhs, *rhs); } + }; + protected: String name; @@ -87,44 +101,49 @@ using AccessEntityPtr = std::shared_ptr; inline const IAccessEntity::TypeInfo & IAccessEntity::TypeInfo::get(Type type_) { - static constexpr auto make_info = [](const char * raw_name_, char unique_char_, const char * list_filename_, int not_found_error_code_) + static constexpr auto make_info = [](const char * raw_name_, const char * plural_raw_name_, char unique_char_, int not_found_error_code_) { - String init_name = raw_name_; - boost::to_upper(init_name); - boost::replace_all(init_name, "_", " "); - String init_alias; - if (auto underscore_pos = init_name.find_first_of(" "); underscore_pos != String::npos) - init_alias = init_name.substr(underscore_pos + 1); - String init_name_for_output_with_entity_name = init_name; + String init_names[2] = {raw_name_, plural_raw_name_}; + String init_aliases[2]; + for (size_t i = 0; i != std::size(init_names); ++i) + { + String & init_name = init_names[i]; + String & init_alias = init_aliases[i]; + boost::to_upper(init_name); + boost::replace_all(init_name, "_", " "); + if (auto underscore_pos = init_name.find_first_of(" "); underscore_pos != String::npos) + init_alias = init_name.substr(underscore_pos + 1); + } + String init_name_for_output_with_entity_name = init_names[0]; boost::to_lower(init_name_for_output_with_entity_name); - return TypeInfo{raw_name_, std::move(init_name), std::move(init_alias), std::move(init_name_for_output_with_entity_name), unique_char_, list_filename_, not_found_error_code_}; + return TypeInfo{raw_name_, plural_raw_name_, std::move(init_names[0]), std::move(init_aliases[0]), std::move(init_names[1]), std::move(init_aliases[1]), std::move(init_name_for_output_with_entity_name), unique_char_, not_found_error_code_}; }; switch (type_) { case Type::USER: { - static const auto info = make_info("USER", 'U', "users.list", ErrorCodes::UNKNOWN_USER); + static const auto info = make_info("USER", "USERS", 'U', ErrorCodes::UNKNOWN_USER); return info; } case Type::ROLE: { - static const auto info = make_info("ROLE", 'R', "roles.list", ErrorCodes::UNKNOWN_ROLE); + static const auto info = make_info("ROLE", "ROLES", 'R', ErrorCodes::UNKNOWN_ROLE); return info; } case Type::SETTINGS_PROFILE: { - static const auto info = make_info("SETTINGS_PROFILE", 'S', "settings_profiles.list", ErrorCodes::THERE_IS_NO_PROFILE); + static const auto info = make_info("SETTINGS_PROFILE", "SETTINGS_PROFILES", 'S', ErrorCodes::THERE_IS_NO_PROFILE); return info; } case Type::ROW_POLICY: { - static const auto info = make_info("ROW_POLICY", 'P', "row_policies.list", ErrorCodes::UNKNOWN_ROW_POLICY); + static const auto info = make_info("ROW_POLICY", "ROW_POLICIES", 'P', ErrorCodes::UNKNOWN_ROW_POLICY); return info; } case Type::QUOTA: { - static const auto info = make_info("QUOTA", 'Q', "quotas.list", ErrorCodes::UNKNOWN_QUOTA); + static const auto info = make_info("QUOTA", "QUOTAS", 'Q', ErrorCodes::UNKNOWN_QUOTA); return info; } case Type::MAX: break; diff --git a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp index 8d54da790f8..379580fe58a 100644 --- a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp @@ -31,7 +31,8 @@ BlockIO InterpreterShowAccessEntitiesQuery::execute() String InterpreterShowAccessEntitiesQuery::getRewrittenQuery() const { - const auto & query = query_ptr->as(); + auto & query = query_ptr->as(); + query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); String origin; String expr = "*"; String filter, order; @@ -42,14 +43,18 @@ String InterpreterShowAccessEntitiesQuery::getRewrittenQuery() const { origin = "row_policies"; expr = "name"; - const String & table_name = query.table_name; - if (!table_name.empty()) + if (!query.short_name.empty()) + filter += String{filter.empty() ? "" : " AND "} + "short_name = " + quoteString(query.short_name); + if (query.database_and_table_name) { - String database = query.database; - if (database.empty()) - database = context.getCurrentDatabase(); - filter = "database = " + quoteString(database) + " AND table = " + quoteString(table_name); - expr = "short_name"; + const String & database = query.database_and_table_name->first; + const String & table_name = query.database_and_table_name->second; + if (!database.empty()) + filter += String{filter.empty() ? "" : " AND "} + "database = " + quoteString(database); + if (!table_name.empty()) + filter += String{filter.empty() ? "" : " AND "} + "table = " + quoteString(table_name); + if (!database.empty() && !table_name.empty()) + expr = "short_name"; } break; } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 5c4173d7aa3..55c5d961ad8 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -232,10 +233,8 @@ BlockIO InterpreterShowCreateAccessEntityQuery::execute() BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() { - auto & show_query = query_ptr->as(); - /// Build a create queries. - ASTs create_queries = getCreateQueries(show_query); + ASTs create_queries = getCreateQueries(); /// Build the result column. MutableColumnPtr column = ColumnString::create(); @@ -249,6 +248,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); String prefix = "SHOW "; @@ -259,52 +259,94 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() } -ASTs InterpreterShowCreateAccessEntityQuery::getCreateQueries(ASTShowCreateAccessEntityQuery & show_query) const +std::vector InterpreterShowCreateAccessEntityQuery::getEntities() const { + auto & show_query = query_ptr->as(); const auto & access_control = context.getAccessControlManager(); context.checkAccess(getRequiredAccess()); show_query.replaceEmptyDatabaseWithCurrent(context.getCurrentDatabase()); + std::vector entities; - if (show_query.current_user) + if (show_query.all) { - auto user = context.getUser(); - if (!user) - return {}; - return {getCreateQueryImpl(*user, &access_control, false)}; + auto ids = access_control.findAll(show_query.type); + for (const auto & id : ids) + { + if (auto entity = access_control.tryRead(id)) + entities.push_back(entity); + } } - - if (show_query.current_quota) + else if (show_query.current_user) + { + if (auto user = context.getUser()) + entities.push_back(user); + } + else if (show_query.current_quota) { auto usage = context.getQuotaUsage(); - if (!usage) - return {}; - auto quota = access_control.read(usage->quota_id); - return {getCreateQueryImpl(*quota, &access_control, false)}; + if (usage) + entities.push_back(access_control.read(usage->quota_id)); } - - ASTs list; - - if (show_query.type == EntityType::ROW_POLICY) + else if (show_query.type == EntityType::ROW_POLICY) { - for (const String & name : show_query.row_policy_names->toStrings()) + auto ids = access_control.findAll(); + if (show_query.row_policy_names) { - RowPolicyPtr policy = access_control.read(name); - list.push_back(getCreateQueryImpl(*policy, &access_control, false)); + for (const String & name : show_query.row_policy_names->toStrings()) + entities.push_back(access_control.read(name)); + } + else + { + for (const auto & id : ids) + { + auto policy = access_control.tryRead(id); + if (!policy) + continue; + if (!show_query.short_name.empty() && (policy->getShortName() != show_query.short_name)) + continue; + if (show_query.database_and_table_name) + { + const String & database = show_query.database_and_table_name->first; + const String & table_name = show_query.database_and_table_name->second; + if (!database.empty() && (policy->getDatabase() != database)) + continue; + if (!table_name.empty() && (policy->getTableName() != table_name)) + continue; + } + entities.push_back(policy); + } } } else { for (const String & name : show_query.names) - { - auto entity = access_control.read(access_control.getID(show_query.type, name)); - list.push_back(getCreateQueryImpl(*entity, &access_control, false)); - } + entities.push_back(access_control.read(access_control.getID(show_query.type, name))); } + boost::range::sort(entities, IAccessEntity::LessByName{}); + return entities; +} + + +ASTs InterpreterShowCreateAccessEntityQuery::getCreateQueries() const +{ + auto entities = getEntities(); + + ASTs list; + const auto & access_control = context.getAccessControlManager(); + for (const auto & entity : entities) + list.push_back(getCreateQuery(*entity, access_control)); + return list; } +ASTPtr InterpreterShowCreateAccessEntityQuery::getCreateQuery(const IAccessEntity & entity, const AccessControlManager & access_control) +{ + return getCreateQueryImpl(entity, &access_control, false); +} + + ASTPtr InterpreterShowCreateAccessEntityQuery::getAttachQuery(const IAccessEntity & entity) { return getCreateQueryImpl(entity, nullptr, true); diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h index 0d2978cff6c..12f427b5eb0 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h @@ -7,10 +7,11 @@ namespace DB { +class AccessControlManager; class Context; -class ASTShowCreateAccessEntityQuery; class AccessRightsElements; struct IAccessEntity; +using AccessEntityPtr = std::shared_ptr; /** Returns a single item containing a statement which could be used to create a specified role. @@ -25,11 +26,13 @@ public: bool ignoreQuota() const override { return ignore_quota; } bool ignoreLimits() const override { return ignore_quota; } + static ASTPtr getCreateQuery(const IAccessEntity & entity, const AccessControlManager & access_control); static ASTPtr getAttachQuery(const IAccessEntity & entity); private: BlockInputStreamPtr executeImpl(); - ASTs getCreateQueries(ASTShowCreateAccessEntityQuery & show_query) const; + std::vector getEntities() const; + ASTs getCreateQueries() const; AccessRightsElements getRequiredAccess() const; ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index c6e3ccce7c7..ebb0d871c8b 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -10,6 +10,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -121,10 +124,8 @@ BlockIO InterpreterShowGrantsQuery::execute() BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() { - const auto & show_query = query_ptr->as(); - /// Build a create query. - ASTs grant_queries = getGrantQueries(show_query); + ASTs grant_queries = getGrantQueries(); /// Build the result column. MutableColumnPtr column = ColumnString::create(); @@ -138,6 +139,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); String prefix = "SHOW "; @@ -148,21 +150,41 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() } -ASTs InterpreterShowGrantsQuery::getGrantQueries(const ASTShowGrantsQuery & show_query) const +std::vector InterpreterShowGrantsQuery::getEntities() const { + const auto & show_query = query_ptr->as(); const auto & access_control = context.getAccessControlManager(); + auto ids = RolesOrUsersSet{*show_query.for_roles, access_control, context.getUserID()}.getMatchingIDs(access_control); - AccessEntityPtr user_or_role; - if (show_query.current_user) - user_or_role = context.getUser(); - else + std::vector entities; + for (const auto & id : ids) { - user_or_role = access_control.tryRead(show_query.name); - if (!user_or_role) - user_or_role = access_control.read(show_query.name); + auto entity = access_control.tryRead(id); + if (entity) + entities.push_back(entity); } - return getGrantQueriesImpl(*user_or_role, &access_control); + boost::range::sort(entities, IAccessEntity::LessByTypeAndName{}); + return entities; +} + + +ASTs InterpreterShowGrantsQuery::getGrantQueries() const +{ + auto entities = getEntities(); + const auto & access_control = context.getAccessControlManager(); + + ASTs grant_queries; + for (const auto & entity : entities) + boost::range::push_back(grant_queries, getGrantQueries(*entity, access_control)); + + return grant_queries; +} + + +ASTs InterpreterShowGrantsQuery::getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control) +{ + return getGrantQueriesImpl(user_or_role, &access_control, false); } diff --git a/src/Interpreters/InterpreterShowGrantsQuery.h b/src/Interpreters/InterpreterShowGrantsQuery.h index b400130a292..af6c3065192 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.h +++ b/src/Interpreters/InterpreterShowGrantsQuery.h @@ -7,8 +7,10 @@ namespace DB { +class AccessControlManager; class ASTShowGrantsQuery; struct IAccessEntity; +using AccessEntityPtr = std::shared_ptr; class InterpreterShowGrantsQuery : public IInterpreter @@ -18,11 +20,13 @@ public: BlockIO execute() override; + static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control); static ASTs getAttachGrantQueries(const IAccessEntity & user_or_role); private: BlockInputStreamPtr executeImpl(); - ASTs getGrantQueries(const ASTShowGrantsQuery & show_query) const; + ASTs getGrantQueries() const; + std::vector getEntities() const; ASTPtr query_ptr; Context & context; diff --git a/src/Parsers/ASTShowAccessEntitiesQuery.cpp b/src/Parsers/ASTShowAccessEntitiesQuery.cpp index cb1ccff5273..e87baebba33 100644 --- a/src/Parsers/ASTShowAccessEntitiesQuery.cpp +++ b/src/Parsers/ASTShowAccessEntitiesQuery.cpp @@ -4,49 +4,51 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} +using EntityTypeInfo = IAccessEntity::TypeInfo; -const char * ASTShowAccessEntitiesQuery::getKeyword() const +String ASTShowAccessEntitiesQuery::getKeyword() const { - switch (type) - { - case EntityType::ROW_POLICY: - return "SHOW ROW POLICIES"; - case EntityType::QUOTA: - return current_quota ? "SHOW CURRENT QUOTA" : "SHOW QUOTAS"; - case EntityType::SETTINGS_PROFILE: - return "SHOW SETTINGS PROFILES"; - case EntityType::USER: - return "SHOW USERS"; - case EntityType::ROLE: - return current_roles ? "SHOW CURRENT ROLES" : (enabled_roles ? "SHOW ENABLED ROLES" : "SHOW ROLES"); - case EntityType::MAX: - break; - } - throw Exception(toString(type) + ": type is not supported by SHOW query", ErrorCodes::NOT_IMPLEMENTED); + if (current_quota) + return "CURRENT QUOTA"; + if (current_roles) + return "CURRENT ROLES"; + if (enabled_roles) + return "ENABLED ROLES"; + return EntityTypeInfo::get(type).plural_name; } String ASTShowAccessEntitiesQuery::getID(char) const { - return String(getKeyword()) + " query"; + return "SHOW " + String(getKeyword()) + " query"; } void ASTShowAccessEntitiesQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - const char * keyword = getKeyword(); - settings.ostr << (settings.hilite ? hilite_keyword : "") << keyword << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW " << getKeyword() << (settings.hilite ? hilite_none : ""); - if ((type == EntityType::ROW_POLICY) && !table_name.empty()) + if (!short_name.empty()) + settings.ostr << " " << backQuoteIfNeed(short_name); + + if (database_and_table_name) { + const String & database = database_and_table_name->first; + const String & table_name = database_and_table_name->second; settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); - if (!database.empty()) - settings.ostr << backQuoteIfNeed(database) << "."; - settings.ostr << backQuoteIfNeed(table_name); + settings.ostr << (database.empty() ? "" : backQuoteIfNeed(database) + "."); + settings.ostr << (table_name.empty() ? "*" : backQuoteIfNeed(table_name)); + } +} + + +void ASTShowAccessEntitiesQuery::replaceEmptyDatabaseWithCurrent(const String & current_database) +{ + if (database_and_table_name) + { + String & database = database_and_table_name->first; + if (database.empty()) + database = current_database; } } diff --git a/src/Parsers/ASTShowAccessEntitiesQuery.h b/src/Parsers/ASTShowAccessEntitiesQuery.h index 3bf16ad6abd..7ccd76bfe5e 100644 --- a/src/Parsers/ASTShowAccessEntitiesQuery.h +++ b/src/Parsers/ASTShowAccessEntitiesQuery.h @@ -7,32 +7,37 @@ namespace DB { -/// SHOW [ROW] POLICIES [ON [database.]table] -/// SHOW QUOTAS -/// SHOW [CURRENT] QUOTA -/// SHOW [SETTINGS] PROFILES /// SHOW USERS /// SHOW [CURRENT|ENABLED] ROLES +/// SHOW [SETTINGS] PROFILES +/// SHOW [ROW] POLICIES [name | ON [database.]table] +/// SHOW QUOTAS +/// SHOW [CURRENT] QUOTA class ASTShowAccessEntitiesQuery : public ASTQueryWithOutput { public: using EntityType = IAccessEntity::Type; EntityType type; - String database; - String table_name; + + bool all = false; bool current_quota = false; bool current_roles = false; bool enabled_roles = false; + String short_name; + std::optional> database_and_table_name; + String getID(char) const override; ASTPtr clone() const override { return std::make_shared(*this); } + void replaceEmptyDatabaseWithCurrent(const String & current_database); + protected: void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; private: - const char * getKeyword() const; + String getKeyword() const; }; } diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp b/src/Parsers/ASTShowCreateAccessEntityQuery.cpp index cbd31d0d53c..bc309ab5c44 100644 --- a/src/Parsers/ASTShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/ASTShowCreateAccessEntityQuery.cpp @@ -7,6 +7,7 @@ namespace DB { namespace { + using EntityType = IAccessEntity::Type; using EntityTypeInfo = IAccessEntity::TypeInfo; void formatNames(const Strings & names, const IAST::FormatSettings & settings) @@ -22,9 +23,18 @@ namespace } +String ASTShowCreateAccessEntityQuery::getKeyword() const +{ + size_t total_count = (names.size()) + (row_policy_names ? row_policy_names->size() : 0) + current_user + current_quota; + bool multiple = (total_count != 1) || all || !short_name.empty() || database_and_table_name; + const auto & type_info = EntityTypeInfo::get(type); + return multiple ? type_info.plural_name : type_info.name; +} + + String ASTShowCreateAccessEntityQuery::getID(char) const { - return String("SHOW CREATE ") + toString(type) + " query"; + return String("SHOW CREATE ") + getKeyword() + " query"; } @@ -36,20 +46,28 @@ ASTPtr ASTShowCreateAccessEntityQuery::clone() const void ASTShowCreateAccessEntityQuery::formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") - << "SHOW CREATE " << EntityTypeInfo::get(type).name - << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW CREATE " << getKeyword() << (settings.hilite ? hilite_none : ""); - if (current_user || current_quota) - { - } - else if (type == EntityType::ROW_POLICY) + if (!names.empty()) + formatNames(names, settings); + + if (row_policy_names) { settings.ostr << " "; row_policy_names->format(settings); } - else - formatNames(names, settings); + + if (!short_name.empty()) + settings.ostr << " " << backQuoteIfNeed(short_name); + + if (database_and_table_name) + { + const String & database = database_and_table_name->first; + const String & table_name = database_and_table_name->second; + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ON " << (settings.hilite ? hilite_none : ""); + settings.ostr << (database.empty() ? "" : backQuoteIfNeed(database) + "."); + settings.ostr << (table_name.empty() ? "*" : backQuoteIfNeed(table_name)); + } } @@ -57,6 +75,13 @@ void ASTShowCreateAccessEntityQuery::replaceEmptyDatabaseWithCurrent(const Strin { if (row_policy_names) row_policy_names->replaceEmptyDatabaseWithCurrent(current_database); + + if (database_and_table_name) + { + String & database = database_and_table_name->first; + if (database.empty()) + database = current_database; + } } } diff --git a/src/Parsers/ASTShowCreateAccessEntityQuery.h b/src/Parsers/ASTShowCreateAccessEntityQuery.h index 5fd16d622f7..f112e9211fe 100644 --- a/src/Parsers/ASTShowCreateAccessEntityQuery.h +++ b/src/Parsers/ASTShowCreateAccessEntityQuery.h @@ -8,11 +8,16 @@ namespace DB { class ASTRowPolicyNames; -/** SHOW CREATE QUOTA [name] - * SHOW CREATE [ROW] POLICY name ON [database.]table - * SHOW CREATE USER [name | CURRENT_USER] +/** SHOW CREATE USER [name | CURRENT_USER] + * SHOW CREATE USERS [name [, name2 ...] * SHOW CREATE ROLE name + * SHOW CREATE ROLES [name [, name2 ...]] * SHOW CREATE [SETTINGS] PROFILE name + * SHOW CREATE [SETTINGS] PROFILES [name [, name2 ...]] + * SHOW CREATE [ROW] POLICY name ON [database.]table + * SHOW CREATE [ROW] POLICIES [name ON [database.]table [, name2 ON database2.table2 ...] | name | ON database.table] + * SHOW CREATE QUOTA [name] + * SHOW CREATE QUOTAS [name [, name2 ...]] */ class ASTShowCreateAccessEntityQuery : public ASTQueryWithOutput { @@ -21,9 +26,14 @@ public: EntityType type; Strings names; + std::shared_ptr row_policy_names; + bool current_quota = false; bool current_user = false; - std::shared_ptr row_policy_names; + bool all = false; + + String short_name; + std::optional> database_and_table_name; String getID(char) const override; ASTPtr clone() const override; @@ -31,6 +41,7 @@ public: void replaceEmptyDatabaseWithCurrent(const String & current_database); protected: + String getKeyword() const; void formatQueryImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; }; diff --git a/src/Parsers/ASTShowGrantsQuery.cpp b/src/Parsers/ASTShowGrantsQuery.cpp index b3cc0cbd386..26ae506d7d4 100644 --- a/src/Parsers/ASTShowGrantsQuery.cpp +++ b/src/Parsers/ASTShowGrantsQuery.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -21,8 +22,15 @@ void ASTShowGrantsQuery::formatQueryImpl(const FormatSettings & settings, Format settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW GRANTS" << (settings.hilite ? hilite_none : ""); - if (!current_user) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " << (settings.hilite ? hilite_none : "") - << backQuoteIfNeed(name); + if (for_roles->current_user && !for_roles->all && for_roles->names.empty() && for_roles->except_names.empty() + && !for_roles->except_current_user) + { + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FOR " + << (settings.hilite ? hilite_none : ""); + for_roles->format(settings); + } } } diff --git a/src/Parsers/ASTShowGrantsQuery.h b/src/Parsers/ASTShowGrantsQuery.h index 1afb5e5ac0d..04764fe3502 100644 --- a/src/Parsers/ASTShowGrantsQuery.h +++ b/src/Parsers/ASTShowGrantsQuery.h @@ -5,13 +5,14 @@ namespace DB { +class ASTRolesOrUsersSet; + /** SHOW GRANTS [FOR user_name] */ class ASTShowGrantsQuery : public ASTQueryWithOutput { public: - String name; - bool current_user = false; + std::shared_ptr for_roles; String getID(char) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ParserDropAccessEntityQuery.cpp b/src/Parsers/ParserDropAccessEntityQuery.cpp index d5eac710631..b1f3ce5f93d 100644 --- a/src/Parsers/ParserDropAccessEntityQuery.cpp +++ b/src/Parsers/ParserDropAccessEntityQuery.cpp @@ -15,6 +15,23 @@ namespace using EntityType = IAccessEntity::Type; using EntityTypeInfo = IAccessEntity::TypeInfo; + + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, EntityType & type) + { + for (auto i : ext::range(EntityType::MAX)) + { + const auto & type_info = EntityTypeInfo::get(i); + if (ParserKeyword{type_info.name.c_str()}.ignore(pos, expected) + || (!type_info.alias.empty() && ParserKeyword{type_info.alias.c_str()}.ignore(pos, expected))) + { + type = i; + return true; + } + } + return false; + } + + bool parseOnCluster(IParserBase::Pos & pos, Expected & expected, String & cluster) { return IParserBase::wrapParseImpl(pos, [&] @@ -30,17 +47,8 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & if (!ParserKeyword{"DROP"}.ignore(pos, expected)) return false; - std::optional type; - for (auto type_i : ext::range(EntityType::MAX)) - { - const auto & type_info = EntityTypeInfo::get(type_i); - if (ParserKeyword{type_info.name.c_str()}.ignore(pos, expected) - || (!type_info.alias.empty() && ParserKeyword{type_info.alias.c_str()}.ignore(pos, expected))) - { - type = type_i; - } - } - if (!type) + EntityType type; + if (!parseEntityType(pos, expected, type)) return false; bool if_exists = false; @@ -78,7 +86,7 @@ bool ParserDropAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & auto query = std::make_shared(); node = query; - query->type = *type; + query->type = type; query->if_exists = if_exists; query->cluster = std::move(cluster); query->names = std::move(names); diff --git a/src/Parsers/ParserGrantQuery.cpp b/src/Parsers/ParserGrantQuery.cpp index 5eb5353f2ee..62efd5314ac 100644 --- a/src/Parsers/ParserGrantQuery.cpp +++ b/src/Parsers/ParserGrantQuery.cpp @@ -1,13 +1,12 @@ #include #include -#include #include #include #include -#include #include #include #include +#include #include @@ -85,72 +84,6 @@ namespace }); } - - bool parseDatabaseAndTableNameOrMaybeAsterisks( - IParser::Pos & pos, Expected & expected, String & database_name, bool & any_database, String & table_name, bool & any_table) - { - return IParserBase::wrapParseImpl(pos, [&] - { - ASTPtr ast[2]; - if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - { - if (ParserToken{TokenType::Dot}.ignore(pos, expected)) - { - if (!ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - return false; - - /// *.* (any table in any database) - any_database = true; - database_name.clear(); - any_table = true; - table_name.clear(); - return true; - } - - /// * (any table in the current database) - any_database = false; - database_name.clear(); - any_table = true; - table_name.clear(); - return true; - } - - if (!ParserIdentifier().parse(pos, ast[0], expected)) - return false; - - if (ParserToken{TokenType::Dot}.ignore(pos, expected)) - { - if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) - { - /// .* - any_database = false; - database_name = getIdentifierName(ast[0]); - any_table = true; - table_name.clear(); - return true; - } - - if (!ParserIdentifier().parse(pos, ast[1], expected)) - return false; - - /// . - any_database = false; - database_name = getIdentifierName(ast[0]); - any_table = false; - table_name = getIdentifierName(ast[1]); - return true; - } - - /// - the current database, specified table - any_database = false; - database_name.clear(); - any_table = false; - table_name = getIdentifierName(ast[0]); - return true; - }); - } - - bool parseAccessTypesWithColumns(IParser::Pos & pos, Expected & expected, std::vector> & access_and_columns) { @@ -193,7 +126,7 @@ namespace String database_name, table_name; bool any_database = false, any_table = false; - if (!parseDatabaseAndTableNameOrMaybeAsterisks(pos, expected, database_name, any_database, table_name, any_table)) + if (!parseDatabaseAndTableNameOrAsterisks(pos, expected, database_name, any_database, table_name, any_table)) return false; for (auto & [access_flags, columns] : access_and_columns) diff --git a/src/Parsers/ParserShowAccessEntitiesQuery.cpp b/src/Parsers/ParserShowAccessEntitiesQuery.cpp index c50bd5b402c..f070641cbd2 100644 --- a/src/Parsers/ParserShowAccessEntitiesQuery.cpp +++ b/src/Parsers/ParserShowAccessEntitiesQuery.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include namespace DB @@ -9,14 +11,29 @@ namespace DB namespace { using EntityType = IAccessEntity::Type; + using EntityTypeInfo = IAccessEntity::TypeInfo; - bool parseONDatabaseAndTableName(IParserBase::Pos & pos, Expected & expected, String & database, String & table_name) + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, EntityType & type) + { + for (auto i : ext::range(EntityType::MAX)) + { + const auto & type_info = EntityTypeInfo::get(i); + if (ParserKeyword{type_info.plural_name.c_str()}.ignore(pos, expected) + || (!type_info.plural_alias.empty() && ParserKeyword{type_info.plural_alias.c_str()}.ignore(pos, expected))) + { + type = i; + return true; + } + } + return false; + } + + bool parseOnDBAndTableName(IParserBase::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) { return IParserBase::wrapParseImpl(pos, [&] { - database.clear(); - table_name.clear(); - return ParserKeyword{"ON"}.ignore(pos, expected) && parseDatabaseAndTableName(pos, expected, database, table_name); + return ParserKeyword{"ON"}.ignore(pos, expected) + && parseDatabaseAndTableNameOrAsterisks(pos, expected, database, any_database, table, any_table); }); } } @@ -27,18 +44,15 @@ bool ParserShowAccessEntitiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected if (!ParserKeyword{"SHOW"}.ignore(pos, expected)) return false; - std::optional type; + EntityType type; + bool all = false; bool current_quota = false; bool current_roles = false; bool enabled_roles = false; - if (ParserKeyword{"USERS"}.ignore(pos, expected)) + if (parseEntityType(pos, expected, type)) { - type = EntityType::USER; - } - else if (ParserKeyword{"ROLES"}.ignore(pos, expected)) - { - type = EntityType::ROLE; + all = true; } else if (ParserKeyword{"CURRENT ROLES"}.ignore(pos, expected)) { @@ -50,39 +64,44 @@ bool ParserShowAccessEntitiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected type = EntityType::ROLE; enabled_roles = true; } - else if (ParserKeyword{"POLICIES"}.ignore(pos, expected) || ParserKeyword{"ROW POLICIES"}.ignore(pos, expected)) - { - type = EntityType::ROW_POLICY; - } - else if (ParserKeyword{"QUOTAS"}.ignore(pos, expected)) - { - type = EntityType::QUOTA; - } - else if (ParserKeyword{"QUOTA"}.ignore(pos, expected) || ParserKeyword{"CURRENT QUOTA"}.ignore(pos, expected)) + else if (ParserKeyword{"CURRENT QUOTA"}.ignore(pos, expected) || ParserKeyword{"QUOTA"}.ignore(pos, expected)) { type = EntityType::QUOTA; current_quota = true; } - else if (ParserKeyword{"PROFILES"}.ignore(pos, expected) || ParserKeyword{"SETTINGS PROFILES"}.ignore(pos, expected)) - { - type = EntityType::SETTINGS_PROFILE; - } else return false; - String database, table_name; + String short_name; + std::optional> database_and_table_name; if (type == EntityType::ROW_POLICY) - parseONDatabaseAndTableName(pos, expected, database, table_name); + { + String database, table_name; + bool any_database, any_table; + if (parseOnDBAndTableName(pos, expected, database, any_database, table_name, any_table)) + { + if (any_database) + all = true; + else + database_and_table_name.emplace(database, table_name); + } + else if (parseIdentifierOrStringLiteral(pos, expected, short_name)) + { + } + else + all = true; + } auto query = std::make_shared(); node = query; - query->type = *type; + query->type = type; + query->all = all; query->current_quota = current_quota; query->current_roles = current_roles; query->enabled_roles = enabled_roles; - query->database = std::move(database); - query->table_name = std::move(table_name); + query->short_name = std::move(short_name); + query->database_and_table_name = std::move(database_and_table_name); return true; } diff --git a/src/Parsers/ParserShowAccessEntitiesQuery.h b/src/Parsers/ParserShowAccessEntitiesQuery.h index bb8b37f40e8..fcee3b09868 100644 --- a/src/Parsers/ParserShowAccessEntitiesQuery.h +++ b/src/Parsers/ParserShowAccessEntitiesQuery.h @@ -6,10 +6,12 @@ namespace DB { /** Parses queries like - * SHOW [ROW] POLICIES [ON [database.]table] - SHOW QUOTAS - SHOW [CURRENT] QUOTA - SHOW [SETTINGS] PROFILES + * SHOW USERS + * SHOW [CURRENT|ENABLED] ROLES + * SHOW [SETTINGS] PROFILES + * SHOW [ROW] POLICIES [name | ON [database.]table] + * SHOW QUOTAS + * SHOW [CURRENT] QUOTA */ class ParserShowAccessEntitiesQuery : public IParserBase { diff --git a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp b/src/Parsers/ParserShowCreateAccessEntityQuery.cpp index 465a6c380b1..55d00572e48 100644 --- a/src/Parsers/ParserShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/ParserShowCreateAccessEntityQuery.cpp @@ -5,14 +5,62 @@ #include #include #include +#include #include #include namespace DB { -using EntityType = IAccessEntity::Type; -using EntityTypeInfo = IAccessEntity::TypeInfo; +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + +namespace +{ + using EntityType = IAccessEntity::Type; + using EntityTypeInfo = IAccessEntity::TypeInfo; + + bool parseEntityType(IParserBase::Pos & pos, Expected & expected, EntityType & type, bool & plural) + { + for (auto i : ext::range(EntityType::MAX)) + { + const auto & type_info = EntityTypeInfo::get(i); + if (ParserKeyword{type_info.name.c_str()}.ignore(pos, expected) + || (!type_info.alias.empty() && ParserKeyword{type_info.alias.c_str()}.ignore(pos, expected))) + { + type = i; + plural = false; + return true; + } + } + + for (auto i : ext::range(EntityType::MAX)) + { + const auto & type_info = EntityTypeInfo::get(i); + if (ParserKeyword{type_info.plural_name.c_str()}.ignore(pos, expected) + || (!type_info.plural_alias.empty() && ParserKeyword{type_info.plural_alias.c_str()}.ignore(pos, expected))) + { + type = i; + plural = true; + return true; + } + } + + return false; + } + + bool parseOnDBAndTableName(IParserBase::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) + { + return IParserBase::wrapParseImpl(pos, [&] + { + return ParserKeyword{"ON"}.ignore(pos, expected) + && parseDatabaseAndTableNameOrAsterisks(pos, expected, database, any_database, table, any_table); + }); + } +} bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -20,65 +68,105 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe if (!ParserKeyword{"SHOW CREATE"}.ignore(pos, expected)) return false; - std::optional type; - for (auto type_i : ext::range(EntityType::MAX)) - { - const auto & type_info = EntityTypeInfo::get(type_i); - if (ParserKeyword{type_info.name.c_str()}.ignore(pos, expected) - || (!type_info.alias.empty() && ParserKeyword{type_info.alias.c_str()}.ignore(pos, expected))) - { - type = type_i; - } - } - if (!type) + EntityType type; + bool plural; + if (!parseEntityType(pos, expected, type, plural)) return false; Strings names; + std::shared_ptr row_policy_names; + bool all = false; bool current_quota = false; bool current_user = false; - std::shared_ptr row_policy_names; + String short_name; + std::optional> database_and_table_name; - if (type == EntityType::USER) + switch (type) { - if (parseCurrentUserTag(pos, expected)) - current_user = true; - else if (!parseUserNames(pos, expected, names)) - return false; - } - else if (type == EntityType::ROLE) - { - if (!parseRoleNames(pos, expected, names)) - return false; - } - else if (type == EntityType::ROW_POLICY) - { - ASTPtr ast; - if (!ParserRowPolicyNames{}.parse(pos, ast, expected)) - return false; - row_policy_names = typeid_cast>(ast); - } - else if (type == EntityType::QUOTA) - { - if (!parseIdentifiersOrStringLiterals(pos, expected, names)) + case EntityType::USER: { - /// SHOW CREATE QUOTA - current_quota = true; + if (parseCurrentUserTag(pos, expected)) + current_user = true; + else if (parseUserNames(pos, expected, names)) + { + } + else if (plural) + all = true; + else + current_user = true; + break; } - } - else if (type == EntityType::SETTINGS_PROFILE) - { - if (!parseIdentifiersOrStringLiterals(pos, expected, names)) - return false; + case EntityType::ROLE: + { + if (parseRoleNames(pos, expected, names)) + { + } + else if (plural) + all = true; + else + return false; + break; + } + case EntityType::ROW_POLICY: + { + ASTPtr ast; + String database, table_name; + bool any_database, any_table; + if (ParserRowPolicyNames{}.parse(pos, ast, expected)) + row_policy_names = typeid_cast>(ast); + else if (parseOnDBAndTableName(pos, expected, database, any_database, table_name, any_table)) + { + if (any_database) + all = true; + else + database_and_table_name.emplace(database, table_name); + } + else if (parseIdentifierOrStringLiteral(pos, expected, short_name)) + { + } + else if (plural) + all = true; + else + return false; + break; + } + case EntityType::SETTINGS_PROFILE: + { + if (parseIdentifiersOrStringLiterals(pos, expected, names)) + { + } + else if (plural) + all = true; + else + return false; + break; + } + case EntityType::QUOTA: + { + if (parseIdentifiersOrStringLiterals(pos, expected, names)) + { + } + else if (plural) + all = true; + else + current_quota = true; + break; + } + case EntityType::MAX: + throw Exception("Type " + toString(type) + " is not implemented in SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); } auto query = std::make_shared(); node = query; - query->type = *type; + query->type = type; query->names = std::move(names); query->current_quota = current_quota; query->current_user = current_user; query->row_policy_names = std::move(row_policy_names); + query->all = all; + query->short_name = std::move(short_name); + query->database_and_table_name = std::move(database_and_table_name); return true; } diff --git a/src/Parsers/ParserShowCreateAccessEntityQuery.h b/src/Parsers/ParserShowCreateAccessEntityQuery.h index 025949d7fca..1a3148727e0 100644 --- a/src/Parsers/ParserShowCreateAccessEntityQuery.h +++ b/src/Parsers/ParserShowCreateAccessEntityQuery.h @@ -6,7 +6,16 @@ namespace DB { /** Parses queries like + * SHOW CREATE USER [name | CURRENT_USER] + * SHOW CREATE USERS [name [, name2 ...] + * SHOW CREATE ROLE name + * SHOW CREATE ROLES [name [, name2 ...]] + * SHOW CREATE [SETTINGS] PROFILE name + * SHOW CREATE [SETTINGS] PROFILES [name [, name2 ...]] + * SHOW CREATE [ROW] POLICY name ON [database.]table + * SHOW CREATE [ROW] POLICIES [name ON [database.]table [, name2 ON database2.table2 ...] | name | ON database.table] * SHOW CREATE QUOTA [name] + * SHOW CREATE QUOTAS [name [, name2 ...]] */ class ParserShowCreateAccessEntityQuery : public IParserBase { diff --git a/src/Parsers/ParserShowGrantsQuery.cpp b/src/Parsers/ParserShowGrantsQuery.cpp index 993346d2eeb..d25527754be 100644 --- a/src/Parsers/ParserShowGrantsQuery.cpp +++ b/src/Parsers/ParserShowGrantsQuery.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -11,25 +13,28 @@ bool ParserShowGrantsQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!ParserKeyword{"SHOW GRANTS"}.ignore(pos, expected)) return false; - String name; - bool current_user = false; + std::shared_ptr for_roles; if (ParserKeyword{"FOR"}.ignore(pos, expected)) { - if (parseCurrentUserTag(pos, expected)) - current_user = true; - else if (!parseUserName(pos, expected, name)) + ASTPtr for_roles_ast; + ParserRolesOrUsersSet for_roles_p; + for_roles_p.allowUserNames().allowRoleNames().allowAll().allowCurrentUser(); + if (!for_roles_p.parse(pos, for_roles_ast, expected)) return false; + + for_roles = typeid_cast>(for_roles_ast); } else - current_user = true; + { + for_roles = std::make_shared(); + for_roles->current_user = true; + } auto query = std::make_shared(); + query->for_roles = std::move(for_roles); node = query; - query->name = name; - query->current_user = current_user; - return true; } } diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index 018fee10731..13429df5b4d 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -41,4 +41,74 @@ bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & return true; } + +bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table) +{ + return IParserBase::wrapParseImpl(pos, [&] + { + if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + auto pos_before_dot = pos; + if (ParserToken{TokenType::Dot}.ignore(pos, expected) + && ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + /// *.* + any_database = true; + database.clear(); + any_table = true; + table.clear(); + return true; + } + + /// * + pos = pos_before_dot; + any_database = false; + database.clear(); + any_table = true; + table.clear(); + return true; + } + + ASTPtr ast; + ParserIdentifier identifier_parser; + if (identifier_parser.parse(pos, ast, expected)) + { + String first_identifier = getIdentifierName(ast); + auto pos_before_dot = pos; + + if (ParserToken{TokenType::Dot}.ignore(pos, expected)) + { + if (ParserToken{TokenType::Asterisk}.ignore(pos, expected)) + { + /// db.* + any_database = false; + database = std::move(first_identifier); + any_table = true; + table.clear(); + return true; + } + else if (identifier_parser.parse(pos, ast, expected)) + { + /// db.table + any_database = false; + database = std::move(first_identifier); + any_table = false; + table = getIdentifierName(ast); + return true; + } + } + + /// table + pos = pos_before_dot; + any_database = false; + database.clear(); + any_table = false; + table = std::move(first_identifier); + return true; + } + + return false; + }); +} + } diff --git a/src/Parsers/parseDatabaseAndTableName.h b/src/Parsers/parseDatabaseAndTableName.h index aae78a2da20..e4699c8ad91 100644 --- a/src/Parsers/parseDatabaseAndTableName.h +++ b/src/Parsers/parseDatabaseAndTableName.h @@ -4,7 +4,10 @@ namespace DB { -/// Parses [db].name +/// Parses [db.]name bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & database_str, String & table_str); +/// Parses [db.]name or [db.]* or [*.]* +bool parseDatabaseAndTableNameOrAsterisks(IParser::Pos & pos, Expected & expected, String & database, bool & any_database, String & table, bool & any_table); + } diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 32bf0af6bb6..8cfc1c2c9dd 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -69,8 +69,16 @@ def test_introspection(): instance.query('GRANT CREATE ON *.* TO B WITH GRANT OPTION') assert instance.query("SHOW USERS") == TSV([ "A", "B", "default" ]) + assert instance.query("SHOW CREATE USERS A") == TSV([ "CREATE USER A" ]) + assert instance.query("SHOW CREATE USERS B") == TSV([ "CREATE USER B" ]) + assert instance.query("SHOW CREATE USERS A,B") == TSV([ "CREATE USER A", "CREATE USER B" ]) + assert instance.query("SHOW CREATE USERS") == TSV([ "CREATE USER A", "CREATE USER B", "CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE default" ]) + assert instance.query("SHOW GRANTS FOR A") == TSV([ "GRANT SELECT ON test.table TO A" ]) assert instance.query("SHOW GRANTS FOR B") == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + assert instance.query("SHOW GRANTS FOR A,B") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + assert instance.query("SHOW GRANTS FOR B,A") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + assert instance.query("SHOW GRANTS FOR ALL") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT ALL ON *.* TO default WITH GRANT OPTION" ]) assert instance.query("SHOW GRANTS", user='A') == TSV([ "GRANT SELECT ON test.table TO A" ]) assert instance.query("SHOW GRANTS", user='B') == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index ab8077030e6..b9a6d80e54e 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -167,7 +167,6 @@ def test_add_remove_quota(): # Add quota. copy_quota_xml('two_quotas.xml') - print system_quotas() assert system_quotas() == [["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"], ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']", "[3600,2629746]", 0, "[]", "[]"]] assert system_quota_limits() == [["myQuota", 31556952, 0, 1000, "\N", "\N", "\N", 1000, "\N", "\N"], @@ -208,6 +207,7 @@ def test_reload_users_xml_by_timer(): def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\n" assert instance.query("SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + assert instance.query("SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) @@ -228,6 +228,8 @@ def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\nmyQuota2\n" assert instance.query("SHOW CREATE QUOTA myQuota") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" assert instance.query("SHOW CREATE QUOTA myQuota2") == "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" + assert instance.query("SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n"\ + "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 92e9f00d326..6bae3baafee 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -109,6 +109,11 @@ def test_introspection(): instance.query('REVOKE SELECT(x) ON test.table FROM R2') assert instance.query("SHOW ROLES") == TSV([ "R1", "R2" ]) + assert instance.query("SHOW CREATE ROLE R1") == TSV([ "CREATE ROLE R1" ]) + assert instance.query("SHOW CREATE ROLE R2") == TSV([ "CREATE ROLE R2" ]) + assert instance.query("SHOW CREATE ROLES R1, R2") == TSV([ "CREATE ROLE R1", "CREATE ROLE R2" ]) + assert instance.query("SHOW CREATE ROLES") == TSV([ "CREATE ROLE R1", "CREATE ROLE R2" ]) + assert instance.query("SHOW GRANTS FOR A") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT R1 TO A" ]) assert instance.query("SHOW GRANTS FOR B") == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT R2 TO B WITH ADMIN OPTION" ]) assert instance.query("SHOW GRANTS FOR R1") == "" diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index 71496c6dbf2..d5fbae654a8 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -225,13 +225,21 @@ def test_introspection(): def test_dcl_introspection(): assert node.query("SHOW POLICIES") == TSV(["another ON mydb.filtered_table1", "another ON mydb.filtered_table2", "another ON mydb.filtered_table3", "another ON mydb.local", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", "default ON mydb.local"]) - assert node.query("SHOW POLICIES ON mydb.filtered_table1") == TSV(["another", "default"]) - assert node.query("SHOW POLICIES ON mydb.local") == TSV(["another", "default"]) + + assert node.query("SHOW POLICIES ON mydb.filtered_table1") == TSV([ "another", "default" ]) + assert node.query("SHOW POLICIES ON mydb.local") == TSV([ "another", "default" ]) + assert node.query("SHOW POLICIES ON mydb.*") == TSV([ "another ON mydb.filtered_table1", "another ON mydb.filtered_table2", "another ON mydb.filtered_table3", "another ON mydb.local", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", "default ON mydb.local" ]) + assert node.query("SHOW POLICIES default") == TSV([ "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3", "default ON mydb.local" ]) assert node.query("SHOW CREATE POLICY default ON mydb.filtered_table1") == "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default\n" assert node.query("SHOW CREATE POLICY default ON mydb.filtered_table2") == "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default\n" assert node.query("SHOW CREATE POLICY default ON mydb.filtered_table3") == "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n" assert node.query("SHOW CREATE POLICY default ON mydb.local") == "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" + + assert node.query("SHOW CREATE POLICY default") == TSV([ "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default" ]) + assert node.query("SHOW CREATE POLICIES ON mydb.filtered_table1") == TSV([ "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default" ]) + assert node.query("SHOW CREATE POLICIES ON mydb.*") == TSV([ "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default" ]) + assert node.query("SHOW CREATE POLICIES") == TSV([ "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default" ]) copy_policy_xml('all_rows.xml') assert node.query("SHOW POLICIES") == TSV(["another ON mydb.filtered_table1", "another ON mydb.filtered_table2", "another ON mydb.filtered_table3", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3"]) diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 9c8b116e6d2..939c7d93e2d 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -151,6 +151,11 @@ def test_show_profiles(): instance.query("CREATE SETTINGS PROFILE xyz") assert instance.query("SHOW SETTINGS PROFILES") == "default\nreadonly\nxyz\n" assert instance.query("SHOW PROFILES") == "default\nreadonly\nxyz\n" + assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n" + assert instance.query("SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" + assert instance.query("SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n"\ + "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"\ + "CREATE SETTINGS PROFILE xyz\n" def test_allow_introspection(): From a40885fa8469dc2aba457f3beb9e88710308942c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 11 Jun 2020 02:08:37 +0300 Subject: [PATCH 0757/2229] Add new command SHOW ACCESS. --- src/Interpreters/InterpreterFactory.cpp | 6 ++ .../InterpreterShowAccessEntitiesQuery.cpp | 2 +- .../InterpreterShowAccessEntitiesQuery.h | 5 +- .../InterpreterShowAccessQuery.cpp | 89 +++++++++++++++++++ src/Interpreters/InterpreterShowAccessQuery.h | 36 ++++++++ ...InterpreterShowCreateAccessEntityQuery.cpp | 2 +- .../InterpreterShowCreateAccessEntityQuery.h | 5 +- src/Interpreters/InterpreterShowGrantsQuery.h | 3 + src/Interpreters/ya.make | 1 + src/Parsers/ASTShowAccessQuery.h | 17 ++++ src/Parsers/ParserQueryWithOutput.cpp | 3 + src/Parsers/ParserShowAccessQuery.h | 32 +++++++ .../test_create_user_and_login/test.py | 11 ++- tests/integration/test_quota/test.py | 3 + tests/integration/test_role/test.py | 10 ++- tests/integration/test_row_policy/test.py | 10 +++ .../integration/test_settings_profile/test.py | 6 ++ 17 files changed, 231 insertions(+), 10 deletions(-) create mode 100644 src/Interpreters/InterpreterShowAccessQuery.cpp create mode 100644 src/Interpreters/InterpreterShowAccessQuery.h create mode 100644 src/Parsers/ASTShowAccessQuery.h create mode 100644 src/Parsers/ParserShowAccessQuery.h diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index ccaa8fa4067..6d631c37428 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -231,6 +233,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); diff --git a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp index 379580fe58a..009b9c580d3 100644 --- a/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessEntitiesQuery.cpp @@ -18,7 +18,7 @@ using EntityType = IAccessEntity::Type; InterpreterShowAccessEntitiesQuery::InterpreterShowAccessEntitiesQuery(const ASTPtr & query_ptr_, Context & context_) - : query_ptr(query_ptr_), context(context_), ignore_quota(query_ptr->as().type == EntityType::QUOTA) + : query_ptr(query_ptr_), context(context_) { } diff --git a/src/Interpreters/InterpreterShowAccessEntitiesQuery.h b/src/Interpreters/InterpreterShowAccessEntitiesQuery.h index 5e20bdfa231..8fcd70919ba 100644 --- a/src/Interpreters/InterpreterShowAccessEntitiesQuery.h +++ b/src/Interpreters/InterpreterShowAccessEntitiesQuery.h @@ -15,15 +15,14 @@ public: BlockIO execute() override; - bool ignoreQuota() const override { return ignore_quota; } - bool ignoreLimits() const override { return ignore_quota; } + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } private: String getRewrittenQuery() const; ASTPtr query_ptr; Context & context; - bool ignore_quota = false; }; } diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp new file mode 100644 index 00000000000..c9541b4f5bf --- /dev/null +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -0,0 +1,89 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +using EntityType = IAccessEntity::Type; + + +BlockIO InterpreterShowAccessQuery::execute() +{ + BlockIO res; + res.in = executeImpl(); + return res; +} + + +BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const +{ + /// Build a create query. + ASTs queries = getCreateAndGrantQueries(); + + /// Build the result column. + MutableColumnPtr column = ColumnString::create(); + std::stringstream ss; + for (const auto & query : queries) + { + ss.str(""); + formatAST(*query, ss, false, true); + column->insert(ss.str()); + } + + String desc = "ACCESS"; + return std::make_shared(Block{{std::move(column), std::make_shared(), desc}}); +} + + +std::vector InterpreterShowAccessQuery::getEntities() const +{ + const auto & access_control = context.getAccessControlManager(); + context.checkAccess(AccessType::SHOW_ACCESS); + + std::vector entities; + for (auto type : ext::range(EntityType::MAX)) + { + auto ids = access_control.findAll(type); + for (const auto & id : ids) + { + if (auto entity = access_control.tryRead(id)) + entities.push_back(entity); + } + } + + boost::range::sort(entities, IAccessEntity::LessByTypeAndName{}); + return entities; +} + + +ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const +{ + auto entities = getEntities(); + const auto & access_control = context.getAccessControlManager(); + + ASTs create_queries, grant_queries; + for (const auto & entity : entities) + { + create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control)); + if (entity->isTypeOf(EntityType::USER) || entity->isTypeOf(EntityType::USER)) + boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); + } + + ASTs result = std::move(create_queries); + boost::range::push_back(result, std::move(grant_queries)); + return result; +} + +} diff --git a/src/Interpreters/InterpreterShowAccessQuery.h b/src/Interpreters/InterpreterShowAccessQuery.h new file mode 100644 index 00000000000..eb548c56241 --- /dev/null +++ b/src/Interpreters/InterpreterShowAccessQuery.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class Context; +struct IAccessEntity; +using AccessEntityPtr = std::shared_ptr; + +/** Return all queries for creating access entities and grants. + */ +class InterpreterShowAccessQuery : public IInterpreter +{ +public: + InterpreterShowAccessQuery(const ASTPtr & query_ptr_, Context & context_) + : query_ptr(query_ptr_), context(context_) {} + + BlockIO execute() override; + + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + BlockInputStreamPtr executeImpl() const; + ASTs getCreateAndGrantQueries() const; + std::vector getEntities() const; + + ASTPtr query_ptr; + Context & context; +}; + + +} diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 55c5d961ad8..8d5f27e116d 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -218,7 +218,7 @@ namespace InterpreterShowCreateAccessEntityQuery::InterpreterShowCreateAccessEntityQuery(const ASTPtr & query_ptr_, const Context & context_) - : query_ptr(query_ptr_), context(context_), ignore_quota(query_ptr->as().type == EntityType::QUOTA) + : query_ptr(query_ptr_), context(context_) { } diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h index 12f427b5eb0..5bacbd42988 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.h @@ -23,8 +23,8 @@ public: BlockIO execute() override; - bool ignoreQuota() const override { return ignore_quota; } - bool ignoreLimits() const override { return ignore_quota; } + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } static ASTPtr getCreateQuery(const IAccessEntity & entity, const AccessControlManager & access_control); static ASTPtr getAttachQuery(const IAccessEntity & entity); @@ -37,7 +37,6 @@ private: ASTPtr query_ptr; const Context & context; - bool ignore_quota = false; }; diff --git a/src/Interpreters/InterpreterShowGrantsQuery.h b/src/Interpreters/InterpreterShowGrantsQuery.h index af6c3065192..f5dbd110fd0 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.h +++ b/src/Interpreters/InterpreterShowGrantsQuery.h @@ -23,6 +23,9 @@ public: static ASTs getGrantQueries(const IAccessEntity & user_or_role, const AccessControlManager & access_control); static ASTs getAttachGrantQueries(const IAccessEntity & user_or_role); + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + private: BlockInputStreamPtr executeImpl(); ASTs getGrantQueries() const; diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 29be5d3c216..2fd540ab609 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -85,6 +85,7 @@ SRCS( InterpreterSelectWithUnionQuery.cpp InterpreterSetQuery.cpp InterpreterSetRoleQuery.cpp + InterpreterShowAccessQuery.cpp InterpreterShowAccessEntitiesQuery.cpp InterpreterShowCreateAccessEntityQuery.cpp InterpreterShowCreateQuery.cpp diff --git a/src/Parsers/ASTShowAccessQuery.h b/src/Parsers/ASTShowAccessQuery.h new file mode 100644 index 00000000000..dffd7ff2403 --- /dev/null +++ b/src/Parsers/ASTShowAccessQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ + +struct ASTShowAccessQueryNames +{ + static constexpr auto ID = "ShowAccessQuery"; + static constexpr auto Query = "SHOW ACCESS"; +}; + +using ASTShowAccessQuery = ASTQueryWithOutputImpl; + +} diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index a81305c0557..c7a42b5bdad 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ParserOptimizeQuery optimize_p; ParserKillQueryQuery kill_query_p; ParserWatchQuery watch_p; + ParserShowAccessQuery show_access_p; ParserShowAccessEntitiesQuery show_access_entities_p; ParserShowCreateAccessEntityQuery show_create_access_entity_p; ParserShowGrantsQuery show_grants_p; @@ -70,6 +72,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || kill_query_p.parse(pos, query, expected) || optimize_p.parse(pos, query, expected) || watch_p.parse(pos, query, expected) + || show_access_p.parse(pos, query, expected) || show_access_entities_p.parse(pos, query, expected) || show_grants_p.parse(pos, query, expected) || show_privileges_p.parse(pos, query, expected); diff --git a/src/Parsers/ParserShowAccessQuery.h b/src/Parsers/ParserShowAccessQuery.h new file mode 100644 index 00000000000..b6483aa3d43 --- /dev/null +++ b/src/Parsers/ParserShowAccessQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** Query SHOW ACCESS + */ +class ParserShowAccessQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW ACCESS query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + auto query = std::make_shared(); + + if (!ParserKeyword("SHOW ACCESS").ignore(pos, expected)) + return false; + + node = query; + + return true; + } +}; + +} diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 8cfc1c2c9dd..ddb3e57c63b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -79,10 +79,19 @@ def test_introspection(): assert instance.query("SHOW GRANTS FOR A,B") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) assert instance.query("SHOW GRANTS FOR B,A") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) assert instance.query("SHOW GRANTS FOR ALL") == TSV([ "GRANT SELECT ON test.table TO A", "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT ALL ON *.* TO default WITH GRANT OPTION" ]) - + assert instance.query("SHOW GRANTS", user='A') == TSV([ "GRANT SELECT ON test.table TO A" ]) assert instance.query("SHOW GRANTS", user='B') == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION" ]) + expected_access1 = "CREATE USER A\n"\ + "CREATE USER B\n"\ + "CREATE USER default IDENTIFIED WITH plaintext_password SETTINGS PROFILE default" + expected_access2 = "GRANT SELECT ON test.table TO A\n"\ + "GRANT CREATE ON *.* TO B WITH GRANT OPTION\n"\ + "GRANT ALL ON *.* TO default WITH GRANT OPTION\n" + assert expected_access1 in instance.query("SHOW ACCESS") + assert expected_access2 in instance.query("SHOW ACCESS") + assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ TSV([[ "A", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], [ "B", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index b9a6d80e54e..27aa353b9b1 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -215,6 +215,9 @@ def test_dcl_introspection(): assert re.match("myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) + expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + assert expected_access in instance.query("SHOW ACCESS") + # Add interval. copy_quota_xml('two_intervals.xml') assert instance.query("SHOW QUOTAS") == "myQuota\n" diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 6bae3baafee..e668b461389 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -118,7 +118,7 @@ def test_introspection(): assert instance.query("SHOW GRANTS FOR B") == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT R2 TO B WITH ADMIN OPTION" ]) assert instance.query("SHOW GRANTS FOR R1") == "" assert instance.query("SHOW GRANTS FOR R2") == TSV([ "GRANT SELECT ON test.table TO R2", "REVOKE SELECT(x) ON test.table FROM R2" ]) - + assert instance.query("SHOW GRANTS", user='A') == TSV([ "GRANT SELECT ON test.table TO A", "GRANT R1 TO A" ]) assert instance.query("SHOW GRANTS", user='B') == TSV([ "GRANT CREATE ON *.* TO B WITH GRANT OPTION", "GRANT R2 TO B WITH ADMIN OPTION" ]) assert instance.query("SHOW CURRENT ROLES", user='A') == TSV([[ "R1", 0, 1 ]]) @@ -126,6 +126,14 @@ def test_introspection(): assert instance.query("SHOW ENABLED ROLES", user='A') == TSV([[ "R1", 0, 1, 1 ]]) assert instance.query("SHOW ENABLED ROLES", user='B') == TSV([[ "R2", 1, 1, 1 ]]) + expected_access1 = "CREATE ROLE R1\n"\ + "CREATE ROLE R2\n" + expected_access2 = "GRANT R1 TO A\n" + expected_access3 = "GRANT R2 TO B WITH ADMIN OPTION" + assert expected_access1 in instance.query("SHOW ACCESS") + assert expected_access2 in instance.query("SHOW ACCESS") + assert expected_access3 in instance.query("SHOW ACCESS") + assert instance.query("SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name") ==\ TSV([[ "R1", "disk" ], [ "R2", "disk" ]]) diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index d5fbae654a8..aca3100196b 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -241,6 +241,16 @@ def test_dcl_introspection(): assert node.query("SHOW CREATE POLICIES ON mydb.*") == TSV([ "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default" ]) assert node.query("SHOW CREATE POLICIES") == TSV([ "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another", "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another", "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default", "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default", "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default", "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default" ]) + expected_access = "CREATE ROW POLICY another ON mydb.filtered_table1 FOR SELECT USING 1 TO another\n"\ + "CREATE ROW POLICY another ON mydb.filtered_table2 FOR SELECT USING 1 TO another\n"\ + "CREATE ROW POLICY another ON mydb.filtered_table3 FOR SELECT USING 1 TO another\n"\ + "CREATE ROW POLICY another ON mydb.local FOR SELECT USING a = 1 TO another\n"\ + "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING a = 1 TO default\n"\ + "CREATE ROW POLICY default ON mydb.filtered_table2 FOR SELECT USING ((a + b) < 1) OR ((c - d) > 5) TO default\n"\ + "CREATE ROW POLICY default ON mydb.filtered_table3 FOR SELECT USING c = 1 TO default\n"\ + "CREATE ROW POLICY default ON mydb.local FOR SELECT USING 1 TO default\n" + assert expected_access in node.query("SHOW ACCESS") + copy_policy_xml('all_rows.xml') assert node.query("SHOW POLICIES") == TSV(["another ON mydb.filtered_table1", "another ON mydb.filtered_table2", "another ON mydb.filtered_table3", "default ON mydb.filtered_table1", "default ON mydb.filtered_table2", "default ON mydb.filtered_table3"]) assert node.query("SHOW CREATE POLICY default ON mydb.filtered_table1") == "CREATE ROW POLICY default ON mydb.filtered_table1 FOR SELECT USING 1 TO default\n" diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 939c7d93e2d..d722717f2a7 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -151,11 +151,17 @@ def test_show_profiles(): instance.query("CREATE SETTINGS PROFILE xyz") assert instance.query("SHOW SETTINGS PROFILES") == "default\nreadonly\nxyz\n" assert instance.query("SHOW PROFILES") == "default\nreadonly\nxyz\n" + assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n" assert instance.query("SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" assert instance.query("SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n"\ "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"\ "CREATE SETTINGS PROFILE xyz\n" + + expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n"\ + "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n"\ + "CREATE SETTINGS PROFILE xyz\n" + assert expected_access in instance.query("SHOW ACCESS") def test_allow_introspection(): From 36ba0192df07424d5c5b7c1ca8a197648238c38a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 22:08:58 +0300 Subject: [PATCH 0758/2229] Metadata in read and write methods of IStorage --- src/Core/ExternalTable.cpp | 2 +- .../CreatingSetsBlockInputStream.cpp | 2 +- .../PushingToViewsBlockOutputStream.cpp | 2 +- src/DataStreams/RemoteQueryExecutor.cpp | 7 +++++-- src/Interpreters/InterpreterInsertQuery.cpp | 3 ++- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- src/Interpreters/InterpreterSelectQuery.h | 1 + .../Transforms/CreatingSetsTransform.cpp | 2 +- src/Server/TCPHandler.cpp | 3 ++- src/Storages/IStorage.h | 3 +++ src/Storages/Kafka/StorageKafka.cpp | 3 ++- src/Storages/Kafka/StorageKafka.h | 2 ++ src/Storages/LiveView/StorageBlocks.h | 1 + src/Storages/LiveView/StorageLiveView.cpp | 1 + src/Storages/LiveView/StorageLiveView.h | 1 + .../MergeTree/StorageFromMergeTreeDataPart.h | 1 + src/Storages/StorageBuffer.cpp | 13 ++++++++++--- src/Storages/StorageBuffer.h | 3 ++- src/Storages/StorageDictionary.cpp | 1 + src/Storages/StorageDictionary.h | 4 +++- src/Storages/StorageDistributed.cpp | 3 ++- src/Storages/StorageDistributed.h | 3 ++- src/Storages/StorageFile.cpp | 2 ++ src/Storages/StorageFile.h | 2 ++ src/Storages/StorageGenerateRandom.cpp | 1 + src/Storages/StorageGenerateRandom.h | 1 + src/Storages/StorageHDFS.cpp | 5 +++-- src/Storages/StorageHDFS.h | 6 ++++-- src/Storages/StorageInput.cpp | 4 +++- src/Storages/StorageInput.h | 1 + src/Storages/StorageJoin.cpp | 1 + src/Storages/StorageJoin.h | 1 + src/Storages/StorageLog.cpp | 4 ++-- src/Storages/StorageLog.h | 3 ++- src/Storages/StorageMaterializedView.cpp | 11 ++++++++--- src/Storages/StorageMaterializedView.h | 3 ++- src/Storages/StorageMemory.cpp | 4 ++-- src/Storages/StorageMemory.h | 3 ++- src/Storages/StorageMerge.cpp | 4 +++- src/Storages/StorageMerge.h | 1 + src/Storages/StorageMergeTree.cpp | 3 ++- src/Storages/StorageMergeTree.h | 3 ++- src/Storages/StorageMySQL.cpp | 4 ++-- src/Storages/StorageMySQL.h | 3 ++- src/Storages/StorageNull.h | 3 ++- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.h | 3 ++- src/Storages/StorageS3.cpp | 3 ++- src/Storages/StorageS3.h | 3 ++- src/Storages/StorageSet.cpp | 2 +- src/Storages/StorageSet.h | 2 +- src/Storages/StorageStripeLog.cpp | 4 ++-- src/Storages/StorageStripeLog.h | 3 ++- src/Storages/StorageTinyLog.cpp | 4 ++-- src/Storages/StorageTinyLog.h | 3 ++- src/Storages/StorageURL.cpp | 6 ++++-- src/Storages/StorageURL.h | 3 ++- src/Storages/StorageValues.cpp | 1 + src/Storages/StorageValues.h | 1 + src/Storages/StorageView.cpp | 1 + src/Storages/StorageView.h | 1 + src/Storages/StorageXDBC.cpp | 8 +++++--- src/Storages/StorageXDBC.h | 17 +++++++++-------- src/Storages/System/IStorageSystemOneBlock.h | 4 +++- src/Storages/System/StorageSystemColumns.cpp | 1 + src/Storages/System/StorageSystemColumns.h | 1 + .../System/StorageSystemDetachedParts.cpp | 13 +++++++------ src/Storages/System/StorageSystemDisks.cpp | 1 + src/Storages/System/StorageSystemDisks.h | 1 + src/Storages/System/StorageSystemNumbers.cpp | 1 + src/Storages/System/StorageSystemNumbers.h | 1 + src/Storages/System/StorageSystemOne.cpp | 1 + src/Storages/System/StorageSystemOne.h | 1 + src/Storages/System/StorageSystemPartsBase.cpp | 13 +++++++------ src/Storages/System/StorageSystemPartsBase.h | 13 +++++++------ src/Storages/System/StorageSystemReplicas.cpp | 1 + src/Storages/System/StorageSystemReplicas.h | 1 + .../System/StorageSystemStoragePolicies.cpp | 13 +++++++------ .../System/StorageSystemStoragePolicies.h | 13 +++++++------ src/Storages/System/StorageSystemTables.cpp | 1 + src/Storages/System/StorageSystemTables.h | 1 + src/Storages/System/StorageSystemZeros.cpp | 13 +++++++------ src/Storages/System/StorageSystemZeros.h | 13 +++++++------ src/Storages/tests/gtest_storage_log.cpp | 6 ++++-- 84 files changed, 208 insertions(+), 111 deletions(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 5ec6980dbfa..3639a109b42 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -167,7 +167,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, auto temporary_table = TemporaryTableHolder(context, ColumnsDescription{columns}, {}); auto storage = temporary_table.getTable(); context.addExternalTable(data->table_name, std::move(temporary_table)); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context); + BlockOutputStreamPtr output = storage->write(ASTPtr(), storage->getInMemoryMetadataPtr(), context); /// Write data auto sink = std::make_shared(std::move(output)); diff --git a/src/DataStreams/CreatingSetsBlockInputStream.cpp b/src/DataStreams/CreatingSetsBlockInputStream.cpp index e40b5979b05..2a2275a4e89 100644 --- a/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -101,7 +101,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) BlockOutputStreamPtr table_out; if (subquery.table) - table_out = subquery.table->write({}, context); + table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context); bool done_with_set = !subquery.set; bool done_with_join = !subquery.join; diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 2c2e6972158..fa213b054df 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -107,7 +107,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( /// Do not push to destination table if the flag is set if (!no_destination) { - output = storage->write(query_ptr, context); + output = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), context); replicated_output = dynamic_cast(output.get()); } } diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index cf3b2c4abcd..071cb6e9aba 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -319,12 +319,15 @@ void RemoteQueryExecutor::sendExternalTables() for (const auto & table : external_tables) { StoragePtr cur = table.second; + auto metadata_snapshot = cur->getInMemoryMetadataPtr(); QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context); Pipes pipes; - pipes = cur->read(cur->getColumns().getNamesOfPhysical(), {}, context, - read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); + pipes = cur->read( + cur->getColumns().getNamesOfPhysical(), + metadata_snapshot, {}, context, + read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); auto data = std::make_unique(); data->table_name = table.first; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 7deed262eda..1841c82b710 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -117,6 +117,7 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); auto table_lock = table->lockStructureForShare( true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table); if (!query.table_function) @@ -226,7 +227,7 @@ BlockIO InterpreterInsertQuery::execute() /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. if (table->noPushingToViews() && !no_destination) - out = table->write(query_ptr, context); + out = table->write(query_ptr, metadata_snapshot, context); else out = std::make_shared(table, context, query_ptr, no_destination); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ac17a3042d8..c22296cfb26 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -255,6 +255,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( table_lock = storage->lockStructureForShare( false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); table_id = storage->getStorageID(); + metadata_snapshot = storage->getInMemoryMetadataPtr(); } if (has_input || !joined_tables.resolveTables()) @@ -1293,7 +1294,6 @@ void InterpreterSelectQuery::executeFetchColumns( else if (storage) { /// Table. - if (max_streams == 0) throw Exception("Logical error: zero number of streams requested", ErrorCodes::LOGICAL_ERROR); @@ -1324,7 +1324,7 @@ void InterpreterSelectQuery::executeFetchColumns( query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); } - Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams); + Pipes pipes = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams); if (pipes.empty()) { diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 8ed775f60ae..8f7237ffd7e 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -184,6 +184,7 @@ private: StoragePtr storage; StorageID table_id = StorageID::createEmpty(); /// Will be initialized if storage is not nullptr TableStructureReadLockHolder table_lock; + StorageMetadataPtr metadata_snapshot; /// Used when we read from prepared input, not table or subquery. BlockInputStreamPtr input; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index af8fa4097df..c0e34d9fbd4 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -66,7 +66,7 @@ void CreatingSetsTransform::startSubquery(SubqueryForSet & subquery) elapsed_nanoseconds = 0; if (subquery.table) - table_out = subquery.table->write({}, context); + table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context); done_with_set = !subquery.set; done_with_join = !subquery.join; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 7e17604c4c7..a01cc4fa0aa 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -952,8 +952,9 @@ bool TCPHandler::receiveData(bool scalar) storage = temporary_table.getTable(); query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table)); } + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); /// The data will be written directly to the table. - state.io.out = storage->write(ASTPtr(), *query_context); + state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context); } if (state.need_receive_data_for_input) state.block_for_input = block; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 0f48f3bf63c..28ad7b0ea8b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -155,6 +155,7 @@ public: /// thread-unsafe part. lockStructure must be acquired bool hasSelectQuery() const; StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } + StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. @@ -292,6 +293,7 @@ public: */ virtual Pipes read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -309,6 +311,7 @@ public: */ virtual BlockOutputStreamPtr write( const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 2109afed932..190397bc675 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -201,6 +201,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) Pipes StorageKafka::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, @@ -231,7 +232,7 @@ Pipes StorageKafka::read( } -BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const Context & context) +BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto modified_context = std::make_shared(context); modified_context->applySettingsChanges(settings_adjustments); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index be3f89687fe..6f479ba2089 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -39,6 +39,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -47,6 +48,7 @@ public: BlockOutputStreamPtr write( const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void pushReadBuffer(ConsumerBufferPtr buf); diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 78d60163d5e..56fd0c620c2 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -37,6 +37,7 @@ public: Pipes read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index ade2d1c967d..cb4964f3c55 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -520,6 +520,7 @@ void StorageLiveView::refresh(const Context & context) Pipes StorageLiveView::read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 458e74eb506..13386c7a4e6 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -126,6 +126,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 342a89c38ea..826af505b12 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -21,6 +21,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 007625790f4..3e419921115 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -145,6 +145,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context Pipes StorageBuffer::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -157,6 +158,7 @@ Pipes StorageBuffer::read( { auto destination = DatabaseCatalog::instance().getTable(destination_id, context); + auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); @@ -177,7 +179,9 @@ Pipes StorageBuffer::read( query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination); /// The destination table has the same structure of the requested columns and we can simply read blocks from there. - pipes_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + pipes_from_dst = destination->read( + column_names, destination_metadata_snapshot, query_info, + context, processed_stage, max_block_size, num_streams); } else { @@ -210,7 +214,10 @@ Pipes StorageBuffer::read( } else { - pipes_from_dst = destination->read(columns_intersection, query_info, context, processed_stage, max_block_size, num_streams); + pipes_from_dst = destination->read( + columns_intersection, destination_metadata_snapshot, query_info, + context, processed_stage, max_block_size, num_streams); + for (auto & pipe : pipes_from_dst) { pipe.addSimpleTransform(std::make_shared( @@ -425,7 +432,7 @@ private: }; -BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 02fd35136bf..7cd73dc556c 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -57,13 +57,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void startup() override; /// Flush all buffers into the subordinate table and stop background thread. diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 4348973ec60..25126ad951d 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -113,6 +113,7 @@ void StorageDictionary::checkTableCanBeDropped() const Pipes StorageDictionary::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 7bb6fc22480..6175902381b 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -16,7 +16,9 @@ public: void checkTableCanBeDropped() const override; - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index bf5f729ed19..719811bbc6b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -464,6 +464,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con Pipes StorageDistributed::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -510,7 +511,7 @@ Pipes StorageDistributed::read( } -BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & context) +BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto cluster = getCluster(); const auto & settings = context.getSettingsRef(); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 63021e0a169..3f148cfff01 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -70,13 +70,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /// Removes temporary data in local filesystem. void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f94a7b71e56..07df2b4ec8a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -377,6 +377,7 @@ private: Pipes StorageFile::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -479,6 +480,7 @@ private: BlockOutputStreamPtr StorageFile::write( const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { if (format_name == "Distributed") diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index fa5034d946c..65589d245b9 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -26,6 +26,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -34,6 +35,7 @@ public: BlockOutputStreamPtr write( const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) override; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index f69478a4bdd..f1d97a4e5c4 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -429,6 +429,7 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipes StorageGenerateRandom::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index 955b8bd671d..0d068eb951e 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -17,6 +17,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 082e40f6d6d..77afc4c47c8 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -264,9 +264,10 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c Pipes StorageHDFS::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context_, - QueryProcessingStage::Enum /*processed_stage*/, + QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { @@ -300,7 +301,7 @@ Pipes StorageHDFS::read( return pipes; } -BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(uri, format_name, diff --git a/src/Storages/StorageHDFS.h b/src/Storages/StorageHDFS.h index 5b250247b84..62425cc518f 100644 --- a/src/Storages/StorageHDFS.h +++ b/src/Storages/StorageHDFS.h @@ -19,14 +19,16 @@ class StorageHDFS final : public ext::shared_ptr_helper, public ISt public: String getName() const override { return "HDFS"; } - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 92287051bf3..4117a6b3a37 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -58,7 +58,9 @@ void StorageInput::setInputStream(BlockInputStreamPtr input_stream_) } -Pipes StorageInput::read(const Names & /*column_names*/, +Pipes StorageInput::read( + const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index 60bda222c2e..f4425ee8cd5 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -19,6 +19,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 5cceefe907b..7ed4c1c110b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -435,6 +435,7 @@ private: // TODO: multiple stream read and index read Pipes StorageJoin::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index f956abb4d3b..40dbf1b44dd 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -38,6 +38,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 09be868bcfa..a09a99b30e1 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -577,6 +577,7 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount() const Pipes StorageLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -620,8 +621,7 @@ Pipes StorageLog::read( return pipes; } -BlockOutputStreamPtr StorageLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { loadMarks(); return std::make_shared(*this); diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 2c2abdb0275..60f885ce45c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -26,13 +26,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 638a13612f2..a387eadabe0 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -108,6 +108,7 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(cons Pipes StorageMaterializedView::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -117,11 +118,12 @@ Pipes StorageMaterializedView::read( auto storage = getTargetTable(); auto lock = storage->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); - Pipes pipes = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + Pipes pipes = storage->read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); for (auto & pipe : pipes) pipe.addTableLock(lock); @@ -129,12 +131,15 @@ Pipes StorageMaterializedView::read( return pipes; } -BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const Context & context) +BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto storage = getTargetTable(); auto lock = storage->lockStructureForShare( true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto stream = storage->write(query, context); + + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto stream = storage->write(query, metadata_snapshot, context); + stream->addTableLock(lock); return stream; } diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 480c75aa114..42fe186a068 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -31,7 +31,7 @@ public: return getTargetTable()->mayBenefitFromIndexForIn(left_in_operand, query_context); } - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void drop() override; @@ -63,6 +63,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index bb89bdb5c48..f9c39d78a05 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -87,6 +87,7 @@ StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription col Pipes StorageMemory::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -119,8 +120,7 @@ Pipes StorageMemory::read( } -BlockOutputStreamPtr StorageMemory::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 5a79358d76d..3c583533462 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -30,13 +30,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void drop() override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 3685a777bf0..6656e91189c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -129,6 +129,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & Pipes StorageMerge::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -243,6 +244,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer return pipes; } + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, query_info.query); if (processed_stage <= storage_stage) { @@ -250,7 +252,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer if (real_column_names.empty()) real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical())); - pipes = storage->read(real_column_names, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); + pipes = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); } else if (processed_stage > storage_stage) { diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index adf4a40e675..a5d3b8d2667 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -31,6 +31,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 7007a544eac..e3f48a05d6e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -177,6 +177,7 @@ StorageMergeTree::~StorageMergeTree() Pipes StorageMergeTree::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -196,7 +197,7 @@ std::optional StorageMergeTree::totalBytes() const return getTotalActiveSizeInBytes(); } -BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { return std::make_shared(*this, context.getSettingsRef().max_partitions_per_insert_block); } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c6c8f99a62a..679726826d4 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -39,6 +39,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -48,7 +49,7 @@ public: std::optional totalRows() const override; std::optional totalBytes() const override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /** Perform the next step in combining the parts. */ diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index f9aad8a58a7..dce9e0f38ec 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -65,6 +65,7 @@ StorageMySQL::StorageMySQL( Pipes StorageMySQL::read( const Names & column_names_, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info_, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, @@ -198,8 +199,7 @@ private: }; -BlockOutputStreamPtr StorageMySQL::write( - const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { return std::make_shared(*this, remote_database_name, remote_table_name, pool.get(), context.getSettingsRef().mysql_max_rows_to_insert); } diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index 8b98536e4d7..287c65db6f3 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -39,13 +39,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; private: friend class StorageMySQLBlockOutputStream; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index fe8bd05d53a..72934d185c7 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -24,6 +24,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processing_stage*/, @@ -35,7 +36,7 @@ public: return pipes; } - BlockOutputStreamPtr write(const ASTPtr &, const Context &) override + BlockOutputStreamPtr write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context &) override { return std::make_shared(getSampleBlock()); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cb5e5aaf701..650578d7560 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3381,6 +3381,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg Pipes StorageReplicatedMergeTree::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -3442,7 +3443,7 @@ void StorageReplicatedMergeTree::assertNotReadonly() const } -BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { const auto storage_settings_ptr = getSettings(); assertNotReadonly(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index aae0b9c81b8..5fcfd98e71d 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -89,6 +89,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -98,7 +99,7 @@ public: std::optional totalRows() const override; std::optional totalBytes() const override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index acaa2bcc7d6..093f4450ecb 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -285,6 +285,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & Pipes StorageS3::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -320,7 +321,7 @@ Pipes StorageS3::read( return narrowPipes(std::move(pipes), num_streams); } -BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared( format_name, min_upload_part_size, getSampleBlock(), context_global, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index fc19fe06da0..665c00b8033 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -48,13 +48,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 38b4d30c25b..cddd4657cd1 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -81,7 +81,7 @@ void SetOrJoinBlockOutputStream::writeSuffix() } -BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { UInt64 id = ++increment; return std::make_shared(*this, path, path + "tmp/", toString(id) + ".bin"); diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index cf85dfd5d5b..b7785aadc6a 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -21,7 +21,7 @@ class StorageSetOrJoinBase : public IStorage public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; Strings getDataPaths() const override { return {path}; } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index b68505fa147..c320d0afb42 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -253,6 +253,7 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora Pipes StorageStripeLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -299,8 +300,7 @@ Pipes StorageStripeLog::read( } -BlockOutputStreamPtr StorageStripeLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index ed8e5da081e..d06758a60e8 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -27,13 +27,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 5bca6072da0..42b70f716f4 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -394,6 +394,7 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage Pipes StorageTinyLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -413,8 +414,7 @@ Pipes StorageTinyLog::read( } -BlockOutputStreamPtr StorageTinyLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 102ec76fda3..a55bf6d0dcf 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -26,13 +26,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0301412e029..0361718c616 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -155,7 +155,9 @@ std::function IStorageURLBase::getReadPOSTDataCallback(con } -Pipes IStorageURLBase::read(const Names & column_names, +Pipes IStorageURLBase::read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -183,7 +185,7 @@ Pipes IStorageURLBase::read(const Names & column_names, return pipes; } -BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared( uri, format_name, getSampleBlock(), context_global, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5a6584f0301..ecd57024a44 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -21,13 +21,14 @@ class IStorageURLBase : public IStorage public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; protected: IStorageURLBase( diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 5ba36a936e2..bb29b4a0932 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -23,6 +23,7 @@ StorageValues::StorageValues( Pipes StorageValues::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 254f3bfa8aa..88fb023fb2b 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -17,6 +17,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index d8392b2edd8..52b7e8764d9 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -54,6 +54,7 @@ StorageView::StorageView( Pipes StorageView::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index c6b48d4d357..143ed3c06c4 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -23,6 +23,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 08538798389..c7fa8a88251 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -82,7 +82,9 @@ std::function StorageXDBC::getReadPOSTDataCallback(const N return [query](std::ostream & os) { os << "query=" << query; }; } -Pipes StorageXDBC::read(const Names & column_names, +Pipes StorageXDBC::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -92,10 +94,10 @@ Pipes StorageXDBC::read(const Names & column_names, check(column_names); bridge_helper->startBridgeSync(); - return IStorageURLBase::read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + return IStorageURLBase::read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } -BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { bridge_helper->startBridgeSync(); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index afc61dac5cd..4488122656d 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -15,13 +15,14 @@ namespace DB class StorageXDBC : public IStorageURLBase { public: - - Pipes read(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; StorageXDBC(const StorageID & table_id_, const std::string & remote_database_name, @@ -29,7 +30,7 @@ public: const ColumnsDescription & columns_, const Context & context_, BridgeHelperPtr bridge_helper_); - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; private: diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 1ceff26ba83..7644f62b96d 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -28,7 +28,9 @@ public: setInMemoryMetadata(metadata_); } - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6359e367106..646a5434b64 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -242,6 +242,7 @@ private: Pipes StorageSystemColumns::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 66b423efb96..7336b406183 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -19,6 +19,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index ef88c3ca058..3d24d90bbef 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -46,12 +46,13 @@ protected: } Pipes read( - const Names & /* column_names */, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) override + const Names & /* column_names */, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) override { StoragesInfoStream stream(query_info, context); diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 5905080539e..36fde616bd4 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -28,6 +28,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) Pipes StorageSystemDisks::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index b136a217508..714a0a5428c 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -22,6 +22,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 0fa7b71555e..fd7e04cfb1f 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -125,6 +125,7 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipes StorageSystemNumbers::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 4d205728496..88d3651c7f9 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -31,6 +31,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index e7c8c446847..af736c215b5 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -22,6 +22,7 @@ StorageSystemOne::StorageSystemOne(const std::string & name_) Pipes StorageSystemOne::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 9fb3a670949..3b3e531dc86 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -23,6 +23,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 42a432489f4..e599bbb19e3 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -224,12 +224,13 @@ StoragesInfo StoragesInfoStream::next() } Pipes StorageSystemPartsBase::read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { bool has_state_column = hasStateColumn(column_names); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 7b9ce7cbae2..a46cecec9dd 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -56,12 +56,13 @@ class StorageSystemPartsBase : public IStorage { public: Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & metadata_, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index ca71e7e5f74..24861fcbd6a 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -59,6 +59,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) Pipes StorageSystemReplicas::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index 94b0d6c9d06..b068ebc8b0a 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -20,6 +20,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index dbb47dc771a..a80747c1fa1 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -31,12 +31,13 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n } Pipes StorageSystemStoragePolicies::read( - const Names & column_names, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & /*query_info*/, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { check(column_names); diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 79e89863bf0..a1427da8559 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -21,12 +21,13 @@ public: std::string getName() const override { return "SystemStoragePolicies"; } Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; protected: StorageSystemStoragePolicies(const std::string & name_); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 84d441a8c6e..f04b3ea20c9 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -448,6 +448,7 @@ private: Pipes StorageSystemTables::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index bab3aef6e15..54551205684 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -20,6 +20,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 438d31e7e02..2bc53b5093e 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -91,12 +91,13 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith } Pipes StorageSystemZeros::read( - const Names & column_names, - const SelectQueryInfo &, - const Context & /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - unsigned num_streams) + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo &, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned num_streams) { check(column_names); diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 3768885d03d..f169861122a 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -21,12 +21,13 @@ public: std::string getName() const override { return "SystemZeros"; } Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index fff352210e7..618d524987b 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -71,6 +71,7 @@ TYPED_TEST_SUITE(StorageLogTest, DiskImplementations); std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & context) { using namespace DB; + auto metadata_snapshot = table->getInMemoryMetadataPtr(); std::string data; @@ -97,7 +98,7 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & cont block.insert(column); } - BlockOutputStreamPtr out = table->write({}, context); + BlockOutputStreamPtr out = table->write({}, metadata_snapshot, context); out->write(block); return data; @@ -107,13 +108,14 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & cont std::string readData(DB::StoragePtr & table, const DB::Context & context) { using namespace DB; + auto metadata_snapshot = table->getInMemoryMetadataPtr(); Names column_names; column_names.push_back("a"); QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context); - BlockInputStreamPtr in = std::make_shared(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0])); + BlockInputStreamPtr in = std::make_shared(std::move(table->read(column_names, metadata_snapshot, {}, context, stage, 8192, 1)[0])); Block sample; { From bd17aa8a0eb5dcca23034194672e57b1f85df048 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 22:11:17 +0300 Subject: [PATCH 0759/2229] Increase timeout in test --- .../0_stateless/01076_parallel_alter_replicated_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh index 15bb851fc77..4bb8da3d43c 100755 --- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh @@ -105,7 +105,7 @@ sleep 1 counter=0 while [[ $($CLICKHOUSE_CLIENT --query "select * from system.mutations where table like 'concurrent_mutate_mt_%' and is_done=0" 2>&1) ]]; do - if [ "$counter" -gt 40 ] + if [ "$counter" -gt 120 ] then break fi From d10109dc38d6f2caa7b1e1a897e0a73082412bde Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 22:18:33 +0300 Subject: [PATCH 0760/2229] Remove excessive statement #11131 --- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 52eace30657..f3da98f0ba3 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -139,7 +139,6 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block return result; data.check(block, true); - block.checkNumberOfRows(); if (!data.hasPartitionKey()) /// Table is not partitioned. { From dbef88e073feed3a8531b7dea5297af9d7c81f12 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 22:33:24 +0300 Subject: [PATCH 0761/2229] Fix tests. --- .../QueryPlan/ReadFromStorageStep.cpp | 39 ++++++------------- .../QueryPlan/ReadFromStorageStep.h | 2 + 2 files changed, 14 insertions(+), 27 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index 191f501e9fa..bc108286ba1 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -32,32 +32,9 @@ ReadFromStorageStep::ReadFromStorageStep( , max_block_size(max_block_size_) , max_streams(max_streams_) { - Block header = storage->getSampleBlockForColumns(required_columns); + /// Note: we read from storage in constructor of step because we don't know real header before reading. + /// It will be fixed when storage return QueryPlanStep itself. - if (query_info.prewhere_info) - { - if (query_info.prewhere_info->alias_actions) - header = ExpressionTransform::transformHeader(std::move(header), query_info.prewhere_info->alias_actions); - - header = FilterTransform::transformHeader( - std::move(header), - query_info.prewhere_info->prewhere_actions, - query_info.prewhere_info->prewhere_column_name, - query_info.prewhere_info->remove_prewhere_column); - - if (query_info.prewhere_info->remove_columns_actions) - header = ExpressionTransform::transformHeader( - std::move(header), - query_info.prewhere_info->remove_columns_actions); - } - - input_streams.emplace_back(DataStream{.header = std::move(header)}); -} - -ReadFromStorageStep::~ReadFromStorageStep() = default; - -QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines) -{ Pipes pipes = storage->read(required_columns, query_info, context, processing_stage, max_block_size, max_streams); if (pipes.empty()) @@ -95,7 +72,7 @@ QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines) pipe.getHeader(), input_streams.front().header, ConvertingTransform::MatchColumnsMode::Name)); } - auto pipeline = std::make_unique(); + pipeline = std::make_unique(); /// Table lock is stored inside pipeline here. pipeline->addTableLock(table_lock); @@ -148,7 +125,15 @@ QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines) pipe.enableQuota(); pipeline->init(std::move(pipes)); - return pipeline; + + input_streams.emplace_back(DataStream{.header = pipeline->getHeader()}); +} + +ReadFromStorageStep::~ReadFromStorageStep() = default; + +QueryPipelinePtr ReadFromStorageStep::updatePipeline(QueryPipelines) +{ + return std::move(pipeline); } } diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.h b/src/Processors/QueryPlan/ReadFromStorageStep.h index ac662c4b06f..13d6b9e255d 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromStorageStep.h @@ -45,6 +45,8 @@ private: QueryProcessingStage::Enum processing_stage; size_t max_block_size; size_t max_streams; + + QueryPipelinePtr pipeline; }; } From a3ac224ae4e6427c30b3fd6d6af55c91d10079e8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 15 Jun 2020 20:41:44 +0300 Subject: [PATCH 0762/2229] in-memory parts: sync insert to replicated --- .../MergeTree/MergeTreeBlockOutputStream.cpp | 24 ++++++----- src/Storages/MergeTree/MergeTreeData.cpp | 21 ++++++---- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataPartInMemory.cpp | 11 +++-- .../MergeTree/MergeTreeDataPartInMemory.h | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 3 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 19 +++++++-- .../ReplicatedMergeTreeBlockOutputStream.h | 3 +- src/Storages/StorageReplicatedMergeTree.cpp | 36 +++++++++------- .../test_polymorphic_parts/test.py | 42 +++++++++++++++++++ 10 files changed, 118 insertions(+), 45 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index e5ee8b2be5e..f730f51879e 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -23,12 +23,14 @@ void MergeTreeBlockOutputStream::write(const Block & block) storage.delayInsertOrThrowIfNeeded(); auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block); + MergeTreeData::DataPartsVector inserted_parts; for (auto & current_block : part_blocks) { Stopwatch watch; MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block); storage.renameTempPartAndAdd(part, &storage.increment); + inserted_parts.push_back(part); PartLog::addNewPart(storage.global_context, part, watch.elapsed()); @@ -36,20 +38,11 @@ void MergeTreeBlockOutputStream::write(const Block & block) { storage.in_memory_merges_throttler.add(part_in_memory->block.bytes(), part_in_memory->rows_count); - auto settings = storage.getSettings(); - if (settings->in_memory_parts_insert_sync) - { - if (!part_in_memory->waitUntilMerged(in_memory_parts_timeout)) - throw Exception("Timeout exceeded while waiting to write part " - + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); - } - else if (storage.merging_mutating_task_handle && !storage.in_memory_merges_throttler.needDelayMerge()) + if (storage.merging_mutating_task_handle && !storage.in_memory_merges_throttler.needDelayMerge()) { storage.in_memory_merges_throttler.reset(); storage.merging_mutating_task_handle->wake(); } - - continue; } else if (storage.merging_mutating_task_handle) { @@ -57,6 +50,17 @@ void MergeTreeBlockOutputStream::write(const Block & block) storage.merging_mutating_task_handle->wake(); } } + + if (storage.getSettings()->in_memory_parts_insert_sync) + { + for (const auto & part : inserted_parts) + { + auto part_in_memory = asInMemoryPart(part); + if (!part_in_memory->waitUntilMerged(in_memory_parts_timeout)) + throw Exception("Timeout exceeded while waiting to write part " + + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); + } + } } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 098416e87ed..aa52d4fdb37 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -245,7 +245,8 @@ MergeTreeData::MergeTreeData( String reason; if (!canUsePolymorphicParts(*settings, &reason) && !reason.empty()) - LOG_WARNING(log, "{} Settings 'min_bytes_for_wide_part' and 'min_bytes_for_wide_part' will be ignored.", reason); + LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', " + "'min_rows_for_compact_part' and 'min_bytes_for_compact_part' will be ignored.", reason); } @@ -1592,10 +1593,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, size_t rows_count) const { - if (!canUseAdaptiveGranularity()) + const auto settings = getSettings(); + if (!canUsePolymorphicParts(*settings)) return MergeTreeDataPartType::WIDE; - const auto settings = getSettings(); if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part) return MergeTreeDataPartType::IN_MEMORY; @@ -1607,10 +1608,10 @@ MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, s MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const { - if (!canUseAdaptiveGranularity()) + const auto settings = getSettings(); + if (!canUsePolymorphicParts(*settings)) return MergeTreeDataPartType::WIDE; - const auto settings = getSettings(); if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) return MergeTreeDataPartType::COMPACT; @@ -3605,11 +3606,15 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S { if (!canUseAdaptiveGranularity()) { - if ((settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0) && out_reason) + if (out_reason && (settings.min_rows_for_wide_part != 0 || settings.min_bytes_for_wide_part != 0 + || settings.min_rows_for_compact_part != 0 || settings.min_bytes_for_compact_part != 0)) { std::ostringstream message; - message << "Table can't create parts with adaptive granularity, but settings min_rows_for_wide_part = " - << settings.min_rows_for_wide_part << ", min_bytes_for_wide_part = " << settings.min_bytes_for_wide_part + message << "Table can't create parts with adaptive granularity, but settings" + << "min_rows_for_wide_part = " << settings.min_rows_for_wide_part + << ", min_bytes_for_wide_part = " << settings.min_bytes_for_wide_part + << ", min_rows_for_compact_part = " << settings.min_rows_for_compact_part + << ", min_bytes_for_compact_part = " << settings.min_bytes_for_compact_part << ". Parts with non-adaptive granularity can be stored only in Wide (default) format."; *out_reason = message.str(); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4a0ce0945a5..e3f25d561cd 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -901,7 +901,7 @@ private: /// Check selected parts for movements. Used by ALTER ... MOVE queries. CurrentlyMovingPartsTagger checkPartsForMove(const DataPartsVector & parts, SpacePtr space); - bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const; + bool canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason = nullptr) const; WriteAheadLogPtr write_ahead_log; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index bec9d16209d..e56e069d91a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -95,16 +95,19 @@ void MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix) const flushToDisk(storage.getRelativeDataPath(), detached_path); } -bool MergeTreeDataPartInMemory::waitUntilMerged(size_t timeout) const +bool MergeTreeDataPartInMemory::waitUntilMerged(size_t timeout_ms) const { auto lock = storage.lockParts(); - return is_merged.wait_for(lock, std::chrono::milliseconds(timeout), - [this]() { return state == State::Outdated; }); + return is_merged.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [this]() { return state != State::Committed; }); } void MergeTreeDataPartInMemory::notifyMerged() const { - is_merged.notify_one(); + LOG_DEBUG(&Poco::Logger::get("InMemPart"), "notifiedMerged"); + LOG_DEBUG(&Poco::Logger::get("InMemPart"), "state {}", stateString()); + + is_merged.notify_all(); } void MergeTreeDataPartInMemory::renameTo(const String & new_relative_path, bool /* remove_new_dir_if_exists */) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index e48d9b8e201..cd44fefdb22 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -45,7 +45,7 @@ public: void flushToDisk(const String & base_path, const String & new_relative_path) const; - bool waitUntilMerged(size_t timeout) const; + bool waitUntilMerged(size_t timeout_ms) const; void notifyMerged() const; mutable Block block; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ad70a541611..9bf61d028cf 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -121,7 +121,8 @@ struct MergeTreeSettings : public SettingsCollection static bool isPartFormatSetting(const String & name) { - return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part"; + return name == "min_bytes_for_wide_part" || name == "min_rows_for_wide_part" + || name == "min_bytes_for_compact_part" || name == "min_rows_for_compact_part"; } }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 8d8ab831e66..c17a44c2b61 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -31,9 +31,14 @@ namespace ErrorCodes ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream( - StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block_, bool deduplicate_) - : storage(storage_), quorum(quorum_), quorum_timeout_ms(quorum_timeout_ms_), max_parts_per_block(max_parts_per_block_), deduplicate(deduplicate_), - log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) + StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, + size_t max_parts_per_block_, size_t insert_in_memory_parts_timeout_ms_, bool deduplicate_) + : storage(storage_), quorum(quorum_) + , quorum_timeout_ms(quorum_timeout_ms_) + , max_parts_per_block(max_parts_per_block_) + , insert_in_memory_parts_timeout_ms(insert_in_memory_parts_timeout_ms_) + , deduplicate(deduplicate_) + , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) { /// The quorum value `1` has the same meaning as if it is disabled. if (quorum == 1) @@ -365,6 +370,14 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo + zkutil::ZooKeeper::error2string(multi_code), ErrorCodes::UNEXPECTED_ZOOKEEPER_ERROR); } + auto part_in_memory = asInMemoryPart(part); + if (part_in_memory && storage.getSettings()->in_memory_parts_insert_sync) + { + if (!part_in_memory->waitUntilMerged(insert_in_memory_parts_timeout_ms)) + throw Exception("Timeout exceeded while waiting to write part " + + part->name + " on disk", ErrorCodes::TIMEOUT_EXCEEDED); + } + if (quorum) { /// We are waiting for quorum to be satisfied. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h index b8650c25c7d..9591c5791c5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h @@ -24,7 +24,7 @@ class ReplicatedMergeTreeBlockOutputStream : public IBlockOutputStream public: ReplicatedMergeTreeBlockOutputStream(StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block_, - bool deduplicate_); + size_t insert_in_memory_parts_timeout_ms_, bool deduplicate_); Block getHeader() const override; void writePrefix() override; @@ -58,6 +58,7 @@ private: size_t quorum; size_t quorum_timeout_ms; size_t max_parts_per_block; + size_t insert_in_memory_parts_timeout_ms; bool deduplicate = true; bool last_block_is_duplicate = false; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 882b5593c76..d3877a0ff69 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1085,20 +1085,6 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) try { checkPartChecksumsAndCommit(transaction, part); - - DataPartsVector parts_to_remove_immediatly; - for (const auto & part_ptr : parts) - { - if (auto part_in_memory = asInMemoryPart(part_ptr)) - { - part_in_memory->notifyMerged(); - modifyPartState(part_in_memory, DataPartState::Deleting); - parts_to_remove_immediatly.push_back(part_in_memory); - } - } - - tryRemovePartsFromZooKeeperWithRetries(parts_to_remove_immediatly); - removePartsFinally(parts_to_remove_immediatly); } catch (const Exception & e) { @@ -1122,6 +1108,20 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) throw; } + DataPartsVector parts_to_remove_immediatly; + for (const auto & part_ptr : parts) + { + if (auto part_in_memory = asInMemoryPart(part_ptr)) + { + modifyPartState(part_in_memory, DataPartState::Deleting); + part_in_memory->notifyMerged(); + parts_to_remove_immediatly.push_back(part_in_memory); + } + } + + tryRemovePartsFromZooKeeperWithRetries(parts_to_remove_immediatly); + removePartsFinally(parts_to_remove_immediatly); + /** Removing old parts from ZK and from the disk is delayed - see ReplicatedMergeTreeCleanupThread, clearOldParts. */ @@ -3138,7 +3138,11 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, bool deduplicate = storage_settings_ptr->replicated_deduplication_window != 0 && query_settings.insert_deduplicate; return std::make_shared(*this, - query_settings.insert_quorum, query_settings.insert_quorum_timeout.totalMilliseconds(), query_settings.max_partitions_per_insert_block, deduplicate); + query_settings.insert_quorum, + query_settings.insert_quorum_timeout.totalMilliseconds(), + query_settings.max_partitions_per_insert_block, + query_settings.insert_in_memory_parts_timeout.totalMilliseconds(), + deduplicate); } @@ -3662,7 +3666,7 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool PartsTemporaryRename renamed_parts(*this, "detached/"); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); - ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, false); /// TODO Allow to use quorum here. + ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, 0, false); /// TODO Allow to use quorum here. for (size_t i = 0; i < loaded_parts.size(); ++i) { String old_name = loaded_parts[i]->name; diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 362204c307d..145d6aedb19 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -9,6 +9,7 @@ from helpers.test_tools import TSV from helpers.test_tools import assert_eq_with_retry from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager +from multiprocessing.dummy import Pool cluster = ClickHouseCluster(__file__) @@ -90,6 +91,8 @@ def start_cluster(): create_tables('in_memory_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard4") create_tables('wal_table', [node11, node12], [settings_in_memory, settings_in_memory], "shard4") create_tables('restore_table', [node11, node12], [settings_in_memory, settings_in_memory], "shard5") + create_tables('deduplication_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard5") + create_tables('sync_table', [node9, node10], [settings_in_memory, settings_in_memory], "shard5") yield cluster @@ -422,6 +425,45 @@ def test_in_memory_wal_rotate(start_cluster): assert os.path.exists(wal_file) assert os.path.getsize(wal_file) == 0 +def test_in_memory_deduplication(start_cluster): + for i in range(3): + node9.query("INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") + node10.query("INSERT INTO deduplication_table (date, id, s) VALUES (toDate('2020-03-03'), 1, 'foo')") + + node9.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20) + node10.query("SYSTEM SYNC REPLICA deduplication_table", timeout=20) + + assert node9.query("SELECT date, id, s FROM deduplication_table") == "2020-03-03\t1\tfoo\n" + assert node10.query("SELECT date, id, s FROM deduplication_table") == "2020-03-03\t1\tfoo\n" + +def test_in_memory_sync_insert(start_cluster): + node9.query("ALTER TABLE sync_table MODIFY SETTING in_memory_parts_insert_sync = 1") + node10.query("ALTER TABLE sync_table MODIFY SETTING in_memory_parts_insert_sync = 1") + node9.query("SYSTEM STOP MERGES sync_table") + node10.query("SYSTEM STOP MERGES sync_table") + + pool = Pool(5) + tasks = [] + for i in range(5): + tasks.append(pool.apply_async(insert_random_data, ('sync_table', node9, 50))) + + time.sleep(5) + assert node9.query("SELECT count() FROM sync_table") == "250\n" + assert node9.query("SELECT part_type, count() FROM system.parts WHERE table = 'sync_table' AND active GROUP BY part_type") == "InMemory\t5\n" + + for task in tasks: + assert not task.ready() + + node9.query("SYSTEM START MERGES sync_table") + node10.query("SYSTEM START MERGES sync_table") + assert_eq_with_retry(node9, "OPTIMIZE TABLE sync_table FINAL SETTINGS optimize_throw_if_noop = 1", "") + + for task in tasks: + task.get() + + assert node9.query("SELECT count() FROM sync_table") == "250\n" + assert node9.query("SELECT part_type, count() FROM system.parts WHERE table = 'sync_table' AND active GROUP BY part_type") == "Compact\t1\n" + def test_polymorphic_parts_index(start_cluster): node1.query(''' CREATE TABLE index_compact(a UInt32, s String) From 1976b10ee0c73de553df12daab06aa413c61213c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 23:01:58 +0300 Subject: [PATCH 0763/2229] Proper query formatting in logs #3853 --- src/Interpreters/executeQuery.cpp | 33 +++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e1e2108c0fc..91c72fa04c5 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include @@ -41,7 +43,6 @@ #include #include #include -#include namespace ProfileEvents @@ -70,11 +71,35 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings) ast.checkSize(settings.max_ast_elements); } -/// NOTE This is wrong in case of single-line comments and in case of multiline string literals. + static String joinLines(const String & query) { - String res = query; - std::replace(res.begin(), res.end(), '\n', ' '); + /// Care should be taken. We don't join lines inside non-whitespace tokens (e.g. multiline string literals) + /// and we don't join line after single-line comment. + /// All other whitespaces replaced to a single whitespace. + + String res; + const char * begin = query.data(); + const char * end = begin + query.size(); + + Lexer lexer(begin, end); + Token token = lexer.nextToken(); + for (; !token.isEnd(); token = lexer.nextToken()) + { + if (token.type == TokenType::Whitespace) + { + res += ' '; + } + else if (token.type == TokenType::Comment) + { + res.append(token.begin, token.end); + if (token.end < end && *token.end == '\n') + res += '\n'; + } + else + res.append(token.begin, token.end); + } + return res; } From 511fa106afad60bdf7b06c15a13e23b108a67531 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 23:05:21 +0300 Subject: [PATCH 0764/2229] Better comment --- src/Interpreters/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 91c72fa04c5..cff020e62f6 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -75,7 +75,7 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings) static String joinLines(const String & query) { /// Care should be taken. We don't join lines inside non-whitespace tokens (e.g. multiline string literals) - /// and we don't join line after single-line comment. + /// and we don't join line after comment (because it can be single-line comment). /// All other whitespaces replaced to a single whitespace. String res; From 80bcaaacf27b831d2e5e29647832293acf2ad9de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 23:07:39 +0300 Subject: [PATCH 0765/2229] Don't rewrite query in client when there are no query parameters --- programs/client/Client.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 63467c1129d..e396ae9c868 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -986,7 +986,10 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { - /// We will always rewrite query (even if there are no query_parameters) because it will help to find errors in query formatter. + /// Rewrite query only when we have query parameters. + /// Note that if query is rewritten, comments in query are lost. + /// But the user often wants to see comments in server logs, query log, processlist, etc. + if (!query_parameters.empty()) { /// Replace ASTQueryParameter with ASTLiteral for prepared statements. ReplaceQueryParameterVisitor visitor(query_parameters); From ba8244198974856b09fd9100b9a65fcfb1dc009f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 23:13:57 +0300 Subject: [PATCH 0766/2229] Added a test --- .../01319_query_formatting_in_server_log.reference | 4 ++++ .../0_stateless/01319_query_formatting_in_server_log.sql | 6 ++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/01319_query_formatting_in_server_log.reference create mode 100644 tests/queries/0_stateless/01319_query_formatting_in_server_log.sql diff --git a/tests/queries/0_stateless/01319_query_formatting_in_server_log.reference b/tests/queries/0_stateless/01319_query_formatting_in_server_log.reference new file mode 100644 index 00000000000..5fb3cc3a599 --- /dev/null +++ b/tests/queries/0_stateless/01319_query_formatting_in_server_log.reference @@ -0,0 +1,4 @@ +ab\ncd 1 +SeLeCt 'ab +cd' /* hello */ -- world + , 1; diff --git a/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql b/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql new file mode 100644 index 00000000000..dc88d3d48f7 --- /dev/null +++ b/tests/queries/0_stateless/01319_query_formatting_in_server_log.sql @@ -0,0 +1,6 @@ +SeLeCt 'ab +cd' /* hello */ -- world +, 1; + +SYSTEM FLUSH LOGS; +SELECT extract(message, 'SeL.+?;') FROM system.text_log WHERE event_date >= yesterday() AND message LIKE '%SeLeCt \'ab\n%' ORDER BY event_time DESC LIMIT 1 FORMAT TSVRaw; From da111906774d997a8ce86cc8e35d4be02cffcd37 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jun 2020 23:20:45 +0300 Subject: [PATCH 0767/2229] Fix tests. --- src/Processors/QueryPlan/ReadFromStorageStep.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index bc108286ba1..14c95cbe2c2 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -65,13 +65,6 @@ ReadFromStorageStep::ReadFromStorageStep( pipes.emplace_back(std::move(pipe)); } - if (!blocksHaveEqualStructure(pipes.front().getHeader(), input_streams.front().header)) - { - for (auto & pipe : pipes) - pipe.addSimpleTransform(std::make_shared( - pipe.getHeader(), input_streams.front().header, ConvertingTransform::MatchColumnsMode::Name)); - } - pipeline = std::make_unique(); /// Table lock is stored inside pipeline here. From bb6c0743fc512a817b4ee53dc47c53822083b10d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jun 2020 23:30:36 +0300 Subject: [PATCH 0768/2229] Change the level of log message about failure to listen, to warning #4406 --- programs/server/Server.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9734bafe30e..25d8e5595b7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -869,7 +869,7 @@ int Server::main(const std::vector & /*args*/) if (listen_try) { - LOG_ERROR(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " + LOG_WARNING(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to " "specify not disabled IPv4 or IPv6 address to listen in element of configuration " "file. Example for disabled IPv6: 0.0.0.0 ." " Example for disabled IPv4: ::", @@ -1013,7 +1013,8 @@ int Server::main(const std::vector & /*args*/) } if (servers.empty()) - throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); global_context->enableNamedSessions(); From bc58e22c5bcf85dc42b97af40357483f62a4ecf6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 01:23:13 +0300 Subject: [PATCH 0769/2229] Whitespace --- src/Common/XDBCBridgeHelper.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index 1609737107e..233c5c83df4 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -272,7 +272,8 @@ struct ODBCBridgeMixin return AccessType::ODBC; } - static std::unique_ptr startBridge(const Poco::Util::AbstractConfiguration & config, Poco::Logger * log, const Poco::Timespan & http_timeout) + static std::unique_ptr startBridge( + const Poco::Util::AbstractConfiguration & config, Poco::Logger * log, const Poco::Timespan & http_timeout) { /// Path to executable folder Poco::Path path{config.getString("application.dir", "/usr/bin")}; From 1e73a56a778d2f2e864f8e932d18e1b44f2beba5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 01:23:56 +0300 Subject: [PATCH 0770/2229] Whitespace --- src/Common/XDBCBridgeHelper.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index 233c5c83df4..9320122d2e5 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -278,7 +278,6 @@ struct ODBCBridgeMixin /// Path to executable folder Poco::Path path{config.getString("application.dir", "/usr/bin")}; - std::vector cmd_args; path.setFileName("clickhouse-odbc-bridge"); From eabbabed04aba337e94d32b4a869bc84e883fbec Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 16 Jun 2020 01:24:00 +0300 Subject: [PATCH 0771/2229] fix 'LIMIT WITH TIES' with aliases --- src/Interpreters/InterpreterSelectQuery.cpp | 8 ++++++ .../01142_with_ties_and_aliases.reference | 25 +++++++++++++++++++ .../01142_with_ties_and_aliases.sql | 12 +++++++++ 3 files changed, 45 insertions(+) create mode 100644 tests/queries/0_stateless/01142_with_ties_and_aliases.reference create mode 100644 tests/queries/0_stateless/01142_with_ties_and_aliases.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ac17a3042d8..523e467261b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -973,6 +973,14 @@ void InterpreterSelectQuery::executeImpl(QueryPipeline & pipeline, const BlockIn executeWithFill(pipeline); + /// If we have 'WITH TIES', we need execute limit before projection, + /// because in that case columns from 'ORDER BY' are used. + if (query.limit_with_ties) + { + executeLimit(pipeline); + has_prelimit = true; + } + /** We must do projection after DISTINCT because projection may remove some columns. */ executeProjection(pipeline, expressions.final_projection); diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.reference b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference new file mode 100644 index 00000000000..1846e07a908 --- /dev/null +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference @@ -0,0 +1,25 @@ +0 0 +1 0 +2 0 +3 0 +4 0 +1 +1 +1 +1 +1 +0 +1 +2 +3 +4 +0 0 +0 1 +0 2 +0 3 +0 4 +0 0 +0 1 +0 2 +0 3 +0 4 diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql new file mode 100644 index 00000000000..f086cb9d907 --- /dev/null +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql @@ -0,0 +1,12 @@ +select number, intDiv(number,5) value from numbers(20) order by value limit 3 with ties; + +drop table if exists wt; +create table wt (a Int, b Int) engine = Memory; +insert into wt select 0, number from numbers(5); + +select 1 from wt order by a limit 3 with ties; +select b from wt order by a limit 3 with ties; +with a * 2 as c select a, b from wt order by c limit 3 with ties; +select a * 2 as c, b from wt order by c limit 3 with ties; + +drop table if exists wt; From 186d336e3199486f753b334d78d1ddad7c8b6fe6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 01:35:15 +0300 Subject: [PATCH 0772/2229] Use proper timeouts when communicating with xdbc-bridge --- src/Common/XDBCBridgeHelper.h | 6 ++++-- src/IO/ReadWriteBufferFromHTTP.h | 3 ++- src/Storages/StorageXDBC.cpp | 1 - src/TableFunctions/ITableFunctionXDBC.cpp | 6 +++++- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index 9320122d2e5..370a58498a5 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -109,7 +109,8 @@ public: uri.setPath(IDENTIFIER_QUOTE_HANDLER); uri.addQueryParameter("connection_string", getConnectionString()); - ReadWriteBufferFromHTTP buf(uri, Poco::Net::HTTPRequest::HTTP_POST, nullptr); + ReadWriteBufferFromHTTP buf( + uri, Poco::Net::HTTPRequest::HTTP_POST, {}, ConnectionTimeouts(http_timeout, http_timeout, http_timeout)); std::string character; readStringBinary(character, buf); if (character.length() > 1) @@ -208,7 +209,8 @@ private: { try { - ReadWriteBufferFromHTTP buf(ping_url, Poco::Net::HTTPRequest::HTTP_GET, nullptr); + ReadWriteBufferFromHTTP buf( + ping_url, Poco::Net::HTTPRequest::HTTP_GET, {}, ConnectionTimeouts(http_timeout, http_timeout, http_timeout)); return checkString(XDBCBridgeHelper::PING_OK_ANSWER, buf); } catch (...) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index edd0b7f1579..2dc053dfa00 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -156,7 +156,8 @@ namespace detail public: using OutStreamCallback = std::function; - explicit ReadWriteBufferFromHTTPBase(UpdatableSessionPtr session_, + explicit ReadWriteBufferFromHTTPBase( + UpdatableSessionPtr session_, Poco::URI uri_, const std::string & method_ = {}, OutStreamCallback out_stream_callback_ = {}, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 08538798389..c090ca44034 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index adf0c9240bc..f25e010ddbc 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -75,7 +75,11 @@ StoragePtr ITableFunctionXDBC::executeImpl(const ASTPtr & ast_function, const Co columns_info_uri.addQueryParameter("external_table_functions_use_nulls", Poco::NumberFormatter::format(use_nulls)); - ReadWriteBufferFromHTTP buf(columns_info_uri, Poco::Net::HTTPRequest::HTTP_POST, nullptr); + ReadWriteBufferFromHTTP buf(columns_info_uri, Poco::Net::HTTPRequest::HTTP_POST, {}, + ConnectionTimeouts( + context.getSettingsRef().http_connection_timeout, + context.getSettingsRef().http_send_timeout, + context.getSettingsRef().http_receive_timeout)); std::string columns_info; readStringBinary(columns_info, buf); From cf0bd501e506ac85a025c948642941be8ccdf5ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 01:36:12 +0300 Subject: [PATCH 0773/2229] Remove harmful default values from code --- src/IO/ReadWriteBufferFromHTTP.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2dc053dfa00..40057e1f80e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -246,9 +246,9 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase Date: Tue, 16 Jun 2020 01:54:19 +0300 Subject: [PATCH 0774/2229] Fix race condition in SYSTEM SYNC REPLICA --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 4 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 13 ++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 8a9dbceba04..6325b1adca4 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -646,7 +646,7 @@ void ReplicatedMergeTreeQueue::updateMutations(zkutil::ZooKeeperPtr zookeeper, C } } - if (some_active_mutations_were_killed) + if (some_active_mutations_were_killed && storage.queue_task_handle) storage.queue_task_handle->signalReadyToRun(); if (!entries_to_load.empty()) @@ -759,7 +759,7 @@ ReplicatedMergeTreeMutationEntryPtr ReplicatedMergeTreeQueue::removeMutation( LOG_DEBUG(log, "Removed mutation {} from local state.", entry->znode_name); } - if (mutation_was_active) + if (mutation_was_active && storage.queue_task_handle) storage.queue_task_handle->signalReadyToRun(); return entry; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 5931bca17ea..885db89e5b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5643,9 +5643,16 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI /// Let's fetch new log entries firstly queue.pullLogsToQueue(getZooKeeper()); - /// This is significant, because the execution of this task could be delayed at BackgroundPool. - /// And we force it to be executed. - queue_task_handle->signalReadyToRun(); + + { + auto lock = queue.lockQueue(); + if (!queue_task_handle) + return false; + + /// This is significant, because the execution of this task could be delayed at BackgroundPool. + /// And we force it to be executed. + queue_task_handle->signalReadyToRun(); + } Poco::Event target_size_event; auto callback = [&target_size_event, queue_size] (size_t new_queue_size) From 00224ee94f482573ca996cecc11a8110aba8dc15 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:04:12 +0300 Subject: [PATCH 0775/2229] Added a test --- ...01320_create_sync_race_condition.reference | 0 .../01320_create_sync_race_condition.sh | 28 +++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/01320_create_sync_race_condition.reference create mode 100755 tests/queries/0_stateless/01320_create_sync_race_condition.sh diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition.reference b/tests/queries/0_stateless/01320_create_sync_race_condition.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition.sh b/tests/queries/0_stateless/01320_create_sync_race_condition.sh new file mode 100755 index 00000000000..2e42033644a --- /dev/null +++ b/tests/queries/0_stateless/01320_create_sync_race_condition.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS r;" + +function thread1() +{ + while true; do $CLICKHOUSE_CLIENT -n --query "CREATE TABLE r (x UInt64) ENGINE = ReplicatedMergeTree('/test/table', 'r') ORDER BY x; DROP TABLE r;"; done +} + +function thread2() +{ + while true; do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA r" 2>/dev/null; done +} + +export -f thread1 +export -f thread2 + +timeout 10 bash -c thread1 & +timeout 10 bash -c thread2 & + +wait + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS r;" From e0f4c64acf8f492ed7d62de1a7c65fa520c7a333 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:18:15 +0300 Subject: [PATCH 0776/2229] Fix flaky test --- .../01064_incremental_streaming_from_2_src_with_feedback.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql index 984f897ec2a..c2d0333bf46 100644 --- a/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql +++ b/tests/queries/0_stateless/01064_incremental_streaming_from_2_src_with_feedback.sql @@ -1,7 +1,5 @@ SET joined_subquery_requires_alias = 0; -SYSTEM STOP MERGES; - -- incremental streaming usecase -- that has sense only if data filling order has guarantees of chronological order @@ -77,6 +75,8 @@ AS LEFT JOIN (SELECT id, maxMerge(latest_login_time) as current_latest_login_time FROM target_table WHERE id IN (SELECT id FROM checkouts) GROUP BY id) USING (id) GROUP BY id; +-- This query has effect only for existing tables, so it must be located after CREATE. +SYSTEM STOP MERGES; -- feed with some initial values INSERT INTO logins SELECT number as id, '2000-01-01 08:00:00' from numbers(50000); From a753ba9c6a2ac4502570123e6cb391fd70595a9e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:24:32 +0300 Subject: [PATCH 0777/2229] Checked and corrected all other tests --- .../0_stateless/00446_clear_column_in_partition_zookeeper.sql | 2 +- .../0_stateless/00653_verification_monotonic_data_load.sh | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql index 861c768edc9..996c84903a9 100644 --- a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql +++ b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql @@ -21,12 +21,12 @@ DROP TABLE clear_column; SELECT '===Replicated case==='; -SYSTEM STOP MERGES; DROP TABLE IF EXISTS clear_column1 NO DELAY; DROP TABLE IF EXISTS clear_column2 NO DELAY; SELECT sleep(1) FORMAT Null; CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '1', d, d, 8192); CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '2', d, d, 8192); +SYSTEM STOP MERGES; INSERT INTO clear_column1 (d) VALUES ('2000-01-01'), ('2000-02-01'); SYSTEM SYNC REPLICA clear_column2; diff --git a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh index 69c0db567df..e52610f03ba 100755 --- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh +++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh @@ -13,8 +13,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES;" - ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS string_test_table;" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS fixed_string_test_table;" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS signed_integer_test_table;" @@ -29,6 +27,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt ${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0;" +${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES;" ${CLICKHOUSE_CLIENT} --query="INSERT INTO string_test_table VALUES ('0'), ('2'), ('2');" ${CLICKHOUSE_CLIENT} --query="INSERT INTO fixed_string_test_table VALUES ('0'), ('2'), ('2');" From d08736f7083bdc948d98b1a3ae7ff418505d8e58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:27:22 +0300 Subject: [PATCH 0778/2229] Added a comment --- src/Interpreters/InterpreterSystemQuery.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index de2291746d9..b55d1bda09b 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -16,6 +16,19 @@ class Context; class AccessRightsElements; class ASTSystemQuery; + +/** Implement various SYSTEM queries. + * Examples: SYSTEM SHUTDOWN, SYSTEM DROP MARK CACHE. + * + * Some commands are intended to stop/start background actions for tables and comes with two variants: + * + * 1. SYSTEM STOP MERGES table, SYSTEM START MERGES table + * - start/stop actions for specific table. + * + * 2. SYSTEM STOP MERGES, SYSTEM START MERGES + * - start/stop actions for all existing tables. + * Note that the actions for tables that will be created after this query will not be affected. + */ class InterpreterSystemQuery : public IInterpreter { public: From 85a7676bf4ed136f8f99ac9ebe2b905c8346ae80 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:32:30 +0300 Subject: [PATCH 0779/2229] Fix flaky unit tests with ZooKeeper. --- .../gtest_zkutil_test_multi_exception.cpp | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp b/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp index 8440b4fe7c9..19104e02e24 100644 --- a/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp +++ b/src/Common/ZooKeeper/tests/gtest_zkutil_test_multi_exception.cpp @@ -14,15 +14,27 @@ using namespace DB; TEST(zkutil, ZookeeperConnected) { - try + /// In our CI infrastructure it is typical that ZooKeeper is unavailable for some amount of time. + size_t i; + for (i = 0; i < 100; ++i) { - auto zookeeper = std::make_unique("localhost:2181"); - zookeeper->exists("/"); - zookeeper->createIfNotExists("/clickhouse_test", "Unit tests of ClickHouse"); + try + { + auto zookeeper = std::make_unique("localhost:2181"); + zookeeper->exists("/"); + zookeeper->createIfNotExists("/clickhouse_test", "Unit tests of ClickHouse"); + } + catch (...) + { + std::cerr << "Zookeeper is unavailable, try " << i << std::endl; + sleep(1); + continue; + } + break; } - catch (...) + if (i == 100) { - std::cerr << "No zookeeper. skip tests." << std::endl; + std::cerr << "No zookeeper after " << i << " tries. skip tests." << std::endl; exit(0); } } From 97d5897d2b5d79ed5d4901735b4f9cae7247d3c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:42:10 +0300 Subject: [PATCH 0780/2229] Fix test --- tests/queries/0_stateless/01091_num_threads.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql index 876a2d15d1a..dc397d62305 100644 --- a/tests/queries/0_stateless/01091_num_threads.sql +++ b/tests/queries/0_stateless/01091_num_threads.sql @@ -4,7 +4,7 @@ set log_query_threads=1; SELECT 1; SYSTEM FLUSH LOGS; -WITH +WITH ( SELECT query_id FROM system.query_log @@ -19,11 +19,11 @@ WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master select sum(number) from numbers(1000000); SYSTEM FLUSH LOGS; -WITH +WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') AND (event_date >= (today() - 1)) + WHERE (query LIKE 'select sum(number) from numbers(1000000);%') AND (event_date >= (today() - 1)) ORDER BY event_time DESC LIMIT 1 ) AS id @@ -34,11 +34,11 @@ WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master select sum(number) from numbers_mt(1000000); SYSTEM FLUSH LOGS; -WITH +WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') AND (event_date >= (today() - 1)) + WHERE (query LIKE 'select sum(number) from numbers_mt(1000000);%') AND (event_date >= (today() - 1)) ORDER BY event_time DESC LIMIT 1 ) AS id From c85b9ae151391a9bc8294777164d9e716eeddbd4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 02:43:05 +0300 Subject: [PATCH 0781/2229] Fix test --- .../0_stateless/01070_exception_code_in_query_log_table.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql index 2c99ba54112..b9627a0f8a8 100644 --- a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql +++ b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql @@ -3,5 +3,5 @@ SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; -- { serve CREATE TABLE test_table_for_01070_exception_code_in_query_log_table (value UInt64) ENGINE=Memory(); SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; SYSTEM FLUSH LOGS; -SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' AND event_date >= yesterday() AND event_time > now() - INTERVAL 5 MINUTE ORDER BY exception_code; +SELECT exception_code FROM system.query_log WHERE lower(query) LIKE lower('SELECT * FROM test_table_for_01070_exception_code_in_query_log_table%') AND event_date >= yesterday() AND event_time > now() - INTERVAL 5 MINUTE ORDER BY exception_code; DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table; From 977fd3e44fa08fe8427e04e75386894c785fba1f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 02:45:05 +0300 Subject: [PATCH 0782/2229] Update CMakeLists.txt --- base/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index a8dedec9269..cfa54fe2ca4 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -10,4 +10,4 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) -endif () \ No newline at end of file +endif () From 92c7760c6e87f64dca55b78c12176c4f35b5de6c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 02:51:33 +0300 Subject: [PATCH 0783/2229] Update CMakeLists.txt --- base/daemon/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 36de193bccd..04d2f059b39 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -10,4 +10,4 @@ target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickh if (USE_SENTRY) target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -endif () \ No newline at end of file +endif () From 9b734ffded4cbf4e929bcfa2bb545c6d3e67938a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 03:21:20 +0300 Subject: [PATCH 0784/2229] Update http_server.py --- tests/integration/test_send_crash_reports/http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_send_crash_reports/http_server.py b/tests/integration/test_send_crash_reports/http_server.py index e3fa2e1cb57..74f0592504f 100644 --- a/tests/integration/test_send_crash_reports/http_server.py +++ b/tests/integration/test_send_crash_reports/http_server.py @@ -40,4 +40,4 @@ if __name__ == "__main__": try: httpd.serve_forever() finally: - httpd.server_close() \ No newline at end of file + httpd.server_close() From edff54e3abb7b7f7d1baa47b8b13d1f02be379d7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 04:08:01 +0300 Subject: [PATCH 0785/2229] Maybe fix max_parser_depth test --- tests/queries/0_stateless/01196_max_parser_depth.sh | 8 +++++--- tests/queries/shell_config.sh | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01196_max_parser_depth.sh b/tests/queries/0_stateless/01196_max_parser_depth.sh index 471c1c22ecb..24c219b2241 100755 --- a/tests/queries/0_stateless/01196_max_parser_depth.sh +++ b/tests/queries/0_stateless/01196_max_parser_depth.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -{ printf "select "; for x in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for x in {1..1000}; do printf ")"; done; } | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 306' -{ printf "select "; for x in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for x in {1..1000}; do printf ")"; done; } | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 306' -{ printf "select "; for x in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for x in {1..1000}; do printf ")"; done; } | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 306' +{ printf "select "; for x in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for x in {1..1000}; do printf ")"; done; } > ${CLICKHOUSE_TMP}/query + +cat ${CLICKHOUSE_TMP}/query | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 306' +cat ${CLICKHOUSE_TMP}/query | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 306' +cat ${CLICKHOUSE_TMP}/query | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 306' diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 8d66a568524..8fe79bd3ccd 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -71,7 +71,7 @@ mkdir -p ${CLICKHOUSE_TMP} function clickhouse_client_removed_host_parameter() { - # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex. - # bash regex magic is arcane, but version dependant and weak; sed or awk are not really portable. - $(echo "$CLICKHOUSE_CLIENT" | python -c "import sys, re; print re.sub('--host(\s+|=)[^\s]+', '', sys.stdin.read())") "$@" + # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex. + # bash regex magic is arcane, but version dependant and weak; sed or awk are not really portable. + $(echo "$CLICKHOUSE_CLIENT" | python -c "import sys, re; print re.sub('--host(\s+|=)[^\s]+', '', sys.stdin.read())") "$@" } From 679c3f8ff335abfa54baae3726486ccf7bb53868 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 04:50:22 +0300 Subject: [PATCH 0786/2229] Update 00446_clear_column_in_partition_zookeeper.sql --- .../0_stateless/00446_clear_column_in_partition_zookeeper.sql | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql index 996c84903a9..8f6fe5340c9 100644 --- a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql +++ b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper.sql @@ -26,7 +26,6 @@ DROP TABLE IF EXISTS clear_column2 NO DELAY; SELECT sleep(1) FORMAT Null; CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '1', d, d, 8192); CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test/tables/clear_column', '2', d, d, 8192); -SYSTEM STOP MERGES; INSERT INTO clear_column1 (d) VALUES ('2000-01-01'), ('2000-02-01'); SYSTEM SYNC REPLICA clear_column2; @@ -63,9 +62,7 @@ SELECT sum(data_uncompressed_bytes) FROM system.columns WHERE database=currentDa ALTER TABLE clear_column1 CLEAR COLUMN s IN PARTITION '200001'; ALTER TABLE clear_column1 CLEAR COLUMN s IN PARTITION '200002'; --- Merges cannot be blocked after all manipulations SET optimize_throw_if_noop = 1; -SYSTEM START MERGES; OPTIMIZE TABLE clear_column1 PARTITION '200001'; OPTIMIZE TABLE clear_column1 PARTITION '200002'; From 22a92faab649479c69bcc3eab7a7f6bfaef30c45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 05:14:53 +0300 Subject: [PATCH 0787/2229] Avoid connection to replica when fetches are cancelled --- src/Storages/MergeTree/DataPartsExchange.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index acc3bf38461..6796e630ff2 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -63,8 +63,10 @@ void Service::processQuery(const Poco::Net::HTMLForm & params, ReadBuffer & /*bo static std::atomic_uint total_sends {0}; - if ((data_settings->replicated_max_parallel_sends && total_sends >= data_settings->replicated_max_parallel_sends) - || (data_settings->replicated_max_parallel_sends_for_table && data.current_table_sends >= data_settings->replicated_max_parallel_sends_for_table)) + if ((data_settings->replicated_max_parallel_sends + && total_sends >= data_settings->replicated_max_parallel_sends) + || (data_settings->replicated_max_parallel_sends_for_table + && data.current_table_sends >= data_settings->replicated_max_parallel_sends_for_table)) { response.setStatus(std::to_string(HTTP_TOO_MANY_REQUESTS)); response.setReason("Too many concurrent fetches, try again later"); @@ -182,6 +184,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchPart( bool to_detached, const String & tmp_prefix_) { + if (blocker.isCancelled()) + throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); + /// Validation of the input that may come from malicious replica. MergeTreePartInfo::fromPartName(part_name, data.format_version); const auto data_settings = data.getSettings(); @@ -294,7 +299,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPart( if (blocker.isCancelled()) { - /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, performing a poll with a not very large timeout. + /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, + /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). disk->removeRecursive(part_download_path); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); From 03058c3c25daf38468736f734c08f95a5b152121 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 16 Jun 2020 05:55:27 +0300 Subject: [PATCH 0788/2229] fix 'ORDER BY WITH FILL' over const columns --- .../Transforms/FillingTransform.cpp | 5 +++-- .../01145_with_fill_const.reference | 20 +++++++++++++++++++ .../0_stateless/01145_with_fill_const.sql | 6 ++++++ 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01145_with_fill_const.reference create mode 100644 tests/queries/0_stateless/01145_with_fill_const.sql diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 50fac121819..e8d56389eac 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -107,8 +107,9 @@ void FillingTransform::transform(Chunk & chunk) { for (size_t pos : positions) { - new_columns.push_back(old_columns[pos]); - new_mutable_columns.push_back(old_columns[pos]->cloneEmpty()->assumeMutable()); + auto old_column = old_columns[pos]->convertToFullColumnIfConst(); + new_columns.push_back(old_column); + new_mutable_columns.push_back(old_column->cloneEmpty()->assumeMutable()); } }; diff --git a/tests/queries/0_stateless/01145_with_fill_const.reference b/tests/queries/0_stateless/01145_with_fill_const.reference new file mode 100644 index 00000000000..fa72c3c5993 --- /dev/null +++ b/tests/queries/0_stateless/01145_with_fill_const.reference @@ -0,0 +1,20 @@ +2020-06-16 00:00:00 +2020-06-16 00:30:00 +2020-06-16 01:00:00 +2020-06-16 01:30:00 +2020-06-16 02:00:00 +2020-06-16 02:30:00 +2020-06-16 03:00:00 +2020-06-16 03:30:00 +2020-06-16 04:00:00 +2020-06-16 04:30:00 +2020-06-16 05:00:00 +2020-06-16 05:30:00 +2020-06-16 06:00:00 +2020-06-16 06:30:00 +2020-06-16 07:00:00 +2020-06-16 07:30:00 +2020-06-16 08:00:00 +2020-06-16 08:30:00 +2020-06-16 09:00:00 +2020-06-16 09:30:00 diff --git a/tests/queries/0_stateless/01145_with_fill_const.sql b/tests/queries/0_stateless/01145_with_fill_const.sql new file mode 100644 index 00000000000..531d202c02a --- /dev/null +++ b/tests/queries/0_stateless/01145_with_fill_const.sql @@ -0,0 +1,6 @@ +WITH toDateTime('2020-06-16 03:00:00') AS date_time +SELECT date_time ORDER BY date_time ASC +WITH FILL + FROM toDateTime('2020-06-16 00:00:00') + TO toDateTime('2020-06-16 10:00:00') + STEP 1800; From bd330cfeb61dafed0c8c09e583235476eba7c279 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jun 2020 05:56:50 +0300 Subject: [PATCH 0789/2229] Update test --- tests/integration/test_quorum_inserts/test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_quorum_inserts/test.py b/tests/integration/test_quorum_inserts/test.py index 27901842692..607fe93f1ef 100644 --- a/tests/integration/test_quorum_inserts/test.py +++ b/tests/integration/test_quorum_inserts/test.py @@ -284,11 +284,14 @@ def test_insert_quorum_with_ttl(started_cluster): zero.query("INSERT INTO test_insert_quorum_with_ttl(a,d) VALUES(1, '2011-01-01')", settings={'insert_quorum_timeout' : 5000}) - - assert TSV("1\t2011-01-01\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 0})) - assert TSV("1\t2011-01-01\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 1})) - print("Inserts should resume.") zero.query("INSERT INTO test_insert_quorum_with_ttl(a, d) VALUES(2, '2012-02-02')") + first.query("OPTIMIZE TABLE test_insert_quorum_with_ttl") + first.query("SYSTEM SYNC REPLICA test_insert_quorum_with_ttl") + zero.query("SYSTEM SYNC REPLICA test_insert_quorum_with_ttl") + + assert TSV("2\t2012-02-02\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 0})) + assert TSV("2\t2012-02-02\n") == TSV(first.query("SELECT * FROM test_insert_quorum_with_ttl", settings={'select_sequential_consistency' : 1})) + execute_on_all_cluster("DROP TABLE IF EXISTS test_insert_quorum_with_ttl") From c43bd228ab6609d5971ff4002240697d817f8a31 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sat, 13 Jun 2020 23:43:01 -0700 Subject: [PATCH 0790/2229] make max global thread pool setting configurable This PR adds a server level config for overriding the default max number of threads in global thread pool that is currently allowed (10,000). This might be useful in scenarios where there are a large number of distributed queries that are executing concurrently and where the default number of max threads might not be necessarily be sufficient. --- programs/server/config.xml | 10 ++++++++++ src/Common/ThreadPool.cpp | 14 +++++++++++++- src/Common/ThreadPool.h | 4 +++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 0ceba85593a..f4c0f5a22fc 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -136,6 +136,16 @@ --> 0 + + + + 10000 + 0.9 diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 3e6e31ed3fc..edfb52e01ac 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -3,6 +3,10 @@ #include +#include +#include + + namespace DB { @@ -264,6 +268,14 @@ template class ThreadPoolImpl; GlobalThreadPool & GlobalThreadPool::instance() { - static GlobalThreadPool ret; + const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config(); + + UInt64 max_threads = config.getUInt64("max_thread_pool_size", 10000); + size_t max_free_threads = 1000; + size_t max_queue_size = 10000; + const bool shutdown_on_exception = false; + + static GlobalThreadPool ret(max_threads, max_free_threads, max_queue_size, shutdown_on_exception); + return ret; } diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 9d5582db50c..3d1169d618d 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -129,7 +129,9 @@ using FreeThreadPool = ThreadPoolImpl; class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable { public: - GlobalThreadPool() : FreeThreadPool(10000, 1000, 10000, false) {} + GlobalThreadPool(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, + const bool shutdown_on_exception_) : + FreeThreadPool(max_threads_, max_free_threads_, queue_size_, shutdown_on_exception_) {} static GlobalThreadPool & instance(); }; From aa2d724ea13804ddd5b3cfc223ba005757b992d0 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 13:35:09 -0700 Subject: [PATCH 0791/2229] add max_thread_pool_size setting to tests This adds the `max_thread_pool_size` config to tests/server-test.xml file. --- tests/server-test.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/server-test.xml b/tests/server-test.xml index 7f792479065..721d62ef301 100644 --- a/tests/server-test.xml +++ b/tests/server-test.xml @@ -17,6 +17,7 @@ 58443 59440 59009 + 10000 From 09e3975b9778d5849a1cd9b8cd4f156b10311cb9 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 13:44:39 -0700 Subject: [PATCH 0792/2229] docs for max_thread_pool_size This adds the docs for the new server level setting `max_thread_pool_size`. --- .../server-configuration-parameters/settings.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index b90b432da6c..b43d6bf847a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -426,6 +426,18 @@ The value 0 means that you can delete all tables without any restrictions. 0 ``` +## max\_thread\_pool\_size {#max-thread-pool-size} + +The maximum number of threads in the Global Thread pool. + +Default value: 10000. + +**Example** + +``` xml +12000 +``` + ## merge\_tree {#server_configuration_parameters-merge_tree} Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). From 334c5abe9b37a314f3a7ad4d7a783ad08bfa724c Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 17:09:59 -0700 Subject: [PATCH 0793/2229] remove extra vertical space --- src/Common/ThreadPool.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index edfb52e01ac..3a669056f21 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -6,8 +6,6 @@ #include #include - - namespace DB { namespace ErrorCodes From 9a26d48ad0e595468225566aafb8c78d41a20ae0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 16 Jun 2020 09:31:00 +0300 Subject: [PATCH 0794/2229] Basic blog similar to docs (#11609) * Basic blog similar to docs * rename post * no post content in post_meta * update readme and template * more "en" content * complete "en" content * build blog redirects * redirects for migration * link sitemaps * update po * add "ru" content * ru redirects * remove old domain mentions * adjust styles * content improvements * +1 alt * use main images from CDN * use re-hosted in-content images * extra vertical margin around embedded youtube * minor improvements * adjust post page * adjust html meta * adjust post page * improve blog rendering --- docs/en/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/en/whats-new/changelog/2017.md | 2 +- docs/es/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/es/whats-new/changelog/2017.md | 2 +- docs/fa/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/fa/whats-new/changelog/2017.md | 2 +- docs/fr/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/fr/whats-new/changelog/2017.md | 2 +- docs/ja/introduction/adopters.md | 2 +- docs/ja/introduction/distinctive-features.md | 2 +- docs/ja/introduction/history.md | 2 +- docs/ja/introduction/performance.md | 6 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/ja/whats-new/changelog/2017.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 4 +- docs/tools/blog.py | 107 +++++++++++ docs/tools/build.py | 38 ++-- docs/tools/mdx_clickhouse.py | 28 ++- docs/tools/nav.py | 48 ++++- docs/tools/redirects.py | 38 ++-- docs/tools/website.py | 39 ++++ docs/tr/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- .../functions/array-functions.md | 6 +- docs/tr/whats-new/changelog/2017.md | 2 +- docs/zh/introduction/adopters.md | 2 +- .../sql-reference/data-types/domains/ipv4.md | 2 +- .../sql-reference/data-types/domains/ipv6.md | 2 +- docs/zh/whats-new/changelog/2017.md | 2 +- website/blog/README.md | 47 +++++ ...on-of-data-structures-in-yandex-metrica.md | 108 +++++++++++ .../2016/how-to-update-data-in-clickhouse.md | 169 ++++++++++++++++++ .../en/2016/yandex-opensources-clickhouse.md | 12 ++ .../en/2017/clickhouse-at-data-scale-2017.md | 10 ++ .../2017/clickhouse-at-percona-live-2017.md | 22 +++ ...ckhouse-meetup-in-berlin-october-5-2017.md | 10 ++ ...khouse-meetup-in-santa-clara-may-4-2017.md | 8 + .../join-the-clickhouse-meetup-in-berlin.md | 13 ++ ...ouse-meetup-in-amsterdam-on-november-15.md | 8 + .../2018/clickhouse-at-analysys-a10-2018.md | 27 +++ .../clickhouse-at-percona-live-europe-2018.md | 25 +++ ...ty-meetup-in-beijing-on-january-27-2018.md | 68 +++++++ ...ty-meetup-in-beijing-on-october-28-2018.md | 54 ++++++ ...mmunity-meetup-in-berlin-on-july-3-2018.md | 39 ++++ ...se-community-meetup-in-berlin-on-july-3.md | 8 + ...unity-meetup-in-paris-on-october-2-2018.md | 20 +++ ...meetup-in-amsterdam-on-november-15-2018.md | 27 +++ .../en/2018/concept-cloud-mergetree-tables.md | 120 +++++++++++++ .../2019/clickhouse-at-percona-live-2019.md | 38 ++++ ...nese-academy-of-science-on-june-11-2019.md | 17 ++ ...khouse-meetup-in-beijing-on-june-8-2019.md | 35 ++++ ...khouse-meetup-in-limassol-on-may-7-2019.md | 41 +++++ ...khouse-meetup-in-madrid-on-april-2-2019.md | 28 +++ ...-meetup-in-san-francisco-on-june-4-2019.md | 10 ++ ...peed-up-lz4-decompression-in-clickhouse.md | 12 ++ ...of-clickhouse-meetups-in-china-for-2019.md | 14 ++ .../five-methods-for-database-obfuscation.md | 10 ++ website/blog/en/index.md | 3 + website/blog/en/redirects.txt | 32 ++++ ...khouse-meetup-v-moskve-21-noyabrya-2016.md | 8 + .../ru/2016/clickhouse-na-highload-2016.md | 14 ++ ...raneniya-i-obrabotki-dannykh-v-yandekse.md | 10 ++ .../ru/2016/yandeks-otkryvaet-clickhouse.md | 10 ++ .../ru/2017/clickhouse-meetup-edet-v-minsk.md | 14 ++ ...use-meetup-v-ekaterinburge-16-maya-2017.md | 8 + .../2017/clickhouse-meetup-v-minske-itogi.md | 16 ++ ...se-meetup-v-novosibirske-3-aprelya-2017.md | 10 ++ ...tup-v-sankt-peterburge-28-fevralya-2017.md | 8 + .../blog/ru/2017/clickhouse-na-uwdc-2017.md | 10 ++ ...ickhouse-meetup-v-limassole-7-maya-2019.md | 38 ++++ ...house-meetup-v-moskve-5-sentyabrya-2019.md | 10 ++ ...se-meetup-v-novosibirske-26-iyunya-2019.md | 12 ++ ...eetup-v-sankt-peterburge-27-iyulya-2019.md | 10 ++ ...ickrouse-meetup-v-minske-11-iyulya-2019.md | 12 ++ website/blog/ru/index.md | 3 + website/blog/ru/redirects.txt | 15 ++ website/css/blog.css | 8 + website/locale/en/LC_MESSAGES/messages.po | 22 ++- website/locale/es/LC_MESSAGES/messages.mo | Bin 6641 -> 6739 bytes website/locale/es/LC_MESSAGES/messages.po | 22 ++- website/locale/fa/LC_MESSAGES/messages.mo | Bin 7453 -> 7553 bytes website/locale/fa/LC_MESSAGES/messages.po | 22 ++- website/locale/fr/LC_MESSAGES/messages.mo | Bin 6490 -> 6585 bytes website/locale/fr/LC_MESSAGES/messages.po | 22 ++- website/locale/ja/LC_MESSAGES/messages.mo | Bin 6613 -> 6691 bytes website/locale/ja/LC_MESSAGES/messages.po | 22 ++- website/locale/messages.pot | 22 ++- website/locale/ru/LC_MESSAGES/messages.mo | Bin 8556 -> 8672 bytes website/locale/ru/LC_MESSAGES/messages.po | 22 ++- website/locale/tr/LC_MESSAGES/messages.mo | Bin 6378 -> 6473 bytes website/locale/tr/LC_MESSAGES/messages.po | 22 ++- website/locale/zh/LC_MESSAGES/messages.mo | Bin 5926 -> 6007 bytes website/locale/zh/LC_MESSAGES/messages.po | 22 ++- website/main.html | 41 +++-- website/sitemap-index.xml | 6 + website/templates/blog/content.html | 43 +++++ website/templates/blog/footer.html | 9 + website/templates/blog/nav.html | 45 +++++ website/templates/common_meta.html | 11 +- website/templates/docs/ld_json.html | 13 +- website/templates/index/community.html | 4 +- website/templates/index/nav.html | 3 +- 119 files changed, 1848 insertions(+), 184 deletions(-) create mode 100644 docs/tools/blog.py create mode 100644 website/blog/README.md create mode 100644 website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md create mode 100644 website/blog/en/2016/how-to-update-data-in-clickhouse.md create mode 100644 website/blog/en/2016/yandex-opensources-clickhouse.md create mode 100644 website/blog/en/2017/clickhouse-at-data-scale-2017.md create mode 100644 website/blog/en/2017/clickhouse-at-percona-live-2017.md create mode 100644 website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md create mode 100644 website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md create mode 100644 website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md create mode 100644 website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md create mode 100644 website/blog/en/2018/clickhouse-at-analysys-a10-2018.md create mode 100644 website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md create mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md create mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md create mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md create mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md create mode 100644 website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md create mode 100644 website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md create mode 100644 website/blog/en/2018/concept-cloud-mergetree-tables.md create mode 100644 website/blog/en/2019/clickhouse-at-percona-live-2019.md create mode 100644 website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md create mode 100644 website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md create mode 100644 website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md create mode 100644 website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md create mode 100644 website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md create mode 100644 website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md create mode 100644 website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md create mode 100644 website/blog/en/2020/five-methods-for-database-obfuscation.md create mode 100644 website/blog/en/index.md create mode 100644 website/blog/en/redirects.txt create mode 100644 website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md create mode 100644 website/blog/ru/2016/clickhouse-na-highload-2016.md create mode 100644 website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md create mode 100644 website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md create mode 100644 website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md create mode 100644 website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md create mode 100644 website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md create mode 100644 website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md create mode 100644 website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md create mode 100644 website/blog/ru/2017/clickhouse-na-uwdc-2017.md create mode 100644 website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md create mode 100644 website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md create mode 100644 website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md create mode 100644 website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md create mode 100644 website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md create mode 100644 website/blog/ru/index.md create mode 100644 website/blog/ru/redirects.txt create mode 100644 website/css/blog.css create mode 100644 website/templates/blog/content.html create mode 100644 website/templates/blog/footer.html create mode 100644 website/templates/blog/nav.html diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 081f963f74f..df9cdfa6430 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -35,7 +35,7 @@ toc_title: Adopters | [Exness](https://www.exness.com){.favicon} | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [Geniee](https://geniee.co.jp){.favicon} | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | [HUYA](https://www.huya.com/){.favicon} | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | [Infovista](https://www.infovista.com/){.favicon} | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | [InnoGames](https://www.innogames.com){.favicon} | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/en/sql-reference/data-types/domains/ipv4.md b/docs/en/sql-reference/data-types/domains/ipv4.md index d8735d70b29..1237514b9e7 100644 --- a/docs/en/sql-reference/data-types/domains/ipv4.md +++ b/docs/en/sql-reference/data-types/domains/ipv4.md @@ -31,7 +31,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` domain supports custom input format as IPv4-strings: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/en/sql-reference/data-types/domains/ipv6.md b/docs/en/sql-reference/data-types/domains/ipv6.md index 7fd88887acc..bc57202bf66 100644 --- a/docs/en/sql-reference/data-types/domains/ipv6.md +++ b/docs/en/sql-reference/data-types/domains/ipv6.md @@ -31,7 +31,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` domain supports custom input as IPv6-strings: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 4f449eea516..1468b48695b 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -701,13 +701,13 @@ arrayDifference(array) **Parameters** -- `array` – [Array](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). **Returned values** Returns an array of differences between adjacent elements. -Type: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.yandex/docs/en/data_types/float/). +Type: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.tech/docs/en/data_types/float/). **Example** @@ -753,7 +753,7 @@ arrayDistinct(array) **Parameters** -- `array` – [Array](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). **Returned values** diff --git a/docs/en/whats-new/changelog/2017.md b/docs/en/whats-new/changelog/2017.md index d819324b07a..3b48e23233f 100644 --- a/docs/en/whats-new/changelog/2017.md +++ b/docs/en/whats-new/changelog/2017.md @@ -24,7 +24,7 @@ This release contains bug fixes for the previous release 1.1.54310: #### New Features: {#new-features} - Custom partitioning key for the MergeTree family of table engines. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) table engine. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) table engine. - Added support for loading [CatBoost](https://catboost.yandex/) models and applying them to data stored in ClickHouse. - Added support for time zones with non-integer offsets from UTC. - Added support for arithmetic operations with time intervals. diff --git a/docs/es/introduction/adopters.md b/docs/es/introduction/adopters.md index e41e8005cc7..4c0aa78d57b 100644 --- a/docs/es/introduction/adopters.md +++ b/docs/es/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Adoptante | Exness | Comercio | Métricas, Registro | — | — | [Charla en ruso, mayo 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Sistema abierto. | Red Ad | Producto principal | — | — | [Publicación de blog en japonés, julio 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Streaming | Analítica | — | — | [Diapositivas en chino, octubre 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Inmobiliario | Analítica | — | — | [Blog Post en Inglés, Abril 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Inmobiliario | Analítica | — | — | [Blog Post en Inglés, Abril 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Red | Analítica | — | — | [Diapositivas en español, octubre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Juego | Métricas, Registro | — | — | [Diapositivas en ruso, septiembre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Plataforma para servicios de video | Analítica | — | — | [Diapositivas en ruso, mayo 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/es/sql-reference/data-types/domains/ipv4.md b/docs/es/sql-reference/data-types/domains/ipv4.md index c97229610d3..6e271f10fd2 100644 --- a/docs/es/sql-reference/data-types/domains/ipv4.md +++ b/docs/es/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` domain admite formato de entrada personalizado como cadenas IPv4: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/es/sql-reference/data-types/domains/ipv6.md b/docs/es/sql-reference/data-types/domains/ipv6.md index bee82ff2898..2f45a353053 100644 --- a/docs/es/sql-reference/data-types/domains/ipv6.md +++ b/docs/es/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` domain admite entradas personalizadas como cadenas IPv6: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/es/sql-reference/functions/array-functions.md b/docs/es/sql-reference/functions/array-functions.md index 3a0ad14b24e..677996efabd 100644 --- a/docs/es/sql-reference/functions/array-functions.md +++ b/docs/es/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Parámetros** -- `array` – [Matriz](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Matriz](https://clickhouse.tech/docs/en/data_types/array/). **Valores devueltos** Devuelve una matriz de diferencias entre los elementos adyacentes. -Tipo: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [En\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Flotante\*](https://clickhouse.yandex/docs/en/data_types/float/). +Tipo: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [En\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Flotante\*](https://clickhouse.tech/docs/en/data_types/float/). **Ejemplo** @@ -754,7 +754,7 @@ arrayDistinct(array) **Parámetros** -- `array` – [Matriz](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Matriz](https://clickhouse.tech/docs/en/data_types/array/). **Valores devueltos** diff --git a/docs/es/whats-new/changelog/2017.md b/docs/es/whats-new/changelog/2017.md index 97b2cafd198..33e48b0409f 100644 --- a/docs/es/whats-new/changelog/2017.md +++ b/docs/es/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Esta versión contiene correcciones de errores para la versión anterior 1.1.543 #### Novedad: {#new-features} - Clave de partición personalizada para la familia MergeTree de motores de tabla. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) motor de mesa. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) motor de mesa. - Se agregó soporte para cargar [CatBoost](https://catboost.yandex/) modelos y aplicarlos a los datos almacenados en ClickHouse. - Se agregó soporte para zonas horarias con desplazamientos no enteros de UTC. - Se agregó soporte para operaciones aritméticas con intervalos de tiempo. diff --git a/docs/fa/introduction/adopters.md b/docs/fa/introduction/adopters.md index a4ad16faf6c..654f3a24736 100644 --- a/docs/fa/introduction/adopters.md +++ b/docs/fa/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: "\u067E\u0630\u06CC\u0631\u0627" | اعمال | بازرگانی | معیارهای ورود به سیستم | — | — | [بحث در روسیه, بیشتر 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | ژنی | شبکه تبلیغاتی | محصول اصلی | — | — | [پست وبلاگ در ژاپن, جولای 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | جریان ویدیو | تجزیه و تحلیل | — | — | [اسلاید در چین, اکتبر 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | املاک و مستغلات | تجزیه و تحلیل | — | — | [پست وبلاگ به زبان انگلیسی, مارس 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | املاک و مستغلات | تجزیه و تحلیل | — | — | [پست وبلاگ به زبان انگلیسی, مارس 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | اینفویستا | شبکه ها | تجزیه و تحلیل | — | — | [اسلاید به زبان انگلیسی, اکتبر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | نام | بازی ها | معیارهای ورود به سیستم | — | — | [اسلاید در روسیه, سپتامبر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | پوششی | بستر های نرم افزاری برای خدمات تصویری | تجزیه و تحلیل | — | — | [اسلاید در روسیه, بیشتر 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/fa/sql-reference/data-types/domains/ipv4.md b/docs/fa/sql-reference/data-types/domains/ipv4.md index 645e839f6d8..a010409d58b 100644 --- a/docs/fa/sql-reference/data-types/domains/ipv4.md +++ b/docs/fa/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` دامنه پشتیبانی از فرمت ورودی سفارشی به عنوان ایپو4 رشته: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/fa/sql-reference/data-types/domains/ipv6.md b/docs/fa/sql-reference/data-types/domains/ipv6.md index 6677916c49b..64a9487cb07 100644 --- a/docs/fa/sql-reference/data-types/domains/ipv6.md +++ b/docs/fa/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` دامنه پشتیبانی از ورودی های سفارشی به عنوان ایپو6 رشته: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/fa/sql-reference/functions/array-functions.md b/docs/fa/sql-reference/functions/array-functions.md index 1988ed4266e..6f4e8326557 100644 --- a/docs/fa/sql-reference/functions/array-functions.md +++ b/docs/fa/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **پارامترها** -- `array` – [& حذف](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [& حذف](https://clickhouse.tech/docs/en/data_types/array/). **مقادیر بازگشتی** بازگرداندن مجموعه ای از تفاوت بین عناصر مجاور. -نوع: [اینترنت\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [شناور\*](https://clickhouse.yandex/docs/en/data_types/float/). +نوع: [اینترنت\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [شناور\*](https://clickhouse.tech/docs/en/data_types/float/). **مثال** @@ -754,7 +754,7 @@ arrayDistinct(array) **پارامترها** -- `array` – [& حذف](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [& حذف](https://clickhouse.tech/docs/en/data_types/array/). **مقادیر بازگشتی** diff --git a/docs/fa/whats-new/changelog/2017.md b/docs/fa/whats-new/changelog/2017.md index 939ed966c22..ea4946cf185 100644 --- a/docs/fa/whats-new/changelog/2017.md +++ b/docs/fa/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### ویژگی های جدید: {#new-features} - کلید پارتیشن بندی سفارشی برای خانواده ادغام موتورهای جدول. -- [کافکا](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) موتور جدول. +- [کافکا](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) موتور جدول. - اضافه شدن پشتیبانی برای بارگذاری [مانتو](https://catboost.yandex/) مدل ها و استفاده از داده های ذخیره شده در کلیک. - اضافه شدن پشتیبانی برای مناطق زمانی با شیپور خاموشی غیر عدد صحیح از مجموعه مقالات. - اضافه شدن پشتیبانی برای عملیات ریاضی با فواصل زمانی. diff --git a/docs/fr/introduction/adopters.md b/docs/fr/introduction/adopters.md index 833fc111fbe..e970c61955c 100644 --- a/docs/fr/introduction/adopters.md +++ b/docs/fr/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Adoptant | Exness | Trading | Métriques, Journalisation | — | — | [Parler en russe, mai 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Geniee | Réseau publicitaire | Produit principal | — | — | [Billet de Blog en japonais, juillet 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Le Streaming Vidéo | Analytics | — | — | [Diapositives en chinois, octobre 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Immobilier | Analytics | — | — | [Billet de Blog en anglais, avril 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Immobilier | Analytics | — | — | [Billet de Blog en anglais, avril 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Réseau | Analytics | — | — | [Diapositives en anglais, octobre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Jeu | Métriques, Journalisation | — | — | [Diapositives en russe, septembre 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Plate-forme pour les services vidéo | Analytics | — | — | [Diapositives en russe, mai 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/fr/sql-reference/data-types/domains/ipv4.md b/docs/fr/sql-reference/data-types/domains/ipv4.md index 7cf36c0aaef..12895992e77 100644 --- a/docs/fr/sql-reference/data-types/domains/ipv4.md +++ b/docs/fr/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` le domaine prend en charge le format d'entrée personnalisé en tant que chaînes IPv4: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/fr/sql-reference/data-types/domains/ipv6.md b/docs/fr/sql-reference/data-types/domains/ipv6.md index 1d0f3cd47fd..77510a950cb 100644 --- a/docs/fr/sql-reference/data-types/domains/ipv6.md +++ b/docs/fr/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` le domaine prend en charge l'entrée personnalisée en tant que chaînes IPv6: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/fr/sql-reference/functions/array-functions.md b/docs/fr/sql-reference/functions/array-functions.md index 5590774732d..ef09800614f 100644 --- a/docs/fr/sql-reference/functions/array-functions.md +++ b/docs/fr/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Paramètre** -- `array` – [Tableau](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Tableau](https://clickhouse.tech/docs/en/data_types/array/). **Valeurs renvoyées** Renvoie un tableau de différences entre les éléments adjacents. -Type: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Flottant\*](https://clickhouse.yandex/docs/en/data_types/float/). +Type: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Flottant\*](https://clickhouse.tech/docs/en/data_types/float/). **Exemple** @@ -754,7 +754,7 @@ arrayDistinct(array) **Paramètre** -- `array` – [Tableau](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Tableau](https://clickhouse.tech/docs/en/data_types/array/). **Valeurs renvoyées** diff --git a/docs/fr/whats-new/changelog/2017.md b/docs/fr/whats-new/changelog/2017.md index be2cb7de9f4..c812f345fdd 100644 --- a/docs/fr/whats-new/changelog/2017.md +++ b/docs/fr/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Cette version contient des corrections de bugs pour la version précédente 1.1. #### Nouveauté: {#new-features} - Clé de partitionnement personnalisée pour la famille MergeTree des moteurs de table. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) tableau moteur. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) tableau moteur. - Ajout du support pour le chargement [CatBoost](https://catboost.yandex/) modèles et les appliquer aux données stockées dans ClickHouse. - Ajout du support pour les fuseaux horaires avec des décalages non entiers de UTC. - Ajout du support pour les opérations arithmétiques avec des intervalles de temps. diff --git a/docs/ja/introduction/adopters.md b/docs/ja/introduction/adopters.md index 084b5034a62..a1a89f6795f 100644 --- a/docs/ja/introduction/adopters.md +++ b/docs/ja/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: "\u30A2\u30C0\u30D7\u30BF\u30FC" | Exness | 取引 | 指標、ロギング | — | — | [ロシア語で話す,May2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | 魔神 | 広告ネットワーク | 主な製品 | — | — | [ブログ投稿日本語,July2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | ビデオストリーミング | 分析 | — | — | [中国語でのスライド,October2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| イデアリスタ | 不動産 | 分析 | — | — | [ブログ投稿英語,April2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| イデアリスタ | 不動産 | 分析 | — | — | [ブログ投稿英語,April2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | インフォビスタ | ネット | 分析 | — | — | [2019年のスライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | ゲーム | 指標、ロギング | — | — | [2019年ロシア](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | インテグロス | Platformビデオサービス | 分析 | — | — | [ロシア語でのスライド,月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/ja/introduction/distinctive-features.md b/docs/ja/introduction/distinctive-features.md index 5cf44ee0002..88dc91e0a3b 100644 --- a/docs/ja/introduction/distinctive-features.md +++ b/docs/ja/introduction/distinctive-features.md @@ -69,4 +69,4 @@ ClickHouseには、精度を犠牲にしてパフォーマンスを得るため 2. 既に挿入されたデータの変更または削除を、高頻度かつ低遅延に行う機能はありません。 [GDPR](https://gdpr-info.eu)に準拠するなど、データをクリーンアップまたは変更するために、バッチ削除およびバッチ更新が利用可能です。 3. インデックスが疎であるため、ClickHouseは、キーで単一行を取得するようなクエリにはあまり適していません。 -[Original article](https://clickhouse.yandex/docs/en/introduction/distinctive_features/) +[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) diff --git a/docs/ja/introduction/history.md b/docs/ja/introduction/history.md index af5dc40145d..162ed3ba415 100644 --- a/docs/ja/introduction/history.md +++ b/docs/ja/introduction/history.md @@ -48,4 +48,4 @@ Yandex.Metricaには、Metrageと呼ばれるデータを集計するための OLAPServerの制限を取り除き、レポートのための非集計データを扱う問題を解決するために、私達は ClickHouse DBMSを開発しました。 -[Original article](https://clickhouse.yandex/docs/en/introduction/history/) +[Original article](https://clickhouse.tech/docs/en/introduction/history/) diff --git a/docs/ja/introduction/performance.md b/docs/ja/introduction/performance.md index d6404853ccd..7750a10c0ec 100644 --- a/docs/ja/introduction/performance.md +++ b/docs/ja/introduction/performance.md @@ -5,9 +5,9 @@ toc_title: "\u30D1\u30D5\u30A9\u30FC\u30DE\u30F3\u30B9" # パフォーマンス {#pahuomansu} -Yandexの内部テスト結果によると、ClickHouseは、テスト可能なクラスのシステム間で同等の動作シナリオで最高のパフォーマンス(長時間のクエリで最も高いスループットと、短時間のクエリで最小のレイテンシの両方)を示します。 [別のページで](https://clickhouse.yandex/benchmark/dbms/)テスト結果を表示できます 。 +Yandexの内部テスト結果によると、ClickHouseは、テスト可能なクラスのシステム間で同等の動作シナリオで最高のパフォーマンス(長時間のクエリで最も高いスループットと、短時間のクエリで最小のレイテンシの両方)を示します。 [別のページで](https://clickhouse.tech/benchmark/dbms/)テスト結果を表示できます 。 -これは、多数の独立したベンチマークでも確認されています。インターネット検索で見つけることは難しくありませんし、 [私達がまとめた関連リンク集](https://clickhouse.yandex/#independent-benchmarks) から見つけることもできます。 +これは、多数の独立したベンチマークでも確認されています。インターネット検索で見つけることは難しくありませんし、 [私達がまとめた関連リンク集](https://clickhouse.tech/#independent-benchmarks) から見つけることもできます。 ## 単一の巨大なクエリのスループット {#dan-yi-noju-da-nakuerinosurupututo} @@ -27,4 +27,4 @@ Yandexの内部テスト結果によると、ClickHouseは、テスト可能な 少なくとも1000行のパケットにデータを挿入することをお勧めします。または、1秒あたり1回のリクエストを超えないでください。タブ区切りのダンプデータをMergeTreeテーブルに挿入する場合、挿入速度は50〜200MB/sになります。挿入された行のサイズが約1Kbの場合、速度は毎秒50,000〜200,000行になります。行が小さい場合、パフォーマンスは1秒あたりの行数で高くなります(Banner System データ- `>` 500,000行/秒、Graphite データ- `>` 1,000,000行/秒)。パフォーマンスを向上させるために、複数のINSERTクエリを並行して作成することで、パフォーマンスを線形に向上できます。 -[Original article](https://clickhouse.yandex/docs/ja/introduction/performance/) +[Original article](https://clickhouse.tech/docs/ja/introduction/performance/) diff --git a/docs/ja/sql-reference/data-types/domains/ipv4.md b/docs/ja/sql-reference/data-types/domains/ipv4.md index e355ae4f70f..c329028ad40 100644 --- a/docs/ja/sql-reference/data-types/domains/ipv4.md +++ b/docs/ja/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` ドメインはIPv4文字列としてカスタム入力形式をサポート: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/ja/sql-reference/data-types/domains/ipv6.md b/docs/ja/sql-reference/data-types/domains/ipv6.md index 73227e7a2b7..26583429ec8 100644 --- a/docs/ja/sql-reference/data-types/domains/ipv6.md +++ b/docs/ja/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` ドメイ: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/ja/sql-reference/functions/array-functions.md b/docs/ja/sql-reference/functions/array-functions.md index 5a70770a54b..bd30262cc1e 100644 --- a/docs/ja/sql-reference/functions/array-functions.md +++ b/docs/ja/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **パラメータ** -- `array` – [配列](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [配列](https://clickhouse.tech/docs/en/data_types/array/). **戻り値** 隣接する要素間の差分の配列を返します。 -タイプ: [UInt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [フロート\*](https://clickhouse.yandex/docs/en/data_types/float/). +タイプ: [UInt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [フロート\*](https://clickhouse.tech/docs/en/data_types/float/). **例** @@ -754,7 +754,7 @@ arrayDistinct(array) **パラメータ** -- `array` – [配列](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [配列](https://clickhouse.tech/docs/en/data_types/array/). **戻り値** diff --git a/docs/ja/whats-new/changelog/2017.md b/docs/ja/whats-new/changelog/2017.md index ada7b74e431..9561062f31d 100644 --- a/docs/ja/whats-new/changelog/2017.md +++ b/docs/ja/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### 新しい機能: {#new-features} - カスタムパーティショニングキーのMergeTree家族のテーブルエンジンです。 -- [カフカ](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) テーブルエンジン。 +- [カフカ](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) テーブルエンジン。 - ロードのサポートを追加 [CatBoost](https://catboost.yandex/) モデルとClickHouseに格納されたデータにそれらを適用します。 - サポートが追加された時間帯と非整数オフセットからのUTCです。 - 時間間隔での算術演算のサポートが追加されました。 diff --git a/docs/ru/sql-reference/data-types/domains/ipv4.md b/docs/ru/sql-reference/data-types/domains/ipv4.md index 2903404774b..68b67bcca60 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv4.md +++ b/docs/ru/sql-reference/data-types/domains/ipv4.md @@ -26,7 +26,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` поддерживает вставку в виде строк с текстовым представлением IPv4 адреса: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/ru/sql-reference/data-types/domains/ipv6.md b/docs/ru/sql-reference/data-types/domains/ipv6.md index 045a2ad1960..c88ee74adea 100644 --- a/docs/ru/sql-reference/data-types/domains/ipv6.md +++ b/docs/ru/sql-reference/data-types/domains/ipv6.md @@ -26,7 +26,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` поддерживает вставку в виде строк с текстовым представлением IPv6 адреса: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 71b6bda47d0..7abebc6a059 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -692,7 +692,7 @@ arrayDifference(array) **Параметры** -- `array` – [Массив](https://clickhouse.yandex/docs/ru/data_types/array/). +- `array` – [Массив](https://clickhouse.tech/docs/ru/data_types/array/). **Возвращаемое значение** @@ -742,7 +742,7 @@ arrayDistinct(array) **Параметры** -- `array` – [Массив](https://clickhouse.yandex/docs/ru/data_types/array/). +- `array` – [Массив](https://clickhouse.tech/docs/ru/data_types/array/). **Возвращаемое значение** diff --git a/docs/tools/blog.py b/docs/tools/blog.py new file mode 100644 index 00000000000..f5415bec608 --- /dev/null +++ b/docs/tools/blog.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 +import datetime +import logging +import os +import time + +import nav # monkey patches mkdocs + +import mkdocs.commands +from mkdocs import config +from mkdocs import exceptions + +import mdx_clickhouse +import redirects + +import util + + +def build_for_lang(lang, args): + logging.info(f'Building {lang} blog') + + try: + theme_cfg = { + 'name': None, + 'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir), + 'language': lang, + 'direction': 'ltr', + 'static_templates': ['404.html'], + 'extra': { + 'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching + } + } + + # the following list of languages is sorted according to + # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers + languages = { + 'en': 'English', + 'ru': 'Русский' + } + + site_names = { + 'en': 'ClickHouse Blog', + 'ru': 'Блог ClickHouse ' + } + + assert len(site_names) == len(languages) + + site_dir = os.path.join(args.blog_output_dir, lang) + + plugins = ['macros'] + if args.htmlproofer: + plugins.append('htmlproofer') + + website_url = 'https://clickhouse.tech' + site_name = site_names.get(lang, site_names['en']) + blog_nav, post_meta = nav.build_blog_nav(lang, args) + raw_config = dict( + site_name=site_name, + site_url=f'{website_url}/blog/{lang}/', + docs_dir=os.path.join(args.blog_dir, lang), + site_dir=site_dir, + strict=True, + theme=theme_cfg, + nav=blog_nav, + copyright='©2016–2020 Yandex LLC', + use_directory_urls=True, + repo_name='ClickHouse/ClickHouse', + repo_url='https://github.com/ClickHouse/ClickHouse/', + edit_uri=f'edit/master/website/blog/{lang}', + markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, + plugins=plugins, + extra=dict( + now=datetime.datetime.now().isoformat(), + rev=args.rev, + rev_short=args.rev_short, + rev_url=args.rev_url, + website_url=website_url, + events=args.events, + languages=languages, + includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), + is_amp=False, + is_blog=True, + post_meta=post_meta + ) + ) + + cfg = config.load_config(**raw_config) + mkdocs.commands.build.build(cfg) + + redirects.build_blog_redirects(args) + + # TODO: AMP for blog + # if not args.skip_amp: + # amp.build_amp(lang, args, cfg) + + logging.info(f'Finished building {lang} blog') + + except exceptions.ConfigurationError as e: + raise SystemExit('\n' + str(e)) + + +def build_blog(args): + tasks = [] + for lang in args.blog_lang.split(','): + if lang: + tasks.append((lang, args,)) + util.run_function_in_parallel(build_for_lang, tasks, threads=False) diff --git a/docs/tools/build.py b/docs/tools/build.py index b7ddbc29629..1c8165fb36f 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -20,8 +20,8 @@ from mkdocs import exceptions import mkdocs.commands.build import amp +import blog import mdx_clickhouse - import redirects import single_page import test @@ -95,25 +95,6 @@ def build_for_lang(lang, args): else: site_dir = os.path.join(args.docs_output_dir, lang) - markdown_extensions = [ - 'mdx_clickhouse', - 'admonition', - 'attr_list', - 'codehilite', - 'nl2br', - 'sane_lists', - 'pymdownx.details', - 'pymdownx.magiclink', - 'pymdownx.superfences', - 'extra', - { - 'toc': { - 'permalink': True, - 'slugify': mdx_clickhouse.slugify - } - } - ] - plugins = ['macros'] if args.htmlproofer: plugins.append('htmlproofer') @@ -133,7 +114,7 @@ def build_for_lang(lang, args): repo_name='ClickHouse/ClickHouse', repo_url='https://github.com/ClickHouse/ClickHouse/', edit_uri=f'edit/master/docs/{lang}', - markdown_extensions=markdown_extensions, + markdown_extensions=mdx_clickhouse.MARKDOWN_EXTENSIONS, plugins=plugins, extra=dict( now=datetime.datetime.now().isoformat(), @@ -147,14 +128,15 @@ def build_for_lang(lang, args): events=args.events, languages=languages, includes_dir=os.path.join(os.path.dirname(__file__), '..', '_includes'), - is_amp=False + is_amp=False, + is_blog=False ) ) if os.path.exists(config_path): raw_config['config_file'] = config_path else: - raw_config['nav'] = nav.build_nav(lang, args) + raw_config['nav'] = nav.build_docs_nav(lang, args) cfg = config.load_config(**raw_config) @@ -187,7 +169,7 @@ def build_docs(args): if lang: tasks.append((lang, args,)) util.run_function_in_parallel(build_for_lang, tasks, threads=False) - redirects.build_redirects(args) + redirects.build_docs_redirects(args) def build(args): @@ -204,6 +186,9 @@ def build(args): from github import build_releases build_releases(args, build_docs) + if not args.skip_blog: + blog.build_blog(args) + if not args.skip_website: website.process_benchmark_results(args) website.minify_website(args) @@ -215,9 +200,11 @@ if __name__ == '__main__': website_dir = os.path.join('..', 'website') arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa') + arg_parser.add_argument('--blog-lang', default='en,ru') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument('--website-dir', default=website_dir) + arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog')) arg_parser.add_argument('--output-dir', default='build') arg_parser.add_argument('--enable-stable-releases', action='store_true') arg_parser.add_argument('--stable-releases-limit', type=int, default='3') @@ -230,6 +217,7 @@ if __name__ == '__main__': arg_parser.add_argument('--skip-amp', action='store_true') arg_parser.add_argument('--skip-pdf', action='store_true') arg_parser.add_argument('--skip-website', action='store_true') + arg_parser.add_argument('--skip-blog', action='store_true') arg_parser.add_argument('--skip-git-log', action='store_true') arg_parser.add_argument('--test-only', action='store_true') arg_parser.add_argument('--minify', action='store_true') @@ -249,6 +237,7 @@ if __name__ == '__main__': logging.getLogger('MARKDOWN').setLevel(logging.INFO) args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs') + args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog') from github import choose_latest_releases, get_events args.stable_releases = choose_latest_releases(args) if args.enable_stable_releases else [] @@ -259,6 +248,7 @@ if __name__ == '__main__': if args.test_only: args.skip_multi_page = True + args.skip_blog = True args.skip_website = True args.skip_pdf = True args.skip_amp = True diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index 393658be2d7..5ea93002cd2 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -18,6 +18,30 @@ import amp import website +def slugify(value, separator): + return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) + + +MARKDOWN_EXTENSIONS = [ + 'mdx_clickhouse', + 'admonition', + 'attr_list', + 'codehilite', + 'nl2br', + 'sane_lists', + 'pymdownx.details', + 'pymdownx.magiclink', + 'pymdownx.superfences', + 'extra', + { + 'toc': { + 'permalink': True, + 'slugify': slugify + } + } +] + + class ClickHouseLinkMixin(object): def handleMatch(self, m, data): @@ -72,10 +96,6 @@ def makeExtension(**kwargs): return ClickHouseMarkdown(**kwargs) -def slugify(value, separator): - return slugify_impl.slugify(value, separator=separator, word_boundary=True, save_order=True) - - def get_translations(dirname, lang): import babel.support return babel.support.Translations.load( diff --git a/docs/tools/nav.py b/docs/tools/nav.py index 3c4fd304bd3..71bd2d8052f 100644 --- a/docs/tools/nav.py +++ b/docs/tools/nav.py @@ -1,4 +1,5 @@ import collections +import datetime import logging import os @@ -19,7 +20,8 @@ def build_nav_entry(root, args): return None, None, None result_items = [] index_meta, index_content = util.read_md_file(os.path.join(root, 'index.md')) - current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title', find_first_header(index_content))) + current_title = index_meta.get('toc_folder_title', index_meta.get('toc_title')) + current_title = current_title or index_meta.get('title', find_first_header(index_content)) for filename in os.listdir(root): path = os.path.join(root, filename) if os.path.isdir(path): @@ -47,7 +49,7 @@ def build_nav_entry(root, args): return index_meta.get('toc_priority', 10000), current_title, result -def build_nav(lang, args): +def build_docs_nav(lang, args): docs_dir = os.path.join(args.docs_dir, lang) _, _, nav = build_nav_entry(docs_dir, args) result = [] @@ -64,10 +66,50 @@ def build_nav(lang, args): key = list(result[0].keys())[0] result[0][key][index_key] = 'index.md' result[0][key].move_to_end(index_key, last=False) - print('result', result) return result +def build_blog_nav(lang, args): + blog_dir = os.path.join(args.blog_dir, lang) + years = sorted(os.listdir(blog_dir), reverse=True) + result_nav = [{'hidden': 'index.md'}] + post_meta = collections.OrderedDict() + for year in years: + year_dir = os.path.join(blog_dir, year) + if not os.path.isdir(year_dir): + continue + result_nav.append({year: collections.OrderedDict()}) + posts = [] + post_meta_items = [] + for post in os.listdir(year_dir): + meta, _ = util.read_md_file(os.path.join(year_dir, post)) + post_date = meta['date'] + post_title = meta['title'] + if datetime.date.fromisoformat(post_date) > datetime.date.today(): + continue + posts.append( + (post_date, post_title, os.path.join(year, post),) + ) + if post_title in post_meta: + raise RuntimeError(f'Duplicate post title: {post_title}') + if not post_date.startswith(f'{year}-'): + raise RuntimeError(f'Post date {post_date} doesn\'t match the folder year {year}: {post_title}') + post_url_part = post.replace('.md', '') + post_meta_items.append((post_date, { + 'date': post_date, + 'title': post_title, + 'image': meta.get('image'), + 'url': f'/blog/{lang}/{year}/{post_url_part}/' + },)) + for _, title, path in sorted(posts, reverse=True): + result_nav[-1][year][title] = path + for _, post_meta_item in sorted(post_meta_items, + reverse=True, + key=lambda item: item[0]): + post_meta[post_meta_item['title']] = post_meta_item + return result_nav, post_meta + + def _custom_get_navigation(files, config): nav_config = config['nav'] or mkdocs.structure.nav.nest_paths(f.src_path for f in files.documentation_pages()) items = mkdocs.structure.nav._data_to_navigation(nav_config, files, config) diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index fc4d60aaf5a..2f5ebc8a620 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -25,24 +25,34 @@ def write_redirect_html(out_path, to_url): ''') -def build_redirect_html(args, from_path, to_path): - for lang in args.lang.split(','): - out_path = os.path.join( - args.docs_output_dir, lang, - from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') - ) - version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/' - target_path = to_path.replace('/index.md', '/').replace('.md', '/') - to_url = f'/docs{version_prefix}{lang}/{target_path}' - to_url = to_url.strip() - write_redirect_html(out_path, to_url) +def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path): + out_path = os.path.join( + output_dir, lang, + from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') + ) + version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/' + target_path = to_path.replace('/index.md', '/').replace('.md', '/') + to_url = f'/{base_prefix}{version_prefix}{lang}/{target_path}' + to_url = to_url.strip() + write_redirect_html(out_path, to_url) -def build_redirects(args): +def build_docs_redirects(args): with open(os.path.join(args.docs_dir, 'redirects.txt'), 'r') as f: for line in f: - from_path, to_path = line.split(' ', 1) - build_redirect_html(args, from_path, to_path) + for lang in args.lang.split(','): + from_path, to_path = line.split(' ', 1) + build_redirect_html(args, 'docs', lang, args.docs_output_dir, from_path, to_path) + + +def build_blog_redirects(args): + for lang in args.blog_lang.split(','): + redirects_path = os.path.join(args.blog_dir, lang, 'redirects.txt') + if os.path.exists(redirects_path): + with open(redirects_path, 'r') as f: + for line in f: + from_path, to_path = line.split(' ', 1) + build_redirect_html(args, 'blog', lang, args.blog_output_dir, from_path, to_path) def build_static_redirects(args): diff --git a/docs/tools/website.py b/docs/tools/website.py index ed950bd06e3..6d4803158a4 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -17,20 +17,56 @@ import jsmin import mdx_clickhouse +def handle_iframe(iframe, soup): + if not iframe.attrs['src'].startswith('https://www.youtube.com/'): + raise RuntimeError('iframes are allowed only for YouTube') + wrapper = soup.new_tag('div') + wrapper.attrs['class'] = ['embed-responsive', 'embed-responsive-16by9'] + iframe.insert_before(wrapper) + iframe.extract() + wrapper.insert(0, iframe) + if 'width' in iframe.attrs: + del iframe.attrs['width'] + if 'height' in iframe.attrs: + del iframe.attrs['height'] + iframe.attrs['allow'] = 'accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture' + iframe.attrs['class'] = 'embed-responsive-item' + iframe.attrs['frameborder'] = '0' + iframe.attrs['allowfullscreen'] = '1' + + def adjust_markdown_html(content): soup = bs4.BeautifulSoup( content, features='html.parser' ) + for a in soup.find_all('a'): a_class = a.attrs.get('class') if a_class and 'headerlink' in a_class: a.string = '\xa0' + + for iframe in soup.find_all('iframe'): + handle_iframe(iframe, soup) + + for img in soup.find_all('img'): + if img.attrs.get('alt') == 'iframe': + img.name = 'iframe' + img.string = '' + handle_iframe(img, soup) + continue + img_class = img.attrs.get('class') + if img_class: + img.attrs['class'] = img_class + ['img-fluid'] + else: + img.attrs['class'] = 'img-fluid' + for details in soup.find_all('details'): for summary in details.find_all('summary'): if summary.parent != details: summary.extract() details.insert(0, summary) + for div in soup.find_all('div'): div_class = div.attrs.get('class') is_admonition = div_class and 'admonition' in div.attrs.get('class') @@ -41,10 +77,12 @@ def adjust_markdown_html(content): a.attrs['class'] = a_class + ['alert-link'] else: a.attrs['class'] = 'alert-link' + for p in div.find_all('p'): p_class = p.attrs.get('class') if is_admonition and p_class and ('admonition-title' in p_class): p.attrs['class'] = p_class + ['alert-heading', 'display-6', 'mb-2'] + if is_admonition: div.attrs['role'] = 'alert' if ('info' in div_class) or ('note' in div_class): @@ -136,6 +174,7 @@ def get_css_in(args): f"'{args.website_dir}/css/bootstrap.css'", f"'{args.website_dir}/css/docsearch.css'", f"'{args.website_dir}/css/base.css'", + f"'{args.website_dir}/css/blog.css'", f"'{args.website_dir}/css/docs.css'", f"'{args.website_dir}/css/highlight.css'" ] diff --git a/docs/tr/introduction/adopters.md b/docs/tr/introduction/adopters.md index 444902e0b96..1da65ebb903 100644 --- a/docs/tr/introduction/adopters.md +++ b/docs/tr/introduction/adopters.md @@ -37,7 +37,7 @@ toc_title: Benimseyenler | Exness | Ticaret | Metrikler, Günlük Kaydı | — | — | [Rusça konuşun, Mayıs 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | Geniee | Reklam Ağı | Ana ürün | — | — | [Japonca Blog yazısı, Temmuz 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Akışı | Analiz | — | — | [Çince slaytlar, Ekim 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| Idealista | Emlak | Analiz | — | — | [İngilizce Blog yazısı, Nisan 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Idealista | Emlak | Analiz | — | — | [İngilizce Blog yazısı, Nisan 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Ağlar | Analiz | — | — | [İngilizce slaytlar, Ekim 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | Innogames | Oyun | Metrikler, Günlük Kaydı | — | — | [Rusça slaytlar, Eylül 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | Integros | Video hizmetleri platformu | Analiz | — | — | [Rusça slaytlar, Mayıs 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/tr/sql-reference/data-types/domains/ipv4.md b/docs/tr/sql-reference/data-types/domains/ipv4.md index 22ca6e7240c..4caf031c0c3 100644 --- a/docs/tr/sql-reference/data-types/domains/ipv4.md +++ b/docs/tr/sql-reference/data-types/domains/ipv4.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; `IPv4` etki alanı IPv4 dizeleri olarak özel giriş biçimini destekler: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/tr/sql-reference/data-types/domains/ipv6.md b/docs/tr/sql-reference/data-types/domains/ipv6.md index 642fe397e52..7f721cc07f6 100644 --- a/docs/tr/sql-reference/data-types/domains/ipv6.md +++ b/docs/tr/sql-reference/data-types/domains/ipv6.md @@ -33,7 +33,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; `IPv6` etki alanı IPv6 dizeleri olarak özel girişi destekler: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/tr/sql-reference/functions/array-functions.md b/docs/tr/sql-reference/functions/array-functions.md index 9ecb255ebbe..9638481db52 100644 --- a/docs/tr/sql-reference/functions/array-functions.md +++ b/docs/tr/sql-reference/functions/array-functions.md @@ -702,13 +702,13 @@ arrayDifference(array) **Parametre** -- `array` – [Dizi](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Dizi](https://clickhouse.tech/docs/en/data_types/array/). **Döndürülen değerler** Bitişik öğeler arasındaki farklar dizisini döndürür. -Tür: [Uİnt\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#uint-ranges), [Tamsayı\*](https://clickhouse.yandex/docs/en/data_types/int_uint/#int-ranges), [Yüzdürmek\*](https://clickhouse.yandex/docs/en/data_types/float/). +Tür: [Uİnt\*](https://clickhouse.tech/docs/en/data_types/int_uint/#uint-ranges), [Tamsayı\*](https://clickhouse.tech/docs/en/data_types/int_uint/#int-ranges), [Yüzdürmek\*](https://clickhouse.tech/docs/en/data_types/float/). **Örnek** @@ -754,7 +754,7 @@ arrayDistinct(array) **Parametre** -- `array` – [Dizi](https://clickhouse.yandex/docs/en/data_types/array/). +- `array` – [Dizi](https://clickhouse.tech/docs/en/data_types/array/). **Döndürülen değerler** diff --git a/docs/tr/whats-new/changelog/2017.md b/docs/tr/whats-new/changelog/2017.md index 98643fe449a..1011ebadb84 100644 --- a/docs/tr/whats-new/changelog/2017.md +++ b/docs/tr/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ Bu sürüm önceki sürüm 1.1.54310 için hata düzeltmeleri içerir: #### Yenilik: {#new-features} - Tablo motorları MergeTree ailesi için özel bölümleme anahtarı. -- [Kafka](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) masa motoru. +- [Kafka](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) masa motoru. - Yükleme için destek eklendi [CatBoost](https://catboost.yandex/) modelleri ve ClickHouse saklanan verilere uygulayarak. - UTC olmayan tamsayı uzaklıklar ile saat dilimleri için destek eklendi. - Zaman aralıklarıyla aritmetik işlemler için destek eklendi. diff --git a/docs/zh/introduction/adopters.md b/docs/zh/introduction/adopters.md index 895ec961751..38b9ca690e3 100644 --- a/docs/zh/introduction/adopters.md +++ b/docs/zh/introduction/adopters.md @@ -35,7 +35,7 @@ toc_title: "\u91C7\u7528\u8005" | [Exness](https://www.exness.com) | 交易 | 指标,日志记录 | — | — | [俄语交谈,2019年5月](https://youtu.be/_rpU-TvSfZ8?t=3215) | | [精灵](https://geniee.co.jp) | 广告网络 | 主要产品 | — | — | [日文博客,2017年7月](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | [虎牙](https://www.huya.com/) | 视频流 | 分析 | — | — | [中文幻灯片,2018年10月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| [Idealista](https://www.idealista.com) | 房地产 | 分析 | — | — | [英文博客文章,四月2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| [Idealista](https://www.idealista.com) | 房地产 | 分析 | — | — | [英文博客文章,四月2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | [Infovista](https://www.infovista.com/) | 网络 | 分析 | — | — | [英文幻灯片,十月2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | [InnoGames](https://www.innogames.com) | 游戏 | 指标,日志记录 | — | — | [俄文幻灯片,2019年9月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | [Integros](https://integros.com) | 视频服务平台 | 分析 | — | — | [俄文幻灯片,2019年5月](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | diff --git a/docs/zh/sql-reference/data-types/domains/ipv4.md b/docs/zh/sql-reference/data-types/domains/ipv4.md index 65c066fb487..9ce12025405 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv4.md +++ b/docs/zh/sql-reference/data-types/domains/ipv4.md @@ -24,7 +24,7 @@ CREATE TABLE hits (url String, from IPv4) ENGINE = MergeTree() ORDER BY from; 在写入与查询时,`IPv4`类型能够识别可读性更加友好的输入输出格式: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.yandex/docs/en/', '116.106.34.242'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '116.253.40.133')('https://clickhouse.tech', '183.247.232.58')('https://clickhouse.tech/docs/en/', '116.106.34.242'); SELECT * FROM hits; ``` diff --git a/docs/zh/sql-reference/data-types/domains/ipv6.md b/docs/zh/sql-reference/data-types/domains/ipv6.md index bc0f95932aa..5b1afc2cd39 100644 --- a/docs/zh/sql-reference/data-types/domains/ipv6.md +++ b/docs/zh/sql-reference/data-types/domains/ipv6.md @@ -24,7 +24,7 @@ CREATE TABLE hits (url String, from IPv6) ENGINE = MergeTree() ORDER BY from; 在写入与查询时,`IPv6`类型能够识别可读性更加友好的输入输出格式: ``` sql -INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.yandex/docs/en/', '2a02:e980:1e::1'); +INSERT INTO hits (url, from) VALUES ('https://wikipedia.org', '2a02:aa08:e000:3100::2')('https://clickhouse.tech', '2001:44c8:129:2632:33:0:252:2')('https://clickhouse.tech/docs/en/', '2a02:e980:1e::1'); SELECT * FROM hits; ``` diff --git a/docs/zh/whats-new/changelog/2017.md b/docs/zh/whats-new/changelog/2017.md index de62730b093..35d839c50c9 100644 --- a/docs/zh/whats-new/changelog/2017.md +++ b/docs/zh/whats-new/changelog/2017.md @@ -26,7 +26,7 @@ toc_title: '2017' #### 新功能: {#new-features} - MergeTree表引擎系列的自定义分区键。 -- [卡夫卡](https://clickhouse.yandex/docs/en/operations/table_engines/kafka/) 表引擎。 +- [卡夫卡](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) 表引擎。 - 增加了对加载的支持 [CatBoost](https://catboost.yandex/) 模型并将其应用到ClickHouse中存储的数据。 - 增加了对UTC非整数偏移的时区的支持。 - 增加了对具有时间间隔的算术运算的支持。 diff --git a/website/blog/README.md b/website/blog/README.md new file mode 100644 index 00000000000..89d7ddfad57 --- /dev/null +++ b/website/blog/README.md @@ -0,0 +1,47 @@ +## Introduction + +First of all, **relevant guest posts are welcome**! Especially with success stories or demonstration of ClickHouse ecosystem projects. + +The ClickHouse blog is published alongside documentation and the rest of official website. So the posts reside in this same repository in [Markdown](https://github.com/ClickHouse/ClickHouse/tree/master/docs#markdown-cheatsheet) format. + +## How To Add a New Post? + +Basically you need to create a new Markdown file at the following location inside repository `/website/blog///.md` and then [open a pull-request](https://github.com/ClickHouse/ClickHouse/compare) with it. + +Each post needs to have a `yaml` meta-header with the following fields: + +- Required: + - `title`, main name of the article. In Title Case for English. + - `date`, publication date in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) format, like `YYYY-MM-DD` (can be in future to postpone publication). +- Optional: + - `image`, URL to main post image. + - `tags`, list of post tags. + +Then after header goes post content in a normal markdown (with some optional extensions). + +The recommended place to store images is this GitHub repo: . It's folder structure matches this folder with blog posts: + +- `///main.jpg` for main post image (linked in `image` header field). +- `///whatever.jpg` for other images (`png` or `gif` are acceptable as well, if necessary). + +### Example + ```markdown +--- +title: 'ClickHouse Meetup in Beijing on June 8, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/main.jpg' +date: '2019-06-13' +tags: ['meetup','Beijing','China','events'] +--- + +24th ClickHouse Meetup globally and 3rd one in China took place in Beijing on Dragon Boat Festival weekend, which appeared to... + +![ClickHouse branded Beijing duck](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) +``` + +## How To Add a New Blog Language? + +If you want to write a guest post, you are welcome to use your native language or make multiple posts in multiple languages + + Unlike documentation, blog languages are independent, i.e. they have partially overlapping sets of posts and it's ok. Most posts are written only in one language because they are not relevant to audiences of other languages. + +At the moment it's not so straightforward to set up a new language for blog and it won't be documented for now, but you can just create a language directory with the first post as described above and we'll configure the website infrastructure to include it during/after merging the pull-request. diff --git a/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md b/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md new file mode 100644 index 00000000000..44739a24191 --- /dev/null +++ b/website/blog/en/2016/evolution-of-data-structures-in-yandex-metrica.md @@ -0,0 +1,108 @@ +--- +title: 'Evolution of Data Structures in Yandex.Metrica' +image: 'https://blog-images.clickhouse.tech/en/2016/evolution-of-data-structures-in-yandex-metrica/main.jpg' +date: '2016-12-13' +tags: ['Yandex.Metrica', 'data structures', 'LSM tree', 'columnar storage'] +--- + +[Yandex.Metrica](https://metrica.yandex.com/) takes in a stream of data representing events that took place on sites or on apps. Our task is to keep this data and present it in an analyzable form. The real challenge lies in trying to determine what form the processed results should be saved in so that they are easy to work with. During the development process, we had to completely change our approach to data storage organization several times. We started with MyISAM tables, then used LSM-trees and eventually came up with column-oriented database, ClickHouse. + +At its founding, Metrica was designed as an offshoot of Yandex.Direct, the search ads service. MySQL tables with MyISAM engine were used in Direct to store statistics and it was natural to use same approach in Metrica. Initially Yandex.Metrica for websites had more than 40 “fixed” report types (for example, the visitor geography report), several in-page analytics tools (like click maps), Webvisor (tool to study individual user actions in great detail), as well as the separate report constructor. But with time to keep up with business goals the system had to become more flexible and provide more customization opportunities for customers. Nowadays instead of using fixed reports Metrica allows to freely add new dimensions (for example, in a keyword report you can break data down further by landing page), segment and compare (between, let's say, traffic sources for all visitors vs. visitors from Moscow), change your set of metrics, etc. These features demanded a completely different approach to data storage than what we used with MyISAM, we will further discuss this transition from technical perspective. + +## MyISAM + +Most SELECT queries that fetch data for reports are made with the conditions WHERE CounterID = AND Date BETWEEN min_date AND max_date. Sometimes there is also filter by region, so it made sense to use complex primary key to turn this into primary key range is read. So table schema for Metrica looks like this: CounterID, Date, RegionID -> Visits, SumVisitTime, etc. Now we'll take a look at what happens when it comes in. + +A MyISAM table is comprised of a data file and an index file. If nothing was deleted from the table and the rows did not change in length during updating, the data file will consist of serialized rows arranged in succession in the order that they were added. The index (including the primary key) is a B-tree, where the leaves contain offsets in the data file. When we read index range data, a lot of offsets in the data file are taken from the index. Then reads are issued for this set of offsets in the data file. + +Let's look at the real-life situation when the index is in RAM (key cache in MySQL or system page cache), but the table data is not cached. Let's assume that we are using HDDs. The time it takes to read data depends on the volume of data that needs to be read and how many Seek operations need to be run. The number of Seek's is determined by the locality of data on the disk. + +Data locality illustrated: +![Data locality](https://blog-images.clickhouse.tech/en/2016/evolution-of-data-structures-in-yandex-metrica/1.jpg) + +Metrica events are received in almost the same order in which they actually took place. In this incoming stream, data from different counters is scattered completely at random. In other words, incoming data is local by time, but not local by CounterID. When writing to a MyISAM table, data from different counters is also placed quite randomly. This means that to read the data report, you will need to perform about as many random reads as there are rows that we need in the table. + +A typical 7200 rpm hard disk can perform 100 to 200 random reads per second. A RAID, if used properly, can handle the same amount multiplied by number of disks in it. One five-year-old SSD can perform 30,000 random reads per second, but we cannot afford to keep our data on SSD. So in this case, if we needed to read 10,000 rows for a report, it would take more than 10 seconds, which would be totally unacceptable. + +InnoDB is much better suited to reading primary key ranges since it uses [a clustered primary key](https://en.wikipedia.org/wiki/Database_index#Clustered) (i.e., the data is stored in an orderly manner on the primary key). But InnoDB was impossible to use due to its slow write speed. If this reminds you of [TokuDB](https://www.percona.com/software/mysql-database/percona-tokudb), then read on. + +It took a lot of tricks like periodic table sorting, complicated manual partitioning schemes, and keeping data in generations to keep Yandex.Metrica working on MyISAM. This approach also had a lot of lot of operational drawbacks, for example slow replication, consistency, unreliable recovery, etc. Nevertheless, as of 2011, we stored more than 580 billion rows in MyISAM tables. + +## Metrage and OLAPServer + +Metrage is an implementation of [LSM Tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree), a fairly common data structure that works well for workloads with intensive stream of writes and mostly primary key reads, like Yandex.Metrica has. LevelDB did not exist in 2010 and TokuDB was proprietary at the time. + +![LSM Tree](https://blog-images.clickhouse.tech/en/2016/evolution-of-data-structures-in-yandex-metrica/2.jpg) + +In Metrage arbitrary data structures (fixed at compile time) can be used as “rows” in it. Every row is a key, value pair. A key is a structure with comparison operations for equality and inequality. The value is an arbitrary structure with operations to update (to add something) and merge (to aggregate or combine with another value). In short, it's a CRDT. Data is located pretty locally on the hard disk, so the primary key range reads are quick. Blocks of data are effectively compressed even with fast algorithms because of ordering (in 2010 we used QuickLZ, since 2011 - LZ4). Storing data in a systematic manner enables us to use a sparse index. + +Since reading is not performed very often (even though lot of rows are read when it does) the increase in latency due to having many chunks and decompressing the data block does not matter. Reading extra rows because of the index sparsity also does not make a difference. + +After transferring reports from MyISAM to Metrage, we immediately saw an increase in Metrica interface speed. Whereas earlier the 90% of page-title reports loaded in 26 seconds, with Metrage they loaded in 0.8 seconds (total time, including time to process all database queries and follow-up data transformations). The time it takes Metrage itself to process queries (for all reports) is as follows according to percent: average = 6 ms, 90tile = 31 ms, 99tile = 334 ms. + +We've been using Metrage for five years and it has proved to be a reliable solution. As of 2015 we stored 3.37 trillion rows in Metrage and used 39 * 2 servers for this. + +Its advantages were simplicity and effectiveness, which made it a far better choice for storing data than MyISAM. Though the system still had one huge drawback: it really only works effectively with fixed reports. Metrage aggregates data and saves aggregated data. But in order to do this, you have to list all the ways in which you want to aggregate data ahead of time. So if we do this in 40 different ways, it means that Metrica will contain 40 types of reports and no more. + +To mitigate this we had to keep for a while a separate storage for custom report wizard, called OLAPServer. It is a simple and very limited implementation of a column-oriented database. It supports only one table set in compile time — a session table. Unlike Metrage, data is not updated in real-time, but rather a few times per day. The only data type supported is fixed-length numbers of 1-8 bytes, so it wasn“t suitable for reports with other kinds of data, for example URLs. + +## ClickHouse + +Using OLAPServer, we developed an understanding of how well column-oriented DBMS's handle ad-hoc analytics tasks with non-aggregated data. If you can retrieve any report from non-aggregated data, then it begs the question of whether data even needs to be aggregated in advance, as we did with Metrage. + +![](https://blog-images.clickhouse.tech/en/2016/evolution-of-data-structures-in-yandex-metrica/3.gif) + +On the one hand, pre-aggregating data can reduce the volume of data that is used at the moment when the report page is loading. On the other hand, though, aggregated data doesn't solve everything. Here are the reasons why: + +- you need to have a list of reports that your users need ahead of time; in other words, the user can't put together a custom report +- when aggregating a lot of keys, the amount of data is not reduced and aggregation is useless; when there are a lot of reports, there are too many aggregation options (combinatorial explosion) +- when aggregating high cardinality keys (for example, URLs) the amount of data does not decrease by much (by less than half) +due to this, the amount of data may not be reduced, but actually grow during aggregation +- users won't view all the reports that we calculate for them (in other words, a lot of the calculations prove useless) +- it's difficult to maintain logical consistency when storing a large number of different aggregations + +As you can see, if nothing is aggregated and we work with non-aggregated data, then it's possible that the volume of computations will even be reduced. But only working with non-aggregated data imposes very high demands on the effectiveness of the system that executes the queries. + +So if we aggregate the data in advance, then we should do it constantly (in real time), but asynchronously with respect to user queries. We should really just aggregate the data in real time; a large portion of the report being received should consist of prepared data. + +If data is not aggregated in advance, all the work has to be done at the moment the user request it (i.e. while they wait for the report page to load). This means that many billions of rows need to be processed in response to the user's query; the quicker this can be done, the better. + +For this you need a good column-oriented DBMS. The market didn‘t have any column-oriented DBMS's that would handle internet-analytics tasks on the scale of Runet (the Russian internet) well enough and would not be prohibitively expensive to license. + +Recently, as an alternative to commercial column-oriented DBMS's, solutions for efficient ad-hoc analytics of data in distributed computing systems began appearing: Cloudera Impala, Spark SQL, Presto, and Apache Drill. Although such systems can work effectively with queries for internal analytical tasks, it is difficult to imagine them as the backend for the web interface of an analytical system accessible to external users. + +At Yandex, we developed and later opensourced our own column-oriented DBMS — ClickHouse. Let's review the basic requirements that we had in mind before we proceeded to development. + +**Ability to work with large datasets.** In current Yandex.Metrica for websites, ClickHouse is used to store all data for reports. As of November, 2016, the database is comprised of 18.3 trillion rows. It‘s made up of non-aggregated data that is used to retrieve reports in real-time. Every row in the largest table contains over 200 columns. + +**The system should scale linearly.** ClickHouse allows you to increase the size of cluster by adding new servers as needed. For example, Yandex.Metrica's main cluster has increased from 60 to 426 servers in three years. In the aim of fault tolerance, our servers are spread across different data centers. ClickHouse can use all hardware resources to process a single query. This way more than 2 terabyte can be processed per second. + +**High efficiency.** We especially pride ourselves on our database's high performance. Based on the results of internal tests, ClickHouse processes queries faster than any other system we could acquire. For example, ClickHouse works an average of 2.8-3.4 times faster than Vertica. With ClickHouse there is no one silver bullet that makes the system work so quickly. + +**Functionality should be sufficient for Web analytics tools.** The database supports the SQL language dialect, subqueries and JOINs (local and distributed). There are numerous SQL extensions: functions for web analytics, arrays and nested data structures, higher-order functions, aggregate functions for approximate calculations using sketching, etc. By working with ClickHouse, you get the convenience of a relational DBMS. + +ClickHouse was initially developed by the Yandex.Metrica team. Furthermore, we were able to make the system flexible and extensible enough that it can be successfully used for different tasks. Although the database can run on large clusters, it can be installed on one server or even on a virtual machine. There are now more than a dozen different ClickHouse applications within our company. + +ClickHouse is well equipped for creating all kinds of analytical tools. Just consider: if the system can handle the challenges of Yandex.Metrica, you can be sure that ClickHouse will cope with other tasks with a lot of performance headroom to spare. + +ClickHouse works well as a time series database; at Yandex it is commonly used as the backend for Graphite instead of Ceres/Whisper. This lets us work with more than a trillion metrics on a single server. + +ClickHouse is used by analytics for internal tasks. Based on our experience at Yandex, ClickHouse performs at about three orders of magnitude higher than traditional methods of data processing (scripts on MapReduce). But this is not a simple quantitative difference. The fact of the matter is that by having such a high calculation speed, you can afford to employ radically different methods of problem solving. + +If an analyst has to make a report and they are competent at their job, they won't just go ahead and construct one report. Rather, they will start by retrieving dozens of other reports to better understand the nature of the data and test various hypotheses. It is often useful to look at data from different angles in order to posit and check new hypotheses, even if you don't have a clear goal. + +This is only possible if the data analysis speed allows you to conduct online research. The faster queries are executed, the more hypotheses you can test. Working with ClickHouse, one even gets the sense that they are able to think faster. + +In traditional systems, data is like a dead weight, figuratively speaking. You can manipulate it, but it takes a lot of time and is inconvenient. If your data is in ClickHouse though, it is much more malleable: you can study it in different cross-sections and drill down to the individual rows of data. + +## Conclusions + +Yandex.Metrica has become the second largest web-analytics system in the world. The volume of data that Metrica takes in grew from 200 million events a day in 2009 to more than 25 billion in 2016. In order to provide users with a wide variety of options while still keeping up with the increasing workload, we've had to constantly modify our approach to data storage. + +Effective hardware utilization is very important to us. In our experience, when you have a large volume of data, it's better not to worry as much about how well the system scales and instead focus on how effectively each unit of resource is used: each processor core, disk and SSD, RAM, and network. After all, if your system is already using hundreds of servers, and you have to work ten times more efficiently, it is unlikely that you can just proceed to install thousands of servers, no matter how scalable your system is. + +To maximize efficiency, it's important to customize your solution to meet the needs of specific type of workload. There is no data structure that copes well with completely different scenarios. For example, it's clear that key-value databases don't work for analytical queries. The greater the load on the system, the narrower the specialization required. One should not be afraid to use completely different data structures for different tasks. + +We were able to set things up so that Yandex.Metrica's hardware was relatively inexpensive. This has allowed us to offer the service free of charge to even very large sites and mobile apps, even larger than Yanex‘s own, while competitors typically start asking for a paid subscription plan. + + diff --git a/website/blog/en/2016/how-to-update-data-in-clickhouse.md b/website/blog/en/2016/how-to-update-data-in-clickhouse.md new file mode 100644 index 00000000000..22c2fa3ccc1 --- /dev/null +++ b/website/blog/en/2016/how-to-update-data-in-clickhouse.md @@ -0,0 +1,169 @@ +--- +title: 'How to Update Data in ClickHouse' +date: '2016-11-20' +image: 'https://blog-images.clickhouse.tech/en/2016/how-to-update-data-in-clickhouse/main.jpg' +tags: ['features', 'update', 'delete', 'CollapsingMergeTree', 'partitions'] +--- + +There is no UPDATE or DELETE commands in ClickHouse at the moment. And that's not because we have some religious believes. ClickHouse is performance-oriented system; and data modifications are hard to store and process optimally in terms of performance. + +But sometimes we have to modify data. And sometimes data should be updated in realtime. Don't worry, we have these cases covered. + +## Work with Partitions + +Data in MergeTree engine family is partitioned by partition_key engine parameter. MergeTree split all the data by this partition key. Partition size is one month. + +That's very useful in many terms. Especially when we're talking about data modification. + +## Yandex.Metrica "hits" Table + +Let's look at an example on Yandex.Metrica server mtlog02-01-1 which store some Yandex.Metrica data for year 2013. Table we are looking at contains user events we call “hits”. This is the engine description for hits table: + +``` text +ENGINE = ReplicatedMergeTree( + '/clickhouse/tables/{layer}-{shard}/hits', -- zookeeper path + '{replica}', -- settings in config describing replicas + EventDate, -- partition key column + intHash32(UserID), -- sampling key + (CounterID, EventDate, intHash32(UserID), WatchID), -- index + 8192 -- index granularity +) +``` + +You can see that the partition key column is EventDate. That means that all the data will be splitted by months using this column. + +With this SQL we can get partitions list and some stats about current partitions: + +```sql +SELECT + partition, + count() as number_of_parts, + formatReadableSize(sum(bytes)) as sum_size +FROM system.parts +WHERE + active + AND database = 'merge' + AND table = 'hits' +GROUP BY partition +ORDER BY partition; +``` +```text +┌─partition─┬─number_of_parts─┬─sum_size───┐ +│ 201306 │ 1 │ 191.34 GiB │ +│ 201307 │ 4 │ 537.86 GiB │ +│ 201308 │ 6 │ 608.77 GiB │ +│ 201309 │ 5 │ 658.68 GiB │ +│ 201310 │ 5 │ 768.74 GiB │ +│ 201311 │ 5 │ 654.61 GiB │ +└───────────┴─────────────────┴────────────┘ +``` +There are 6 partitions with a few parts in each of them. Each partition is around 600 Gb of data. Partition is strictly one piece of data for partition key, here we can see that it is months. Part is one piece of data inside partition. Basically it's one node of LSMT structure, so there are not so many of them, especially for old data. If there are too many of them, they merge and form bigger ones. + +## Partition Operations + +There is a nice set of operations to work with partitions: + +- `DETACH PARTITION` - Move a partition to the 'detached' directory and forget it. +- `DROP PARTITION` - Delete a partition. +- `ATTACH PART|PARTITION` -- Add a new part or partition from the 'detached' directory to the table. +- `FREEZE PARTITION` - Create a backup of a partition. +- `FETCH PARTITION` - Download a partition from another server. + +We can do any data management operations on partitions level: move, copy and delete. Also, special DETACH and ATTACH operations are created to simplify data manipulation. DETACH detaches partition from table, moving all data to detached directory. Data is still there and you can copy it anywhere but detached data is not visible on request level. ATTACH is the opposite: attaches data from detached directory so it become visible. + +This attach-detach commands works almost in no time so you can make your updates almost transparently to database clients. + +Here is the plan how to update data using partitions: + +- Create modified partition with updated data on another table +- Copy data for this partition to detached directory +- `DROP PARTITION` in main table +- `ATTACH PARTITION` in main table + +Partition swap especially useful for huge data updates with low frequency. But they're not so handy when you need to update a lot of data in real time. + +## Update Data on the Fly + +In Yandex.Metrica we have user sessions table. Each row is one session on a website: some pages checked, some time spent, some banners clicked. This data is updated every second: user on a website view more pages, click more buttons, and do other things. Site owner can see that actions in Yandex.Metrica interface in real time. + +So how do we do that? + +We update data not by updating that data, but adding more data about what have changed. This is usually called CRDT approach, and there is an article on Wikipedia about that. + +It was created to solve conflict problem in transactions but this concept also allows updating data. We use our own data model with this approach. We call it Incremental Log. + +## Incremental Log + +Let's look at an example. + +Here we have one session information with user identifier UserID, number of page viewed PageViews, time spent on site in seconds Duration. There is also Sign field, we describe it later. +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ +└─────────────────────┴───────────┴──────────┴──────┘ +``` +And let's say we calculate some metrics over this data. + +- `count()`- number of sessions +- `sum(PageViews)`- total number of pages all users checked +- `avg(Duration)` - average session duration, how long user usually spent on the website + +Let's say now we have update on that: user checked one more page, so we should change PageViews from 5 to 6 and Duration from 146 to 185. + +We insert two more rows: +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ +│ 4324182021466249494 │ 5 │ 146 │ -1 │ +│ 4324182021466249494 │ 6 │ 185 │ 1 │ +└─────────────────────┴───────────┴──────────┴──────┘ +``` + +First one is delete row. It's exactly the same row what we already have there but with Sign set to -1. Second one is updated row with all data set to new values. + +After that we have three rows of data: +``` text +┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ +│ 4324182021466249494 │ 5 │ 146 │ 1 │ +│ 4324182021466249494 │ 5 │ 146 │ -1 │ +│ 4324182021466249494 │ 6 │ 185 │ 1 │ +└─────────────────────┴───────────┴──────────┴──────┘ +``` + +The most important part is modified metrics calculation. We should update our queries like this: + +``` text + -- number of sessions +count() -> sum(Sign) + -- total number of pages all users checked +sum(PageViews) -> sum(Sign * PageViews) + -- average session duration, how long user usually spent on the website +avg(Duration) -> sum(Sign * Duration) / sum(Sign) +``` + +You can see that it works as expected over this data. Deleted row 'hide' old row, same values come with + and - signs inside aggregation and annihilate each other. + +Moreover, it works totally fine with changing keys for grouping. If we want to group data by PageViews, all data for PageView = 5 will be 'hidden' for this rows. + +There are some limitations with this approach: + +- It works only for metrics which can be presented through this Sign operations. It covers most cases, but it's not possible to calculate min or max values. There is an impact to uniq calculations also. But it's fine at least for Yandex.Metrica cases, and there are a lot of different analytical calculations; +- You need to remember somehow old value in external system doing updates, so you can insert this 'delete' rows; +- Some other effects; there is a [great answer](https://groups.google.com/forum/#!msg/clickhouse/VixyOUD-K68/Km8EpkCyAQAJ) on Google Groups. + +## CollapsingMergeTree + +ClickHouse has support of Incremental Log model in Collapsing engines family. + +If you use Collapsing family, 'delete' row and old 'deleted' rows will collapse during merge process. Merge is a background process of merging data into larger chunks. Here is a great article about merges and LSMT structures. + +For most cases 'delete' and 'deleted' rows will be removed in terms of days. What's important here is that you will not have any significant overhead on data size. Using Sign field on selects still required. + +Also there is FINAL modifier available over Collapsing family. Using FINAL guarantees that user will see already collapsing data, thus using Sign field isn't required. FINAL usually make tremendous performance degradation because ClickHouse have to group data by key and delete rows during SELECT execution. But it's useful when you want to check your queries or if you want to see raw, unaggregated data in their final form. + +## Future Plans + +We know that current feature set is not enough. There are some cases which do not fit to limitations. But we have huge plans, and here are some insights what we've preparing: + +- Partitions by custom key: current partitioning scheme is binded to months only. We will remove this limitation and it will be possible to create partitions by any key. All partition operations like FETCH PARTITION will be available. +- UPDATE and DELETE: there are a lot of issues with updates and deletes support. Performance degradation, consistency guarantees, distributed queries and more. But we believe that if you need to update few rows of data in your dataset, it should not be painful. It will be done. + diff --git a/website/blog/en/2016/yandex-opensources-clickhouse.md b/website/blog/en/2016/yandex-opensources-clickhouse.md new file mode 100644 index 00000000000..feffec65d79 --- /dev/null +++ b/website/blog/en/2016/yandex-opensources-clickhouse.md @@ -0,0 +1,12 @@ +--- +title: 'Yandex Opensources ClickHouse' +image: 'https://blog-images.clickhouse.tech/en/2016/yandex-opensources-clickhouse/main.jpg' +date: '2016-06-15' +tags: ['announcement', 'GitHub', 'license'] +--- + +Today [analytical DBMS ClickHouse](https://clickhouse.tech/) initially developed internally at Yandex, became available to everyone. Source code is published on [GitHub](https://github.com/ClickHouse/ClickHouse) under Apache 2.0 license. + +ClickHouse allows interactive analytical query execution on data updated in real time. System is able to scale to tens of trillions of rows and petabytes of stored data. Using ClickHouse opens up opportunities that were hard to imagine: you can store full stream of data and slice and dice it to produce reports without offline aggregation. ClickHouse was initially developed as a backend for [Yandex.Metrica](https://metrika.yandex.com/) — second largest web analytics system in the world. + +[Discussion on Hacker News](https://news.ycombinator.com/item?id=11908254). diff --git a/website/blog/en/2017/clickhouse-at-data-scale-2017.md b/website/blog/en/2017/clickhouse-at-data-scale-2017.md new file mode 100644 index 00000000000..f05cc9e1e89 --- /dev/null +++ b/website/blog/en/2017/clickhouse-at-data-scale-2017.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse at Data@Scale 2017' +image: 'https://blog-images.clickhouse.tech/en/2017/clickhouse-at-data-scale-2017/main.jpg' +date: '2017-06-15' +tags: ['conference', 'Seattle', 'USA', 'America', 'events'] +--- + +![iframe](https://www.youtube.com/embed/bSyQahMVZ7w) + +[Slides](https://presentations.clickhouse.tech/data_at_scale/) diff --git a/website/blog/en/2017/clickhouse-at-percona-live-2017.md b/website/blog/en/2017/clickhouse-at-percona-live-2017.md new file mode 100644 index 00000000000..989dfde932d --- /dev/null +++ b/website/blog/en/2017/clickhouse-at-percona-live-2017.md @@ -0,0 +1,22 @@ +--- +title: 'ClickHouse at Percona Live 2017' +image: 'https://blog-images.clickhouse.tech/en/2017/clickhouse-at-percona-live-2017/main.jpg' +date: '2017-04-28' +tags: ['meetup', 'Santa Clara', 'Bay Area', 'California', 'USA', 'America', 'events', 'Graphouse'] +--- + +For those who haven't heard, [Percona Live](https://percona.com/live/17) is probably one of the largest international conferences about opensource database management systems, having 12 talk tracks in parallel. It's been around for many years and initially, it was focused mainly on MySQL (and had that in its name), but nowadays it is more generic and other products of this category get lots of attention too. Needless to say that for a relatively new player on the market like [ClickHouse](https://clickhouse.tech/), it's been a great opportunity to spread the word about the technology and how exactly it allows us to perform analytics on petabytes of data in real-time. + +![Percona Live](https://blog-images.clickhouse.tech/en/2017/clickhouse-at-percona-live-2017/1.jpg) + +Yandex team members had three chances to talk about ClickHouse from the stage: + +1. A large portion of [Opening Keynote](https://www.percona.com/blog/2017/04/25/percona-live-2017-day-one-keynotes/) has been dedicated to different time-series databases. ClickHouse is not really a specialized time-series database but still outperforms many alternatives if used as such. So Dmitry Andreev, Head of Yandex.Market Infrastructure Development Group, had a short talk about how ClickHouse can be used a as storage backend for Graphite using [Graphouse](https://github.com/clickhouse/graphouse), an open-source adapter that implements this. This setup is used in Yandex.Market and number of other Yandex services and have proven to be very reliable and effective. Chain of short talks has been followed by a live panel about time series in general with the same speakers including Dmitry. Unfortunately, as we figured out later, many keynote attendees perceived ClickHouse as just yet another time-series database and missed the explicitly said part that it opens up way more opportunities to analyze data. +2. Victor Tarnavsky, Head of Yandex.Metrica, and Alexey Milovidov, Head of ClickHouse Development Group, gave a full-length talk about ClickHouse overview, capabilities, features and use cases. Their video has not been recorded, but you can check out [the slides](https://presentations.clickhouse.tech/percona2017/ClickHouse%20Percona%20Santa%20Clara%202.0.pdf). +3. Later on, Dmitry Andreev went deeper on the same topic he covered on an opening keynote. He spoke in more detail about how Graphouse works, shown the benchmark results and future plans of the project. Also, [only slides](https://www.percona.com/live/17/sites/default/files/slides/clickhouse-as-timeseries-database.pdf) are available. + +![Keynote](https://blog-images.clickhouse.tech/en/2017/clickhouse-at-percona-live-2017/2.gif) + +Besides, ClickHouse has been represented in the exhibition accompanying the conference. Altinity, the private company independent from Yandex that provides consulting and support services for ClickHouse, organized the booth and invited Yandex team members to join them to talk about ClickHouse with conference attendees which appeared to be quite productive. + +![ClickHouse Booth](https://blog-images.clickhouse.tech/en/2017/clickhouse-at-percona-live-2017/3.jpg) diff --git a/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md b/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md new file mode 100644 index 00000000000..bd15350ba34 --- /dev/null +++ b/website/blog/en/2017/clickhouse-meetup-in-berlin-october-5-2017.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse Meetup in Berlin, October 5, 2017' +image: 'https://blog-images.clickhouse.tech/en/2017/clickhouse-meetup-in-berlin-october-5-2017/main.jpg' +date: '2017-10-19' +tags: ['meetup', 'Berlin', 'Germany', 'events'] +--- + +![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3hO_3kCUFZLdcIQuI3gghZ8) + +All presentations are available for download at [the event page](https://events.yandex.com/events/meetings/05-10-2017/). diff --git a/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md b/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md new file mode 100644 index 00000000000..5974a292853 --- /dev/null +++ b/website/blog/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017.md @@ -0,0 +1,8 @@ +--- +title: 'ClickHouse Meetup in Santa Clara on May 4, 2017' +image: 'https://blog-images.clickhouse.tech/en/2017/clickhouse-meetup-in-santa-clara-may-4-2017/main.jpg' +date: '2017-05-11' +tags: ['meetup', 'Santa Clara', 'Bay Area', 'California', 'USA', 'America', 'events'] +--- + +After [Percona Live 2017](clickhouse-at-percona-live-2017.md), Yandex ClickHouse team stayed for one more week in San Francisco Bay Area to meet with local companies in person to talk about ClickHouse and how it can be applied to their tasks. On the last evening we even managed to organize our own meetup with active ClickHouse users in the area, not as large as we regularly host in Russia, but still had some very interesting discussions. diff --git a/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md b/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md new file mode 100644 index 00000000000..5521127edba --- /dev/null +++ b/website/blog/en/2017/join-the-clickhouse-meetup-in-berlin.md @@ -0,0 +1,13 @@ +--- +title: 'Join the ClickHouse Meetup in Berlin' +image: 'https://blog-images.clickhouse.tech/en/2017/join-the-clickhouse-meetup-in-berlin/main.jpg' +date: '2017-10-19' +tags: ['announcement', 'meetup', 'Berlin', 'Germany', 'events'] +--- + +Come learn about ClickHouse, our open source high-performance column-oriented database management system at a meetup on October 5, 2017 at the Park Inn at Alexanderplatz 7 in Berlin. + +ClickHouse can generate custom data reports in real time and process billions of rows and dozens of gigabytes of data per single server per second. It works up to a thousand times faster than traditional approaches. ClickHouse is linearly scalable, hardware-efficient, fault-tolerant, and can be deployed across multiple data centers. Among other features, ClickHouse offers a user-friendly SQL query dialect with a number of built-in analytics capabilities. + +Join us at the meetup to learn why hundreds of companies across Europe, US, and China are adopting ClickHouse. Through interactive talks, attendees will learn about product features, how ClickHouse can benefit them, and how to use this system in practice. +Attending the ClickHouse meetup is free. [Please register to join us](https://events.yandex.com/events/meetings/05-10-2017/). diff --git a/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md b/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md new file mode 100644 index 00000000000..c3534efee55 --- /dev/null +++ b/website/blog/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md @@ -0,0 +1,8 @@ +--- +title: 'Announcing ClickHouse Meetup in Amsterdam on November 15' +image: 'https://blog-images.clickhouse.tech/en/2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15/main.jpg' +date: '2018-10-17' +tags: ['meetup', 'Amsterdam', 'Netherlands', 'events', 'announcement'] +--- + +Yet another meetup of ClickHouse community is planned in Europe, see detailed agenda and register on [the event page](https://events.yandex.com/events/meetings/15-11-2018/). diff --git a/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md b/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md new file mode 100644 index 00000000000..d3700b40e42 --- /dev/null +++ b/website/blog/en/2018/clickhouse-at-analysys-a10-2018.md @@ -0,0 +1,27 @@ +--- +title: 'ClickHouse at Analysys A10 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-at-analysys-a10-2018/main.jpg' +date: '2018-11-04' +tags: ['conference', 'Beijing', 'China', 'events', 'Analysys', 'Asia'] +--- + +[Analysys A10](https://event.analysys.cn/pc/2018/index.html) is a large conference on Big Data that took place on October 26-27 in Beijing. Since China's population is huge, it generates a lot of data and Big Data industry is in very high demand. Yandex ClickHouse team has been honored to participate in this event alongside top management, analysts, and IT professionals from various Chinese companies. + +Each year Analysys also organizes the OLAP contest. The second year in a row the same team of Sundy Li (李本旺) and Winter Zhang (张健) wins it by using ClickHouse as the core of their solution. The task was to calculate complex marketing funnel as fast as possible. + +Sundy Li (李本旺) receives award for winning Analysys OLAP contest 2018 from William Kwok (郭炜): +![Sundy Li and William Kwok](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-analysys-a10-2018/1.jpg) + +The first day of the conference we mostly spent talking with people on ClickHouse booth, while on the second day there were two technical talks about ClickHouse. + +Alexey Milovidov demonstrates ClickHouse and how it works internally: +![Alexey Milovidov](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-analysys-a10-2018/2.jpg) + +Sundy Li (李本旺) explains the audience how they won the OLAP contest using ClickHouse: +![Sundy Li](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-analysys-a10-2018/3.jpg) + +The next day after A10 was a dedicated ClickHouse Community Meetup in Beijing, but it deserves a separate recap post. + +Analysys A10 afterparty: +![Analysys A10 afterparty](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-analysys-a10-2018/4.jpg) + diff --git a/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md b/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md new file mode 100644 index 00000000000..1e0b7e4c99d --- /dev/null +++ b/website/blog/en/2018/clickhouse-at-percona-live-europe-2018.md @@ -0,0 +1,25 @@ +--- +title: 'ClickHouse at Percona Live Europe 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/main.jpg' +date: '2018-11-21' +tags: ['conference', 'Frankfurt', 'Germany', 'events', 'Percona Live', 'Europe'] +--- + +Open-source database management systems conference [Percona Live](https://www.percona.com/live/e18/) this time took place on November 5-7 in Germany, Frankfurt am Main. Over last couple years ClickHouse became a solid member of this community as demand in analytics with subsecond latencies appeared to be pretty high. + +There were three talks about ClickHouse in agenda, while only one of them was from Yandex. Also we had a lot of interesting conversations with conference attendees at ClickHouse booth sponsored by Altinity. + +Alexander Zaitsev, CTO and co-founder of Altinity, gives an overview of ClickHouse and then demonstrates case studies and best practices ([slides](https://presentations.clickhouse.tech/percona_europe_2018/Altinity.pdf)): +![](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/1.jpg) + +Fast! Flexible! Free! Fun! +![Fast! Flexible! Free! Fun!](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/2.jpg) + +Aleksey Milovidov, lead ClickHouse developer from Yandex, talks about unusual and unique ClickHouse features ([slides](https://presentations.clickhouse.tech/percona_europe_2018)): +![Aleksey Milovidov](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/3.jpg) + +Aleksandar Aleksandrov and Felix Mattrat, data engineers from MessageBird, show how they use ClickHouse to analyze process of delivery of SMS and other kinds of messages ([slides](http://presentations.clickhouse.tech/percona_europe_2018/MessageBird.pdf)): +![Aleksandar Aleksandrov and Felix Mattrat](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/4.jpg) + +Live demo at ClickHouse booth by Alexey Milovidov: +![Demo at ClickHouse booth by Alexey Milovidov](https://blog-images.clickhouse.tech/en/2018/clickhouse-at-percona-live-europe-2018/5.jpg) diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md new file mode 100644 index 00000000000..cd7cb155c37 --- /dev/null +++ b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md @@ -0,0 +1,68 @@ +--- +title: 'ClickHouse Community Meetup in Beijing on January 27, 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/main.jpg' +date: '2018-02-08' +tags: ['meetup', 'Beijing', 'China', 'events', 'Asia'] +--- + +Last year there has been an OLAP algorithm contest in China organized by Analysys. The team who have shown the top results and won the competition has been using ClickHouse as the core of their solution. Other teams were mostly using different technologies and didn't really know much about ClickHouse at a time. When the final results were published, many people in China who participated in or were aware of this competition became really eager to learn more about ClickHouse. This spike of interest about ClickHouse in China has eventually lead to the first Chinese ClickHouse Community Meetup that has taken place in Beijing. + +Welcome word by William Kwok, CTO of Analysys, who personally played a huge role in making this event possible: +![William Kwok, CTO of Analysys](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/1.jpg) + +It was probably the most intense ClickHouse Meetup compared to all previous ones worldwide. The main part of the event took over 6 hours non-stop and there were also either pre-meetup and after-party on the same day. Well over 150 people have shown up on Saturday to participate. + +Audience listening for ClickHouse introduction by Alexey Milovidov: +![ClickHouse introduction by Alexey Milovidov](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/2.jpg) + +Alexey Milovidov has started the main meetup session with an introductory talk about ClickHouse, it's usage inside Yandex and history that lead to becoming an open-source analytical DBMS ([slides](https://presentations.clickhouse.tech/meetup12/introduction/)). + +Alexander Zaitsev's practical talk about migrating to ClickHouse: +![Alexander Zaitsev](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/3.jpg) + +Alexander Zaitsev has shared his vast experience in migrating to ClickHouse. LifeStreet, advertisement company where he works, was one of the first companies outside of Yandex which switched to ClickHouse from other analytical DBMS in production. Later on, Alexander also co-founded Altinity, a company that specializes in helping others to migrate to ClickHouse and then effectively use it to achieve their business goals. The talk has covered many specific topics that are important for those who are in the middle of such migration or just considering it ([Slides](https://presentations.clickhouse.tech/meetup12/migration.pptx)). + +Alexey Zatelepin explaining how ClickHouse sparse index works and other implementation details: +![Alexey Zatelepin](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/4.jpg) + +Alexey Zatelepin's technical talk was focused on providing engineers some insights on why ClickHouse is that fast in OLAP workloads and how to leverage its design and core features as a primary index, replication, and distributed tables to achieve great performance and reliability ([slides](https://presentations.clickhouse.tech/meetup12/internals.pdf)). + +Jack Gao gives an extensive overview of ClickHouse and it's use cases in Chinese: +![Jack Gao](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/5.jpg) + +As we have learned during meet up and the rest of our business trip, actually there are many companies in China that are already using or seriously evaluating ClickHouse to use either part of their products or for internal analytics. Three of them are doing this long and extensively enough to give a full talk about their progress and experience. + +In China, in general, and especially in Beijing the knowledge of English is not really common. Chinese people working in the IT industry have to know English well enough to read documentation, but it does not really imply that they can talk or understand verbal English well. So the talks by representatives of local companies were in Chinese. + +Jack Gao, ex-DBA and now an analyst at Sina (major social network) have dedicated a significant part of his talk to go over fundamental topics essential to most ClickHouse users. It partially overlapped with previous talks, but this time in Chinese. Also, he covered not only use case of ClickHouse in Sina but also other publicly known cases by other companies. Considering the reaction of the audience, it has been the most useful talk of the whole meetup, because of the widely useful content, lack of language barrier, and excellent execution of presentation. We even had to sacrifice initially scheduled a short break to give Jack some additional time ([slides](https://presentations.clickhouse.tech/meetup12/power_your_data.pdf)). + +Yang Xujun from Dataliance / UltraPower, which provides outsourced data analysis platform to telecom companies in China, have demonstrated why they decided to move away from reports prepared offline in Apache Hadoop / Spark and exported to MySQL towards ClickHouse. In short: Hadoop is too slow and cumbersome ([slides](https://presentations.clickhouse.tech/meetup12/telecom.pdf)). + +It might sound obvious, but the huge Chinese population generates insane amounts of data to store and process. So IT companies operating mostly on the local Chinese market are often handling amounts of information comparable to even the largest global companies. + +Kent Wang from Splunk Shanghai R&D center has demonstrated the current state of ClickHouse integration into Splunk ecosystem. Basically, they have plugged ClickHouse into their system via JDBC driver to allow data from ClickHouse to be easily accessed in Splunk UI and dashboards. Last spring Yandex ClickHouse team actually had a friendly visit to Splunk office in San Francisco to discuss potential points of interaction and exchange experience, so it was great to hear that there's some real progress in that direction ([slides](https://presentations.clickhouse.tech/meetup12/splunk.pdf)). + +The last talk was for the most tenacious ClickHouse users. Alexey Milovidov has announced some recently released features and improvements and shared what's coming next either in the short and long term [slides](https://presentations.clickhouse.tech/meetup12/news_and_plans/). + +Here is an over 5 hours long video recording of main meetup session: + +![iframe](https://www.youtube.com/embed/UXw8izZGPGk) + +If you are from China or at least can read Chinese, you might consider joining the **[Chinese ClickHouse User Group](http://www.clickhouse.com.cn/)**. + +{## Likely outdated in favor of YouTube + +There is an over 5 hours long video recording of main meetup session, but it'll take a bit of effort to get access to it (especially if you are not from China): http://m.zm518.cn/zhangmen/livenumber/share/entry/?liveId=1460023&sharerId=6fd3bac16125e71d69-899&circleId=b0b78915b2edbfe6c-78f7&followerId=×tamp=1517022274560 +You'll need to install WeChat (probably one of the most popular messengers in the world, everyone in China has it) on your smartphone: Android or iOS. https://play.google.com/store/apps/details?id=com.tencent.mm https://itunes.apple.com/ru/app/wechat/id414478124?mt=8 +On the first launch, WeChat will ask to confirm your phone number via SMS, read some digits via a microphone and accept the user agreement. Go through this. +On your computer, click the red button in the middle of the video behind the link above. It'll show a QR code. Now in WeChat in the top-right corner, there's the “+” button which opens a menu that has a “Scan QR code” item. Use it to scan QR code from your computer screen, then press the “Sign in” button on the smartphone. Now the video on the computer automatically becomes playable. +If you are from China or at least can read Chinese, you might consider joining the Chinese ClickHouse User Group. + +ClickHouse Community Meetup afterparty. +##} + +Pre-meetup meeting of speakers and most active ClickHouse users in China: +![Pre-meetup meeting](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/6.jpg) + +ClickHouse Community Meetup afterparty: +![ClickHouse Community Meetup afterparty](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-january-27-2018/7.jpg) diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md new file mode 100644 index 00000000000..a794a5f7a7e --- /dev/null +++ b/website/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md @@ -0,0 +1,54 @@ +--- +title: 'ClickHouse Community Meetup in Beijing on October 28, 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/main.jpg' +date: '2018-11-12' +tags: ['meetup', 'Beijing', 'China', 'Asia', 'events'] +--- + +Interest in ClickHouse among Chinese experts is growing rapidly. It was second ClickHouse Meetup in Beijing this year and the venue was more than full, it could fit only about 170 people out of 500 who signed up and around 2000 more joined the live translation online. Many Chinese companies have already adopted ClickHouse in production and are willing to share their experience. + +See the **[video recording of all talks](http://play.yunxi.tv/livestream/flash?id=05527cf6e260448b9d880b99d2cf4d40)** and **[all slides](https://github.com/yandex/clickhouse-presentations/tree/master/meetup19)**. + +Welcome word by William Kwok (郭炜), CTO of Analysys, who played a key role in organizing this event: +![William Kwok](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/1.jpg) + +Nikolay Kochetov from Yandex demonstrating recent advancements in string processing optimization using LowCardinality feature: +![Nikolay Kochetov from Yandex](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/2.jpg) + +Shang Shujie (尚书杰) from Kuaishou gives an overview of ClickHouse and it's usage scenarios: +![Shang Shujie](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/3.jpg) + +Winter Zhang (张健) from QingCloud explains their services based on ClickHouse: +![Winter Zhang](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/4.jpg) + +Audience listening to Zhang's talk: +![Audience listening to Zhang's talk](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/5.jpg) + +Li Junfei (李俊飞) from Tencent explains how ClickHouse fits their data processing infrastructure: +![Li Junfei](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/6.jpg) + +Questions&Answers session: +![Q&A](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/7.jpg) + +Jack Gao (高鹏) from Sina explains their ClickHouse use case and gives some advice based on their extensive experience with ClickHouse: +![Jack Gao](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/8.jpg) + +Chinese developers are also one of the most active worldwide in contributing to ClickHouse source code compared to other countries. Chinese ClickHouse Contributors Awards 2018 ceremony was also part of the meetup agenda with the following: + +1. 张建 (Winter Zhang, zhang2014) received First Place among independent ClickHouse developers in China for 2018, having developed 22 new features, improvements, and fixes in 57 pull requests. +2. Amos Bird received Second Place among independent ClickHouse developers in China for 2018, having developed 16 new features, improvements, and fixes in 42 pull requests. +3. 李本旺 (sundy-li) received Third Place among independent ClickHouse developers in China for 2018, having developed 6 new features, improvements, and fixes in 11 pull requests. + +A special award went to William Kwok (郭炜) for his active role in developing the Chinese ClickHouse Community. + +Sundy Li (李本旺) receives ClickHouse Contributor Award from Alexey Milovidov: +![Sundy Li](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/9.jpg) + +William Kwok (郭炜) receives special award for organizing Chinese ClickHouse community and meetups: +![William Kwok](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/10.jpg) + +Pre-meetup at the Analysys office: +![Pre-meetup](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/11.jpg) + + + diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md new file mode 100644 index 00000000000..1db4b33c8dc --- /dev/null +++ b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md @@ -0,0 +1,39 @@ +--- +title: 'ClickHouse Community Meetup in Berlin on July 3, 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/main.jpg' +date: '2018-07-05' +tags: ['meetup', 'Berlin', 'Germany', 'events'] +--- + +Just a few months ago Brenno Oliveira from Delivery Hero has dropped us an email saying that they want to host a meetup of ClickHouse community in their HQ and together we made it happen. Actually, renting a suitable room is one of the main limiting factors on how often ClickHouse meetups can happen worldwide and it was very kind of Delivery Hero to provide it for free. Bringing interesting speakers was the easy part as there are more and more companies adopting ClickHouse and willing to share their stories. Being an open-source product has its advantages after all. About 50 people have shown up from 75 sign-ups, which is way above the typical rate. + +To get started Alexander Zaitsev from Altinity gave an overview of ClickHouse for those who are not that familiar with the technology yet. He was using use cases from his personal experience and their clients as examples. Here are [the slides](https://presentations.clickhouse.tech/meetup16/introduction.pdf), unfortunately, no video this time. + +Gleb Kanterov talking about the usage of ClickHouse for experimentation metrics at Spotify: +![Gleb Kanterov Spotify](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/1.jpg) + +![Gleb Kanterov Spotify](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-berlin-on-july-3-2018/2.jpg) + +Spotify relies heavily on what Google Cloud Platform provides, but nevertheless found a spot in their infrastructure where only ClickHouse appeared to satisfy the requirements. Gleb Kanterov has demonstrated their approach to conducting experiments and measuring if they are worth being promoted to production solutions. Using ClickHouse has allowed them to build a framework scalable to thousands of metrics, which in the end makes them move even faster and break fewer things. Checking out [full slides](https://presentations.clickhouse.tech/meetup16/spotify.pdf) is highly recommended and here are a few quotes: + +- **Requirements** + - Serve 100-s of QPS with sub-second latency + - We know in advance what are queries and data + - Maintain 10x metrics with the same cost + - Thousands of metrics + - Billions of rows per day in each of 100-s of tables + - Ready to be used out of the box + - Leverage existing infrastructure as much as feasible + - Hide unnecessary complexity from internal users +- **Why ClickHouse?** + - Build proof of concept using various OLAP storages (ClickHouse, Druid, Pinot,...) + - ClickHouse has the most simple architecture + - Powerful SQL dialect close to Standard SQL + - A comprehensive set of built-in functions and aggregators + - Was ready to be used out of the box + - Superset integration is great + - Easy to query using clickhouse-jdbc and jooq + +The last talk by Alexey Milovidov was pretty technical and mostly intended for a deeper understanding of what's going on inside ClickHouse, see [the slides](https://presentations.clickhouse.tech/meetup16/internals.pdf). There were many experienced users in the audience who didn't mind staying late to hear that and ask very relevant questions. Actually, we had to leave the building way before people were out of topics to discuss. + +If your company regularly hosts technical meetups and you are looking for interesting topics to talk about, ClickHouse might be in pretty high demand. Feel free to write Yandex ClickHouse team via [this form](http://clickhouse.tech/#meet) if you are interested to host a similar event in your city and we'll find a way to cooperate and bring in other ClickHouse community members. diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md new file mode 100644 index 00000000000..7e0082cd570 --- /dev/null +++ b/website/blog/en/2018/clickhouse-community-meetup-in-berlin-on-july-3.md @@ -0,0 +1,8 @@ +--- +title: 'Announcing ClickHouse Community Meetup in Berlin on July 3' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-berlin-on-july-3/main.jpg' +date: '2018-06-25' +tags: ['meetup', 'Berlin', 'Germany', 'events', 'announcement'] +--- + +There's yet another upcoming meetup of ClickHouse community in Europe, see detailed agenda and sign up on [the event page](https://bitly.com/2Jv9Bug). diff --git a/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md b/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md new file mode 100644 index 00000000000..4f9874af655 --- /dev/null +++ b/website/blog/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md @@ -0,0 +1,20 @@ +--- +title: 'ClickHouse Community Meetup in Paris on October 2, 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/main.jpg' +date: '2018-10-09' +tags: ['meetup', 'Paris', 'France', 'events'] +--- + +Agenda of Paris ClickHouse Meetup was full of use cases, mostly from France-based companies which are actively using ClickHouse. Slides for all talks are [available on the GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup18). + +Christophe Kalenzaga and Vianney Foucault, engineers from ContentSquare, company that provided the meetup venue: +![Christophe Kalenzaga and Vianney Foucault](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/1.jpg) + +Matthieu Jacquet from Storetail (Criteo): +![Matthieu Jacquet](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/2.jpg) + +The audience: +![Audience](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/3.jpg) + +Networking after the meetup: +![Networking](https://blog-images.clickhouse.tech/en/2018/clickhouse-community-meetup-in-paris-on-october-2-2018/4.jpg) diff --git a/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md b/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md new file mode 100644 index 00000000000..080caf610e5 --- /dev/null +++ b/website/blog/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md @@ -0,0 +1,27 @@ +--- +title: 'ClickHouse Meetup in Amsterdam on November 15, 2018' +image: 'https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/main.jpg' +date: '2018-11-22' +tags: ['meetup', 'Amsterdam', 'Netherlands', 'events'] +--- + +20th ClickHouse Meetup took place in Amsterdam, which appeared to be a convenient location for people from all over Europe to join the event, including Austria, Czech Republic and Germany. We were also glad to see people from many local companies including Booking.com, Crobox, Marktplaats (eBay), MessageBird and others. + +Aleksandar Aleksandrov and Felix Mattrat, data engineers from MessageBird, show how they use ClickHouse to analyze process of delivery of SMS and other kinds of messages: +![Aleksandar Aleksandrov and Felix Mattrat](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/1.jpg) + +Nikolay Kochetov from Yandex ClickHouse team demonstrates recent features related to string processing optimization: +![Nikolay Kochetov from Yandex ClickHouse team](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/2.jpg) + +Konstantin Ignatov from Qrator Labs shares his experience in using ClickHouse as time-series database: +![Konstantin Ignatov from Qrator Labs](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/3.jpg) + +Aurimas Jacikevicius from Altinity demonstrates benchmark of ClickHouse against TimescaleDB and InfluxDB under time-series workload: +![Aurimas Jacikevicius from Altinity](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/4.jpg) + +Roy Brondgeest from Crobox showcases [ClickHouse Scala reactive client](https://github.com/crobox/clickhouse-scala-client) and it's bundled [DSL for query building](https://github.com/crobox/clickhouse-scala-client/wiki): +![Roy Brondgeest from Crobox](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/5.jpg) + +Alexey Milovidov from Yandex closes the meetup with talk about performance analysis of ClickHouse queries: +![Alexey Milovidov from Yandex](https://blog-images.clickhouse.tech/en/2018/clickhouse-meetup-in-amsterdam-on-november-15-2018/6.jpg) + diff --git a/website/blog/en/2018/concept-cloud-mergetree-tables.md b/website/blog/en/2018/concept-cloud-mergetree-tables.md new file mode 100644 index 00000000000..9d4818ba8d7 --- /dev/null +++ b/website/blog/en/2018/concept-cloud-mergetree-tables.md @@ -0,0 +1,120 @@ +--- +title: 'Concept: "Cloud" MergeTree Tables' +image: 'https://blog-images.clickhouse.tech/en/2018/concept-cloud-mergetree-tables/main.jpg' +date: '2018-11-23' +tags: ['concept', 'MergeTree', 'future', 'sharding'] +--- + +The main property of the MergeTree cloud tables is the absence of manual control over the sharding scheme of data on a cluster. The data in the cloud tables are distributed around the cluster on its own, while at the same time providing the locality property for a certain key. + +## Requirements + +1. Creating a cloud table makes it visible on all nodes of the cluster. No need to manually create a separate Distributed table and local tables on each node. +2. When ingesting data to a cloud table, while the table is very small, data is distributed across several cluster servers, but as data grows, more servers are involved (for example, starting from gigabytes per server). The user can create a small table and it should not be too cumbersome; but when creating a table, we do not know in advance how much data will be loaded into it. +3. The user specifies a sharding key (arbitrary tuple). Data for the sharding key range (in lexicographical order) is located on some servers. Very small ranges are located on several servers and to access it is enough to read data from a single server, while sufficiently large ranges are spread across all servers. For example, if we are talking about web analytics the sharding key might start with CounterID, the website identifier. Data on a large site like https://yandex.ru should be spread across all servers in the cluster, while data on a small site should be located on only a few servers. Physical explanation: the cluster should scale to simultaneously provide throughput for heavy queries and to handle high QPS of light queries, and for light queries, the latency should not suffer. In general, this is called data locality. +4. The ability for heavy queries to use all the servers in the cluster, rather than 1 / N, where N is the replication coefficient. Thus, one server can contain multiple replicas of different shards. +5. When replacing the server with an empty one (node recovery), the data restore must be parallelized in some way. At least the reads should be spread over different servers to avoid overloading individual servers. +6. On each local server, reading the range of the primary key should be touching not a very large number of file ranges or not too small file ranges (minimizing disk seeks). +7. (Optional) The ability to use individual disks instead of RAID, but at the same time preserving throughput when reading medium-sized primary key ranges and preserving QPS when reading small-sized ranges. +8. The ability to create multiple tables with a common sharding scheme (co-sharding). +9. Rebalancing data when adding new servers; creation of additional replicas with long unavailability of old servers. +10. SELECT queries should not require synchronous requests to the coordinator. No duplicates or missing data visible by SELECT queries during data rebalancing operations. +11. SELECT queries must choose large enough subset of servers considering conditions on sharding key and knowledge of the current sharding scheme. +12. The ability to efficiently distribute data across servers with uneven available disk space. +13. Atomicity of INSERT on a cluster. + +Out of scope and will not be considered: + +1. Erasure data encoding for replication and recovery. +2. Data storage on systems with different disks - HDD and SSD. An example is storing fresh data on an SSD. + +## General Considerations + +A similar problem usually (in Map-Reduce or blob-storage systems) is solved by organizing data in chunks. Chunks are located on the nodes of the cluster. Mappings: table or file -> chunks, chunk -> nodes, are stored in the master, which itself can be replicated. The master observes the liveliness of nodes and maintains a reasonable replication level of all chunks. + +Difficulties arise when there are too many chunks: in this case, the master does not cope with the storage of metadata and with the load. It becomes necessary to make complicated metadata sharding. + +In our case, it may seem tempting to solve a problem in a similar way, where instead of a chunk, an instance of a MergeTree type table containing the data range is used. Chunks in other systems are called “tablets” or “regions”. But there are many problems with this. The number of chunks on one server cannot be large, because then the property is violated - minimizing the number of seeks when reading data ranges. The problem also arises from the fact that each MergeTree table itself is rather cumbersome and consists of a large number of files. On the other hand, tables with a size of one terabyte are more or less normal if the data locality property is maintained. That is if several such tables on one server begin to be used only for not too small data ranges. + +A variety of options can be used for sharding data, including: +Sharding according to some formula with a small number of parameters. Examples are simple hashing, consistent hashing (hash ring, rendezvous hashing, jump consistent hashing, sumbur). The practice of using in other systems shows that in its pure form this approach does not work well, because the sharding scheme is poorly controlled. Fits fine, for example, for caches. It can also be used as part of another algorithm. + +The opposite option is that the data is divided into shards using an explicitly specified table. The table may contain key ranges (or, in another case, hash ranges from keys) and their corresponding servers. This gives a much greater degree of freedom in choosing when and how to transfer data. But at the same time, to scale the cluster, the size of the table has to be dynamically expanded, breaking the existing ranges. + +One of the combined options is that the mapping is made up of two parts: first, the set of various keys is divided into some pre-fixed not too few and not too many “virtual shards” (you can also call “logical shards”, “mini-shards”). This number is several times larger than the hypothetical cluster size in the number of servers. Further, the second mapping explicitly specifies the location of each mini-shard on the servers, and this second mapping can be controlled arbitrarily. + +The complexity of this approach is that partitioning hash ranges gives uniformity, but does not give locality of data for range queries; whereas when splitting by key ranges, it is difficult to choose a uniform distribution in advance since we do not know what the distribution of data will be to the keys. That is, the approach with the choice of a pre-fixed split into mini-shards does not work if data locality is required. + +It turns out that the only acceptable approach in our case is partitioning by key ranges, which can change dynamically (repartitioned). At the same time, for more convenience, manageability, and uniformity of data distribution, the number of partitioning elements can be slightly larger than the number of servers, and the mapping from the partitioning element into servers can be changed separately. + +## Possible Implementation + +Each ClickHouse server can participate in a certain cloud. The cloud is identified by a text string. The membership of a node in the cloud can be ensured by creating a certain type of database on the node (IDatabase). Thus, one node can be registered in several clouds. Registry of the nodes registered in the cloud is maintained in the coordinator. + +Cloud nodes are selected to accommodate the replicas of the shards of cloud tables. The node also sends some additional information to the coordinator for its selection when placing data: the path that determines the locality in the network (for example, data center and rack), the amount of disk space, etc. + +The cloud table is created in the corresponding database registered in the cloud. The table is created on any server and is visible in all databases registered in the cloud. + +Sharding key is set for cloud table on it“s creation, an arbitrary tuple. Sometimes it is practical that the sharding key matches the primary key (example - (CounterID, Date, UserID)), sometimes it makes sense that it is different (for example, the DateTime primary key, sharding key - UserID). + +Sharding is a composition of several mappings: + +1. The set of all possible tuples, the values ​​of the sharding key, is mapped onto many half-intervals that break the half-interval [0, 1). Initially, this number is the size of the partition, it is equal to 1. That is, all values ​​are mapped into a single semi-interval, the whole set [0, 1). Then, as the amount of data in the table increases, the semi-intervals, the split elements, can be divided approximately in half by the median of the distribution of values ​​in lexicographical order. +2. For each half-interval splitting, several cloud servers are selected and remembered in some way, on which replicas of the corresponding data will be located. The choice is made based on the location of servers on the network (for example, at least two replicas in different data centers and all replicas in different racks), the number of replicas already created on this server (choose servers with the minimum) and the amount of free space (from various servers just select the server with the maximum amount of free space). + +As a result, this composition forms a mapping from the sharding key into several replica servers. + +It is assumed that in the course of work both parts of this mapping may change. + +The result of mapping 1 can be called the “virtual shard” or “logical shard”. In the process of work, virtual shards can be divided in half. Going in the opposite direction is impossible - the number of virtual shards can only grow. It is assumed that even for tables occupying the entire cluster, the number of virtual shards will be several times larger than the number of servers (for example, it may be greater by 10 times the replication ratio). Data ranges occupying at least a tenth of all data should be spread across all servers to ensure throughput of heavy queries. The mapping as a whole is specified by the set of boundary values ​​for the sharding key. This set is small (roughly kilobytes) and stored in the coordinator. + +The mapping of virtual shards on real servers can change arbitrarily: the number of replicas can increase when servers are not available for a long time or increase and then decrease to move replicas between servers. +## How to Satisfy All Requirements + +List items below correspond to the requirement numbers above: + +1. IDatabase synchronously goes to the coordinator to get or change the list of tables. The list of cloud tables is stored in the coordinator in the node corresponding to the cloud. That is, all the tables in the cloud are visible on each server entering the cloud. +2. It is ensured by the fact that initially the partition consists of a single element, but begins to break up further with increasing data volume. Each replica responsible for the local storage of this data can initiate the splitting, once the criterion for the data volume has been reached. Multiple replicas may decide to do this competitively, and the decision is made using atomic CAS. To have fewer problems, it is possible to randomize somewhat the moment of deciding repartition. The criterion when it is necessary to additionally break virtual shards turns out to be non-trivial. For example, you can break up to the number of servers * the replication rate quite soon, by growing a shard to several gigabytes. But it is already worth breaking shards even when shards are 1 / N in size from the server size (for example, around a terabyte). In coordinator, you should store the last and previous splits immediately and do not do the splitting too often. +3. It is ensured by the fact that the number of virtual shards will be several times (user-defined) more than the number of servers. Note: for additional data spreading, you can impose some spreading transformation on the sharding key. Not thought out. For example, instead of a key (CounterID, Date, UserID) use for sharding (hash (UserID)% 10, CounterID, Date, UserID). But in this case, even small CounterIDs will fall into 10 ranges. +4. Similarly. +5. If several virtual shards are located on a single server, their replicas will be spread over a larger number of servers, and during recovery, there will be more fanout. +6. Small requests will use one shard. While large requests will use several shards on the same server. But since each shard will be somewhat smaller, the data in the MergeTree table will probably be presented by a smaller set of parts. For example, we now have a maximum part size of 150 GiB, and for large tables, many such large chunks are formed in one partition. And if there are several tables, there will be a smaller number of large chunks in each. On the other hand, when inserting data, a larger number of small pieces will be generated on each server. And these small parts will cause an increase in the number of seeks. But not much, as the fresh data will be in the page cache. That is why too many virtual shards per server might not work well. +7. Pretty hard. You can have groups of neighboring shards on different disks of the same server. But then reading of medium size ranges will not be parallelized (since the whole range will be on one disk). In RAID, the problem is solved by the fact that the size of the chunk is relatively small (typically 1 megabyte). It would be possible to come up with a separate distribution of data in different pieces on different disks. But it is too difficult to design and implement carefully. Probably it“s better not to do the whole thing, and as a minimum, make it so that when on the JBOD server, one server disk is selected for the location of one shard. +8. It is possible to identify the sharding scheme with a string, which may be common to different tables. The criterion for splitting shards is determined based on the total amount of data for all tables with the same sharding scheme. +9. It is solved completely by changing the mapping of virtual shards on the servers. This mapping can be controlled independently of everything else. +10. Servers can cache the sharding map (both parts of it) for a while and update it usually asynchronously. When rebalancing data due to the splitting of virtual shards, you should keep the old data for a longer time. Similarly, when transferring replicas between servers. Upon request, the initiator server also asks if the remote server has the necessary data: data for the required shard according to the sharding scheme that is cached by the initiator server. For the query, one live replica of each shard is selected, on which there is data. If suddenly there were none, then it is worthwhile to update the sharding map synchronously, as for some reason all the replicas were transferred somewhere. +11. It is trivial. +12. It is solved on the basis that more than one shard accounts for one server and the fact that the distribution of shards replicas among servers is more or less arbitrary and can take into account the amount of disk space. +## Issues + +To ingest data into a table, you can send an INSERT query to any server. The data will be divided into ranges and recorded on the desired servers. At the same time, it is synchronously ensured that we use a fresh sharding map - it is requested before the data is inserted and it is checked that it is not out of date, simultaneously with the commit in ZK. + +When a SELECT query is used, if the old sharding map was used, the latest data will not be visible. Therefore, the asynchronous update interval of the sharding map for SELECT should be made customizable, and an option should be added to synchronously use the latest sharding map. + +For fairly large tables, it turns out that an INSERT request breaks the data into many small pieces and writes to all servers (example: with 500 servers, you need to commit 5000 replicas of shards). This should work since the probability of inaccessibility or inhibition of all replicas of one shard is still low. But it will work slowly and, possibly, unstable. With a lot of INSERTs, there will be a terrible load on the coordinator. Although it can withstand one INSERT per second normally. To achieve high throughput of INSERTs, it is sufficient to simply make them parallel, but with the same low frequency of INSERTs in general. However, this is still a big problem. + +There are the following possible solutions: + +1. You can add something to the beginning of the sharding key. For example, Date % 10 or toMinute. Then INSERTs will touch fewer shards (in the typical case when recent data is inserted), but at the same time during some time intervals, some shards will be hotter than others. Normally, if it reduces the number of active shards, for example, from 5000 on INSERT to 500. It is also very inconvenient for users. +2. You can come up with some kind of incomprehensible sharding scheme, where the fresh data first falls into some fresh shard where it is not clear where from where it is then lazily overwritten. A fresh shard is essentially a distributed queue. At the same time, a fresh shard with SELECT is always requested. Not so good. And still, it contradicts the atomicity of these transfers of data, visible at SELECT. Alternatively, you could relax the requirements if you allow SELECT not to see some of the fresh data. +It looks like it“s generally not working well at a cluster size of over 500 servers. +Another problem is that to properly spread the ranges of the primary key, the number of virtual shards must be no less than the number of servers squared. And this is too much. +How to Get Around These Issues +For sharding, you can add some more intermediate mappings. There are the following options: +1. Splitting each shard into a set of shards in an arbitrary way. For example, 10 pieces. This is equivalent to adding a random number 0.N-1 to the beginning of the sharding key, which means nothing. Then with INSERT, you can only insert into one randomly selected shard, or a minimum sized shard, or some kind of round-robin; and as a result, INSERT becomes easier. But this increases the fanout of all point SELECTs. For convenience, such a partition can be done dynamically - only large enough shards can be divided in such a way (this will help avoid excessive splitting of old shards in the case when the sharding key starts with Date and the data is inserted in the Date order) or do such a partition starting from the situation when the number of shards is large enough (restriction on top of fanout INSERT requests). +An additional advantage: in the case of servers with JBOD, it is possible to prefer to place such second-level shards on the disks of one server, which half emulates RAID-0. +But there is a serious drawback: there is no possibility to do local IN / JOIN. For example, this possibility is assumed if the sharding key is hash (UserID), and we do JOIN by UserID. It would be possible to avoid this drawback by always placing all the “symmetric” shards on one server. +2. A mapping that spreads the data while keeping the number of virtual shards. The essence of this mapping is as follows: + - The spreading factor is set, for example, `N = 10.` As the very first mapping, 10 times more ranges are generated. For example, if we want to end up with 7 shards, then we divide the data into 70 ranges. + - Then these ranges are renumbered in a circle with numbers from 0.6 and the ranges with the same number will fall into one shard, as a result, there will be 7 shards again. + - The continuous analogue of this mapping: `x in [0, 1) -> fractional_part (x * N)`, multiplication by N on a circle. + +If you draw it on the picture in Cartesian coordinates, you get a “saw” with 10 teeth. + +After this, it becomes obvious that this mapping simultaneously spreads the data and preserves its locality. + +See also: [Arnold's cat map](https://en.wikipedia.org/wiki/Arnold%27s_cat_map). + +But what is described here does not exactly work. First, until a sufficient amount of data has been accumulated, it is impossible to create a uniform division into parts (there is no place to count quantiles). Secondly, according to such a simple scheme, it is impossible to divide the intervals. + +There is an option in which, instead of dividing a range in half, it uses splitting into 4 parts, which are then mapped into two shards. It is also not clear how this will work. diff --git a/website/blog/en/2019/clickhouse-at-percona-live-2019.md b/website/blog/en/2019/clickhouse-at-percona-live-2019.md new file mode 100644 index 00000000000..15a9973c0d0 --- /dev/null +++ b/website/blog/en/2019/clickhouse-at-percona-live-2019.md @@ -0,0 +1,38 @@ +--- +title: 'ClickHouse at Percona Live 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/main.jpg' +date: '2019-06-04' +tags: ['Percona Live','USA','Texas','Austin', 'events', 'conference'] +--- + +This year American episode of [Percona Live](https://www.percona.com/live/19/) took place in nice waterfront location in Austin, TX, which welcomed open source database experts with pretty hot weather. ClickHouse community is undeniably growing and it became a common database product to give a talk about or at least compare or refer to, while just [two short years ago](../2017/clickhouse-at-percona-live-2017.md) it was more like “wth is ClickHouse?”. + +Alexey Rubin from VirtualHealth compared two column-oriented databases: ClickHouse and MariaDB Column Store. Bottom line was no surprise, ClickHouse is noticeably faster and MariaDB is more familiar for MySQL users, details were useful though. +![Alexey Rubin from VirtualHealth](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/1.jpg) + +Alexey Milovidov from Yandex have demonstrated how exactly ClickHouse became even faster in recent releases. +![Alexey Milovidov from Yandex](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/2.jpg) + +Alexander Zaitsev and Robert Hodges from Altinity have given an entry level tutorial to ClickHouse, which included loading in demo dataset and going through realistic queries against it with some extra variation demonstrating possible query optimization techniques. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Making%20HTAP%20Real%20with%20TiFlash%20--%20A%20TiDB%20Native%20Columnar%20Extension%20-%20FileId%20-%20174070.pdf). Also Altinity was sponsoring the ClickHouse booth in Expo Hall which became an easy spot for people interested in ClickHouse to chat outside of talks. +![Alexander Zaitsev and Robert Hodges from Altinity](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/3.jpg) + +Ruoxi Sun from PingCAP introduced TiFlash, column-oriented add-on to TiDB for analytics based on ClickHouse source code. Basically it provides [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree/)-like table engine that is hooked up to TiDB replication and has in-memory row-friendly cache for recent updates. Unfortunately, PingCAP has no plans to bring TiFlash to opensource at the moment. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Making%20HTAP%20Real%20with%20TiFlash%20--%20A%20TiDB%20Native%20Columnar%20Extension%20-%20FileId%20-%20174070.pdf). +![Ruoxi Sun from PingCAP](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/4.jpg) + +ClickHouse has also been covered in talk by Jervin Real and Francisco Bordenave from Percona with overview of moving and replicating data around MySQL-compatible storage solutions. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Replicating%20MySQL%20Data%20to%20TiDB%20For%20Real-Time%20Analytics%20-%20FileId%20-%20187672.pdf). +![Jervin Real](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/5.jpg) + +ClickHouse represented columnar storage systems in venture beyond relational by Marcos Albe from Percona. +![Marcos Albe from Percona](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/6.jpg) + +Jervin Real from Percona have demonstrated real case study of applying ClickHouse in practice. It heavily involved manual partitions manipulation, hopefully audience have understood that it is an option, but not exactly a best practice for most use cases. [Slides](https://www.percona.com/live/19/sites/default/files/slides/Low%20Cost%20Transactional%20and%20Analytics%20With%20MySQL%20and%20Clickhouse,%20Have%20Your%20Cake%20and%20Eat%20It%20Too!%20-%20FileId%20-%20187674.pdf). +![Jervin Real from Percona](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/7.jpg) + +Evgeny Potapov from ITSumma went through modern options for time-series storage and once more confirmed ClickHouse is leading the way in this field as well. +![Evgeny Potapov from ITSumma](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/8.jpg) + +Event location in the center of US provided equal opportunities for peoplefrom East and West Coast to show up, but presence of people from other countries was also quite noticeable. The content they all brought in was top notch as usual. +![The venue](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/9.jpg) + +Austin after the Event. +![Austin](https://blog-images.clickhouse.tech/en/2019/clickhouse-at-percona-live-2019/10.jpg) diff --git a/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md b/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md new file mode 100644 index 00000000000..ab55f746bdd --- /dev/null +++ b/website/blog/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md @@ -0,0 +1,17 @@ +--- +title: 'ClickHouse Lecture at Institute of Computing Technology, Chinese Academy of Science on June 11, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019/main.jpg' +tags: ['lecture', 'events', 'China', 'Beijing', 'university', 'academy', 'institute'] +date: '2019-06-14' +--- + +Alexey Milovidov, head of ClickHouse development group at Yandex, have given an open two-part lecture at [Institute of Computing Technology, Chinese Academy of Science](http://english.ict.cas.cn/): + +- ClickHouse history and evolution of Yandex.Metrica storage system +- Internal implementation of ClickHouse and reasoning behind design decisions + +The event has been organised by [Amos Bird](https://github.com/amosbird), who is one of the most active ClickHouse community members and contributors, at the same time being a last year PhD student at this institution. + +Alexey with the event announcement: +![Alexey with the event announcement](https://blog-images.clickhouse.tech/en/2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019/1.jpg) + diff --git a/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md b/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md new file mode 100644 index 00000000000..ace5967736c --- /dev/null +++ b/website/blog/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019.md @@ -0,0 +1,35 @@ +--- +title: 'ClickHouse Meetup in Beijing on June 8, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/main.jpg' +date: '2019-06-13' +tags: ['meetup','Beijing','China','events'] +--- + +24th ClickHouse Meetup globally and 3rd one in China took place in Beijing on Dragon Boat Festival weekend, which appeared to have a rich history and be a popular opportunity for Chinese people to travel around the country. Nevertheless the ClickHouse Meetup venue was more than full as usual, this time kindly provided by Gaea Mobile, with hundreds more people watching live broadcast online. Yandex ClickHouse team have extensively used this trip as an opportunity to strengthen the bond with ClickHouse Community in China by also giving an [open lecture in Institute of Computing Technology, Chinese Academy of Science](clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md) and by having a private conversations with the most active local corporate users including ByteDance and JD.com. + +Welcome word by William Kwok, CTO of Analysys, who played the key role in making this event in particular possible and also in establishment of ClickHouse Community in China: +![William Kwok, CTO of Analysys](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/1.jpg) + +He's also administering ClickHouse WeChat groups, feel free to ask him for invite (@guodaxia2999 at WeChat): +![@guodaxia2999 at WeChat](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/2.jpg) + +Alexey Milovidov from ClickHouse core developers team at Yandex got the content part of main event part started with overview of new features and roadmap overview: +![Alexey Milovidov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/3.jpg) + +Amos Bird, one of the most active ClickHouse contributors either in China and worldwide, shares his experience of using ClickHouse for graph processing ([slides](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup24/2.%20SQLGraph%20--%20When%20ClickHouse%20marries%20graph%20processing%20Amoisbird.pdf)): +![Amos Bird](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/4.jpg) + +Yan Gangqiang from Golden Data shares details of their approach to data storage for surveys system based on ClickHouse ([slides](https://presentations.clickhouse.tech/meetup24/3.%20金数据数据架构调整方案Public.pdf)): +![Yan Gangqiang](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/5.jpg) + +ClickHouse for beginners talk by Percent ([slides](https://presentations.clickhouse.tech/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf)): +![Percent](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/6.jpg) + +ClickHouse core developer Nikolay Kochetov demonstrates upcoming query execution pipeline changes ([slides](https://presentations.clickhouse.tech/meetup24/5.%20Clickhouse%20query%20execution%20pipeline%20changes/)): +![Nikolay Kochetov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/7.jpg) + +Pre-meetup meeting with active ClickHouse community members in China: +![Pre-meetup meeting](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/8.jpg) + +ClickHouse branded Beijing duck :) +![ClickHouse branded Beijing duck](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-beijing-on-june-8-2019/9.jpg) diff --git a/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md b/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md new file mode 100644 index 00000000000..71176ab3d47 --- /dev/null +++ b/website/blog/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019.md @@ -0,0 +1,41 @@ +--- +title: 'ClickHouse Meetup in Limassol on May 7, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/main.jpg' +date: '2019-05-14' +tags: ['meetup', 'Cyprus', 'Limassol', 'events'] +--- + +The first open-air ClickHouse Meetup took place in the heart of Limassol, the second-largest city of Cyprus, on the roof kindly provided by Exness Group. The views were stunning, but speakers did a great job competing with them for audience attention. Over one hundred people have joined in, which once again confirms high interest in ClickHouse around the globe. Meetup content is also available as [video recording](https://www.youtube.com/watch?v=_rpU-TvSfZ8). + +![Intro](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/1.jpg) + +[Kirill Shvakov](https://github.com/kshvakov) has played the key role in making this event possible by reaching out to the ClickHouse Community at Cyprus, finding the great venue, and other speakers. Most of the worldwide ClickHouse Meetups happen thanks to active community members like Kirill, if you want to help us organize ClickHouse Meetup in your area, please reach the Yandex ClickHouse team via [this form](https://clickhouse.tech/#meet) or any other convenient way. + +![Kirill Shvakov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/2.jpg) + +Kirill is well known for his top-notch [ClickHouse Go Driver](https://github.com/clickhouse/clickhouse-go) running over native protocol, but his opening talk was about his experience optimizing ClickHouse queries and solving real-world tasks at Integros and Wisebits. [Slides](https://presentations.clickhouse.tech/meetup22/strategies.pdf). [Full query listings](https://github.com/kshvakov/ClickHouse-Meetup-Exness). + +The event has begun in the early evening... +![Evening in Limassol](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/3.jpg) + +...but it took just around one hour for nature to turn the night mode on. It made the projected slides easier to read. +![Night in Limassol](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/4.jpg) + +Sergey Tomilov with his colleagues from the Exness Platform Team has shared details on the evolution of their systems for analyzing logs and metrics and how they ended up relying on ClickHouse for long-term storage ([slides](https://presentations.clickhouse.tech/meetup22/exness.pdf)): +![Sergey Tomilov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/5.jpg) + +Alexey Milovidov from the Yandex ClickHouse team has demonstrated features from recent ClickHouse releases and gave an update on what's coming soon ([slides](https://presentations.clickhouse.tech/meetup22/new_features/)): +![Alexey Milovidov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/6.jpg) + +Alexander Zaitsev, CTO of Altinity, have shown an overview of how to integrate ClickHouse into environments running on Kubernetes ([slides](https://presentations.clickhouse.tech/meetup22/kubernetes.pdf)): +![Alexander Zaitsev, CTO of Altinity](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/7.jpg) + +Vladimir Goncharov, a backend engineer from Aloha Browser, has closed the ClickHouse Limassol Meetup by demonstrating few projects that allow integrating other opensource tools for logs processing with ClickHouse ([slides](https://presentations.clickhouse.tech/meetup22/aloha.pdf)): +![Vladimir Goncharov](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/8.jpg) + +Unfortunately, midnight was closing in and only the most weather-proof ClickHouse fans have managed to stay the whole event as it started getting pretty chilly. + +![Final](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-limassol-on-may-7-2019/9.jpg) + +More photos from the event are available at [short event afterword by Exness](https://www.facebook.com/events/386638262181785/permalink/402167077295570/). + diff --git a/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md b/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md new file mode 100644 index 00000000000..6141b4c2fd5 --- /dev/null +++ b/website/blog/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019.md @@ -0,0 +1,28 @@ +--- +title: 'ClickHouse Meetup in Madrid on April 2, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/main.jpg' +date: '2019-04-11' +tags: ['meetup', 'Spain', 'Madrid', 'events'] +--- + +Madrid ClickHouse Meetup has probably been the largest one in the EU so far with well over one hundred attendees. As usual, we've seen not only people working and living in the same city, Madrid, but also many people who have traveled a long way to join the event and talk about ClickHouse use cases and learn about new and upcoming features. + +Opening word by [Javi Santana](https://twitter.com/javisantana), who personally made this event possible by gathering up all the people and setting up the venue provided by Google Campus for Startups: +![Javi Santana](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/1.jpg) + +Alexander Zaitsev, CTO of Altinity, has introduced ClickHouse to those who're just starting to use it or only considering for future ([slides](https://www.slideshare.net/Altinity/clickhouse-introduction-by-alexander-zaitsev-altinity-cto)): +![Alexander Zaitsev](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/2.jpg) + +Robert Hodges, CEO of Altinity, has probably traveled the longest distance to join the event since he's based in California and he has also [published his thoughts on this event in the Altinity blog](https://www.altinity.com/blog/2019/4/9/madrid-clickhouse-meetup-summary). + +Alexey Milovidov from Yandex has shown the recent advancements in ClickHouse features and briefly walked the audience through the current roadmap ([slides](https://presentations.clickhouse.tech/meetup21/new_features/)): +![Alexey Milovidov from Yandex](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/3.jpg) + +Iago Enriquez from Idealista was talking about their migration from “legacy” commercial DBMS to ClickHouse. It was the first time we've heard that someone talking about using two flagship opensource products by Yandex together in production. They are using [CatBoost](https://catboost.ai/) model inference right from ClickHouse SQL queries to fill in the incompleteness of their source data. Unfortunately, slides of Iago's talk were not allowed to be published. +![Iago Enriquez from Idealista](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/4.jpg) + +David Pardo Villaverde from Corunet gave a talk about how they've used ClickHouse to prepare data for dense model generation for one of their clients. It took a pretty short time on a single server. Fun quote from conclusions: “If I wasn't already married, I'd marry it! \[ClickHouse\]” ([slides](https://presentations.clickhouse.tech/meetup21/predictive_models.pdf)): +![David Pardo Villaverde from Corunet](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/5.jpg) + +Closing talk of the meetup was by Murat Kabilov fro Adjust Gmbh, he was demonstrating his opensource project [pg2ch](https://github.com/mkabilov/pg2ch) that allows to sync data from PostgreSQL to ClickHouse in real-time ([slides](https://presentations.clickhouse.tech/meetup21/postgres_to_clickhouse.pdf)). +![Murat Kabilov fro Adjust Gmbh](https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-madrid-on-april-2-2019/6.jpg) diff --git a/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md b/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md new file mode 100644 index 00000000000..94ad125b71b --- /dev/null +++ b/website/blog/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse Meetup in San Francisco on June 4, 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/clickhouse-meetup-in-san-francisco-on-june-4-2019/main.jpg' +date: '2019-06-12' +tags: ['meetup','USA','San Francisco','events', 'California', 'Bay Area'] +--- + +23th ClickHouse Meetup in San Francisco was held in CloudFlare office and co-organized by Altinity. There were about 35 attendees, most of them are experienced ClickHouse users from SF and Bay Area. The meetup started with an introduction by Robert Hodges, Altinity CEO and continued with a lightning talk by Alan Braithwaite from Segment.com about their experience with ClickHouse. Next talk from Alexander Zaitsev about ClickHouse operator for Kubernetes gained much attention from the audience because Kubernetes is in fact very popular even for databases. At the end there was a presentation from the ClickHouse developer Alexey Milovidov about new and upcoming features with a roadmap. There was a discussion about the details of implementation and design of the most appreciated features. We were happy to meet with ClickHouse contributors at the meetup. Slides from the event are [available on GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup23). + +As we see increasing demand for ClickHouse events in SF and Bay Area, we have already started planning the next event. diff --git a/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md b/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md new file mode 100644 index 00000000000..b5799702d04 --- /dev/null +++ b/website/blog/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse.md @@ -0,0 +1,12 @@ +--- +title: 'How to speed up LZ4 decompression in ClickHouse?' +image: 'https://blog-images.clickhouse.tech/en/2019/how-to-speed-up-lz4-decompression-in-clickhouse/main.jpg' +date: '2019-06-25' +tags: ['performance', 'lz4', 'article', 'decompression'] +--- + +When you run queries in [ClickHouse](https://clickhouse.tech/), you might notice that the profiler often shows the `LZ_decompress_fast` function near the top. What is going on? This question had us wondering how to choose the best compression algorithm. + +ClickHouse stores data in compressed form. When running queries, ClickHouse tries to do as little as possible, in order to conserve CPU resources. In many cases, all the potentially time-consuming computations are already well optimized, plus the user wrote a well thought-out query. Then all that's left to do is to perform decompression. + +[Read further](https://habr.com/en/company/yandex/blog/457612/) diff --git a/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md b/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md new file mode 100644 index 00000000000..726d714b765 --- /dev/null +++ b/website/blog/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019.md @@ -0,0 +1,14 @@ +--- +title: 'Schedule of ClickHouse Meetups in China for 2019' +image: 'https://blog-images.clickhouse.tech/en/2019/schedule-of-clickhouse-meetups-in-china-for-2019/main.jpg' +date: '2019-04-18' +tags: ['China', 'Beijing', 'Shanghai', 'Shenzhen', 'announcement', 'meetup'] +--- + +Last year there were two ClickHouse Meetups in Beijing, in [January](../2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md) and [October](../2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md), and they appeared to be in extremely high demand, with fully packed venue and thousands of people watching online. So this year we decided to try to expand meetups to other large cities in China where we see the most interest in ClickHouse based on website visits. Here's the current schedule and sign up pages: + +- [ClickHouse Community Meetup in Beijing](https://www.huodongxing.com/event/2483759276200) on June 8. +- [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. +- [ClickHouse Community Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. + +到时候那里见! diff --git a/website/blog/en/2020/five-methods-for-database-obfuscation.md b/website/blog/en/2020/five-methods-for-database-obfuscation.md new file mode 100644 index 00000000000..9a6615b0079 --- /dev/null +++ b/website/blog/en/2020/five-methods-for-database-obfuscation.md @@ -0,0 +1,10 @@ +--- +title: 'Five Methods For Database Obfuscation' +image: 'https://blog-images.clickhouse.tech/en/2020/five-methods-for-database-obfuscation/main.jpg' +date: '2020-01-27' +tags: ['article', 'obfuscation'] +--- + +ClickHouse users already know that its biggest advantage is its high-speed processing of analytical queries. But claims like this need to be confirmed with reliable performance testing. + +[Read further](https://habr.com/en/company/yandex/blog/485096/) diff --git a/website/blog/en/index.md b/website/blog/en/index.md new file mode 100644 index 00000000000..227a69408dc --- /dev/null +++ b/website/blog/en/index.md @@ -0,0 +1,3 @@ +--- +is_index: true +--- diff --git a/website/blog/en/redirects.txt b/website/blog/en/redirects.txt new file mode 100644 index 00000000000..80a57d38ebc --- /dev/null +++ b/website/blog/en/redirects.txt @@ -0,0 +1,32 @@ +clickhouse-meetup-in-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md +clickhouse-meetup-at-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md +clickhouse-meetup-in-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md +clickhouse-meetup-at-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md +clickhouse-community-meetup-in-berlin-on-july-3.md 2018/announcing-clickhouse-community-meetup-in-berlin-on-july-3.md +evolution-of-data-structures-in-yandex-metrica.md 2016/evolution-of-data-structures-in-yandex-metrica.md +how-to-update-data-in-clickhouse.md 2016/how-to-update-data-in-clickhouse.md +yandex-opensources-clickhouse.md 2016/yandex-opensources-clickhouse.md +clickhouse-at-data-scale-2017.md 2017/clickhouse-at-data-scale-2017.md +clickhouse-meetup-in-berlin-october-5-2017.md 2017/clickhouse-meetup-in-berlin-october-5-2017.md +join-the-clickhouse-meetup-in-berlin.md 2017/join-the-clickhouse-meetup-in-berlin.md +clickhouse-at-percona-live-2017.md 2017/clickhouse-at-percona-live-2017.md +clickhouse-meetup-in-santa-clara-may-4-2017.md 2017/clickhouse-meetup-in-santa-clara-may-4-2017.md +announcing-clickhouse-meetup-in-amsterdam-on-november-15.md 2018/announcing-clickhouse-meetup-in-amsterdam-on-november-15.md +clickhouse-community-meetup-in-berlin-on-july-3-2018.md 2018/clickhouse-community-meetup-in-berlin-on-july-3-2018.md +clickhouse-at-analysys-a10-2018.md 2018/clickhouse-at-analysys-a10-2018.md +clickhouse-community-meetup-in-berlin-on-july-3.md 2018/clickhouse-community-meetup-in-berlin-on-july-3.md +clickhouse-community-meetup-in-paris-on-october-2-2018.md 2018/clickhouse-community-meetup-in-paris-on-october-2-2018.md +clickhouse-community-meetup-in-beijing-on-october-28-2018.md 2018/clickhouse-community-meetup-in-beijing-on-october-28-2018.md +clickhouse-meetup-in-amsterdam-on-november-15-2018.md 2018/clickhouse-meetup-in-amsterdam-on-november-15-2018.md +clickhouse-community-meetup-in-beijing-on-january-27-2018.md 2018/clickhouse-community-meetup-in-beijing-on-january-27-2018.md +clickhouse-at-percona-live-europe-2018.md 2018/clickhouse-at-percona-live-europe-2018.md +concept-cloud-mergetree-tables.md 2018/concept-cloud-mergetree-tables.md +clickhouse-meetup-in-limassol-on-may-7-2019.md 2019/clickhouse-meetup-in-limassol-on-may-7-2019.md +schedule-of-clickhouse-meetups-in-china-for-2019.md 2019/schedule-of-clickhouse-meetups-in-china-for-2019.md +clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md 2019/clickhouse-lecture-at-institute-of-computing-technology-chinese-academy-of-science-on-june-11-2019.md +clickhouse-meetup-in-san-francisco-on-june-4-2019.md 2019/clickhouse-meetup-in-san-francisco-on-june-4-2019.md +how-to-speed-up-lz4-decompression-in-clickhouse.md 2019/how-to-speed-up-lz4-decompression-in-clickhouse.md +clickhouse-at-percona-live-2019.md 2019/clickhouse-at-percona-live-2019.md +clickhouse-meetup-in-madrid-on-april-2-2019.md 2019/clickhouse-meetup-in-madrid-on-april-2-2019.md +clickhouse-meetup-in-beijing-on-june-8-2019.md 2019/clickhouse-meetup-in-beijing-on-june-8-2019.md +five-methods-for-database-obfuscation.md 2020/five-methods-for-database-obfuscation.md diff --git a/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md b/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md new file mode 100644 index 00000000000..71fb8da8215 --- /dev/null +++ b/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md @@ -0,0 +1,8 @@ +--- +title: 'ClickHouse Meetup в Москве, 21 ноября 2016' +image: 'https://blog-images.clickhouse.tech/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016/main.jpg' +date: '2016-11-22' +tags: ['мероприятия', 'meetup', 'Москва'] +--- + +[Посмотреть видео](https://events.yandex.ru/lib/talks/4351/) diff --git a/website/blog/ru/2016/clickhouse-na-highload-2016.md b/website/blog/ru/2016/clickhouse-na-highload-2016.md new file mode 100644 index 00000000000..fb950db35b0 --- /dev/null +++ b/website/blog/ru/2016/clickhouse-na-highload-2016.md @@ -0,0 +1,14 @@ +--- +title: 'ClickHouse на HighLoad++ 2016' +image: 'https://blog-images.clickhouse.tech/ru/2016/clickhouse-na-highload-2016/main.jpg' +date: '2016-12-10' +tags: ['мероприятия', 'конференции', 'Москва', 'HighLoad++'] +--- + +![iframe](https://www.youtube.com/embed/TAiCXHgZn50) + +[Расшифровка доклада](https://habrahabr.ru/post/322724/) + +![iframe](https://www.youtube.com/embed/tf38TPvwjJ4) + +[Расшифровка доклада](https://habrahabr.ru/post/322620/) diff --git a/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md b/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md new file mode 100644 index 00000000000..6404ee0465a --- /dev/null +++ b/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse на встрече про инфраструктуру хранения и обработки данных в Яндексе' +image: 'https://blog-images.clickhouse.tech/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse/main.jpg' +date: '2016-10-16' +tags: ['мероприятия', 'инфраструктура'] +--- + +![iframe](https://www.youtube.com/embed/Ho4_dQk7dAg) + +[Страница мероприятия «Яндекс изнутри: инфраструктура хранения и обработки данных»](https://events.yandex.ru/events/meetings/15-oct-2016/), прошедшего 15 октября 2016 года. diff --git a/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md b/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md new file mode 100644 index 00000000000..36daa047ce7 --- /dev/null +++ b/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md @@ -0,0 +1,10 @@ +--- +title: 'Яндекс открывает ClickHouse' +image: 'https://blog-images.clickhouse.tech/ru/2016/yandeks-otkryvaet-clickhouse/main.jpg' +date: '2016-06-15' +tags: ['анонс', 'GitHub', 'лицензия'] +--- + +Сегодня внутренняя разработка компании Яндекс — [аналитическая СУБД ClickHouse](https://clickhouse.tech/), стала доступна каждому. Исходники опубликованы на [GitHub](https://github.com/yandex/ClickHouse) под лицензией Apache 2.0. + +ClickHouse позволяет выполнять аналитические запросы в интерактивном режиме по данным, обновляемым в реальном времени. Система способна масштабироваться до десятков триллионов записей и петабайт хранимых данных. Использование ClickHouse открывает возможности, которые раньше было даже трудно представить: вы можете сохранять весь поток данных без предварительной агрегации и быстро получать отчёты в любых разрезах. ClickHouse разработан в Яндексе для задач [Яндекс.Метрики](https://metrika.yandex.ru/) — второй по величине системы веб-аналитики в мире. diff --git a/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md b/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md new file mode 100644 index 00000000000..fb84a16c02a --- /dev/null +++ b/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md @@ -0,0 +1,14 @@ +--- +title: 'ClickHouse MeetUp едет в Минск!' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-meetup-edet-v-minsk/main.jpg' +date: '2017-06-13' +tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь', 'анонс'] +--- + +29 июня в Минске впервые выступят с докладами создатели СУБД ClickHоuse и те, кто ежедневно использует её для решения аналитических задач. Докладчики расскажут о последних изменениях и предстоящих обновлениях СУБД, а также о нюансах работы с ней. + +Встреча будет интересна администраторам ClickHouse и тем, кто пока только присматривается к системе. Мы приглашаем белорусских пользователей также поделиться своим опытом использования ClickHоuse и выступить на встрече с блиц-докладами: при регистрации мы предложим вам такую возможность! + +Участие в мероприятии бесплатное, но необходимо заранее зарегистрироваться: количество мест в зале ограничено. + +Посмотреть программу и подать заявку на участие можно на [странице встречи](https://events.yandex.ru/events/meetings/29-june-2017). diff --git a/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md new file mode 100644 index 00000000000..80d399203b0 --- /dev/null +++ b/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md @@ -0,0 +1,8 @@ +--- +title: 'ClickHouse Meetup в Екатеринбурге, 16 мая 2017' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017/main.jpg' +date: '2017-05-17' +tags: ['мероприятия', 'meetup', 'Екатеринбург'] +--- + +[Посмотреть презентацию](https://presentations.clickhouse.tech/meetup6/) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md b/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md new file mode 100644 index 00000000000..de38df47af3 --- /dev/null +++ b/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md @@ -0,0 +1,16 @@ +--- +title: 'ClickHouse MeetUp в Минске: итоги' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-meetup-v-minske-itogi/main.jpg' +date: '2017-06-19' +tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] +--- + +Недавно в Минске мы встретились с пользователями ClickHouse и техническими специалистами, кто только знакомится с СУБД. + +Мы делимся с вами презентациями докладчиков и будем рады ответить на вопросы в [чате ClickHouse в Телеграме](https://t.me/clickhouse_ru). + +[История создания ClickHouse, новости и планы по развитию](https://presentations.clickhouse.tech/meetup7/), Алексей Миловидов + +[Использование ClickHouse для мониторинга связности сети](https://presentations.clickhouse.tech/meetup7/netmon.pdf), Дмитрий Липин + +[Разбираемся во внутреннем устройстве ClickHouse](https://presentations.clickhouse.tech/meetup7/internals.pdf), Виталий Людвиченко diff --git a/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md new file mode 100644 index 00000000000..e4a614befad --- /dev/null +++ b/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse Meetup в Новосибирске, 3 апреля 2017' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017/main.jpg' +date: '2017-04-04' +tags: ['мероприятия', 'meetup', 'Новосибирск'] +--- + +[Презентация Алексея Миловидова](https://presentations.clickhouse.tech/meetup4/) + +[Презентация Марии Мансуровой](https://presentations.clickhouse.tech/meetup4/clickhouse_for_analysts.pdf) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md new file mode 100644 index 00000000000..3bdfd2763b8 --- /dev/null +++ b/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md @@ -0,0 +1,8 @@ +--- +title: 'ClickHouse Meetup в Санкт-Петербурге, 28 февраля 2017' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017/main.jpg' +date: '2017-03-01' +tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] +--- + +![iframe](https://www.youtube.com/embed/CVrwp4Zoex4) diff --git a/website/blog/ru/2017/clickhouse-na-uwdc-2017.md b/website/blog/ru/2017/clickhouse-na-uwdc-2017.md new file mode 100644 index 00000000000..7b801181803 --- /dev/null +++ b/website/blog/ru/2017/clickhouse-na-uwdc-2017.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse на UWDC 2017' +image: 'https://blog-images.clickhouse.tech/ru/2017/clickhouse-na-uwdc-2017/main.jpg' +date: '2017-05-20' +tags: ['мероприятия', 'конференции', 'Челябинск'] +--- + +![iframe](https://www.youtube.com/embed/isYA4e5zg1M?t=2h8m15s) + +[Посмотреть презентацию](https://presentations.clickhouse.tech/uwdc/) diff --git a/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md new file mode 100644 index 00000000000..38e697d6b4c --- /dev/null +++ b/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md @@ -0,0 +1,38 @@ +--- +title: 'ClickHouse Meetup в Лимассоле, 7 мая 2019' +image: 'https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/main.jpg' +date: '2019-05-14' +tags: ['мероприятия', 'meetup', 'Лимассол', 'Кипр', 'Европа'] +--- + +Первый ClickHouse Meetup под открытым небом прошел в сердце Лимассола, второго по размеру города Кипра, на крыше, любезно предоставленной Exness Group. С крыши открывались сногсшибательные виды, но докладчики отлично справлялись с конкуренцией с ними за внимание аудитории. Более ста человек присоединилось к мероприятие, что в очередной раз подтверждает высокий интерес к ClickHouse по всему земному шару. Контент мероприятия также доступен в формате [видеозаписи](https://www.youtube.com/watch?v=_rpU-TvSfZ8). + +[Кирилл Шваков](https://github.com/kshvakov) сыграл ключевую роль в том, чтобы данное мероприятие стало возможным: наладил коммуникацию с ClickHouse сообществом на Кипре, нашел отличную площадку и докладчиков. Большинство ClickHouse митапов по всему миру происходят благодаря активным участникам сообщества таким как Кирилл. Если вы хотите помочь нам организовать ClickHouse митап в своём регионе, пожалуйста свяжитесь с командой ClickHouse в Яндексе через [эту форму](https://clickhouse.tech/#meet) или любым другим удобным способом. + +![Кирилл Шваков](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/1.jpg) + +Кирилл широко известен благодаря его замечательногму [ClickHouse Go Driver](https://github.com/clickhouse/clickhouse-go), работающему по нативному протоколу, а его открывающий доклад был о его опыте оптимизации ClickHouse запросов и решению реальных прикладных задач в Integros и Wisebits. [Слайды](https://presentations.clickhouse.tech/meetup22/strategies.pdf). [Полные тексты запросов](https://github.com/kshvakov/ClickHouse-Meetup-Exness). + +Мероприятие началось ранним вечером… +![Вечер в Лимассоле](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/2.jpg) + +…но природе потребовалось всего около часа, чтобы включить «ночной режим». Зато проецируемые слайды стало заметно легче читать. +![Ночь в Лимассоле](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/3.jpg) + +Сергей Томилов с его коллегами из Exness Platform Team поделились деталями об эволюции их систем для анализа логов и метрик, а также как они в итоге стали использовать ClickHouse для долгосрочного хранения и анализа данных([слайды](https://presentations.clickhouse.tech/meetup22/exness.pdf)): +![Сергей Томилов](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/4.jpg) + +Алексей Миловидов из команды ClickHouse в Яндексе продемонстрировал функциональность из недавних релизов ClickHouse, а также рассказал о том, что стоит ждать в ближайшем будущем([слайды](https://presentations.clickhouse.tech/meetup22/new_features/)): +![Алексей Миловидов](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/5.jpg) + +Александр Зайцев, технический директор Altinity, показал обзор того, как можно интегрировать ClickHouse в окружения, работающие на Kubernetes([слайды](https://presentations.clickhouse.tech/meetup22/kubernetes.pdf)): +![Александр Зайцев](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/6.jpg) + +Владимир Гончаров, бекенд разработчик из Aloha Browser, закрывал ClickHouse Limassol Meetup демонстрацией нескольких проектов для интеграции других opensource продуктов для анализа логов с ClickHouse ([слайды](https://presentations.clickhouse.tech/meetup22/aloha.pdf)): +![Владимир Гончаров](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/7.jpg) + +К сожалению, приближалась полнось и только самые «морозостойкие» любители ClickHouse продержались всё мероприятие, так стало заметно холодать. + +![Лимассол](https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/8.jpg) + +Больше фотографий с мероприятия доступно в [коротком послесловии от Exness](https://www.facebook.com/events/386638262181785/permalink/402167077295570/). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md new file mode 100644 index 00000000000..d3a5471b1a8 --- /dev/null +++ b/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse Meetup в Москве, 5 сентября 2019' +image: 'https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019/main.jpg' +date: '2019-09-06' +tags: ['мероприятия', 'meetup', 'Москва'] +--- + +![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3gYSwohnKFUozYy9QdUpcT_) + +[Слайды опубликованы на GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup28). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md new file mode 100644 index 00000000000..d1dafe580f1 --- /dev/null +++ b/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md @@ -0,0 +1,12 @@ +--- +title: 'ClickHouse Meetup в Новосибирске, 26 июня 2019' +image: 'https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019/main.jpg' +date: '2019-06-05' +tags: ['мероприятия', 'meetup', 'Новосибирск'] +--- + +Изюминкой второго ClickHouse митапа в Новосибирске были два низкоуровневых доклада с погружением во внутренности ClickHouse, а остальная часть контента была очень прикладной с конкретными сценариями. Любезно предоставленный S7 зал на сто человек был полон до самого завершения последнего доклада где-то ближе к полуночи. + +![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3ionSVt-NYC9Vu_83xxhb4J) + +Как обычно, [все слайды опубликованы на GitHub](https://presentations.clickhouse.tech/meetup25). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md new file mode 100644 index 00000000000..8f8f9b4aae2 --- /dev/null +++ b/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md @@ -0,0 +1,10 @@ +--- +title: 'ClickHouse Meetup в Санкт-Петербурге, 27 июля 2019' +image: 'https://blog-images.clickhouse.tech/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019/main.jpg' +date: '2019-08-01' +tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] +--- + +![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3j3X7TWrKmnEPcfEG901W-T) + +[Слайды опубликованы на GitHub](https://github.com/yandex/clickhouse-presentations/tree/master/meetup27). diff --git a/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md b/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md new file mode 100644 index 00000000000..cbd9e6c01fb --- /dev/null +++ b/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md @@ -0,0 +1,12 @@ +--- +title: 'ClickHouse Meetup в Минске, 11 июля 2019' +image: 'https://blog-images.clickhouse.tech/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/main.jpg' +date: '2019-07-12' +tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] +--- + +![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3hLz6dmyu6gM_X871FG9eCc) + +[Все слайды опубликованы на GitHub](https://github.com/yandex/clickhouse-presentations/tree/master/meetup26). + +![Минск](https://blog-images.clickhouse.tech/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/1.jpg) diff --git a/website/blog/ru/index.md b/website/blog/ru/index.md new file mode 100644 index 00000000000..227a69408dc --- /dev/null +++ b/website/blog/ru/index.md @@ -0,0 +1,3 @@ +--- +is_index: true +--- diff --git a/website/blog/ru/redirects.txt b/website/blog/ru/redirects.txt new file mode 100644 index 00000000000..4e34d53af3d --- /dev/null +++ b/website/blog/ru/redirects.txt @@ -0,0 +1,15 @@ +yandeks-otkryvaet-clickhouse.md 2016/yandeks-otkryvaet-clickhouse.md +clickhouse-meetup-v-moskve-21-noyabrya-2016.md 2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md +clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md 2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md +clickhouse-na-highload-2016.md 2016/clickhouse-na-highload-2016.md +clickhouse-meetup-v-novosibirske-3-aprelya-2017.md 2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md +clickhouse-meetup-v-minske-itogi.md 2017/clickhouse-meetup-v-minske-itogi.md +clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md 2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md +clickhouse-meetup-v-ekaterinburge-16-maya-2017.md 2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md +clickhouse-na-uwdc-2017.md 2017/clickhouse-na-uwdc-2017.md +clickhouse-meetup-edet-v-minsk.md 2017/clickhouse-meetup-edet-v-minsk.md +clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md 2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md +clickhouse-meetup-v-moskve-5-sentyabrya-2019.md 2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md +clickhouse-meetup-v-novosibirske-26-iyunya-2019.md 2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md +clickrouse-meetup-v-minske-11-iyulya-2019.md 2019/clickrouse-meetup-v-minske-11-iyulya-2019.md +clickhouse-meetup-v-limassole-7-maya-2019.md 2019/clickhouse-meetup-v-limassole-7-maya-2019.md diff --git a/website/css/blog.css b/website/css/blog.css new file mode 100644 index 00000000000..80ba393dec1 --- /dev/null +++ b/website/css/blog.css @@ -0,0 +1,8 @@ +body.blog .dropdown-item { + color: #111 !important; +} + +body.blog .dropdown-item:hover, +body.blog .dropdown-item:focus { + background-color: #efefef; +} diff --git a/website/locale/en/LC_MESSAGES/messages.po b/website/locale/en/LC_MESSAGES/messages.po index c2c37ebf2b1..9b264bb67d4 100644 --- a/website/locale/en/LC_MESSAGES/messages.po +++ b/website/locale/en/LC_MESSAGES/messages.po @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language: en\n" @@ -37,27 +37,27 @@ msgstr "ClickHouse - fast open-source OLAP DBMS" msgid "ClickHouse DBMS" msgstr "ClickHouse DBMS" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "open-source" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "relational" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "analytics" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "analytical" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "Big Data" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "web-analytics" @@ -77,6 +77,14 @@ msgstr "" msgid "Yandex LLC" msgstr "Yandex LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "Published date" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "Documentation" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "Rating" diff --git a/website/locale/es/LC_MESSAGES/messages.mo b/website/locale/es/LC_MESSAGES/messages.mo index 9b61bb401bb8d56b03085f60b85d14bdc3c28009..a072272644ef6c4831ba813f62d3c5723d0d5b28 100644 GIT binary patch delta 1299 zcmYMzUr3Wt7{~EPo%yGn`Df;zsDY6oD{y6KrKFfeNr|8#NVw)eF{hyt6>Shf7ZHSz zWaWhgc~ciUib{%+cM$|BK@@pmMd(EeL4PRf`@@$S@6P9(_kGWKo;~MmdHc+c==*Hf zRpWDj?|Qy%i5mU?+;o}cFdRiUPGA*zz3wgQ=J)u;f6umoG2=Mm(x3pAJc z0YkWsv6q9OGgg5bH)m3Rz1YJI?e$sQh(o9ZCb0}3VKXkGwxWhcrEfw74&!DVcJ4pH z4UFHQ*8Ps!(oD)4$5Ql>*kTs-SLyPpe+ssv_No(=z$xUES_HMnw@{@Ucg7Q_(mud# z_!Tw3m=mjn{CFB$QHji;)>}aR0C&{0a`;M70gqq{wxCLU4^{HV$YpUF1$d20=p$<0 z0;;sj*oSVS>5xXTQComoZ=SmAEG?lDi2k6XL*wDk+k}U)9>=f&KO@z%a!yDifyUYSocBFD&-0w;eK*_ovo*I+ z<=i(uU3{Z_drCC={~2-2su^an0`KAme2Qyv5`&n>8hnc!_Jva;E@35>m;N(~dcGO! z&2pCHVha-|J+oNL_#tk>Dcp!N_y~*m3oorPOW}op8HY`A;+DNaB`A3FAFzz^JgTr| zOyE~+Aive|8s$vvM-`IBQ#gR@@f}96h*kI-RbYsy3fP26Y{7MS)|($jYP8EpF1z8) zkD@wy4~NKa&$&>=QTkZxBM_3q4iile_M!^t!)6@9ZhVFvxPk|)PfsK9yD zQohGG_#K-mY=Sncb91a?0PCyizY_0cLD-F!K&|OfRLlCj@o7{``>_RIp#BY?P=zex zFoxKvD&Q6>-V;>e1#G|tR6c*092m@Tq1K*2welP)@F1#?>!=5AqgpwJ=kXJ(?8+KtQ|J5GM-QvEyVL65l#eg{4q? zCXGtei(1p;sDk=Yg=bL(T|>ng!@W4}-Je79`mGpo$DMtCcO_8px%sjVC)mC#5!)Vb W-`$#U|AYog-HX+SO5Bg(tn(K;qjmTI diff --git a/website/locale/es/LC_MESSAGES/messages.po b/website/locale/es/LC_MESSAGES/messages.po index 794662f0dbd..54765875a81 100644 --- a/website/locale/es/LC_MESSAGES/messages.po +++ b/website/locale/es/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: 2020-03-26 10:19+0300\n" "Last-Translator: FULL NAME \n" "Language: es\n" @@ -36,27 +36,27 @@ msgstr "ClickHouse - DBMS OLAP de código abierto rápido" msgid "ClickHouse DBMS" msgstr "Sistema abierto." -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "de código abierto" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "relacional" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "analítica" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "analítico" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "Grandes Datos" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "Sistema abierto." @@ -76,6 +76,14 @@ msgstr "" msgid "Yandex LLC" msgstr "Sistema abierto." +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "Fecha de publicación" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "Documentación" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "Clasificación" diff --git a/website/locale/fa/LC_MESSAGES/messages.mo b/website/locale/fa/LC_MESSAGES/messages.mo index 36d0063da7a33529fbb93d0dc770594a8f758d80..21ba91517111062c683f8c18545bc2846144588e 100644 GIT binary patch delta 1296 zcmYM!TS!zv9LMp$x_Q6kddp1OG_t_3mJ;vK&58_yKuM(XmPw^XD^Z|pH+|_rgf3=2 zmLXEu21UMTy+ja3_;5rkBIrS(0*j>R^8Lv|huQO)nRCv}fBrMRE%8fS0?enN`<<(7l4Wo8MO8?*%VeFbKl`Ry>3 zm2_MTI*1vxpI|CZVKUBRKU%oi54eTfurI=lVpH7Wa29#h=1>!TMT&(IZ4^eM0!&6P z^IJNVwRBWqES^RM)QR=jkIV5lrei##xN$uy&|=hpRak~K7=r`B{vlN6Mv$0o3>DBb zJ!gJ}6lyKn>EKm{;?dH4eL;Ua3FSoTBhEZl_oxEh<#gFUzo z@1eGG67~KZx(VnH-oZpS`LE%@W1cF-$(%IpRVMO|<)Q*C4fa=}R@{hszYWXr9B#pJ zR3P6`0i+P^D3)L*e#R_}WwmPO`Kf5d`%x=Ci$3f`rFa1~k)oacGYVHdYOYE4ZC zeRqscC*LsNJ$dT=|D5-kmC{XOG2X%z_!yVsFa|J#tMN5**#~a5_z6p}aLGTzsQ05- zZI-e&8fzFh=9*98W1oq=f`dJQ) zGW~(Li zZlh#2gj%SFgQ9t^q5|m0-RR(5T*MQ2n=+lo)B+9d`AKF^;#pKc^Qek=`BTst38N}g ziCUl$73fwZY1@T514mGYtrv;W5~u{O;{m*fs=yplIj_x!ork_1US~Fc%;RJVT6}@# bhFDW%Q*-O)n6p?E@jKD7={%>Z{HXsg=ly(S diff --git a/website/locale/fa/LC_MESSAGES/messages.po b/website/locale/fa/LC_MESSAGES/messages.po index 65eeb6f605a..cc50b4726ff 100644 --- a/website/locale/fa/LC_MESSAGES/messages.po +++ b/website/locale/fa/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: 2020-03-26 10:19+0300\n" "Last-Translator: FULL NAME \n" "Language: fa\n" @@ -36,27 +36,27 @@ msgstr "ClickHouse - سریع باز-منبع OLAP DBMS" msgid "ClickHouse DBMS" msgstr "خانه عروسکی" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "متن باز" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "رابطه" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "تجزیه و تحلیل" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "تحلیلی" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "داده های بزرگ" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "تجزیه و تحلیل وب سایت" @@ -76,6 +76,14 @@ msgstr "" msgid "Yandex LLC" msgstr "Yandex, LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "تاریخ انتشار" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "مستندات" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "درجهبندی" diff --git a/website/locale/fr/LC_MESSAGES/messages.mo b/website/locale/fr/LC_MESSAGES/messages.mo index 6d5d70236c139ece8255fb2ceaee84ce6c7d96b3..6ca1195547545385bed08d88ae6b5c5682b839fd 100644 GIT binary patch delta 1295 zcmZA0OGs2v9LMp$dVSVtKJwXPnjRL6i$)F26eGnT3qvpj6(?sx(X>!27da6@8;dZC zwA|VvH`5>(M%ZH!Eh@q`6%_<7YLT0Yf~fBgF9Z#D?&q8{_x#WQ{?C~=fqSKq_gT(0 zizrNa?G;nPGLGmF%uUs1z%wrzQuf8LN5Emtr(p|vkdgR?m^9O#LZ?A z>!4A{zlFWX zT=tNg5_ye-xQt3T#7S0Sr%-XPMriOiuo=`=yu@}~M3p{3y|fiINDQk-jklsI6h3V0_r;_OTC)D3hE z9}o8r1&4YD`eXCSkK@vthL86340eY@-cWET96Os@>7?x{*;(!l1a=21V+)>5iT_Re Zm{k~`UB|=TP}qAy)4NvU{mcnEe*r*3iE;n{ delta 1200 zcmYMyUr3Wt7{~Eve{`vv)^yf%MaxK}EpjG^lq6=05L!xsgphSpL^q{De{6K&MgJ}g zioA=Upf0-b^`c?jSQz3(HxgKqT2Zi|pe}+%*7t`mG|t}7IcIyH^PK0LT+`cB_I=oY z+xYC|8{^yM)#(4{lHaV7;SiSN2(H0CVq$3FCQGVxKXE zUvV?*+Xl7~WTFF=NCuDNDO`=OF^Y>A#sVtA5K$Gd0o!mJmf>l4{ydVST}IZjVR!x} zs-m}WkoD~$9c3J&j3F^c-V9ch;4af1yU!u5>fb=;AQK975f%6f((I9I4ppIP+=TC3mr;S^oRco^PSm&;wZH(r z#Ot`5#4-fg!+3;q)JEQfslPH>VnVk!k1Dy315(M#P-h&$9aw`~@Bq^691ksc4bS2O zRH9LCtP\n" "Language: fr\n" @@ -36,27 +36,27 @@ msgstr "ClickHouse-SGBD OLAP open-source rapide" msgid "ClickHouse DBMS" msgstr "SGBD ClickHouse" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "open-source" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "relationnel" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "Analytics" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "analytique" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "Big Data" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "web-analytics" @@ -76,6 +76,14 @@ msgstr "" msgid "Yandex LLC" msgstr "Yandex LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "Date de publication" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "Documentation" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "Évaluation" diff --git a/website/locale/ja/LC_MESSAGES/messages.mo b/website/locale/ja/LC_MESSAGES/messages.mo index 4577e84e199bbcf81da749052a34b8ce358e4e1f..4f72203e20bd51ad0351a33205ed17e01d4d4072 100644 GIT binary patch delta 1279 zcmYMzdq`7J9Ki9PdVTB6`N*l%%xEGCwjO9LnNeA^KQg2TNM$M|nNm&yAs4Z-D5xy0 zW&YDc352jw2oi|}QGXPOMW9p=4J@LFKnS9~KReJkyPxy8_nhDPoqJ!exV19y&TjPz zzs>yf`PW5j>;LbDC6YmN0Btye={SxFIEhI(jX4-Wiu~c?LMv7z4NHt3)aQLzC=!r8 zRC4J!X&k^j+M}3_Q<#ZgFoY5(G6&bO3VY&3C^E$*6=#rJFUPKM#8kXRFtj70PjxKhw5%-}U%^wu3Aop3J}VGC+ty|@WOs2Tczy7Qm72yL9pGAzXMD0Ihm?w;Q{YKokgg~g`dYx=8Ay9T|C??MlTP~V9+ zs9(bmJb}r)!5a8^)JzScZs28ribnbsb>SRVtHs5rnen3r(rvtidS5?k#%`MSJ=FUj z;%$766lvuXDr2WX)cYSS{C5Nss4snBlFAk;b9ex&2-}T!u>xmM&p4a#Yp?({fOgct zI#G`*X!_5X_60M3**Jh2(2!|ALINt3XH;~;DJ;Wj(@xtL@44qb2RBVsCO-y! zw~gO+{=)q2&QSFKciv}~PuPQbcoUc4eO!pcn1iFZ3|}G5K5?nQFBrhw%z4A8_oG;5 zmb5xL%Nb~Q_Fysb9bAedSb!6F7pL(Do?T#;z%yB9G#lZv0jDbmQGfj&=cA%dWWb zK2$~fv5WQX5glb5rjErPCPLP*UCgEz526xi#VB@R6F$HioW({gV;4G#t*pFo;5ap`3PEvQPR`CxSR zbI!l0@c@IGuQWjYSJT?TuoR0ppA}e(Yq1UW-c_X8 z0GC_%80$$aL75K_Um^=_cq&N!Rhl#dg*fAG{6>v?nN1J+u^tO>3$~!Xfqv9i{{%1M zI4a>Ce6gxfJ1X((s04>l^G#wien`?`EsL?b7HmL0um@G5W)~kpJ$DQr;yI*QgnyA3 zJF7-L-%&JQ@=h!#zKlC@2s`jQ#<7*GH)HY!9i8cG+>h^239MurN+^mtnrhT|t&0=x zexq|QDxt$JK7~rC8}(i<#<0)DFOXC8+RISto^P8sHJdf&NsVUL_;RY&#bc3;RkfAz T)E|F1GZo1{osk+Syyp7{`FwRL diff --git a/website/locale/ja/LC_MESSAGES/messages.po b/website/locale/ja/LC_MESSAGES/messages.po index c88f2cabea2..f057f359724 100644 --- a/website/locale/ja/LC_MESSAGES/messages.po +++ b/website/locale/ja/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: 2020-03-26 10:19+0300\n" "Last-Translator: FULL NAME \n" "Language: ja\n" @@ -33,27 +33,27 @@ msgstr "ツ環板篠ョツ嘉ッツ偲青エツδツ-ツエツスツ-ツシツ" msgid "ClickHouse DBMS" msgstr "クリックハウスDBMS" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "オープンソース" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "関係" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "analytics" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "分析" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "ビッグデータ" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "ウェブ分析" @@ -71,6 +71,14 @@ msgstr "ソフトウェアは、明示または黙示を問わず、いかなる msgid "Yandex LLC" msgstr "Yandex LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "公開日" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "文書" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "評価" diff --git a/website/locale/messages.pot b/website/locale/messages.pot index 75aae739cff..12cb2a98bb8 100644 --- a/website/locale/messages.pot +++ b/website/locale/messages.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -32,27 +32,27 @@ msgstr "" msgid "ClickHouse DBMS" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "" @@ -70,6 +70,14 @@ msgstr "" msgid "Yandex LLC" msgstr "" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "" diff --git a/website/locale/ru/LC_MESSAGES/messages.mo b/website/locale/ru/LC_MESSAGES/messages.mo index 38fc03212af92bef1bc9b77364958447cf048c5d..53d8b37d07246761c4600def95d5998e8cce20ee 100644 GIT binary patch delta 1296 zcmYMzT}YEr7{KvIH=SD3{LH20%5X+Tv@rZ?k|qUt5eZ3TAEts@DbfagkOK=N=%UO> zEe*QJo1)n+GIxVoRV0<_ASU7nCgV89;S46=JZ9n#&Z*M`rslYUZ9JF-r(F zpm*BO`tp&2rg#c{_#HL!6XZ23O5{AUw%lS?9e5WtkUlKHS9k~)un1l3q6*KV9?dh{ zk0WM%8CgPpq2EHmO5QYealG{SGf*RSAajb-v;Z}=MaWq3pe}F(U3lE=??fKC$!nGk z^x$6Vty~1n*bwI7SQ7cyhHo@z2G+0ylgYPET#dC@i`xIvbOJY0pT%-qM&0>t(zY3! zQRnSOy+cni2jAdB0-D2kcG{6j{?E{HEj@C^^QgCY3AbVlqf60+dI=klwd6ADncl`$ z>_ZLILR32bByzjOi=VU~_21P_X9eEDojBz;8-Amv+{3}@9jHQGD1@5oNz|Rqp)R~? z8p}=9%b1QkFdvuj7;47KNkbkxtwCaxt4yNv-NQok4^cQpA&gJZy(N;78T?nziKKc4 zd+A?9<`%y*vhV=voq2)mK>}v|18QKCs2f?pd|W}jd=Apf@3%=V1&yo>_3u`Ly2IDV zCgpwbinY>WKiJ;jyV%s`t@pOHw*^DCiKxV*zH=8^E;TnbIveY~O~LE&#nyy`1 z-R=^1ad6zeVvFj%?{Q`Z1L6aLA`mwyLQF8aDT<))j~!^7ozMHc=bZO_-sik07hUR!&sVx{ z8^8Vh>-ZlkRP_Hh>NZ@|ba0iW8UY1!{s>r$2*U;yF}eU$Fzf z;YQ}S0Lv(&BZ^AoFrLFfT#4_{k2$Qw->3wAjH&@!FoIjK6fZjcBS?;R1)0mPIsG?K z6&=SI^V>5T%D9d?))p`jGKU>xG(C6}mB0yX!x;AB6YRuA+=or9La*XH?!ik=oIs|q zm*~Pb=)*L|^}0XNP^NRpSoYa*8CB9h$YBLMr3uQh8U3j1-N<3RoIbLEJ`54J^Ws&h zGZ@4%Cw_{mz^e-C-$f%shX$^o-p8;8b;FS3bzDn)5BFjMwelZWkFAtX6CFV9nE`CT zQM^M!_tDErw=?J|`Xg2OSNouf`md*BijH0Q9<|wgjKbKq0k!KxcmaD*2|h>d_9C)k z7h3>7=sM~v@^i5VcVH`yIPoN^(hC^FCH!EnpBg;V5deO``6fLM8SV^;Kn23ye~B)=+3ik`wNzE4k>IEl5s#JKZJW c_KvNM+rnL;j^tn8cTcjoI^#@~ diff --git a/website/locale/ru/LC_MESSAGES/messages.po b/website/locale/ru/LC_MESSAGES/messages.po index aed233c1275..981b83f3d7e 100644 --- a/website/locale/ru/LC_MESSAGES/messages.po +++ b/website/locale/ru/LC_MESSAGES/messages.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" "Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: 2020-05-20 15:32+0300\n" +"POT-Creation-Date: 2020-06-15 22:29+0300\n" "PO-Revision-Date: 2020-03-26 10:19+0300\n" "Last-Translator: FULL NAME \n" "Language: ru\n" @@ -38,27 +38,27 @@ msgstr "ClickHouse-быстрая СУБД OLAP с открытым исходн msgid "ClickHouse DBMS" msgstr "СУБД ClickHouse" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "открытый исходный код" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "реляционный" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "аналитика" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "аналитический" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "большие данные" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "веб-аналитика" @@ -78,6 +78,14 @@ msgstr "" msgid "Yandex LLC" msgstr "ООО «Яндекс»" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "Дата публикации" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "Документация" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "Рейтинг" diff --git a/website/locale/tr/LC_MESSAGES/messages.mo b/website/locale/tr/LC_MESSAGES/messages.mo index 1cfc686a5ed1392629766d00ad23bfdb739bd789..65218c3b7298969fc73112ec30893466695a3a4b 100644 GIT binary patch delta 1296 zcmYMzTS!zv9LMp$dc5X6v$hRdGO@xQ+)^}6P%Nd4iog&g+;maNOL=+d=0b!)PbCCd zN?>F!32qBv2qNZ75f{3)vYe zTj{vwI)Yx>lbDSQn1$bQ2rbTR4entD4#t~NY=K7#E+T)mWz<3|NU><5O~fQrfLZ8g zee+P+Nk=Uv;{{Ye{n&&JoV9__I-wdXPEI=bDd7Z80*|%25j(#XPKYfA2?%UFR{t1_n{{ zc(@3gFc&vtX&U+0M5pM`f{nNXTio_d%%^=DDK_mokIK*z`fvp`E`xK@(e20WcpP0|$bSVLO&q9F`v|qean}fPOYJ#oryo$M`-(dApQzNXVG$N`j>KZ^ zn2T4i2S<@&+1z9Wwg(l^kq{L{+=xp3Wju-fs1&|Mo&6H(vVB1X7{h2~ED;r0I%+~M zkMmgIwnwpyolc@I;VS9SMji66*FBU>rH0CGtjBIF!Pi)YalF*)XN-qw*P#L$K?N{| z3Va+D*aT{UXQ;b1jXL{T)Q09!N51&4KgYgN(E_XJM_p}Av=5ca5>MokQyG&|)z#G3 z5$x;@^tN?%MyBFlM5UhSJKNsY(;95{H3xcwk*kR%PEyg{fq~9cZ91FHibb(ZN~}yef11>25yXPX;-ZP&5Lqmkgs8>^7m{5} zVqtDfj6@=4Z|^oYyCCdBLT|{z5`vUO;-(29zQ1@A&FT9&=QQtmp7Wg3=dog2?p?^a zZhTUF!+iIYY4rbd!7;04n8hj_#07X8=ixY3-~=|{Q>58@9xeD0tFdyVvsl9ObIp=?Dqu#lJP&@dNmPI-Z@z%#jAu}ZeZm-i z#TCT2rEH^;iB41^`>_X4U=6;+Mx4bE{zN4hWK}KLiV0kc^Rd^PKZoRK7m-+Y*_$6g zRrDJ66W{L9QO05F*yLj&B!=x~HNAKMmB0~$mC_9pgXl<`5N*(J|GRE0*c8C}$S1=Nw1a2bX;FWs3KD)25G z!;HoxR>LAm#;Z6-RrXkj`fCS$OmG|RENX|tsFIDM&iE0kqknZJM^Nh&kP7\n" "Language: tr\n" @@ -36,27 +36,27 @@ msgstr "ClickHouse - hızlı açık kaynak OLAP DBMS" msgid "ClickHouse DBMS" msgstr "ClickHouse DBMS" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "açık kaynak" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "ilişkisel" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "analiz" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "analitik" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "Büyük Veri" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "web-analyt -ics" @@ -76,6 +76,14 @@ msgstr "" msgid "Yandex LLC" msgstr "Yandex LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "Yayım datelandığı tarih" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "Belge" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "Verim" diff --git a/website/locale/zh/LC_MESSAGES/messages.mo b/website/locale/zh/LC_MESSAGES/messages.mo index 7260ce95e89b510777ffebc6c96cb915de5e9939..0a8eabb78ba784581b73d2e56aa0591383c04889 100644 GIT binary patch delta 1281 zcmYMzT}V@57{KvIJ^3-ea#L%5#L#d;Q)rcGmJk+2Ds&Om$TCww``Cya;YAKC5-%i^ zvP_3p8HfZnDq%OHE~IXPu&bhwazq6NQi726f9i$C+4;TC`}v&rJ!i6Vr84y1Zrv7s z2l*HAZ%)+c|8Lw9aS(~ z>x+}ZW-1yn123W`)QfF6itF(gIx&k`>{yPPXf5i5$FL5YF&)QD{R7mUn?`b$C~88_ zw4L?kB?aB$R~W>PsG0lN*TMvmbI97#࿒sM$r=)p&L06$G9{IX6i)d6qnJ1y0z8FSgAu@paI=@)U@{^mt5m^od+03 z?eFA8P|Hx?iEY{Jzh+WTqzg8oKG_iy`%xzjn))yn6F)V+F)pJXVGie1f>oG@KGgmJ z+=PSJgVXp8%W~NNofI4#sAkrHn#gfvPH90d37Gmm)P4gd9>FcdVbpiw1?sp()WmNy`T_h& zJY?GY$wJ59Lfz@m7zKTT$9M=AuogXhdzwfm>KRX%cnWob1>~(akshl)DRW<-EqI~B-`(2X74S!*DbEwK4hPSi z@4D34(e7$*?e2(NNvpAJRTW!zxV+w7-kL}(^MsWgAL;)TeG)0KKTOD2ym51J_G3+^hp{R+w?vG$(G^D^!75mtVk2;v%ZCub9AZ z*g$=&<2C#w(x^sy@Dg6b8l1-nE@KFPp&AS@t0vr!DU9P5yzcV-NRM_Ksb#}1e-E{y z53rB=Ho=8DjOXcm~x#FE(QzcHuZCv4ovi&s%6KuHsR=<>D!%guO;D zzQq6*FsJQa;zFGkk-2Qe`2)42zmdZ{j8cIhHev*IzXLg}i_=|R;4JF-B0m_piuw(# zp&BVCP`#RJ)?aTHc8M0$#7R{C0`A5E=cw}qHjrPyz4#NiV~qXM^9iiO9_+^(_#IdA z5H7OZYAhIH{TXXfHdcGv#7Ui{P!Dvv_%t>VXHk1Uj2ib8)$nu7;u5NnRyJ7oPoU<> zqViWz^ABP>-pO&HC3%V3n>qKuJLkM}5jD|A?8cJIC;4lv5g$X{Z>Ne1q){uFK{b9E zGdO}r@B^xWT$ulu_Vg4gkwr}~;(UZ^?6Hf-UHlAHa1u9Oz}=s77F_x5#jGbkQ\n" "Language: zh\n" @@ -33,27 +33,27 @@ msgstr "ツ暗ェツ氾环催ツ団ツ法ツ人" msgid "ClickHouse DBMS" msgstr "ツ环板msョツ嘉ッツ偲" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "open-source" msgstr "开源" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "relational" msgstr "关系" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytics" msgstr "分析" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "analytical" msgstr "分析" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "Big Data" msgstr "大数据" -#: templates/common_meta.html:24 +#: templates/common_meta.html:28 msgid "web-analytics" msgstr "网络分析" @@ -71,6 +71,14 @@ msgstr "软件按\"原样\"分发,不附带任何明示或暗示的担保或 msgid "Yandex LLC" msgstr "Yandex LLC" +#: templates/blog/content.html:16 +msgid "Published date" +msgstr "发布日期" + +#: templates/blog/nav.html:18 +msgid "Documentation" +msgstr "文件" + #: templates/docs/footer.html:3 msgid "Rating" msgstr "评分" diff --git a/website/main.html b/website/main.html index 73693204e5d..95debc2a233 100644 --- a/website/main.html +++ b/website/main.html @@ -3,6 +3,7 @@ {% set language = config.theme.language %} {% set direction = config.theme.direction %} {% set is_amp = config.extra.is_amp %} +{% set is_blog = config.extra.is_blog %} {% set single_page = config.extra.single_page %} {% set no_footer = True %} {% set og_type = 'article' %} @@ -36,22 +37,38 @@ {% if is_amp %} {% include "templates/docs/amp.html" %} {% else %} - {% set extra_html_attrs = 'data-version="' + data_version + '" data-single-page="' + data_single_page + '"' %} - {% set extra_body_attrs = 'data-spy="scroll" data-target="#toc" data-offset="80"' %} - + {% if not is_blog %} + {% set extra_html_attrs = 'data-version="' + data_version + '" data-single-page="' + data_single_page + '"' %} + {% set extra_body_attrs = 'data-spy="scroll" data-target="#toc" data-offset="80"' %} + {% else %} + {% set extra_body_attrs = 'class="blog"' %} + {% endif %} {% extends "templates/base.html" %} {% block content %} - {% include "templates/docs/nav.html" %} -
    -
    - {% include "templates/docs/sidebar.html" %} - {% include "templates/docs/content.html" %} - {% if not config.extra.single_page %} - {% include "templates/docs/toc.html" %} - {% endif %} + {% if not is_blog %} + {% include "templates/docs/nav.html" %} +
    +
    + {% include "templates/docs/sidebar.html" %} + {% include "templates/docs/content.html" %} + {% if not config.extra.single_page %} + {% include "templates/docs/toc.html" %} + {% endif %} +
    -
    + {% else %} + {% include "templates/blog/nav.html" %} +
    +
    + {% include "templates/blog/content.html" %} +
    +
    + {% if page and page.meta.is_index %} + {% include "templates/index/community.html" %} + {% include "templates/blog/footer.html" %} + {% endif %} + {% endif %} {% endblock %} {% endif %} diff --git a/website/sitemap-index.xml b/website/sitemap-index.xml index e53d6c29c54..75fdc75973c 100644 --- a/website/sitemap-index.xml +++ b/website/sitemap-index.xml @@ -21,6 +21,12 @@ https://clickhouse.tech/docs/fa/sitemap.xml + + https://clickhouse.tech/blog/en/sitemap.xml + + + https://clickhouse.tech/blog/ru/sitemap.xml + https://clickhouse.tech/sitemap-static.xml diff --git a/website/templates/blog/content.html b/website/templates/blog/content.html new file mode 100644 index 00000000000..38ad7933b00 --- /dev/null +++ b/website/templates/blog/content.html @@ -0,0 +1,43 @@ +
    + {% if not page.meta.is_index %} +
    +
    + {% if page.meta.image %} + {{ title }} + {% endif %} +

    {{ title }}

    +
    + +
    + {{ page.content|adjust_markdown_html }} +
    + +
    + {{ page.meta.date }} + {% if page.meta.tags %} + {% for tag in page.meta.tags %} +
    + {{ tag }} +
    + {% endfor %} + {% endif %} +
    + {% include "templates/blog/footer.html" %} +
    + {% else %} + {% for post in config.extra.post_meta.values() %} + + {% set post_image = post.get('image') or '/images/index/intro.svg' %} +
    +
    + {{ post['title'] }} +
    +
    +

    {{ post['title'] }}

    + {{ post['date'] }} +
    +
    +
    + {% endfor %} + {% endif %} +
    diff --git a/website/templates/blog/footer.html b/website/templates/blog/footer.html new file mode 100644 index 00000000000..3e94ecce51f --- /dev/null +++ b/website/templates/blog/footer.html @@ -0,0 +1,9 @@ +
    +
    +
    + +
    +
    +
    diff --git a/website/templates/blog/nav.html b/website/templates/blog/nav.html new file mode 100644 index 00000000000..a7e135296f2 --- /dev/null +++ b/website/templates/blog/nav.html @@ -0,0 +1,45 @@ + diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 84bd93d5175..86a852284ee 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -10,7 +10,11 @@ +{% if page and page.meta.image %} + +{% else %} +{% endif %} {% if page and not single_page %} @@ -20,13 +24,18 @@ {% include "templates/docs/ld_json.html" %} +{% if page and page.meta.tags %} + +{% else %} +{% endif %} {% if config and (config.extra.single_page or config.extra.version_prefix) %} {% endif %} -{% if config and page %} +{% if config and page and not is_blog %} {% for code, name in config.extra.languages.items() %} {% endfor %} diff --git a/website/templates/docs/ld_json.html b/website/templates/docs/ld_json.html index 3db89657221..7170a88dad0 100644 --- a/website/templates/docs/ld_json.html +++ b/website/templates/docs/ld_json.html @@ -1,12 +1,17 @@ {% if page and page.meta %} +}{% endif %}] {% endif %} diff --git a/website/templates/index/community.html b/website/templates/index/community.html index e230cac8da9..0adb3150ea0 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -113,8 +113,8 @@ class="bg-secondary-alt rounded-circle p-2 mr-4 float-left" />
    {{ _('ClickHouse Blog') }}
    -

    {{ _('in') }} {{ _('English') }} - or in {{ _('Russian') }}

    +

    {{ _('in') }} {{ _('English') }} + or in {{ _('Russian') }}

    diff --git a/website/templates/index/nav.html b/website/templates/index/nav.html index e3c680f1885..9bae81eb73c 100644 --- a/website/templates/index/nav.html +++ b/website/templates/index/nav.html @@ -18,8 +18,7 @@ Documentation