From 8e12eedbb33750b47f5a7023c223854b5ab067f0 Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Sun, 27 Jan 2019 19:51:05 +0300 Subject: [PATCH 01/38] Initial commit --- .gitmodules | 3 +++ CMakeLists.txt | 1 + cmake/find_cassandra.cmake | 12 ++++++++++++ contrib/CMakeLists.txt | 9 ++++++++- contrib/cassandra | 1 + dbms/src/Common/config.h.in | 1 + dbms/src/Dictionaries/CMakeLists.txt | 6 +++++- .../Dictionaries/CassandraDBDictionarySource.cpp | 1 + .../Dictionaries/CassandraDBDictionarySource.h | 16 ++++++++++++++++ 9 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 cmake/find_cassandra.cmake create mode 160000 contrib/cassandra create mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.cpp create mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.h diff --git a/.gitmodules b/.gitmodules index 24211b6707e..86100baa3dc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -64,3 +64,6 @@ [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git +[submodule "contrib/cassandra"] + path = contrib/cassandra + url = https://github.com/datastax/cpp-driver.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 98c3643f055..b8f8d803585 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,6 +236,7 @@ include (cmake/find_protobuf.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) +include (cmake/find_cassandra.cmake) if (ENABLE_TESTS) include (cmake/find_gtest.cmake) endif () diff --git a/cmake/find_cassandra.cmake b/cmake/find_cassandra.cmake new file mode 100644 index 00000000000..3e4bb3ca373 --- /dev/null +++ b/cmake/find_cassandra.cmake @@ -0,0 +1,12 @@ +if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + else() + set (CASSANDRA_INCLUDE_DIR + "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") + set (CASSANDRA_LIBRARY cassandra) + set (USE_CASSANDRA 1) + + message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") + endif() +endif() \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0c4b6c15287..4a4add1f095 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -219,5 +219,12 @@ if (USE_INTERNAL_HDFS3_LIBRARY) endif () if (USE_BASE64) - add_subdirectory (base64-cmake) + add_subdirectory(base64-cmake) endif() + +if (USE_CASSANDRA) + # TODO osfavstov: cassandra/CMakeLists.txt change + # 5: set(CASS_ROOT_DIR "${CMAKE_SOURCE_DIR}/contrib/cassandra"); + # 10: include(${ClickHouse_SOURCE_DIR}/contrib/cassandra/cmake/modules/CppDriver.cmake) + add_subdirectory(cassandra) +endif() \ No newline at end of file diff --git a/contrib/cassandra b/contrib/cassandra new file mode 160000 index 00000000000..fd9b73d4acf --- /dev/null +++ b/contrib/cassandra @@ -0,0 +1 @@ +Subproject commit fd9b73d4acfd85293ab304be64e2e1e2109e521d diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 0c756841f2e..42ff3e43ec2 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -18,6 +18,7 @@ #cmakedefine01 USE_XXHASH #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_PROTOBUF +#cmakedefine01 USE_CASSANDRA #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index d7f85a5c7eb..534f49bc5bb 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -11,7 +11,7 @@ generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) -add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources}) +add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources} CassandraDBDictionarySource.cpp CassandraDBDictionarySource.h) target_link_libraries(clickhouse_dictionaries PRIVATE clickhouse_common_io pocoext ${MYSQLXX_LIBRARY} ${BTRIE_LIBRARIES}) if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) @@ -36,4 +36,8 @@ if(USE_POCO_MONGODB) target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_MongoDB_LIBRARY}) endif() +if(USE_CASSANDRA) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${CASSANDRA_INCLUDE_DIR}) +endif() + add_subdirectory(Embedded) diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp new file mode 100644 index 00000000000..084ef283107 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp @@ -0,0 +1 @@ +#include "CassandraDBDictionarySource.h" \ No newline at end of file diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.h b/dbms/src/Dictionaries/CassandraDBDictionarySource.h new file mode 100644 index 00000000000..a9a43b026b0 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDBDictionarySource.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +#if USE_CASSANDRA + +# include "DictionaryStructure.h" +# include "IDictionarySource.h" +# include + +namespace DB +{ + +} + +#endif From ac46a3a976b511b16501b3c3df75f56667efba61 Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Sat, 16 Feb 2019 13:11:49 +0300 Subject: [PATCH 02/38] Add basic realisation --- dbms/src/Dictionaries/CMakeLists.txt | 2 +- .../CassandraBlockInputStream.cpp | 158 ++++++++++++++++++ .../Dictionaries/CassandraBlockInputStream.h | 38 +++++ .../CassandraDBDictionarySource.cpp | 1 - .../CassandraDBDictionarySource.h | 16 -- .../CassandraDictionarySource.cpp | 114 +++++++++++++ .../Dictionaries/CassandraDictionarySource.h | 58 +++++++ 7 files changed, 369 insertions(+), 18 deletions(-) create mode 100644 dbms/src/Dictionaries/CassandraBlockInputStream.cpp create mode 100644 dbms/src/Dictionaries/CassandraBlockInputStream.h delete mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.cpp delete mode 100644 dbms/src/Dictionaries/CassandraDBDictionarySource.h create mode 100644 dbms/src/Dictionaries/CassandraDictionarySource.cpp create mode 100644 dbms/src/Dictionaries/CassandraDictionarySource.h diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index 534f49bc5bb..21b82a49154 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -11,7 +11,7 @@ generate_code(CacheDictionary_generate2 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 generate_code(CacheDictionary_generate3 UInt8 UInt16 UInt32 UInt64 UInt128 Int8 Int16 Int32 Int64 Float32 Float64 Decimal32 Decimal64 Decimal128) add_headers_and_sources(clickhouse_dictionaries ${CMAKE_CURRENT_BINARY_DIR}/generated/) -add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources} CassandraDBDictionarySource.cpp CassandraDBDictionarySource.h) +add_library(clickhouse_dictionaries ${LINK_MODE} ${clickhouse_dictionaries_sources}) target_link_libraries(clickhouse_dictionaries PRIVATE clickhouse_common_io pocoext ${MYSQLXX_LIBRARY} ${BTRIE_LIBRARIES}) if(Poco_SQL_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp new file mode 100644 index 00000000000..8493cf8a6e5 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -0,0 +1,158 @@ +#include +#include + +#if USE_CASSANDRA + +# include "CassandraBlockInputStream.h" +#include "CassandraBlockInputStream.h" + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int CASSANDRA_INTERNAL_ERROR; +} + +CassandraBlockInputStream::CassandraBlockInputStream( + CassSession *session, + const std::string &query_str, + const DB::Block &sample_block, + const size_t max_block_size) + : session{session} + , query_str{query_str} + , max_block_size{max_block_size} +{ + CassStatement * statement = cass_statement_new(query_str.c_str(), 0); + CassFuture* future = cass_session_execute(session, statement); + + const CassResult * result = cass_future_get_result(future); + cass_statement_free(statement); + + if (result == nullptr) { +// CassError error_code = cass_future_error_code(future); + const char* error_message; + size_t error_message_length; + cass_future_error_message(future, &error_message, &error_message_length); + + throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; + } + + cass_future_free(future); + + this->result = result; + + description.init(sample_block); +} + +CassandraBlockInputStream::~CassandraBlockInputStream() { + if (iterator != nullptr) + cass_iterator_free(iterator); + cass_result_free(result); +} + +namespace +{ + using ValueType = ExternalResultDescription::ValueType; + + void insertValue(IColumn & column, const ValueType type, const CassValue * value) + { + switch (type) + { + case ValueType::UInt8: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt16: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt32: + { + cass_uint32_t _value; + cass_value_get_uint32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::UInt64: + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int8: + { + cass_int8_t _value; + cass_value_get_int8(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int16: + { + cass_int16_t _value; + cass_value_get_int16(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int32: + { + cass_int32_t _value; + cass_value_get_int32(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Int64: + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Float32: + { + cass_float_t _value; + cass_value_get_float(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::Float64: + { + cass_double_t _value; + cass_value_get_double(value, &_value); + static_cast(column).insertValue(_value); + break; + } + case ValueType::String: + { + const char* _value; + size_t _value_length; + cass_value_get_string + static_cast(column).insertData(value.data(), value.size()); + break; + } + case ValueType::Date: + static_cast(column).insertValue(UInt16(value.getDate().getDayNum())); + break; + case ValueType::DateTime: + static_cast(column).insertValue(UInt32(value.getDateTime())); + break; + case ValueType::UUID: + static_cast(column).insert(parse(value.data(), value.size())); + break; + } + } + + void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } +} + + +} +#endif diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h new file mode 100644 index 00000000000..fdf9954d18f --- /dev/null +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include +#include +#include +#include "ExternalResultDescription.h" + + +namespace DB +{ +/// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining + class CassandraBlockInputStream final : public IBlockInputStream + { + public: + CassandraBlockInputStream( + CassSession * session, + const std::string & query_str, + const Block & sample_block, + const size_t max_block_size); + ~CassandraBlockInputStream() override; + + String getName() const override { return "Cassandra"; } + + Block getHeader() const override { return description.sample_block.cloneEmpty(); } + + private: + Block readImpl() override; + + CassSession * session, + const std::string & query_str; + const size_t max_block_size; + ExternalResultDescription description; + const CassResult * result; + CassIterator * iterator = nullptr; + }; + +} diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp deleted file mode 100644 index 084ef283107..00000000000 --- a/dbms/src/Dictionaries/CassandraDBDictionarySource.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "CassandraDBDictionarySource.h" \ No newline at end of file diff --git a/dbms/src/Dictionaries/CassandraDBDictionarySource.h b/dbms/src/Dictionaries/CassandraDBDictionarySource.h deleted file mode 100644 index a9a43b026b0..00000000000 --- a/dbms/src/Dictionaries/CassandraDBDictionarySource.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include - -#if USE_CASSANDRA - -# include "DictionaryStructure.h" -# include "IDictionarySource.h" -# include - -namespace DB -{ - -} - -#endif diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp new file mode 100644 index 00000000000..81cc3e9d85c --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -0,0 +1,114 @@ +#include "CassandraDictionarySource.h" +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + +namespace DB +{ + namespace ErrorCodes + { + extern const int SUPPORT_IS_DISABLED; + } + + void registerDictionarySourceCassandra(DictionarySourceFactory & factory) + { + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */) -> DictionarySourcePtr { +#if USE_CASSANDRA + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); +#else + (void)dict_struct; + (void)config; + (void)config_prefix; + (void)sample_block; + throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("cassandra", createTableSource); + } + +} + +#if USE_CASSANDRA + +# include + +namespace DB +{ +namespace ErrorCodes { + extern const int UNSUPPORTED_METHOD; + extern const int WRONG_PASSWORD; +} + +static const size_t max_block_size = 8192; + +CassandraDictionarySource::CassandraDictionarySource( + const DB::DictionaryStructure &dict_struct, + const std::string &host, + UInt16 port, + const std::string &user, + const std::string &password, + const std::string &method, + const std::string &db, + const DB::Block &sample_block) + : dict_struct{dict_struct} + , host{host} + , port{port} + , user{user} + , password{password} + , method{method} + , db{db} + , sample_block{sample_block} + , cluster{cass_cluster_new()} + , session{cass_session_new()} +{ + cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); +} + +CassandraDictionarySource::CassandraDictionarySource( + const DB::DictionaryStructure &dict_struct, + const Poco::Util::AbstractConfiguration &config, + const std::string &config_prefix, + DB::Block &sample_block) + : CassandraDictionarySource( + dict_struct, + config.getString(config_prefix + ".host"), + config.getUInt(config_prefix + ".port"), + config.getString(config_prefix + ".user", ""), + config.getString(config_prefix + ".password", ""), + config.getString(config_prefix + ".method", ""), + config.getString(config_prefix + ".db", ""), + sample_block) +{ +} + +CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySource & other) + : CassandraDictionarySource{other.dict_struct, + other.host, + other.port, + other.user, + other.password, + other.method, + other.db, + other.sample_block} +{ +} + +CassandraDictionarySource::~CassandraDictionarySource() { + cass_session_free(session); + cass_cluster_free(cluster); +} + +std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { + return host + (port != 0 ? ":" + std::to_string(port) : ""); +} + +BlockInputStreamPtr CassandraDict + + +} + +#endif diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h new file mode 100644 index 00000000000..2d5e81a2733 --- /dev/null +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +#if USE_CASSANDRA + +# include "DictionaryStructure.h" +# include "IDictionarySource.h" +# include + +namespace DB +{ +class CassandraDictionarySource final : public IDictionarySource { + CassandraDictionarySource( + const DictionaryStructure & dict_struct, + const std::string & host, + UInt16 port, + const std::string & user, + const std::string & password, + const std::string & method, + const std::string & db, + const Block & sample_block); + +public: + CassandraDictionarySource( + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block); + + CassandraDictionarySource(const CassandraDictionarySource & other); + + ~CassandraDictionarySource() override; + + BlockInputStreamPtr loadAll() override; + + BlockInputStreamPtr loadIds(const std::vector & ids) override; + + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + +private: + static std::string toConnectionString(const std::string& host, const UInt16 port); + + const DictionaryStructure dict_struct; + const std::string host; + const UInt16 port; + const std::string user; + const std::string password; + const std::string method; + const std::string db; + Block sample_block; + + CassCluster * cluster; + CassSession * session; +}; +} + +#endif From 0e047e9abcd873f18eb0cf42471dc41e19c6ef0d Mon Sep 17 00:00:00 2001 From: Oleg Favstov Date: Thu, 11 Apr 2019 11:05:01 +0200 Subject: [PATCH 03/38] Base code for testing added --- ci/jobs/quick-build/run.sh | 2 +- .../CassandraBlockInputStream.cpp | 87 ++++++++++++++----- .../Dictionaries/CassandraBlockInputStream.h | 1 + .../src/Dictionaries/registerDictionaries.cpp | 2 + dbms/tests/integration/helpers/cluster.py | 20 ++++- .../helpers/docker_compose_cassandra.yml | 7 ++ dbms/tests/integration/image/Dockerfile | 2 +- .../external_sources.py | 20 +++++ .../test_external_dictionaries/test.py | 5 +- .../dicts/external_dicts_dict_sources.md | 16 +++- 10 files changed, 132 insertions(+), 30 deletions(-) create mode 100644 dbms/tests/integration/helpers/docker_compose_cassandra.yml diff --git a/ci/jobs/quick-build/run.sh b/ci/jobs/quick-build/run.sh index 9e8fe9353d6..c7d8470d358 100755 --- a/ci/jobs/quick-build/run.sh +++ b/ci/jobs/quick-build/run.sh @@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse BUILD_TYPE=Debug ENABLE_EMBEDDED_COMPILER=0 -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0" +CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0 -D ENABLE_SSL=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_CASSANDRA=0" [[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp index 8493cf8a6e5..ac90ac8b528 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,10 +1,13 @@ #include #include +#include +#include +#include #if USE_CASSANDRA # include "CassandraBlockInputStream.h" -#include "CassandraBlockInputStream.h" +# include "CassandraBlockInputStream.h" namespace DB @@ -25,23 +28,8 @@ CassandraBlockInputStream::CassandraBlockInputStream( , max_block_size{max_block_size} { CassStatement * statement = cass_statement_new(query_str.c_str(), 0); - CassFuture* future = cass_session_execute(session, statement); - - const CassResult * result = cass_future_get_result(future); - cass_statement_free(statement); - - if (result == nullptr) { -// CassError error_code = cass_future_error_code(future); - const char* error_message; - size_t error_message_length; - cass_future_error_message(future, &error_message, &error_message_length); - - throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; - } - - cass_future_free(future); - - this->result = result; + cass_statement_set_paging_size(statement, max_block_size) + this->has_more_pages = cass_true; description.init(sample_block); } @@ -134,23 +122,78 @@ namespace { const char* _value; size_t _value_length; - cass_value_get_string - static_cast(column).insertData(value.data(), value.size()); + cass_value_get_string(value, &_value, &_value_length); + static_cast(column).insertData(_value, _value_length); break; } case ValueType::Date: - static_cast(column).insertValue(UInt16(value.getDate().getDayNum())); + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); break; + } case ValueType::DateTime: - static_cast(column).insertValue(UInt32(value.getDateTime())); + { + cass_int64_t _value; + cass_value_get_int64(value, &_value); + static_cast(column).insertValue(_value); break; + } case ValueType::UUID: + { + CassUuid _value; + cass_value_get_uuid(value, &_value); static_cast(column).insert(parse(value.data(), value.size())); break; + } } } void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + + Block CassandraBlockInputStream::readImpl() + { + if (has_more_pages) + return {}; + + CassFuture* query_future = cass_session_execute(session, statement); + + const CassResult* result = cass_future_get_result(query_future); + + if (result == nullptr) { + const char* error_message; + size_t error_message_length; + cass_future_error_message(future, &error_message, &error_message_length); + + throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; + } + + const CassRow* row = cass_result_first_row(result); + const CassValue* map = cass_row_get_column(row, 0); + CassIterator* iterator = cass_iterator_from_map(map); + while (cass_iterator_next(iterator)) { + const CassValue* _key = cass_iterator_get_map_key(iterator); + const CassValue* _value = cass_iterator_get_map_value(iterator); + for (const auto &[value, idx]: {{_key, 0}, {_value, 1}}) { + if (description.types[idx].second) { + ColumnNullable & column_nullable = static_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + column_nullable.getNullMapData().emplace_back(0); + } else { + insertValue(*columns[idx], description.types[idx].first, value); + } + } + } + + has_more_pages = cass_result_has_more_pages(result); + + if (has_more_pages) { + cass_statement_set_paging_state(statement, result); + } + + cass_result_free(result); + } } diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h index fdf9954d18f..005c6f69b75 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.h +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -32,6 +32,7 @@ namespace DB const size_t max_block_size; ExternalResultDescription description; const CassResult * result; + cass_bool_t has_more_pages; CassIterator * iterator = nullptr; }; diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index 1a8c5a7be7b..93df888d519 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -7,6 +7,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & source_factory); void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory); +void registerDictionarySourceCassandra(DictionarySourceFactory & source_factory); void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory); @@ -30,6 +31,7 @@ void registerDictionaries() registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); registerDictionarySourceMongoDB(source_factory); + registerDictionarySourceCassandra(source_factory); registerDictionarySourceXDBC(source_factory); registerDictionarySourceJDBC(source_factory); registerDictionarySourceExecutable(source_factory); diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 7ee2c1f9f54..38f09a72524 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -92,6 +92,7 @@ class ClickHouseCluster: self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] + self.base_cassandra_cmd = [] self.pre_zookeeper_commands = [] self.instances = {} self.with_zookeeper = False @@ -101,6 +102,7 @@ class ClickHouseCluster: self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False + self.with_cassandra = False self.docker_client = None self.is_up = False @@ -112,7 +114,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_cassandra=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -130,7 +132,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, self.base_configs_dir, self.server_bin_path, + self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_cassandra, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) @@ -185,6 +187,13 @@ class ClickHouseCluster: self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mongo.yml')] + if with_cassandra and not self.with_cassandra: + self.with_cassandra = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_cassandra.yml')]) + self.base_cassandra_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_cassandra.yml')] + + return instance @@ -316,6 +325,10 @@ class ClickHouseCluster: subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate']) self.wait_mongo_to_start(30) + if self.with_cassandra and self.base_cassandra_cmd: + subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate']) + time.sleep(10) + subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) start_deadline = time.time() + 20.0 # seconds @@ -414,7 +427,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, base_configs_dir, server_bin_path, odbc_bridge_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_cassandra, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -439,6 +452,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka self.with_mongo = with_mongo + self.with_cassandra = with_cassandra self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, 'docker_compose.yml') diff --git a/dbms/tests/integration/helpers/docker_compose_cassandra.yml b/dbms/tests/integration/helpers/docker_compose_cassandra.yml new file mode 100644 index 00000000000..bb6a0221c54 --- /dev/null +++ b/dbms/tests/integration/helpers/docker_compose_cassandra.yml @@ -0,0 +1,7 @@ +version: '2.2' +services: + cassandra1: + image: cassandra + restart: always + ports: + - 6340:6349 diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 1dd5c1713b2..0b3cdd5a65f 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal cassandra-driver ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 71dc05ca78c..e0adb8c187b 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -2,6 +2,7 @@ import warnings import pymysql.cursors import pymongo +import cassandra from tzlocal import get_localzone import datetime import os @@ -372,3 +373,22 @@ class SourceHTTP(SourceHTTPBase): class SourceHTTPS(SourceHTTPBase): def _get_schema(self): return "https" + +class SourceCassandra(ExternalSource): + def get_source_str(self, table_name): + return ''' + + {host} + {port} + + '''.format( + host=self.docker_hostname, + port=self.docker_port, + ) + + def prepare(self, structure, table_name, cluster): + self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) + self.prepared = True + + def load_data(self, data, table_name): + for ro diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index 314ec26a106..0bb76ce171e 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -5,7 +5,7 @@ import time from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed, SourceMongo -from external_sources import SourceHTTP, SourceHTTPS +from external_sources import SourceHTTP, SourceHTTPS, SourceCassandra SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -79,6 +79,7 @@ LAYOUTS = [ ] SOURCES = [ + SourceCassandra("Cassandra", "localhost", "6340", "cassandra1", "6349", "", ""), SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), @@ -120,7 +121,7 @@ def setup_module(module): for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) + node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_cassandra=True) cluster.add_instance('clickhouse1') @pytest.fixture(scope="module") diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index 8fb2145ecaf..cda41f8294b 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -30,6 +30,7 @@ Types of sources (`source_type`): - [MySQL](#dicts-external_dicts_dict_sources-mysql) - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) + - [Cassanda](#dicts-external_dicts_dict_sources-cassandra) - [ODBC](#dicts-external_dicts_dict_sources-odbc) @@ -143,7 +144,7 @@ PASSWORD = test If you then make a query such as ``` -SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); +SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ``` ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. @@ -421,4 +422,17 @@ Setting fields: - `db` – Name of the database. - `collection` – Name of the collection. +### Cassandra {#dicts-external_dicts_dict_sources-cassandra} + +Example of settings: + +```xml + + + localhost + 6349 + + +``` + [Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_sources/) From 3175caa1c0a4c517080b7a1c2d1ccac697318bf1 Mon Sep 17 00:00:00 2001 From: Gleb-Tretyakov Date: Thu, 23 May 2019 00:09:29 +0300 Subject: [PATCH 04/38] Fix compilation errors --- .../CassandraBlockInputStream.cpp | 37 ++++++++++++------- .../Dictionaries/CassandraBlockInputStream.h | 8 ++-- .../CassandraDictionarySource.cpp | 10 ++++- .../Dictionaries/CassandraDictionarySource.h | 17 +++++++++ 4 files changed, 53 insertions(+), 19 deletions(-) diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp index ac90ac8b528..f76c9dd93f6 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,11 +1,14 @@ -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include #if USE_CASSANDRA +# include # include "CassandraBlockInputStream.h" # include "CassandraBlockInputStream.h" @@ -24,11 +27,11 @@ CassandraBlockInputStream::CassandraBlockInputStream( const DB::Block &sample_block, const size_t max_block_size) : session{session} + , statement{cass_statement_new(query_str.c_str(), 0)} , query_str{query_str} , max_block_size{max_block_size} { - CassStatement * statement = cass_statement_new(query_str.c_str(), 0); - cass_statement_set_paging_size(statement, max_block_size) + cass_statement_set_paging_size(statement, max_block_size); this->has_more_pages = cass_true; description.init(sample_block); @@ -130,33 +133,37 @@ namespace { cass_int64_t _value; cass_value_get_int64(value, &_value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME break; } case ValueType::DateTime: { cass_int64_t _value; cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + static_cast(column).insertValue(_value); break; } case ValueType::UUID: { CassUuid _value; cass_value_get_uuid(value, &_value); - static_cast(column).insert(parse(value.data(), value.size())); + std::array uuid_str; + cass_uuid_string(_value, uuid_str.data()); + static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } } } +} - void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + // void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } Block CassandraBlockInputStream::readImpl() { if (has_more_pages) return {}; + MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); const CassResult* result = cass_future_get_result(query_future); @@ -164,7 +171,7 @@ namespace if (result == nullptr) { const char* error_message; size_t error_message_length; - cass_future_error_message(future, &error_message, &error_message_length); + cass_future_error_message(query_future, &error_message, &error_message_length); throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } @@ -175,7 +182,8 @@ namespace while (cass_iterator_next(iterator)) { const CassValue* _key = cass_iterator_get_map_key(iterator); const CassValue* _value = cass_iterator_get_map_value(iterator); - for (const auto &[value, idx]: {{_key, 0}, {_value, 1}}) { + auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; + for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { ColumnNullable & column_nullable = static_cast(*columns[idx]); insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); @@ -193,8 +201,9 @@ namespace } cass_result_free(result); + + return description.sample_block.cloneWithColumns(std::move(columns)); } -} } diff --git a/dbms/src/Dictionaries/CassandraBlockInputStream.h b/dbms/src/Dictionaries/CassandraBlockInputStream.h index 005c6f69b75..2b7c3b68744 100644 --- a/dbms/src/Dictionaries/CassandraBlockInputStream.h +++ b/dbms/src/Dictionaries/CassandraBlockInputStream.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include #include -#include "ExternalResultDescription.h" +#include namespace DB @@ -27,8 +26,9 @@ namespace DB private: Block readImpl() override; - CassSession * session, - const std::string & query_str; + CassSession * session; + CassStatement * statement; + String query_str; const size_t max_block_size; ExternalResultDescription description; const CassResult * result; diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp index 81cc3e9d85c..ad89d5e2e43 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.cpp +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -35,6 +35,8 @@ namespace DB #if USE_CASSANDRA # include +# include +# include "CassandraBlockInputStream.h" namespace DB { @@ -106,7 +108,13 @@ std::string CassandraDictionarySource::toConnectionString(const std::string &hos return host + (port != 0 ? ":" + std::to_string(port) : ""); } -BlockInputStreamPtr CassandraDict +BlockInputStreamPtr CassandraDictionarySource::loadAll() { + return std::make_shared(nullptr, "", sample_block, max_block_size); +} + +std::string CassandraDictionarySource::toString() const { + return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); +} } diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h index 2d5e81a2733..60c503fc4da 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.h +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if USE_CASSANDRA @@ -34,9 +35,25 @@ public: BlockInputStreamPtr loadAll() override; + bool supportsSelectiveLoad() const override { return true; } + + bool isModified() const override { return true; } + + ///Not yet supported + bool hasUpdateField() const override { return false; } + + DictionarySourcePtr clone() const override { return std::make_unique(*this); } + BlockInputStreamPtr loadIds(const std::vector & ids) override; BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + + BlockInputStreamPtr loadUpdatedAll() override + { + throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; + } + + std::string toString() const override; private: static std::string toConnectionString(const std::string& host, const UInt16 port); From a82f4925ec3189eb7eef1ebca48dfa65b35042c4 Mon Sep 17 00:00:00 2001 From: favstovol Date: Thu, 30 May 2019 02:01:25 +0300 Subject: [PATCH 05/38] Add stubs --- contrib/cassandra-cmake/CMakeLists.txt | 0 dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Dictionaries/CassandraDictionarySource.cpp | 5 +++-- dbms/src/Dictionaries/CassandraDictionarySource.h | 12 +++++++++--- 4 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 contrib/cassandra-cmake/CMakeLists.txt diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 093f98d22a1..04a7e72e0c8 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -427,6 +427,7 @@ namespace ErrorCodes extern const int BAD_TTL_EXPRESSION = 450; extern const int BAD_TTL_FILE = 451; extern const int SETTING_CONSTRAINT_VIOLATION = 452; + extern const int CASSANDRA_INTERNAL_ERROR = 453; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.cpp b/dbms/src/Dictionaries/CassandraDictionarySource.cpp index ad89d5e2e43..cfd21510e69 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.cpp +++ b/dbms/src/Dictionaries/CassandraDictionarySource.cpp @@ -17,7 +17,7 @@ namespace DB Block & sample_block, const Context & /* context */) -> DictionarySourcePtr { #if USE_CASSANDRA - return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else (void)dict_struct; (void)config; @@ -40,7 +40,8 @@ namespace DB namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int UNSUPPORTED_METHOD; extern const int WRONG_PASSWORD; } diff --git a/dbms/src/Dictionaries/CassandraDictionarySource.h b/dbms/src/Dictionaries/CassandraDictionarySource.h index 60c503fc4da..2bdd476951a 100644 --- a/dbms/src/Dictionaries/CassandraDictionarySource.h +++ b/dbms/src/Dictionaries/CassandraDictionarySource.h @@ -44,9 +44,15 @@ public: DictionarySourcePtr clone() const override { return std::make_unique(*this); } - BlockInputStreamPtr loadIds(const std::vector & ids) override; + BlockInputStreamPtr loadIds(const std::vector & /* ids */) override + { + throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + } - BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override + { + throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + } BlockInputStreamPtr loadUpdatedAll() override { @@ -56,7 +62,7 @@ public: std::string toString() const override; private: - static std::string toConnectionString(const std::string& host, const UInt16 port); + static std::string toConnectionString(const std::string & host, const UInt16 port); const DictionaryStructure dict_struct; const std::string host; From 4b30b3168cd8675f50c2741bb89c90bac6b08428 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 19 May 2020 20:48:28 +0300 Subject: [PATCH 06/38] try fix build --- .gitmodules | 11 +++- cmake/find_cassandra.cmake | 8 ++- contrib/CMakeLists.txt | 1 + contrib/cassandra | 2 +- contrib/libuv | 1 + src/CMakeLists.txt | 5 ++ .../CassandraBlockInputStream.cpp | 60 +++++++++---------- .../CassandraDictionarySource.cpp | 51 ++++++++-------- src/Dictionaries/ya.make | 4 +- src/Functions/ya.make | 2 +- .../external_sources.py | 2 +- 11 files changed, 83 insertions(+), 64 deletions(-) create mode 160000 contrib/libuv diff --git a/.gitmodules b/.gitmodules index f5d4b8340fa..b3a624aaae9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -95,9 +95,6 @@ [submodule "contrib/rapidjson"] path = contrib/rapidjson url = https://github.com/Tencent/rapidjson -[submodule "contrib/cassandra"] - path = contrib/cassandra - url = https://github.com/datastax/cpp-driver.git [submodule "contrib/fastops"] path = contrib/fastops url = https://github.com/ClickHouse-Extras/fastops @@ -160,3 +157,11 @@ [submodule "contrib/openldap"] path = contrib/openldap url = https://github.com/openldap/openldap.git +[submodule "contrib/cassandra"] + path = contrib/cassandra + url = https://github.com/tavplubix/cpp-driver.git + branch = ch-tmp +[submodule "contrib/libuv"] + path = contrib/libuv + url = https://github.com/libuv/libuv.git + branch = v1.x diff --git a/cmake/find_cassandra.cmake b/cmake/find_cassandra.cmake index 7f7346ce545..951cfc88b11 100644 --- a/cmake/find_cassandra.cmake +++ b/cmake/find_cassandra.cmake @@ -1,10 +1,14 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) - if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") + message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() + set(LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") - set (CASSANDRA_LIBRARY cassandra) + set (LIBUV_LIBRARY uv_a) + set (CASSANDRA_LIBRARY cassandra_static) set (USE_CASSANDRA 1) set(CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 76da288991b..ce187038e2a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -319,5 +319,6 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) + add_subdirectory(libuv) add_subdirectory(cassandra) endif() diff --git a/contrib/cassandra b/contrib/cassandra index fd9b73d4acf..5c0f2a62bdc 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit fd9b73d4acfd85293ab304be64e2e1e2109e521d +Subproject commit 5c0f2a62bdc63dcc390d771c9afaa9dc34eb8e5b diff --git a/contrib/libuv b/contrib/libuv new file mode 160000 index 00000000000..cc51217a317 --- /dev/null +++ b/contrib/libuv @@ -0,0 +1 @@ +Subproject commit cc51217a317e96510fbb284721d5e6bc2af31e33 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 222a3e486f9..d713cec8b8a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -356,6 +356,11 @@ if (USE_OPENCL) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${OpenCL_INCLUDE_DIRS}) endif () +if (USE_CASSANDRA) + dbms_target_link_libraries(PRIVATE ${CASSANDRA_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${CASS_INCLUDE_DIR}) +endif() + dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) target_include_directories (clickhouse_common_io PUBLIC ${DBMS_INCLUDE_DIR}) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index f76c9dd93f6..73028e8d2b2 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -22,14 +22,14 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession *session, - const std::string &query_str, + CassSession *session_, + const std::string &query_str_, const DB::Block &sample_block, - const size_t max_block_size) - : session{session} - , statement{cass_statement_new(query_str.c_str(), 0)} - , query_str{query_str} - , max_block_size{max_block_size} + const size_t max_block_size_) + : session(session_) + , statement(cass_statement_new(query_str_.c_str(), 0)) + , query_str(query_str_) + , max_block_size(max_block_size_) { cass_statement_set_paging_size(statement, max_block_size); this->has_more_pages = cass_true; @@ -51,77 +51,77 @@ namespace { switch (type) { - case ValueType::UInt8: + case ValueType::vtUInt8: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt16: + case ValueType::vtUInt16: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt32: + case ValueType::vtUInt32: { cass_uint32_t _value; cass_value_get_uint32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UInt64: + case ValueType::vtUInt64: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int8: + case ValueType::vtInt8: { cass_int8_t _value; cass_value_get_int8(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int16: + case ValueType::vtInt16: { cass_int16_t _value; cass_value_get_int16(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int32: + case ValueType::vtInt32: { cass_int32_t _value; cass_value_get_int32(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Int64: + case ValueType::vtInt64: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Float32: + case ValueType::vtFloat32: { cass_float_t _value; cass_value_get_float(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::Float64: + case ValueType::vtFloat64: { cass_double_t _value; cass_value_get_double(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::String: + case ValueType::vtString: { const char* _value; size_t _value_length; @@ -129,21 +129,21 @@ namespace static_cast(column).insertData(_value, _value_length); break; } - case ValueType::Date: + case ValueType::vtDate: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME break; } - case ValueType::DateTime: + case ValueType::vtDateTime: { cass_int64_t _value; cass_value_get_int64(value, &_value); static_cast(column).insertValue(_value); break; } - case ValueType::UUID: + case ValueType::vtUUID: { CassUuid _value; cass_value_get_uuid(value, &_value); @@ -166,7 +166,7 @@ namespace MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); - const CassResult* result = cass_future_get_result(query_future); + const CassResult* result_tmp = cass_future_get_result(query_future); if (result == nullptr) { const char* error_message; @@ -176,12 +176,12 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result); + const CassRow* row = cass_result_first_row(result_tmp); const CassValue* map = cass_row_get_column(row, 0); - CassIterator* iterator = cass_iterator_from_map(map); - while (cass_iterator_next(iterator)) { - const CassValue* _key = cass_iterator_get_map_key(iterator); - const CassValue* _value = cass_iterator_get_map_value(iterator); + CassIterator* iterator_tmp = cass_iterator_from_map(map); + while (cass_iterator_next(iterator_tmp)) { + const CassValue* _key = cass_iterator_get_map_key(iterator_tmp); + const CassValue* _value = cass_iterator_get_map_value(iterator_tmp); auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { @@ -194,13 +194,13 @@ namespace } } - has_more_pages = cass_result_has_more_pages(result); + has_more_pages = cass_result_has_more_pages(result_tmp); if (has_more_pages) { - cass_statement_set_paging_state(statement, result); + cass_statement_set_paging_state(statement, result_tmp); } - cass_result_free(result); + cass_result_free(result_tmp); return description.sample_block.cloneWithColumns(std::move(columns)); } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index cfd21510e69..c51ae2877d8 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -15,7 +15,8 @@ namespace DB const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, - const Context & /* context */) -> DictionarySourcePtr { + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr { #if USE_CASSANDRA return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else @@ -49,42 +50,42 @@ namespace ErrorCodes static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure &dict_struct, - const std::string &host, - UInt16 port, - const std::string &user, - const std::string &password, - const std::string &method, - const std::string &db, - const DB::Block &sample_block) - : dict_struct{dict_struct} - , host{host} - , port{port} - , user{user} - , password{password} - , method{method} - , db{db} - , sample_block{sample_block} - , cluster{cass_cluster_new()} - , session{cass_session_new()} + const DB::DictionaryStructure & dict_struct_, + const std::string & host_, + UInt16 port_, + const std::string & user_, + const std::string & password_, + const std::string & method_, + const std::string & db_, + const DB::Block & sample_block_) + : dict_struct(dict_struct_) + , host(host_) + , port(port_) + , user(user_) + , password(password_) + , method(method_) + , db(db_) + , sample_block(sample_block_) + , cluster(cass_cluster_new()) + , session(cass_session_new()) { cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); } CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure &dict_struct, - const Poco::Util::AbstractConfiguration &config, - const std::string &config_prefix, - DB::Block &sample_block) + const DB::DictionaryStructure & dict_struct_, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DB::Block & sample_block_) : CassandraDictionarySource( - dict_struct, + dict_struct_, config.getString(config_prefix + ".host"), config.getUInt(config_prefix + ".port"), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), config.getString(config_prefix + ".method", ""), config.getString(config_prefix + ".db", ""), - sample_block) + sample_block_) { } diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index e47b55d5254..22703a3924d 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -16,6 +16,8 @@ SRCS( CacheDictionary_generate1.cpp CacheDictionary_generate2.cpp CacheDictionary_generate3.cpp + CassandraBlockInputStream.cpp + CassandraDictionarySource.cpp ClickHouseDictionarySource.cpp ComplexKeyCacheDictionary.cpp ComplexKeyCacheDictionary_createAttributeWithType.cpp @@ -24,8 +26,8 @@ SRCS( ComplexKeyCacheDictionary_generate3.cpp ComplexKeyCacheDictionary_setAttributeValue.cpp ComplexKeyCacheDictionary_setDefaultAttributeValue.cpp - ComplexKeyHashedDictionary.cpp ComplexKeyDirectDictionary.cpp + ComplexKeyHashedDictionary.cpp DictionaryBlockInputStreamBase.cpp DictionaryFactory.cpp DictionarySourceFactory.cpp diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 8e53ffe493d..da9435148b6 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -281,10 +281,10 @@ SRCS( rand64.cpp randConstant.cpp rand.cpp + randomFixedString.cpp randomPrintableASCII.cpp randomString.cpp randomStringUTF8.cpp - randomFixedString.cpp regexpQuoteMeta.cpp registerFunctionsArithmetic.cpp registerFunctionsComparison.cpp diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 117eb7b7e6f..c90725f2f6a 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -2,7 +2,7 @@ import warnings import pymysql.cursors import pymongo -import cassandra +import cassandra.cluster import redis import aerospike from tzlocal import get_localzone From f40fadc3d64381d5b85cefc047b996315ffe262c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 21 May 2020 18:29:18 +0300 Subject: [PATCH 07/38] fix segfault --- .../CassandraBlockInputStream.cpp | 21 ++++++++++--------- src/Dictionaries/CassandraBlockInputStream.h | 2 +- .../helpers/docker_compose_cassandra.yml | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 73028e8d2b2..471f6df3e28 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -40,7 +40,8 @@ CassandraBlockInputStream::CassandraBlockInputStream( CassandraBlockInputStream::~CassandraBlockInputStream() { if (iterator != nullptr) cass_iterator_free(iterator); - cass_result_free(result); + if (result) + cass_result_free(result); } namespace @@ -166,7 +167,7 @@ namespace MutableColumns columns(description.sample_block.columns()); CassFuture* query_future = cass_session_execute(session, statement); - const CassResult* result_tmp = cass_future_get_result(query_future); + result = cass_future_get_result(query_future); if (result == nullptr) { const char* error_message; @@ -176,12 +177,12 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result_tmp); + const CassRow* row = cass_result_first_row(result); const CassValue* map = cass_row_get_column(row, 0); - CassIterator* iterator_tmp = cass_iterator_from_map(map); - while (cass_iterator_next(iterator_tmp)) { - const CassValue* _key = cass_iterator_get_map_key(iterator_tmp); - const CassValue* _value = cass_iterator_get_map_value(iterator_tmp); + iterator = cass_iterator_from_map(map); + while (cass_iterator_next(iterator)) { + const CassValue* _key = cass_iterator_get_map_key(iterator); + const CassValue* _value = cass_iterator_get_map_value(iterator); auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { @@ -194,13 +195,13 @@ namespace } } - has_more_pages = cass_result_has_more_pages(result_tmp); + has_more_pages = cass_result_has_more_pages(result); if (has_more_pages) { - cass_statement_set_paging_state(statement, result_tmp); + cass_statement_set_paging_state(statement, result); } - cass_result_free(result_tmp); + cass_result_free(result); return description.sample_block.cloneWithColumns(std::move(columns)); } diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 2b7c3b68744..8af63745f17 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -31,7 +31,7 @@ namespace DB String query_str; const size_t max_block_size; ExternalResultDescription description; - const CassResult * result; + const CassResult * result = nullptr; cass_bool_t has_more_pages; CassIterator * iterator = nullptr; }; diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/tests/integration/helpers/docker_compose_cassandra.yml index bb6a0221c54..6bbedcc1130 100644 --- a/tests/integration/helpers/docker_compose_cassandra.yml +++ b/tests/integration/helpers/docker_compose_cassandra.yml @@ -1,4 +1,4 @@ -version: '2.2' +version: '2.3' services: cassandra1: image: cassandra From e6ca09e1343ad154abfd70c3e30ceb46a868f257 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 22 May 2020 17:19:33 +0300 Subject: [PATCH 08/38] fix build --- CMakeLists.txt | 2 +- .../cassandra.cmake} | 13 +- contrib/CMakeLists.txt | 2 +- contrib/cassandra | 2 +- contrib/libuv-cmake/CMakeLists.txt | 591 ++++++++++++++++++ src/CMakeLists.txt | 4 +- .../CassandraBlockInputStream.cpp | 96 +-- .../CassandraDictionarySource.cpp | 4 +- 8 files changed, 655 insertions(+), 59 deletions(-) rename cmake/{find_cassandra.cmake => find/cassandra.cmake} (63%) create mode 100644 contrib/libuv-cmake/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d774f9c9f5..54a88404579 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,7 +364,7 @@ include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) include (cmake/find/avro.cmake) include (cmake/find/msgpack.cmake) -include (cmake/find_cassandra.cmake) +include (cmake/find/cassandra.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/cmake/find_cassandra.cmake b/cmake/find/cassandra.cmake similarity index 63% rename from cmake/find_cassandra.cmake rename to cmake/find/cassandra.cmake index 951cfc88b11..b1d76702cfa 100644 --- a/cmake/find_cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -4,13 +4,18 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() - set(LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") + set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/") - set (LIBUV_LIBRARY uv_a) - set (CASSANDRA_LIBRARY cassandra_static) + if (USE_STATIC_LIBRARIES) + set (LIBUV_LIBRARY uv_a) + set (CASSANDRA_LIBRARY cassandra_static) + else() + set (LIBUV_LIBRARY uv) + set (CASSANDRA_LIBRARY cassandra) + endif() set (USE_CASSANDRA 1) - set(CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") + set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") endif() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ce187038e2a..9c42993dda7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -319,6 +319,6 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) - add_subdirectory(libuv) + add_subdirectory(libuv-cmake) add_subdirectory(cassandra) endif() diff --git a/contrib/cassandra b/contrib/cassandra index 5c0f2a62bdc..bc593f2644a 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 5c0f2a62bdc63dcc390d771c9afaa9dc34eb8e5b +Subproject commit bc593f2644a6c50c4057459e242e214a6af70969 diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt new file mode 100644 index 00000000000..b84ce217f3b --- /dev/null +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -0,0 +1,591 @@ +cmake_minimum_required(VERSION 3.4) +project(libuv LANGUAGES C) + +cmake_policy(SET CMP0057 NEW) # Enable IN_LIST operator +cmake_policy(SET CMP0064 NEW) # Support if (TEST) operator + +#list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") + +include(CMakePackageConfigHelpers) +include(CMakeDependentOption) +include(CheckCCompilerFlag) +include(GNUInstallDirs) +include(CTest) + +set(CMAKE_C_VISIBILITY_PRESET hidden) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS ON) +set(CMAKE_C_STANDARD 90) + +#cmake_dependent_option(LIBUV_BUILD_TESTS +# "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON +# "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) +#cmake_dependent_option(LIBUV_BUILD_BENCH +# "Build the benchmarks when building unit tests and we are the root project" ON +# "LIBUV_BUILD_TESTS" OFF) + +# Qemu Build +option(QEMU "build for qemu" OFF) +if(QEMU) + add_definitions(-D__QEMU__=1) +endif() + +# Compiler check +string(CONCAT is-msvc $, + $ +>) + +check_c_compiler_flag(/W4 UV_LINT_W4) +check_c_compiler_flag(/wd4100 UV_LINT_NO_UNUSED_PARAMETER_MSVC) +check_c_compiler_flag(/wd4127 UV_LINT_NO_CONDITIONAL_CONSTANT_MSVC) +check_c_compiler_flag(/wd4201 UV_LINT_NO_NONSTANDARD_MSVC) +check_c_compiler_flag(/wd4206 UV_LINT_NO_NONSTANDARD_EMPTY_TU_MSVC) +check_c_compiler_flag(/wd4210 UV_LINT_NO_NONSTANDARD_FILE_SCOPE_MSVC) +check_c_compiler_flag(/wd4232 UV_LINT_NO_NONSTANDARD_NONSTATIC_DLIMPORT_MSVC) +check_c_compiler_flag(/wd4456 UV_LINT_NO_HIDES_LOCAL) +check_c_compiler_flag(/wd4457 UV_LINT_NO_HIDES_PARAM) +check_c_compiler_flag(/wd4459 UV_LINT_NO_HIDES_GLOBAL) +check_c_compiler_flag(/wd4706 UV_LINT_NO_CONDITIONAL_ASSIGNMENT_MSVC) +check_c_compiler_flag(/wd4996 UV_LINT_NO_UNSAFE_MSVC) + +check_c_compiler_flag(-Wall UV_LINT_WALL) # DO NOT use this under MSVC + +# TODO: Place these into its own function +check_c_compiler_flag(-Wno-unused-parameter UV_LINT_NO_UNUSED_PARAMETER) +check_c_compiler_flag(-Wstrict-prototypes UV_LINT_STRICT_PROTOTYPES) +check_c_compiler_flag(-Wextra UV_LINT_EXTRA) + +set(lint-no-unused-parameter $<$:-Wno-unused-parameter>) +set(lint-strict-prototypes $<$:-Wstrict-prototypes>) +set(lint-extra $<$:-Wextra>) +set(lint-w4 $<$:/W4>) +set(lint-no-unused-parameter-msvc $<$:/wd4100>) +set(lint-no-conditional-constant-msvc $<$:/wd4127>) +set(lint-no-nonstandard-msvc $<$:/wd4201>) +set(lint-no-nonstandard-empty-tu-msvc $<$:/wd4206>) +set(lint-no-nonstandard-file-scope-msvc $<$:/wd4210>) +set(lint-no-nonstandard-nonstatic-dlimport-msvc $<$:/wd4232>) +set(lint-no-hides-local-msvc $<$:/wd4456>) +set(lint-no-hides-param-msvc $<$:/wd4457>) +set(lint-no-hides-global-msvc $<$:/wd4459>) +set(lint-no-conditional-assignment-msvc $<$:/wd4706>) +set(lint-no-unsafe-msvc $<$:/wd4996>) +# Unfortunately, this one is complicated because MSVC and clang-cl support -Wall +# but using it is like calling -Weverything +string(CONCAT lint-default $< + $,$>:-Wall +>) + +list(APPEND uv_cflags ${lint-strict-prototypes} ${lint-extra} ${lint-default} ${lint-w4}) +list(APPEND uv_cflags ${lint-no-unused-parameter}) +list(APPEND uv_cflags ${lint-no-unused-parameter-msvc}) +list(APPEND uv_cflags ${lint-no-conditional-constant-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-empty-tu-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-file-scope-msvc}) +list(APPEND uv_cflags ${lint-no-nonstandard-nonstatic-dlimport-msvc}) +list(APPEND uv_cflags ${lint-no-hides-local-msvc}) +list(APPEND uv_cflags ${lint-no-hides-param-msvc}) +list(APPEND uv_cflags ${lint-no-hides-global-msvc}) +list(APPEND uv_cflags ${lint-no-conditional-assignment-msvc}) +list(APPEND uv_cflags ${lint-no-unsafe-msvc}) + +set(uv_sources + src/fs-poll.c + src/idna.c + src/inet.c + src/random.c + src/strscpy.c + src/threadpool.c + src/timer.c + src/uv-common.c + src/uv-data-getter-setters.c + src/version.c) + +#if(WIN32) +# list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) +# list(APPEND uv_libraries +# psapi +# iphlpapi +# userenv +# ws2_32) +# list(APPEND uv_sources +# src/win/async.c +# src/win/core.c +# src/win/detect-wakeup.c +# src/win/dl.c +# src/win/error.c +# src/win/fs.c +# src/win/fs-event.c +# src/win/getaddrinfo.c +# src/win/getnameinfo.c +# src/win/handle.c +# src/win/loop-watcher.c +# src/win/pipe.c +# src/win/thread.c +# src/win/poll.c +# src/win/process.c +# src/win/process-stdio.c +# src/win/signal.c +# src/win/snprintf.c +# src/win/stream.c +# src/win/tcp.c +# src/win/tty.c +# src/win/udp.c +# src/win/util.c +# src/win/winapi.c +# src/win/winsock.c) +# list(APPEND uv_test_libraries ws2_32) +# list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) +#else() +list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) +if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|OS390") + # TODO: This should be replaced with find_package(Threads) if possible + # Android has pthread as part of its c library, not as a separate + # libpthread.so. + list(APPEND uv_libraries pthread) +endif() +list(APPEND uv_sources + src/unix/async.c + src/unix/core.c + src/unix/dl.c + src/unix/fs.c + src/unix/getaddrinfo.c + src/unix/getnameinfo.c + src/unix/loop-watcher.c + src/unix/loop.c + src/unix/pipe.c + src/unix/poll.c + src/unix/process.c + src/unix/random-devurandom.c + src/unix/signal.c + src/unix/stream.c + src/unix/tcp.c + src/unix/thread.c + src/unix/tty.c + src/unix/udp.c) +list(APPEND uv_test_sources test/runner-unix.c) +#endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "AIX") + list(APPEND uv_defines + _ALL_SOURCE + _LINUX_SOURCE_COMPAT + _THREAD_SAFE + _XOPEN_SOURCE=500 + HAVE_SYS_AHAFS_EVPRODS_H) + list(APPEND uv_libraries perfstat) + list(APPEND uv_sources + src/unix/aix.c + src/unix/aix-common.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Android") + list(APPEND uv_libraries dl) + list(APPEND uv_sources + src/unix/android-ifaddrs.c + src/unix/linux-core.c + src/unix/linux-inotify.c + src/unix/linux-syscalls.c + src/unix/procfs-exepath.c + src/unix/pthread-fixes.c + src/unix/random-getentropy.c + src/unix/random-getrandom.c + src/unix/random-sysctl-linux.c + src/unix/sysinfo-loadavg.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS390") + list(APPEND uv_sources src/unix/proctitle.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD") + list(APPEND uv_sources src/unix/freebsd.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") + list(APPEND uv_sources src/unix/posix-hrtime.c src/unix/bsd-proctitle.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") + list(APPEND uv_sources src/unix/bsd-ifaddrs.c src/unix/kqueue.c) +endif() + +if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + list(APPEND uv_sources src/unix/random-getrandom.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") + list(APPEND uv_sources src/unix/random-getentropy.c) +endif() + +if(APPLE) + list(APPEND uv_defines _DARWIN_UNLIMITED_SELECT=1 _DARWIN_USE_64_BIT_INODE=1) + list(APPEND uv_sources + src/unix/darwin-proctitle.c + src/unix/darwin.c + src/unix/fsevents.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) + list(APPEND uv_libraries dl rt) + list(APPEND uv_sources + src/unix/linux-core.c + src/unix/linux-inotify.c + src/unix/linux-syscalls.c + src/unix/procfs-exepath.c + src/unix/random-getrandom.c + src/unix/random-sysctl-linux.c + src/unix/sysinfo-loadavg.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD") + list(APPEND uv_sources src/unix/netbsd.c) + list(APPEND uv_libraries kvm) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") + list(APPEND uv_sources src/unix/openbsd.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OS390") + list(APPEND uv_defines PATH_MAX=255) + list(APPEND uv_defines _AE_BIMODAL) + list(APPEND uv_defines _ALL_SOURCE) + list(APPEND uv_defines _ISOC99_SOURCE) + list(APPEND uv_defines _LARGE_TIME_API) + list(APPEND uv_defines _OPEN_MSGQ_EXT) + list(APPEND uv_defines _OPEN_SYS_FILE_EXT) + list(APPEND uv_defines _OPEN_SYS_IF_EXT) + list(APPEND uv_defines _OPEN_SYS_SOCK_EXT3) + list(APPEND uv_defines _OPEN_SYS_SOCK_IPV6) + list(APPEND uv_defines _UNIX03_SOURCE) + list(APPEND uv_defines _UNIX03_THREADS) + list(APPEND uv_defines _UNIX03_WITHDRAWN) + list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) + list(APPEND uv_sources + src/unix/pthread-fixes.c + src/unix/os390.c + src/unix/os390-syscalls.c) + list(APPEND uv_cflags -Wc,DLL -Wc,exportall -Wc,xplink) + list(APPEND uv_libraries -Wl,xplink) + list(APPEND uv_test_libraries -Wl,xplink) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "OS400") + list(APPEND uv_defines + _ALL_SOURCE + _LINUX_SOURCE_COMPAT + _THREAD_SAFE + _XOPEN_SOURCE=500) + list(APPEND uv_sources + src/unix/aix-common.c + src/unix/ibmi.c + src/unix/no-fsevents.c + src/unix/no-proctitle.c + src/unix/posix-poll.c) +endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") + list(APPEND uv_defines __EXTENSIONS__ _XOPEN_SOURCE=500) + list(APPEND uv_libraries kstat nsl sendfile socket) + list(APPEND uv_sources src/unix/no-proctitle.c src/unix/sunos.c) +endif() + +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|Linux|NetBSD|OpenBSD") + list(APPEND uv_test_libraries util) +endif() + +set(uv_sources_tmp "") +foreach(file ${uv_sources}) + list(APPEND uv_sources_tmp "${LIBUV_ROOT_DIR}/${file}") +endforeach(file) +set(uv_sources "${uv_sources_tmp}") + +add_library(uv SHARED ${uv_sources}) +target_compile_definitions(uv + INTERFACE + USING_UV_SHARED=1 + PRIVATE + BUILDING_UV_SHARED=1 + ${uv_defines}) +target_compile_options(uv PRIVATE ${uv_cflags}) +target_include_directories(uv + PUBLIC + $ + $ + PRIVATE + $) +target_link_libraries(uv ${uv_libraries}) + +add_library(uv_a STATIC ${uv_sources}) +target_compile_definitions(uv_a PRIVATE ${uv_defines}) +target_compile_options(uv_a PRIVATE ${uv_cflags}) +target_include_directories(uv_a + PUBLIC + $ + $ + PRIVATE + $) +target_link_libraries(uv_a ${uv_libraries}) + +#if(LIBUV_BUILD_TESTS) +# # Small hack: use ${uv_test_sources} now to get the runner skeleton, +# # before the actual tests are added. +# add_executable( +# uv_run_benchmarks_a +# ${uv_test_sources} +# test/benchmark-async-pummel.c +# test/benchmark-async.c +# test/benchmark-fs-stat.c +# test/benchmark-getaddrinfo.c +# test/benchmark-loop-count.c +# test/benchmark-million-async.c +# test/benchmark-million-timers.c +# test/benchmark-multi-accept.c +# test/benchmark-ping-pongs.c +# test/benchmark-ping-udp.c +# test/benchmark-pound.c +# test/benchmark-pump.c +# test/benchmark-sizes.c +# test/benchmark-spawn.c +# test/benchmark-tcp-write-batch.c +# test/benchmark-thread.c +# test/benchmark-udp-pummel.c +# test/blackhole-server.c +# test/dns-server.c +# test/echo-server.c +# test/run-benchmarks.c +# test/runner.c) +# target_compile_definitions(uv_run_benchmarks_a PRIVATE ${uv_defines}) +# target_compile_options(uv_run_benchmarks_a PRIVATE ${uv_cflags}) +# target_link_libraries(uv_run_benchmarks_a uv_a ${uv_test_libraries}) +# +# list(APPEND uv_test_sources +# test/blackhole-server.c +# test/echo-server.c +# test/run-tests.c +# test/runner.c +# test/test-active.c +# test/test-async-null-cb.c +# test/test-async.c +# test/test-barrier.c +# test/test-callback-order.c +# test/test-callback-stack.c +# test/test-close-fd.c +# test/test-close-order.c +# test/test-condvar.c +# test/test-connect-unspecified.c +# test/test-connection-fail.c +# test/test-cwd-and-chdir.c +# test/test-default-loop-close.c +# test/test-delayed-accept.c +# test/test-dlerror.c +# test/test-eintr-handling.c +# test/test-embed.c +# test/test-emfile.c +# test/test-env-vars.c +# test/test-error.c +# test/test-fail-always.c +# test/test-fork.c +# test/test-fs-copyfile.c +# test/test-fs-event.c +# test/test-fs-poll.c +# test/test-fs.c +# test/test-fs-readdir.c +# test/test-fs-fd-hash.c +# test/test-fs-open-flags.c +# test/test-get-currentexe.c +# test/test-get-loadavg.c +# test/test-get-memory.c +# test/test-get-passwd.c +# test/test-getaddrinfo.c +# test/test-gethostname.c +# test/test-getnameinfo.c +# test/test-getsockname.c +# test/test-getters-setters.c +# test/test-gettimeofday.c +# test/test-handle-fileno.c +# test/test-homedir.c +# test/test-hrtime.c +# test/test-idle.c +# test/test-idna.c +# test/test-ip4-addr.c +# test/test-ip6-addr.c +# test/test-ipc-heavy-traffic-deadlock-bug.c +# test/test-ipc-send-recv.c +# test/test-ipc.c +# test/test-loop-alive.c +# test/test-loop-close.c +# test/test-loop-configure.c +# test/test-loop-handles.c +# test/test-loop-stop.c +# test/test-loop-time.c +# test/test-multiple-listen.c +# test/test-mutexes.c +# test/test-osx-select.c +# test/test-pass-always.c +# test/test-ping-pong.c +# test/test-pipe-bind-error.c +# test/test-pipe-close-stdout-read-stdin.c +# test/test-pipe-connect-error.c +# test/test-pipe-connect-multiple.c +# test/test-pipe-connect-prepare.c +# test/test-pipe-getsockname.c +# test/test-pipe-pending-instances.c +# test/test-pipe-sendmsg.c +# test/test-pipe-server-close.c +# test/test-pipe-set-fchmod.c +# test/test-pipe-set-non-blocking.c +# test/test-platform-output.c +# test/test-poll-close-doesnt-corrupt-stack.c +# test/test-poll-close.c +# test/test-poll-closesocket.c +# test/test-poll-oob.c +# test/test-poll.c +# test/test-process-priority.c +# test/test-process-title-threadsafe.c +# test/test-process-title.c +# test/test-queue-foreach-delete.c +# test/test-random.c +# test/test-ref.c +# test/test-run-nowait.c +# test/test-run-once.c +# test/test-semaphore.c +# test/test-shutdown-close.c +# test/test-shutdown-eof.c +# test/test-shutdown-twice.c +# test/test-signal-multiple-loops.c +# test/test-signal-pending-on-close.c +# test/test-signal.c +# test/test-socket-buffer-size.c +# test/test-spawn.c +# test/test-stdio-over-pipes.c +# test/test-strscpy.c +# test/test-tcp-alloc-cb-fail.c +# test/test-tcp-bind-error.c +# test/test-tcp-bind6-error.c +# test/test-tcp-close-accept.c +# test/test-tcp-close-while-connecting.c +# test/test-tcp-close.c +# test/test-tcp-close-reset.c +# test/test-tcp-connect-error-after-write.c +# test/test-tcp-connect-error.c +# test/test-tcp-connect-timeout.c +# test/test-tcp-connect6-error.c +# test/test-tcp-create-socket-early.c +# test/test-tcp-flags.c +# test/test-tcp-oob.c +# test/test-tcp-open.c +# test/test-tcp-read-stop.c +# test/test-tcp-shutdown-after-write.c +# test/test-tcp-try-write.c +# test/test-tcp-try-write-error.c +# test/test-tcp-unexpected-read.c +# test/test-tcp-write-after-connect.c +# test/test-tcp-write-fail.c +# test/test-tcp-write-queue-order.c +# test/test-tcp-write-to-half-open-connection.c +# test/test-tcp-writealot.c +# test/test-thread-equal.c +# test/test-thread.c +# test/test-threadpool-cancel.c +# test/test-threadpool.c +# test/test-timer-again.c +# test/test-timer-from-check.c +# test/test-timer.c +# test/test-tmpdir.c +# test/test-tty-duplicate-key.c +# test/test-tty-escape-sequence-processing.c +# test/test-tty.c +# test/test-udp-alloc-cb-fail.c +# test/test-udp-bind.c +# test/test-udp-connect.c +# test/test-udp-create-socket-early.c +# test/test-udp-dgram-too-big.c +# test/test-udp-ipv6.c +# test/test-udp-multicast-interface.c +# test/test-udp-multicast-interface6.c +# test/test-udp-multicast-join.c +# test/test-udp-multicast-join6.c +# test/test-udp-multicast-ttl.c +# test/test-udp-open.c +# test/test-udp-options.c +# test/test-udp-send-and-recv.c +# test/test-udp-send-hang-loop.c +# test/test-udp-send-immediate.c +# test/test-udp-send-unreachable.c +# test/test-udp-try-send.c +# test/test-uname.c +# test/test-walk-handles.c +# test/test-watcher-cross-stop.c) +# +# add_executable(uv_run_tests ${uv_test_sources} uv_win_longpath.manifest) +# target_compile_definitions(uv_run_tests +# PRIVATE ${uv_defines} USING_UV_SHARED=1) +# target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) +# target_link_libraries(uv_run_tests uv ${uv_test_libraries}) +# add_test(NAME uv_test +# COMMAND uv_run_tests +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +# if(CMAKE_SYSTEM_NAME STREQUAL "OS390") +# set_tests_properties(uv_test PROPERTIES ENVIRONMENT +# "LIBPATH=${CMAKE_BINARY_DIR}:$ENV{LIBPATH}") +# endif() +# add_executable(uv_run_tests_a ${uv_test_sources} uv_win_longpath.manifest) +# target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) +# target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) +# if(QEMU) +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries} -static) +# else() +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) +# endif() +# add_test(NAME uv_test_a +# COMMAND uv_run_tests_a +# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) +#endif() + +if(UNIX) + # Now for some gibbering horrors from beyond the stars... + foreach(lib IN LISTS uv_libraries) + list(APPEND LIBS "-l${lib}") + endforeach() + string(REPLACE ";" " " LIBS "${LIBS}") + # Consider setting project version via project() call? + file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) + string(REGEX MATCH "([0-9]+)[.][0-9]+[.][0-9]+" PACKAGE_VERSION "${configure_ac}") + set(UV_VERSION_MAJOR "${CMAKE_MATCH_1}") + # The version in the filename is mirroring the behaviour of autotools. + set_target_properties(uv PROPERTIES + VERSION ${UV_VERSION_MAJOR}.0.0 + SOVERSION ${UV_VERSION_MAJOR}) + set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) + set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) + set(prefix ${CMAKE_INSTALL_PREFIX}) + configure_file(${LIBUV_ROOT_DIR}/libuv.pc.in ${LIBUV_ROOT_DIR}/libuv.pc @ONLY) + + install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) + install(FILES ${PROJECT_BINARY_DIR}/libuv.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + +#if(WIN32) +# install(DIRECTORY include/ DESTINATION include) +# install(FILES LICENSE DESTINATION .) +# install(TARGETS uv uv_a +# RUNTIME DESTINATION lib/$ +# ARCHIVE DESTINATION lib/$) +#endif() +# +#message(STATUS "summary of build options: +# Install prefix: ${CMAKE_INSTALL_PREFIX} +# Target system: ${CMAKE_SYSTEM_NAME} +# Compiler: +# C compiler: ${CMAKE_C_COMPILER} +# CFLAGS: ${CMAKE_C_FLAGS_${_build_type}} ${CMAKE_C_FLAGS} +#") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d713cec8b8a..7c6b4006021 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -357,8 +357,8 @@ if (USE_OPENCL) endif () if (USE_CASSANDRA) - dbms_target_link_libraries(PRIVATE ${CASSANDRA_LIBRARY}) - dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${CASS_INCLUDE_DIR}) + dbms_target_link_libraries(PUBLIC ${CASSANDRA_LIBRARY}) + dbms_target_include_directories (SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) endif() dbms_target_include_directories (PUBLIC ${DBMS_INCLUDE_DIR}) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 471f6df3e28..bf43adcdc59 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession *session_, + CassSession * session_, const std::string &query_str_, const DB::Block &sample_block, const size_t max_block_size_) @@ -48,108 +48,108 @@ namespace { using ValueType = ExternalResultDescription::ValueType; - void insertValue(IColumn & column, const ValueType type, const CassValue * value) + void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { switch (type) { case ValueType::vtUInt8: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt16: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt32: { - cass_uint32_t _value; - cass_value_get_uint32(value, &_value); - static_cast(column).insertValue(_value); + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUInt64: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt8: { - cass_int8_t _value; - cass_value_get_int8(value, &_value); - static_cast(column).insertValue(_value); + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt16: { - cass_int16_t _value; - cass_value_get_int16(value, &_value); - static_cast(column).insertValue(_value); + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt32: { - cass_int32_t _value; - cass_value_get_int32(value, &_value); - static_cast(column).insertValue(_value); + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtInt64: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtFloat32: { - cass_float_t _value; - cass_value_get_float(value, &_value); - static_cast(column).insertValue(_value); + cass_float_t value; + cass_value_get_float(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtFloat64: { - cass_double_t _value; - cass_value_get_double(value, &_value); - static_cast(column).insertValue(_value); + cass_double_t value; + cass_value_get_double(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtString: { - const char* _value; - size_t _value_length; - cass_value_get_string(value, &_value, &_value_length); - static_cast(column).insertData(_value, _value_length); + const char * value; + size_t value_length; + cass_value_get_string(cass_value, &value, &value_length); + static_cast(column).insertData(value, value_length); break; } case ValueType::vtDate: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(_value)}); // FIXME + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(UInt32{cass_date_from_epoch(value)}); // FIXME break; } case ValueType::vtDateTime: { - cass_int64_t _value; - cass_value_get_int64(value, &_value); - static_cast(column).insertValue(_value); + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + static_cast(column).insertValue(value); break; } case ValueType::vtUUID: { - CassUuid _value; - cass_value_get_uuid(value, &_value); + CassUuid value; + cass_value_get_uuid(cass_value, &value); std::array uuid_str; - cass_uuid_string(_value, uuid_str.data()); + cass_uuid_string(value, uuid_str.data()); static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } @@ -181,9 +181,9 @@ namespace const CassValue* map = cass_row_get_column(row, 0); iterator = cass_iterator_from_map(map); while (cass_iterator_next(iterator)) { - const CassValue* _key = cass_iterator_get_map_key(iterator); - const CassValue* _value = cass_iterator_get_map_value(iterator); - auto pair_values = {std::make_pair(_key, 0ul), std::make_pair(_value, 1ul)}; + const CassValue* cass_key = cass_iterator_get_map_key(iterator); + const CassValue* cass_value = cass_iterator_get_map_value(iterator); + auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; for (const auto &[value, idx]: pair_values) { if (description.types[idx].second) { ColumnNullable & column_nullable = static_cast(*columns[idx]); diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index c51ae2877d8..3a4c7e2f2b7 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -11,7 +11,7 @@ namespace DB void registerDictionarySourceCassandra(DictionarySourceFactory & factory) { - auto createTableSource = [=](const DictionaryStructure & dict_struct, + auto create_table_source = [=](const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Block & sample_block, @@ -28,7 +28,7 @@ namespace DB ErrorCodes::SUPPORT_IS_DISABLED}; #endif }; - factory.registerSource("cassandra", createTableSource); + factory.registerSource("cassandra", create_table_source); } } From 310d5225280029b9018c6caf0a2570bb70999e99 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 26 May 2020 22:21:18 +0300 Subject: [PATCH 09/38] minimal implementation --- contrib/cassandra | 2 +- .../CassandraBlockInputStream.cpp | 124 ++++++++++++------ src/Dictionaries/CassandraBlockInputStream.h | 5 + .../CassandraDictionarySource.cpp | 74 ++++++++--- src/Dictionaries/CassandraDictionarySource.h | 52 ++++---- src/Dictionaries/ExternalQueryBuilder.cpp | 43 +++--- src/Dictionaries/ExternalQueryBuilder.h | 4 +- tests/integration/helpers/cluster.py | 15 ++- .../helpers/docker_compose_cassandra.yml | 2 +- .../external_sources.py | 50 ++++++- .../test.py | 9 +- 11 files changed, 276 insertions(+), 104 deletions(-) diff --git a/contrib/cassandra b/contrib/cassandra index bc593f2644a..9606ff1f70b 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit bc593f2644a6c50c4057459e242e214a6af70969 +Subproject commit 9606ff1f70bd3fc5d395df32e626923c012ffb5f diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index bf43adcdc59..473a42549a8 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -50,76 +50,77 @@ namespace void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { + /// Cassandra does not support unsigned integers switch (type) { case ValueType::vtUInt8: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt16: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt32: { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - static_cast(column).insertValue(value); + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(value); break; } case ValueType::vtUInt64: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt8: { cass_int8_t value; cass_value_get_int8(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt16: { cass_int16_t value; cass_value_get_int16(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt32: { cass_int32_t value; cass_value_get_int32(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtInt64: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtFloat32: { cass_float_t value; cass_value_get_float(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtFloat64: { cass_double_t value; cass_value_get_double(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(value); break; } case ValueType::vtString: @@ -127,21 +128,21 @@ namespace const char * value; size_t value_length; cass_value_get_string(cass_value, &value, &value_length); - static_cast(column).insertData(value, value_length); + assert_cast(column).insertData(value, value_length); break; } case ValueType::vtDate: { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(UInt32{cass_date_from_epoch(value)}); // FIXME + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); break; } case ValueType::vtDateTime: { cass_int64_t value; cass_value_get_int64(cass_value, &value); - static_cast(column).insertValue(value); + assert_cast(column).insertValue(static_cast(value / 1000)); break; } case ValueType::vtUUID: @@ -150,7 +151,7 @@ namespace cass_value_get_uuid(cass_value, &value); std::array uuid_str; cass_uuid_string(value, uuid_str.data()); - static_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); + assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } } @@ -161,10 +162,10 @@ namespace Block CassandraBlockInputStream::readImpl() { - if (has_more_pages) + if (!has_more_pages) return {}; - MutableColumns columns(description.sample_block.columns()); + MutableColumns columns = description.sample_block.cloneEmptyColumns(); CassFuture* query_future = cass_session_execute(session, statement); result = cass_future_get_result(query_future); @@ -177,23 +178,49 @@ namespace throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; } - const CassRow* row = cass_result_first_row(result); - const CassValue* map = cass_row_get_column(row, 0); - iterator = cass_iterator_from_map(map); - while (cass_iterator_next(iterator)) { - const CassValue* cass_key = cass_iterator_get_map_key(iterator); - const CassValue* cass_value = cass_iterator_get_map_value(iterator); - auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; - for (const auto &[value, idx]: pair_values) { - if (description.types[idx].second) { - ColumnNullable & column_nullable = static_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + [[maybe_unused]] size_t row_count = 0; + assert(cass_result_column_count(result) == columns.size()); + CassIterator * rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + while (cass_iterator_next(rows_iter)) + { + const CassRow * row = cass_iterator_get_row(rows_iter); + for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) + { + const CassValue * val = cass_row_get_column(row, col_idx); + if (cass_value_is_null(val)) + columns[col_idx]->insertDefault(); + else if (description.types[col_idx].second) + { + ColumnNullable & column_nullable = static_cast(*columns[col_idx]); + insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); column_nullable.getNullMapData().emplace_back(0); - } else { - insertValue(*columns[idx], description.types[idx].first, value); } + else + insertValue(*columns[col_idx], description.types[col_idx].first, val); } + ++row_count; } + assert(cass_result_row_count(result) == row_count); + cass_iterator_free(rows_iter); + + //const CassRow* row = cass_result_first_row(result); + //const CassValue* map = cass_row_get_column(row, 0); + //const CassValue* map = cass_row_get_column(row, 0); + //iterator = cass_iterator_from_map(map); + //while (cass_iterator_next(iterator)) { + // const CassValue* cass_key = cass_iterator_get_map_key(iterator); + // const CassValue* cass_value = cass_iterator_get_map_value(iterator); + // auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; + // for (const auto &[value, idx]: pair_values) { + // if (description.types[idx].second) { + // ColumnNullable & column_nullable = static_cast(*columns[idx]); + // insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + // column_nullable.getNullMapData().emplace_back(0); + // } else { + // insertValue(*columns[idx], description.types[idx].first, value); + // } + // } + //} has_more_pages = cass_result_has_more_pages(result); @@ -207,5 +234,28 @@ namespace } +void cassandraCheck(CassError code) +{ + if (code != CASS_OK) + throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), + ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + +void cassandraWaitAndCheck(CassFuture * future) +{ + auto code = cass_future_error_code(future); /// Waits if not ready + if (code == CASS_OK) + { + cass_future_free(future); + return; + } + const char * message; + size_t message_len; + cass_future_error_message(future, &message, & message_len); + String full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; + cass_future_free(future); /// Frees message + throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 8af63745f17..c8476bd2c15 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -8,6 +8,11 @@ namespace DB { + +void cassandraCheck(CassError error); +void cassandraWaitAndCheck(CassFuture * future); + + /// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining class CassandraBlockInputStream final : public IBlockInputStream { diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 3a4c7e2f2b7..c165ecea212 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,6 +1,8 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" +#include "ExternalQueryBuilder.h" +#include namespace DB { @@ -51,25 +53,32 @@ static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( const DB::DictionaryStructure & dict_struct_, - const std::string & host_, + const String & host_, UInt16 port_, - const std::string & user_, - const std::string & password_, - const std::string & method_, - const std::string & db_, + const String & user_, + const String & password_, + //const std::string & method_, + const String & db_, + const String & table_, const DB::Block & sample_block_) - : dict_struct(dict_struct_) + : log(&Logger::get("CassandraDictionarySource")) + , dict_struct(dict_struct_) , host(host_) , port(port_) , user(user_) , password(password_) - , method(method_) + //, method(method_) , db(db_) + , table(table_) , sample_block(sample_block_) - , cluster(cass_cluster_new()) + , cluster(cass_cluster_new()) //FIXME will not be freed in case of exception , session(cass_session_new()) { - cass_cluster_set_contact_points(cluster, toConnectionString(host, port).c_str()); + cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); + if (port) + cassandraCheck(cass_cluster_set_port(cluster, port)); + cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); + cassandraWaitAndCheck(cass_session_connect_keyspace(session, cluster, db.c_str())); } CassandraDictionarySource::CassandraDictionarySource( @@ -80,11 +89,12 @@ CassandraDictionarySource::CassandraDictionarySource( : CassandraDictionarySource( dict_struct_, config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port"), + config.getUInt(config_prefix + ".port", 0), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), - config.getString(config_prefix + ".method", ""), - config.getString(config_prefix + ".db", ""), + //config.getString(config_prefix + ".method", ""), + config.getString(config_prefix + ".keyspace", ""), + config.getString(config_prefix + ".column_family"), sample_block_) { } @@ -95,8 +105,9 @@ CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySo other.port, other.user, other.password, - other.method, + //other.method, other.db, + other.table, other.sample_block} { } @@ -106,18 +117,45 @@ CassandraDictionarySource::~CassandraDictionarySource() { cass_cluster_free(cluster); } -std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { - return host + (port != 0 ? ":" + std::to_string(port) : ""); -} +//std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { +// return host + (port != 0 ? ":" + std::to_string(port) : ""); +//} -BlockInputStreamPtr CassandraDictionarySource::loadAll() { - return std::make_shared(nullptr, "", sample_block, max_block_size); +BlockInputStreamPtr CassandraDictionarySource::loadAll() +{ + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadAllQuery(); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading all using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); } +BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) +{ + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadIdsQuery(ids); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading ids using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); +} + +BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) +{ + //FIXME split conditions on partition key and clustering key + ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; + String query = builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES); + query.pop_back(); + query += " ALLOW FILTERING;"; + LOG_INFO(log, "Loading keys using query: " << query); + return std::make_shared(session, query, sample_block, max_block_size); +} + } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 2bdd476951a..400481d0a95 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -1,25 +1,29 @@ #pragma once +#if !defined(ARCADIA_BUILD) #include -#include +#endif #if USE_CASSANDRA -# include "DictionaryStructure.h" -# include "IDictionarySource.h" -# include +#include "DictionaryStructure.h" +#include "IDictionarySource.h" +#include +#include +#include namespace DB { class CassandraDictionarySource final : public IDictionarySource { CassandraDictionarySource( const DictionaryStructure & dict_struct, - const std::string & host, + const String & host, UInt16 port, - const std::string & user, - const std::string & password, - const std::string & method, - const std::string & db, + const String & user, + const String & password, + //const std::string & method, + const String & db, + const String & table, const Block & sample_block); public: @@ -44,15 +48,15 @@ public: DictionarySourcePtr clone() const override { return std::make_unique(*this); } - BlockInputStreamPtr loadIds(const std::vector & /* ids */) override - { - throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadIds(const std::vector & ids) override; + //{ + // throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + //} - BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override - { - throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; + //{ + // throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; + //} BlockInputStreamPtr loadUpdatedAll() override { @@ -62,15 +66,17 @@ public: std::string toString() const override; private: - static std::string toConnectionString(const std::string & host, const UInt16 port); + //static std::string toConnectionString(const std::string & host, const UInt16 port); + Poco::Logger * log; const DictionaryStructure dict_struct; - const std::string host; + const String host; const UInt16 port; - const std::string user; - const std::string password; - const std::string method; - const std::string db; + const String user; + const String password; + //const std::string method; + const String db; + const String table; Block sample_block; CassCluster * cluster; diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 529fb3d60fa..d55d77ea9a5 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -63,6 +63,13 @@ void ExternalQueryBuilder::writeQuoted(const std::string & s, WriteBuffer & out) std::string ExternalQueryBuilder::composeLoadAllQuery() const { WriteBufferFromOwnString out; + composeLoadAllQuery(out); + writeChar(';', out); + return out.str(); +} + +void ExternalQueryBuilder::composeLoadAllQuery(WriteBuffer & out) const +{ writeString("SELECT ", out); if (dict_struct.id) @@ -149,24 +156,26 @@ std::string ExternalQueryBuilder::composeLoadAllQuery() const writeString(" WHERE ", out); writeString(where, out); } - - writeChar(';', out); - - return out.str(); } std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_field, const std::string & time_point) const { - std::string out = composeLoadAllQuery(); - std::string update_query; + WriteBufferFromOwnString out; + composeLoadAllQuery(out); if (!where.empty()) - update_query = " AND " + update_field + " >= '" + time_point + "'"; + writeString(" AND ", out); else - update_query = " WHERE " + update_field + " >= '" + time_point + "'"; + writeString(" WHERE ", out); - return out.insert(out.size() - 1, update_query); /// This is done to insert "update_query" before "out"'s semicolon + writeQuoted(update_field, out); + writeString(" >= '", out); + writeString(time_point, out); + writeChar('\'', out); + + writeChar(';', out); + return out.str(); } @@ -303,7 +312,7 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st } else /* if (method == IN_WITH_TUPLES) */ { - writeString(composeKeyTupleDefinition(), out); + composeKeyTupleDefinition(out); writeString(" IN (", out); first = true; @@ -346,7 +355,7 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons const auto & key_description = (*dict_struct.key)[i]; /// key_i=value_i - writeString(key_description.name, out); + writeQuoted(key_description.name, out); writeString("=", out); key_description.type->serializeAsTextQuoted(*key_columns[i], row, out, format_settings); } @@ -355,26 +364,24 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons } -std::string ExternalQueryBuilder::composeKeyTupleDefinition() const +void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; - std::string result{"("}; + writeChar('(', out); auto first = true; for (const auto & key : *dict_struct.key) { if (!first) - result += ", "; + writeString(", ", out); first = false; - result += key.name; + writeQuoted(key.name, out); } - result += ")"; - - return result; + writeChar(')', out); } diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 93e10f2d6b0..2ffc6a475ee 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -58,11 +58,13 @@ struct ExternalQueryBuilder private: const FormatSettings format_settings; + void composeLoadAllQuery(WriteBuffer & out) const; + /// Expression in form (x = c1 AND y = c2 ...) void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const; /// Expression in form (x, y, ...) - std::string composeKeyTupleDefinition() const; + void composeKeyTupleDefinition(WriteBuffer & out) const; /// Expression in form (c1, c2, ...) void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const; diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8b97d77f4c9..9e66712fd7e 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -19,6 +19,7 @@ import pprint import psycopg2 import pymongo import pymysql +import cassandra.cluster from dicttoxml import dicttoxml from kazoo.client import KazooClient from kazoo.exceptions import KazooException @@ -448,6 +449,18 @@ class ClickHouseCluster: logging.warning("Can't connect to SchemaRegistry: %s", str(ex)) time.sleep(1) + def wait_cassandra_to_start(self, timeout=15): + cass_client = cassandra.cluster.Cluster(["localhost"], port="9043") + start = time.time() + while time.time() - start < timeout: + try: + cass_client.connect().execute("drop keyspace if exists test;") + logging.info("Connected to Cassandra %s") + return + except Exception as ex: + logging.warning("Can't connect to Minio: %s", str(ex)) + time.sleep(1) + def start(self, destroy_dirs=True): if self.is_up: return @@ -526,7 +539,7 @@ class ClickHouseCluster: if self.with_cassandra and self.base_cassandra_cmd: subprocess_check_call(self.base_cassandra_cmd + ['up', '-d', '--force-recreate']) - time.sleep(10) + self.wait_cassandra_to_start() clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate'] logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd))) diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/tests/integration/helpers/docker_compose_cassandra.yml index 6bbedcc1130..6567a352027 100644 --- a/tests/integration/helpers/docker_compose_cassandra.yml +++ b/tests/integration/helpers/docker_compose_cassandra.yml @@ -4,4 +4,4 @@ services: image: cassandra restart: always ports: - - 6340:6349 + - 9043:9042 diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 493cccc8482..2dad70bc913 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -8,6 +8,7 @@ import aerospike from tzlocal import get_localzone import datetime import os +import uuid class ExternalSource(object): @@ -407,23 +408,70 @@ class SourceHTTPS(SourceHTTPBase): return "https" class SourceCassandra(ExternalSource): + TYPE_MAPPING = { + 'UInt8': 'tinyint', + 'UInt16': 'smallint', + 'UInt32': 'int', + 'UInt64': 'bigint', + 'Int8': 'tinyint', + 'Int16': 'smallint', + 'Int32': 'int', + 'Int64': 'bigint', + 'UUID': 'uuid', + 'Date': 'date', + 'DateTime': 'timestamp', + 'String': 'text', + 'Float32': 'float', + 'Float64': 'double' + } + + def __init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password): + ExternalSource.__init__(self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password) + self.structure = dict() + def get_source_str(self, table_name): return ''' {host} {port} + test + {table} '''.format( host=self.docker_hostname, port=self.docker_port, + table=table_name, ) def prepare(self, structure, table_name, cluster): self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) + self.session = self.client.connect() + self.session.execute("create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};") + self.structure[table_name] = structure + columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] + keys = ['"' + col.name + '"' for col in structure.keys] + # FIXME use partition key + query = 'create table test."{name}" ({columns}, primary key ("{some_col}", {pk}));'.format( + name=table_name, columns=', '.join(columns), some_col=structure.ordinary_fields[0].name, pk=', '.join(keys)) + self.session.execute(query) self.prepared = True + def get_value_to_insert(self, value, type): + if type == 'UUID': + return uuid.UUID(value) + elif type == 'DateTime': + local_datetime = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S') + return get_localzone().localize(local_datetime) + return value + def load_data(self, data, table_name): - pass + names_and_types = [(field.name, field.field_type) for field in self.structure[table_name].get_all_fields()] + columns = ['"' + col[0] + '"' for col in names_and_types] + insert = 'insert into test."{table}" ({columns}) values ({args})'.format( + table=table_name, columns=','.join(columns), args=','.join(['%s']*len(columns))) + for row in data: + values = [self.get_value_to_insert(row.get_value_by_name(col[0]), col[1]) for col in names_and_types] + self.session.execute(insert, values) class SourceRedis(ExternalSource): def __init__( diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 2e1d3ed4c70..98ba191c948 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -8,7 +8,7 @@ from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceEx from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra import math - +import time SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') @@ -119,7 +119,7 @@ LAYOUTS = [ ] SOURCES = [ - SourceCassandra("Cassandra", "localhost", "6340", "cassandra1", "6349", "", ""), + SourceCassandra("Cassandra", "localhost", "9043", "cassandra1", "9042", "", ""), SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMongoURI("MongoDB_URI", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), @@ -134,7 +134,7 @@ SOURCES = [ DICTIONARIES = [] -# Key-value dictionaries with onle one possible field for key +# Key-value dictionaries with only one possible field for key SOURCES_KV = [ SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), @@ -212,6 +212,7 @@ def get_dictionaries(fold, total_folds, all_dicts): return all_dicts[fold * chunk_len : (fold + 1) * chunk_len] +#@pytest.mark.timeout(3000) @pytest.mark.parametrize("fold", list(range(10))) def test_simple_dictionaries(started_cluster, fold): fields = FIELDS["simple"] @@ -227,6 +228,8 @@ def test_simple_dictionaries(started_cluster, fold): node.query("system reload dictionaries") + #time.sleep(3000) + queries_with_answers = [] for dct in simple_dicts: for row in data: From 415c8469a355f076e327d88d48e2d820996f2962 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 16:51:23 +0300 Subject: [PATCH 10/38] comaptibility with glibc 2.4 --- .gitmodules | 4 +- contrib/cassandra | 2 +- contrib/libuv | 2 +- contrib/libuv-cmake/CMakeLists.txt | 538 ++++++++++------------------- 4 files changed, 196 insertions(+), 350 deletions(-) diff --git a/.gitmodules b/.gitmodules index bbc9f15ffd8..a4c84301fc9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -163,8 +163,8 @@ branch = ch-tmp [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/libuv/libuv.git - branch = v1.x + url = https://github.com/tavplubix/libuv.git + branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git diff --git a/contrib/cassandra b/contrib/cassandra index 9606ff1f70b..58a71947d9d 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 9606ff1f70bd3fc5d395df32e626923c012ffb5f +Subproject commit 58a71947d9dd8412f5aeb38275fa81417ea27ee0 diff --git a/contrib/libuv b/contrib/libuv index cc51217a317..379988fef9b 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit cc51217a317e96510fbb284721d5e6bc2af31e33 +Subproject commit 379988fef9b0c6ac706a624dbac6be8924a3a0da diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index b84ce217f3b..a2869e037ff 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,96 +1,23 @@ cmake_minimum_required(VERSION 3.4) project(libuv LANGUAGES C) -cmake_policy(SET CMP0057 NEW) # Enable IN_LIST operator -cmake_policy(SET CMP0064 NEW) # Support if (TEST) operator - -#list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") - include(CMakePackageConfigHelpers) include(CMakeDependentOption) -include(CheckCCompilerFlag) include(GNUInstallDirs) include(CTest) -set(CMAKE_C_VISIBILITY_PRESET hidden) -set(CMAKE_C_STANDARD_REQUIRED ON) -set(CMAKE_C_EXTENSIONS ON) -set(CMAKE_C_STANDARD 90) - #cmake_dependent_option(LIBUV_BUILD_TESTS # "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON # "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) -#cmake_dependent_option(LIBUV_BUILD_BENCH -# "Build the benchmarks when building unit tests and we are the root project" ON -# "LIBUV_BUILD_TESTS" OFF) -# Qemu Build -option(QEMU "build for qemu" OFF) -if(QEMU) - add_definitions(-D__QEMU__=1) +if(MSVC) + list(APPEND uv_cflags /W4) +elseif(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") + list(APPEND uv_cflags -fvisibility=hidden --std=gnu89) + list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes) + list(APPEND uv_cflags -Wno-unused-parameter) endif() -# Compiler check -string(CONCAT is-msvc $, - $ ->) - -check_c_compiler_flag(/W4 UV_LINT_W4) -check_c_compiler_flag(/wd4100 UV_LINT_NO_UNUSED_PARAMETER_MSVC) -check_c_compiler_flag(/wd4127 UV_LINT_NO_CONDITIONAL_CONSTANT_MSVC) -check_c_compiler_flag(/wd4201 UV_LINT_NO_NONSTANDARD_MSVC) -check_c_compiler_flag(/wd4206 UV_LINT_NO_NONSTANDARD_EMPTY_TU_MSVC) -check_c_compiler_flag(/wd4210 UV_LINT_NO_NONSTANDARD_FILE_SCOPE_MSVC) -check_c_compiler_flag(/wd4232 UV_LINT_NO_NONSTANDARD_NONSTATIC_DLIMPORT_MSVC) -check_c_compiler_flag(/wd4456 UV_LINT_NO_HIDES_LOCAL) -check_c_compiler_flag(/wd4457 UV_LINT_NO_HIDES_PARAM) -check_c_compiler_flag(/wd4459 UV_LINT_NO_HIDES_GLOBAL) -check_c_compiler_flag(/wd4706 UV_LINT_NO_CONDITIONAL_ASSIGNMENT_MSVC) -check_c_compiler_flag(/wd4996 UV_LINT_NO_UNSAFE_MSVC) - -check_c_compiler_flag(-Wall UV_LINT_WALL) # DO NOT use this under MSVC - -# TODO: Place these into its own function -check_c_compiler_flag(-Wno-unused-parameter UV_LINT_NO_UNUSED_PARAMETER) -check_c_compiler_flag(-Wstrict-prototypes UV_LINT_STRICT_PROTOTYPES) -check_c_compiler_flag(-Wextra UV_LINT_EXTRA) - -set(lint-no-unused-parameter $<$:-Wno-unused-parameter>) -set(lint-strict-prototypes $<$:-Wstrict-prototypes>) -set(lint-extra $<$:-Wextra>) -set(lint-w4 $<$:/W4>) -set(lint-no-unused-parameter-msvc $<$:/wd4100>) -set(lint-no-conditional-constant-msvc $<$:/wd4127>) -set(lint-no-nonstandard-msvc $<$:/wd4201>) -set(lint-no-nonstandard-empty-tu-msvc $<$:/wd4206>) -set(lint-no-nonstandard-file-scope-msvc $<$:/wd4210>) -set(lint-no-nonstandard-nonstatic-dlimport-msvc $<$:/wd4232>) -set(lint-no-hides-local-msvc $<$:/wd4456>) -set(lint-no-hides-param-msvc $<$:/wd4457>) -set(lint-no-hides-global-msvc $<$:/wd4459>) -set(lint-no-conditional-assignment-msvc $<$:/wd4706>) -set(lint-no-unsafe-msvc $<$:/wd4996>) -# Unfortunately, this one is complicated because MSVC and clang-cl support -Wall -# but using it is like calling -Weverything -string(CONCAT lint-default $< - $,$>:-Wall ->) - -list(APPEND uv_cflags ${lint-strict-prototypes} ${lint-extra} ${lint-default} ${lint-w4}) -list(APPEND uv_cflags ${lint-no-unused-parameter}) -list(APPEND uv_cflags ${lint-no-unused-parameter-msvc}) -list(APPEND uv_cflags ${lint-no-conditional-constant-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-empty-tu-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-file-scope-msvc}) -list(APPEND uv_cflags ${lint-no-nonstandard-nonstatic-dlimport-msvc}) -list(APPEND uv_cflags ${lint-no-hides-local-msvc}) -list(APPEND uv_cflags ${lint-no-hides-param-msvc}) -list(APPEND uv_cflags ${lint-no-hides-global-msvc}) -list(APPEND uv_cflags ${lint-no-conditional-assignment-msvc}) -list(APPEND uv_cflags ${lint-no-unsafe-msvc}) - set(uv_sources src/fs-poll.c src/idna.c @@ -103,11 +30,172 @@ set(uv_sources src/uv-data-getter-setters.c src/version.c) +set(uv_test_sources + test/blackhole-server.c + test/echo-server.c + test/run-tests.c + test/runner.c + test/test-active.c + test/test-async-null-cb.c + test/test-async.c + test/test-barrier.c + test/test-callback-order.c + test/test-callback-stack.c + test/test-close-fd.c + test/test-close-order.c + test/test-condvar.c + test/test-connect-unspecified.c + test/test-connection-fail.c + test/test-cwd-and-chdir.c + test/test-default-loop-close.c + test/test-delayed-accept.c + test/test-dlerror.c + test/test-eintr-handling.c + test/test-embed.c + test/test-emfile.c + test/test-env-vars.c + test/test-error.c + test/test-fail-always.c + test/test-fork.c + test/test-fs-copyfile.c + test/test-fs-event.c + test/test-fs-poll.c + test/test-fs.c + test/test-fs-readdir.c + test/test-fs-fd-hash.c + test/test-fs-open-flags.c + test/test-get-currentexe.c + test/test-get-loadavg.c + test/test-get-memory.c + test/test-get-passwd.c + test/test-getaddrinfo.c + test/test-gethostname.c + test/test-getnameinfo.c + test/test-getsockname.c + test/test-getters-setters.c + test/test-gettimeofday.c + test/test-handle-fileno.c + test/test-homedir.c + test/test-hrtime.c + test/test-idle.c + test/test-idna.c + test/test-ip4-addr.c + test/test-ip6-addr.c + test/test-ipc-heavy-traffic-deadlock-bug.c + test/test-ipc-send-recv.c + test/test-ipc.c + test/test-loop-alive.c + test/test-loop-close.c + test/test-loop-configure.c + test/test-loop-handles.c + test/test-loop-stop.c + test/test-loop-time.c + test/test-multiple-listen.c + test/test-mutexes.c + test/test-osx-select.c + test/test-pass-always.c + test/test-ping-pong.c + test/test-pipe-bind-error.c + test/test-pipe-close-stdout-read-stdin.c + test/test-pipe-connect-error.c + test/test-pipe-connect-multiple.c + test/test-pipe-connect-prepare.c + test/test-pipe-getsockname.c + test/test-pipe-pending-instances.c + test/test-pipe-sendmsg.c + test/test-pipe-server-close.c + test/test-pipe-set-fchmod.c + test/test-pipe-set-non-blocking.c + test/test-platform-output.c + test/test-poll-close-doesnt-corrupt-stack.c + test/test-poll-close.c + test/test-poll-closesocket.c + test/test-poll-oob.c + test/test-poll.c + test/test-process-priority.c + test/test-process-title-threadsafe.c + test/test-process-title.c + test/test-queue-foreach-delete.c + test/test-random.c + test/test-ref.c + test/test-run-nowait.c + test/test-run-once.c + test/test-semaphore.c + test/test-shutdown-close.c + test/test-shutdown-eof.c + test/test-shutdown-twice.c + test/test-signal-multiple-loops.c + test/test-signal-pending-on-close.c + test/test-signal.c + test/test-socket-buffer-size.c + test/test-spawn.c + test/test-stdio-over-pipes.c + test/test-strscpy.c + test/test-tcp-alloc-cb-fail.c + test/test-tcp-bind-error.c + test/test-tcp-bind6-error.c + test/test-tcp-close-accept.c + test/test-tcp-close-while-connecting.c + test/test-tcp-close.c + test/test-tcp-close-reset.c + test/test-tcp-connect-error-after-write.c + test/test-tcp-connect-error.c + test/test-tcp-connect-timeout.c + test/test-tcp-connect6-error.c + test/test-tcp-create-socket-early.c + test/test-tcp-flags.c + test/test-tcp-oob.c + test/test-tcp-open.c + test/test-tcp-read-stop.c + test/test-tcp-shutdown-after-write.c + test/test-tcp-try-write.c + test/test-tcp-try-write-error.c + test/test-tcp-unexpected-read.c + test/test-tcp-write-after-connect.c + test/test-tcp-write-fail.c + test/test-tcp-write-queue-order.c + test/test-tcp-write-to-half-open-connection.c + test/test-tcp-writealot.c + test/test-thread-equal.c + test/test-thread.c + test/test-threadpool-cancel.c + test/test-threadpool.c + test/test-timer-again.c + test/test-timer-from-check.c + test/test-timer.c + test/test-tmpdir.c + test/test-tty-duplicate-key.c + test/test-tty.c + test/test-udp-alloc-cb-fail.c + test/test-udp-bind.c + test/test-udp-connect.c + test/test-udp-create-socket-early.c + test/test-udp-dgram-too-big.c + test/test-udp-ipv6.c + test/test-udp-multicast-interface.c + test/test-udp-multicast-interface6.c + test/test-udp-multicast-join.c + test/test-udp-multicast-join6.c + test/test-udp-multicast-ttl.c + test/test-udp-open.c + test/test-udp-options.c + test/test-udp-send-and-recv.c + test/test-udp-send-hang-loop.c + test/test-udp-send-immediate.c + test/test-udp-send-unreachable.c + test/test-udp-try-send.c + test/test-uname.c + test/test-walk-handles.c + test/test-watcher-cross-stop.c) + #if(WIN32) # list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) # list(APPEND uv_libraries -# psapi +# advapi32 # iphlpapi +# psapi +# shell32 +# user32 # userenv # ws2_32) # list(APPEND uv_sources @@ -140,8 +228,7 @@ set(uv_sources # list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) #else() list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) -if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|OS390") - # TODO: This should be replaced with find_package(Threads) if possible +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") # Android has pthread as part of its c library, not as a separate # libpthread.so. list(APPEND uv_libraries pthread) @@ -173,16 +260,13 @@ if(CMAKE_SYSTEM_NAME STREQUAL "AIX") _ALL_SOURCE _LINUX_SOURCE_COMPAT _THREAD_SAFE - _XOPEN_SOURCE=500 - HAVE_SYS_AHAFS_EVPRODS_H) + _XOPEN_SOURCE=500) list(APPEND uv_libraries perfstat) - list(APPEND uv_sources - src/unix/aix.c - src/unix/aix-common.c) + list(APPEND uv_sources src/unix/aix.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Android") - list(APPEND uv_libraries dl) + list(APPEND uv_libs dl) list(APPEND uv_sources src/unix/android-ifaddrs.c src/unix/linux-core.c @@ -190,13 +274,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android") src/unix/linux-syscalls.c src/unix/procfs-exepath.c src/unix/pthread-fixes.c - src/unix/random-getentropy.c src/unix/random-getrandom.c src/unix/random-sysctl-linux.c src/unix/sysinfo-loadavg.c) endif() -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS390") +if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS/390") list(APPEND uv_sources src/unix/proctitle.c) endif() @@ -250,11 +333,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") list(APPEND uv_sources src/unix/openbsd.c) endif() -if(CMAKE_SYSTEM_NAME STREQUAL "OS390") +if(CMAKE_SYSTEM_NAME STREQUAL "OS/390") list(APPEND uv_defines PATH_MAX=255) list(APPEND uv_defines _AE_BIMODAL) list(APPEND uv_defines _ALL_SOURCE) - list(APPEND uv_defines _ISOC99_SOURCE) list(APPEND uv_defines _LARGE_TIME_API) list(APPEND uv_defines _OPEN_MSGQ_EXT) list(APPEND uv_defines _OPEN_SYS_FILE_EXT) @@ -267,25 +349,9 @@ if(CMAKE_SYSTEM_NAME STREQUAL "OS390") list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) list(APPEND uv_sources src/unix/pthread-fixes.c + src/unix/pthread-barrier.c src/unix/os390.c src/unix/os390-syscalls.c) - list(APPEND uv_cflags -Wc,DLL -Wc,exportall -Wc,xplink) - list(APPEND uv_libraries -Wl,xplink) - list(APPEND uv_test_libraries -Wl,xplink) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OS400") - list(APPEND uv_defines - _ALL_SOURCE - _LINUX_SOURCE_COMPAT - _THREAD_SAFE - _XOPEN_SOURCE=500) - list(APPEND uv_sources - src/unix/aix-common.c - src/unix/ibmi.c - src/unix/no-fsevents.c - src/unix/no-proctitle.c - src/unix/posix-poll.c) endif() if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") @@ -304,225 +370,24 @@ foreach(file ${uv_sources}) endforeach(file) set(uv_sources "${uv_sources_tmp}") +list(APPEND uv_defines CLICKHOUSE_GLIBC_COMPATIBILITY) + add_library(uv SHARED ${uv_sources}) target_compile_definitions(uv - INTERFACE - USING_UV_SHARED=1 - PRIVATE - BUILDING_UV_SHARED=1 - ${uv_defines}) + INTERFACE USING_UV_SHARED=1 + PRIVATE ${uv_defines} BUILDING_UV_SHARED=1) target_compile_options(uv PRIVATE ${uv_cflags}) -target_include_directories(uv - PUBLIC - $ - $ - PRIVATE - $) +target_include_directories(uv PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) target_link_libraries(uv ${uv_libraries}) add_library(uv_a STATIC ${uv_sources}) target_compile_definitions(uv_a PRIVATE ${uv_defines}) target_compile_options(uv_a PRIVATE ${uv_cflags}) -target_include_directories(uv_a - PUBLIC - $ - $ - PRIVATE - $) +target_include_directories(uv_a PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) target_link_libraries(uv_a ${uv_libraries}) #if(LIBUV_BUILD_TESTS) -# # Small hack: use ${uv_test_sources} now to get the runner skeleton, -# # before the actual tests are added. -# add_executable( -# uv_run_benchmarks_a -# ${uv_test_sources} -# test/benchmark-async-pummel.c -# test/benchmark-async.c -# test/benchmark-fs-stat.c -# test/benchmark-getaddrinfo.c -# test/benchmark-loop-count.c -# test/benchmark-million-async.c -# test/benchmark-million-timers.c -# test/benchmark-multi-accept.c -# test/benchmark-ping-pongs.c -# test/benchmark-ping-udp.c -# test/benchmark-pound.c -# test/benchmark-pump.c -# test/benchmark-sizes.c -# test/benchmark-spawn.c -# test/benchmark-tcp-write-batch.c -# test/benchmark-thread.c -# test/benchmark-udp-pummel.c -# test/blackhole-server.c -# test/dns-server.c -# test/echo-server.c -# test/run-benchmarks.c -# test/runner.c) -# target_compile_definitions(uv_run_benchmarks_a PRIVATE ${uv_defines}) -# target_compile_options(uv_run_benchmarks_a PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_benchmarks_a uv_a ${uv_test_libraries}) -# -# list(APPEND uv_test_sources -# test/blackhole-server.c -# test/echo-server.c -# test/run-tests.c -# test/runner.c -# test/test-active.c -# test/test-async-null-cb.c -# test/test-async.c -# test/test-barrier.c -# test/test-callback-order.c -# test/test-callback-stack.c -# test/test-close-fd.c -# test/test-close-order.c -# test/test-condvar.c -# test/test-connect-unspecified.c -# test/test-connection-fail.c -# test/test-cwd-and-chdir.c -# test/test-default-loop-close.c -# test/test-delayed-accept.c -# test/test-dlerror.c -# test/test-eintr-handling.c -# test/test-embed.c -# test/test-emfile.c -# test/test-env-vars.c -# test/test-error.c -# test/test-fail-always.c -# test/test-fork.c -# test/test-fs-copyfile.c -# test/test-fs-event.c -# test/test-fs-poll.c -# test/test-fs.c -# test/test-fs-readdir.c -# test/test-fs-fd-hash.c -# test/test-fs-open-flags.c -# test/test-get-currentexe.c -# test/test-get-loadavg.c -# test/test-get-memory.c -# test/test-get-passwd.c -# test/test-getaddrinfo.c -# test/test-gethostname.c -# test/test-getnameinfo.c -# test/test-getsockname.c -# test/test-getters-setters.c -# test/test-gettimeofday.c -# test/test-handle-fileno.c -# test/test-homedir.c -# test/test-hrtime.c -# test/test-idle.c -# test/test-idna.c -# test/test-ip4-addr.c -# test/test-ip6-addr.c -# test/test-ipc-heavy-traffic-deadlock-bug.c -# test/test-ipc-send-recv.c -# test/test-ipc.c -# test/test-loop-alive.c -# test/test-loop-close.c -# test/test-loop-configure.c -# test/test-loop-handles.c -# test/test-loop-stop.c -# test/test-loop-time.c -# test/test-multiple-listen.c -# test/test-mutexes.c -# test/test-osx-select.c -# test/test-pass-always.c -# test/test-ping-pong.c -# test/test-pipe-bind-error.c -# test/test-pipe-close-stdout-read-stdin.c -# test/test-pipe-connect-error.c -# test/test-pipe-connect-multiple.c -# test/test-pipe-connect-prepare.c -# test/test-pipe-getsockname.c -# test/test-pipe-pending-instances.c -# test/test-pipe-sendmsg.c -# test/test-pipe-server-close.c -# test/test-pipe-set-fchmod.c -# test/test-pipe-set-non-blocking.c -# test/test-platform-output.c -# test/test-poll-close-doesnt-corrupt-stack.c -# test/test-poll-close.c -# test/test-poll-closesocket.c -# test/test-poll-oob.c -# test/test-poll.c -# test/test-process-priority.c -# test/test-process-title-threadsafe.c -# test/test-process-title.c -# test/test-queue-foreach-delete.c -# test/test-random.c -# test/test-ref.c -# test/test-run-nowait.c -# test/test-run-once.c -# test/test-semaphore.c -# test/test-shutdown-close.c -# test/test-shutdown-eof.c -# test/test-shutdown-twice.c -# test/test-signal-multiple-loops.c -# test/test-signal-pending-on-close.c -# test/test-signal.c -# test/test-socket-buffer-size.c -# test/test-spawn.c -# test/test-stdio-over-pipes.c -# test/test-strscpy.c -# test/test-tcp-alloc-cb-fail.c -# test/test-tcp-bind-error.c -# test/test-tcp-bind6-error.c -# test/test-tcp-close-accept.c -# test/test-tcp-close-while-connecting.c -# test/test-tcp-close.c -# test/test-tcp-close-reset.c -# test/test-tcp-connect-error-after-write.c -# test/test-tcp-connect-error.c -# test/test-tcp-connect-timeout.c -# test/test-tcp-connect6-error.c -# test/test-tcp-create-socket-early.c -# test/test-tcp-flags.c -# test/test-tcp-oob.c -# test/test-tcp-open.c -# test/test-tcp-read-stop.c -# test/test-tcp-shutdown-after-write.c -# test/test-tcp-try-write.c -# test/test-tcp-try-write-error.c -# test/test-tcp-unexpected-read.c -# test/test-tcp-write-after-connect.c -# test/test-tcp-write-fail.c -# test/test-tcp-write-queue-order.c -# test/test-tcp-write-to-half-open-connection.c -# test/test-tcp-writealot.c -# test/test-thread-equal.c -# test/test-thread.c -# test/test-threadpool-cancel.c -# test/test-threadpool.c -# test/test-timer-again.c -# test/test-timer-from-check.c -# test/test-timer.c -# test/test-tmpdir.c -# test/test-tty-duplicate-key.c -# test/test-tty-escape-sequence-processing.c -# test/test-tty.c -# test/test-udp-alloc-cb-fail.c -# test/test-udp-bind.c -# test/test-udp-connect.c -# test/test-udp-create-socket-early.c -# test/test-udp-dgram-too-big.c -# test/test-udp-ipv6.c -# test/test-udp-multicast-interface.c -# test/test-udp-multicast-interface6.c -# test/test-udp-multicast-join.c -# test/test-udp-multicast-join6.c -# test/test-udp-multicast-ttl.c -# test/test-udp-open.c -# test/test-udp-options.c -# test/test-udp-send-and-recv.c -# test/test-udp-send-hang-loop.c -# test/test-udp-send-immediate.c -# test/test-udp-send-unreachable.c -# test/test-udp-try-send.c -# test/test-uname.c -# test/test-walk-handles.c -# test/test-watcher-cross-stop.c) -# -# add_executable(uv_run_tests ${uv_test_sources} uv_win_longpath.manifest) +# add_executable(uv_run_tests ${uv_test_sources}) # target_compile_definitions(uv_run_tests # PRIVATE ${uv_defines} USING_UV_SHARED=1) # target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) @@ -530,18 +395,10 @@ target_link_libraries(uv_a ${uv_libraries}) # add_test(NAME uv_test # COMMAND uv_run_tests # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -# if(CMAKE_SYSTEM_NAME STREQUAL "OS390") -# set_tests_properties(uv_test PROPERTIES ENVIRONMENT -# "LIBPATH=${CMAKE_BINARY_DIR}:$ENV{LIBPATH}") -# endif() -# add_executable(uv_run_tests_a ${uv_test_sources} uv_win_longpath.manifest) +# add_executable(uv_run_tests_a ${uv_test_sources}) # target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) # target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) -# if(QEMU) -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries} -static) -# else() -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) -# endif() +# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) # add_test(NAME uv_test_a # COMMAND uv_run_tests_a # WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) @@ -549,18 +406,15 @@ target_link_libraries(uv_a ${uv_libraries}) if(UNIX) # Now for some gibbering horrors from beyond the stars... - foreach(lib IN LISTS uv_libraries) - list(APPEND LIBS "-l${lib}") - endforeach() - string(REPLACE ";" " " LIBS "${LIBS}") - # Consider setting project version via project() call? + foreach(x ${uv_libraries}) + set(LIBS "${LIBS} -l${x}") + endforeach(x) file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) - string(REGEX MATCH "([0-9]+)[.][0-9]+[.][0-9]+" PACKAGE_VERSION "${configure_ac}") - set(UV_VERSION_MAJOR "${CMAKE_MATCH_1}") + string(REGEX MATCH [0-9]+[.][0-9]+[.][0-9]+ PACKAGE_VERSION "${configure_ac}") + string(REGEX MATCH ^[0-9]+ UV_VERSION_MAJOR "${PACKAGE_VERSION}") # The version in the filename is mirroring the behaviour of autotools. - set_target_properties(uv PROPERTIES - VERSION ${UV_VERSION_MAJOR}.0.0 - SOVERSION ${UV_VERSION_MAJOR}) + set_target_properties(uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 + SOVERSION ${UV_VERSION_MAJOR}) set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) set(prefix ${CMAKE_INSTALL_PREFIX}) @@ -568,7 +422,7 @@ if(UNIX) install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) - install(FILES ${PROJECT_BINARY_DIR}/libuv.pc + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libuv.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) @@ -581,11 +435,3 @@ endif() # RUNTIME DESTINATION lib/$ # ARCHIVE DESTINATION lib/$) #endif() -# -#message(STATUS "summary of build options: -# Install prefix: ${CMAKE_INSTALL_PREFIX} -# Target system: ${CMAKE_SYSTEM_NAME} -# Compiler: -# C compiler: ${CMAKE_C_COMPILER} -# CFLAGS: ${CMAKE_C_FLAGS_${_build_type}} ${CMAKE_C_FLAGS} -#") From f099ce19a7dde2ab9c1d9e37b1f932cb32cf4480 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 16:54:39 +0300 Subject: [PATCH 11/38] fix --- src/Dictionaries/CassandraBlockInputStream.h | 2 +- tests/integration/helpers/cluster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index c8476bd2c15..22e4429343d 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -9,7 +9,7 @@ namespace DB { -void cassandraCheck(CassError error); +void cassandraCheck(CassError code); void cassandraWaitAndCheck(CassFuture * future); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 44e08d5bf6a..49a18d14796 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -460,7 +460,7 @@ class ClickHouseCluster: logging.warning("Can't connect to SchemaRegistry: %s", str(ex)) time.sleep(1) - def wait_cassandra_to_start(self, timeout=15): + def wait_cassandra_to_start(self, timeout=30): cass_client = cassandra.cluster.Cluster(["localhost"], port="9043") start = time.time() while time.time() - start < timeout: From 905bce4aebb6925f37f33806cfd9642a51cec69c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 May 2020 23:13:25 +0300 Subject: [PATCH 12/38] fix --- contrib/libuv-cmake/CMakeLists.txt | 6 ++++- .../CassandraBlockInputStream.cpp | 21 +++++++++-------- .../CassandraDictionarySource.cpp | 23 ++++++++----------- tests/integration/helpers/cluster.py | 6 ++--- .../external_sources.py | 1 + 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index a2869e037ff..65aeabdd9cb 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -227,7 +227,11 @@ set(uv_test_sources # list(APPEND uv_test_libraries ws2_32) # list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) #else() -list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) + +if(CMAKE_SIZEOF_VOID_P EQUAL 4) + list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) +endif() + if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") # Android has pthread as part of its c library, not as a separate # libpthread.so. diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 473a42549a8..6d8a45508ce 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -1,16 +1,17 @@ -# include -# include -# include -# include -# include -# include -# include +#if !defined(ARCADIA_BUILD) +#include +#endif #if USE_CASSANDRA -# include -# include "CassandraBlockInputStream.h" -# include "CassandraBlockInputStream.h" +#include +#include +#include +#include +#include +#include +#include +#include "CassandraBlockInputStream.h" namespace DB diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 4150fe56f14..2aee5d44b6b 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -13,19 +13,16 @@ namespace DB void registerDictionarySourceCassandra(DictionarySourceFactory & factory) { - auto create_table_source = [=](const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block, - const Context & /* context */, - bool /*check_config*/) -> DictionarySourcePtr { + auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, + [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, + [[maybe_unused]] const std::string & config_prefix, + [[maybe_unused]] Block & sample_block, + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr + { #if USE_CASSANDRA return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else - (void)dict_struct; - (void)config; - (void)config_prefix; - (void)sample_block; throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif @@ -37,9 +34,9 @@ namespace DB #if USE_CASSANDRA -# include -# include -# include "CassandraBlockInputStream.h" +#include +#include +#include "CassandraBlockInputStream.h" namespace DB { diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 49a18d14796..a539db1d47f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -465,11 +465,11 @@ class ClickHouseCluster: start = time.time() while time.time() - start < timeout: try: - cass_client.connect().execute("drop keyspace if exists test;") - logging.info("Connected to Cassandra %s") + cass_client.connect() + logging.info("Connected to Cassandra") return except Exception as ex: - logging.warning("Can't connect to Minio: %s", str(ex)) + logging.warning("Can't connect to Cassandra: %s", str(ex)) time.sleep(1) def start(self, destroy_dirs=True): diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 2dad70bc913..336f3ddc28b 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -447,6 +447,7 @@ class SourceCassandra(ExternalSource): self.client = cassandra.cluster.Cluster([self.internal_hostname], port=self.internal_port) self.session = self.client.connect() self.session.execute("create keyspace if not exists test with replication = {'class': 'SimpleStrategy', 'replication_factor' : 1};") + self.session.execute('drop table if exists test."{}"'.format(table_name)) self.structure[table_name] = structure columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] keys = ['"' + col.name + '"' for col in structure.keys] From aade00130c2a1e76b83ae595be8c1ae9c0c0e39b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 1 Jun 2020 20:35:30 +0300 Subject: [PATCH 13/38] add raii wrappers --- .../CassandraBlockInputStream.cpp | 88 ++++--------------- src/Dictionaries/CassandraBlockInputStream.h | 16 ++-- .../CassandraDictionarySource.cpp | 27 ++---- src/Dictionaries/CassandraDictionarySource.h | 24 +---- src/Dictionaries/CassandraHelpers.cpp | 38 ++++++++ src/Dictionaries/CassandraHelpers.h | 72 +++++++++++++++ src/Dictionaries/ya.make | 1 + 7 files changed, 140 insertions(+), 126 deletions(-) create mode 100644 src/Dictionaries/CassandraHelpers.cpp create mode 100644 src/Dictionaries/CassandraHelpers.h diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 6d8a45508ce..e00fd5ec3e9 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -23,26 +23,17 @@ namespace ErrorCodes } CassandraBlockInputStream::CassandraBlockInputStream( - CassSession * session_, - const std::string &query_str_, - const DB::Block &sample_block, + const CassClusterPtr & cluster, + const String & query_str, + const Block & sample_block, const size_t max_block_size_) - : session(session_) - , statement(cass_statement_new(query_str_.c_str(), 0)) - , query_str(query_str_) + : statement(query_str.c_str(), /*parameters count*/ 0) , max_block_size(max_block_size_) + , has_more_pages(cass_true) { - cass_statement_set_paging_size(statement, max_block_size); - this->has_more_pages = cass_true; - description.init(sample_block); -} - -CassandraBlockInputStream::~CassandraBlockInputStream() { - if (iterator != nullptr) - cass_iterator_free(iterator); - if (result) - cass_result_free(result); + cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); + cassandraWaitAndCheck(cass_session_connect(session, cluster)); } namespace @@ -51,7 +42,7 @@ namespace void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) { - /// Cassandra does not support unsigned integers + /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) switch (type) { case ValueType::vtUInt8: @@ -159,19 +150,17 @@ namespace } } - // void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } - Block CassandraBlockInputStream::readImpl() { if (!has_more_pages) return {}; MutableColumns columns = description.sample_block.cloneEmptyColumns(); - CassFuture* query_future = cass_session_execute(session, statement); + CassFuturePtr query_future = cass_session_execute(session, statement); - result = cass_future_get_result(query_future); + CassResultPtr result = cass_future_get_result(query_future); - if (result == nullptr) { + if (!result) { const char* error_message; size_t error_message_length; cass_future_error_message(query_future, &error_message, &error_message_length); @@ -181,7 +170,7 @@ namespace [[maybe_unused]] size_t row_count = 0; assert(cass_result_column_count(result) == columns.size()); - CassIterator * rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] while (cass_iterator_next(rows_iter)) { const CassRow * row = cass_iterator_get_row(rows_iter); @@ -192,7 +181,7 @@ namespace columns[col_idx]->insertDefault(); else if (description.types[col_idx].second) { - ColumnNullable & column_nullable = static_cast(*columns[col_idx]); + ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); column_nullable.getNullMapData().emplace_back(0); } @@ -202,61 +191,14 @@ namespace ++row_count; } assert(cass_result_row_count(result) == row_count); - cass_iterator_free(rows_iter); - - //const CassRow* row = cass_result_first_row(result); - //const CassValue* map = cass_row_get_column(row, 0); - //const CassValue* map = cass_row_get_column(row, 0); - //iterator = cass_iterator_from_map(map); - //while (cass_iterator_next(iterator)) { - // const CassValue* cass_key = cass_iterator_get_map_key(iterator); - // const CassValue* cass_value = cass_iterator_get_map_value(iterator); - // auto pair_values = {std::make_pair(cass_key, 0ul), std::make_pair(cass_value, 1ul)}; - // for (const auto &[value, idx]: pair_values) { - // if (description.types[idx].second) { - // ColumnNullable & column_nullable = static_cast(*columns[idx]); - // insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); - // column_nullable.getNullMapData().emplace_back(0); - // } else { - // insertValue(*columns[idx], description.types[idx].first, value); - // } - // } - //} has_more_pages = cass_result_has_more_pages(result); - if (has_more_pages) { - cass_statement_set_paging_state(statement, result); - } - - cass_result_free(result); + if (has_more_pages) + cassandraCheck(cass_statement_set_paging_state(statement, result)); return description.sample_block.cloneWithColumns(std::move(columns)); } - -void cassandraCheck(CassError code) -{ - if (code != CASS_OK) - throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), - ErrorCodes::CASSANDRA_INTERNAL_ERROR); -} - -void cassandraWaitAndCheck(CassFuture * future) -{ - auto code = cass_future_error_code(future); /// Waits if not ready - if (code == CASS_OK) - { - cass_future_free(future); - return; - } - const char * message; - size_t message_len; - cass_future_error_message(future, &message, & message_len); - String full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; - cass_future_free(future); /// Frees message - throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); -} - } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 22e4429343d..700211ebb3e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -9,8 +9,6 @@ namespace DB { -void cassandraCheck(CassError code); -void cassandraWaitAndCheck(CassFuture * future); /// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining @@ -18,11 +16,10 @@ void cassandraWaitAndCheck(CassFuture * future); { public: CassandraBlockInputStream( - CassSession * session, - const std::string & query_str, + const CassClusterPtr & cluster, + const String & query_str, const Block & sample_block, const size_t max_block_size); - ~CassandraBlockInputStream() override; String getName() const override { return "Cassandra"; } @@ -31,14 +28,11 @@ void cassandraWaitAndCheck(CassFuture * future); private: Block readImpl() override; - CassSession * session; - CassStatement * statement; - String query_str; + CassSessionPtr session; + CassStatementPtr statement; const size_t max_block_size; ExternalResultDescription description; - const CassResult * result = nullptr; cass_bool_t has_more_pages; - CassIterator * iterator = nullptr; }; } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index 2aee5d44b6b..a58dd383b46 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -34,7 +34,6 @@ namespace DB #if USE_CASSANDRA -#include #include #include "CassandraBlockInputStream.h" @@ -54,28 +53,23 @@ CassandraDictionarySource::CassandraDictionarySource( UInt16 port_, const String & user_, const String & password_, - //const std::string & method_, const String & db_, const String & table_, const DB::Block & sample_block_) - : log(&Logger::get("CassandraDictionarySource")) + : log(&Poco::Logger::get("CassandraDictionarySource")) , dict_struct(dict_struct_) , host(host_) , port(port_) , user(user_) , password(password_) - //, method(method_) , db(db_) , table(table_) , sample_block(sample_block_) - , cluster(cass_cluster_new()) //FIXME will not be freed in case of exception - , session(cass_session_new()) { cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); if (port) cassandraCheck(cass_cluster_set_port(cluster, port)); cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); - cassandraWaitAndCheck(cass_session_connect_keyspace(session, cluster, db.c_str())); } CassandraDictionarySource::CassandraDictionarySource( @@ -89,7 +83,6 @@ CassandraDictionarySource::CassandraDictionarySource( config.getUInt(config_prefix + ".port", 0), config.getString(config_prefix + ".user", ""), config.getString(config_prefix + ".password", ""), - //config.getString(config_prefix + ".method", ""), config.getString(config_prefix + ".keyspace", ""), config.getString(config_prefix + ".column_family"), sample_block_) @@ -102,22 +95,12 @@ CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySo other.port, other.user, other.password, - //other.method, other.db, other.table, other.sample_block} { } -CassandraDictionarySource::~CassandraDictionarySource() { - cass_session_free(session); - cass_cluster_free(cluster); -} - -//std::string CassandraDictionarySource::toConnectionString(const std::string &host, const UInt16 port) { -// return host + (port != 0 ? ":" + std::to_string(port) : ""); -//} - BlockInputStreamPtr CassandraDictionarySource::loadAll() { ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; @@ -125,11 +108,11 @@ BlockInputStreamPtr CassandraDictionarySource::loadAll() query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading all using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { - return "Cassandra: " + /*db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + */ host + ':' + DB::toString(port); + return "Cassandra: " + db + '.' + table; } BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) @@ -139,7 +122,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading ids using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -150,7 +133,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu query.pop_back(); query += " ALLOW FILTERING;"; LOG_INFO(log, "Loading keys using query: ", query); - return std::make_shared(session, query, sample_block, max_block_size); + return std::make_shared(cluster, query, sample_block, max_block_size); } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 400481d0a95..564fa75c3a2 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -1,8 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) -#include -#endif +#include #if USE_CASSANDRA @@ -10,10 +8,10 @@ #include "IDictionarySource.h" #include #include -#include namespace DB { + class CassandraDictionarySource final : public IDictionarySource { CassandraDictionarySource( const DictionaryStructure & dict_struct, @@ -21,7 +19,6 @@ class CassandraDictionarySource final : public IDictionarySource { UInt16 port, const String & user, const String & password, - //const std::string & method, const String & db, const String & table, const Block & sample_block); @@ -35,29 +32,20 @@ public: CassandraDictionarySource(const CassandraDictionarySource & other); - ~CassandraDictionarySource() override; - BlockInputStreamPtr loadAll() override; bool supportsSelectiveLoad() const override { return true; } bool isModified() const override { return true; } - ///Not yet supported bool hasUpdateField() const override { return false; } DictionarySourcePtr clone() const override { return std::make_unique(*this); } BlockInputStreamPtr loadIds(const std::vector & ids) override; - //{ - // throw Exception{"Method loadIds is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - //} BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; - //{ - // throw Exception{"Method loadKeys is not implemented yet", ErrorCodes::NOT_IMPLEMENTED}; - //} - + BlockInputStreamPtr loadUpdatedAll() override { throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; @@ -66,21 +54,17 @@ public: std::string toString() const override; private: - //static std::string toConnectionString(const std::string & host, const UInt16 port); - Poco::Logger * log; const DictionaryStructure dict_struct; const String host; const UInt16 port; const String user; const String password; - //const std::string method; const String db; const String table; Block sample_block; - CassCluster * cluster; - CassSession * session; + CassClusterPtr cluster; }; } diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp new file mode 100644 index 00000000000..9c181abcf43 --- /dev/null +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -0,0 +1,38 @@ +#include + +#if USE_CASSANDRA +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int CASSANDRA_INTERNAL_ERROR; +} + +void cassandraCheck(CassError code) +{ + if (code != CASS_OK) + throw Exception("Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code), + ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + + +void cassandraWaitAndCheck(CassFuturePtr && future) +{ + auto code = cass_future_error_code(future); /// Waits if not ready + if (code == CASS_OK) + return; + + /// `future` owns `message` and will free it on destruction + const char * message; + size_t message_len; + cass_future_error_message(future, &message, & message_len); + std::string full_message = "Cassandra driver error " + std::to_string(code) + ": " + cass_error_desc(code) + ": " + message; + throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); +} + +} + +#endif diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h new file mode 100644 index 00000000000..48573c1080f --- /dev/null +++ b/src/Dictionaries/CassandraHelpers.h @@ -0,0 +1,72 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_CASSANDRA +#include +#include + +namespace DB +{ + +namespace Cassandra +{ + +template +CassT * defaultCtor() { return nullptr; } + +/// RAII wrapper for raw pointers to objects from cassandra driver library +template> +class ObjectHolder +{ + CassT * ptr = nullptr; +public: + template + ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} + ObjectHolder(CassT * ptr_) : ptr(ptr_) {} + + ObjectHolder(const ObjectHolder &) = delete; + ObjectHolder & operator = (const ObjectHolder &) = delete; + + ObjectHolder(ObjectHolder && rhs) noexcept : ptr(rhs.ptr) { rhs.ptr = nullptr; } + ObjectHolder & operator = (ObjectHolder && rhs) noexcept + { + if (ptr) + Dtor(ptr); + ptr = rhs.ptr; + rhs.ptr = nullptr; + } + + ~ObjectHolder() + { + if (ptr) + Dtor(ptr); + } + + /// For implicit conversion when passing object to driver library functions + operator CassT * () { return ptr; } + operator const CassT * () const { return ptr; } +}; + +} + +/// These object are created on pointer construction +using CassClusterPtr = Cassandra::ObjectHolder; +using CassSessionPtr = Cassandra::ObjectHolder; +using CassStatementPtr = Cassandra::ObjectHolder; + +/// The following objects are created inside Cassandra driver library, +/// but must be freed by user code +using CassFuturePtr = Cassandra::ObjectHolder; +using CassResultPtr = Cassandra::ObjectHolder; +using CassIteratorPtr = Cassandra::ObjectHolder; + +/// Checks return code, throws exception on error +void cassandraCheck(CassError code); +void cassandraWaitAndCheck(CassFuturePtr && future); + +} + +#endif diff --git a/src/Dictionaries/ya.make b/src/Dictionaries/ya.make index 5e1af27a165..3de623a9a8b 100644 --- a/src/Dictionaries/ya.make +++ b/src/Dictionaries/ya.make @@ -19,6 +19,7 @@ SRCS( CacheDictionary_generate3.cpp CassandraBlockInputStream.cpp CassandraDictionarySource.cpp + CassandraHelpers.cpp ClickHouseDictionarySource.cpp ComplexKeyCacheDictionary.cpp ComplexKeyCacheDictionary_createAttributeWithType.cpp From 73926b2cf9c71093dac41b9fadaaa866122add1a Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Jun 2020 14:54:26 +0300 Subject: [PATCH 14/38] support loading from multiple partitions with complex key --- .../CassandraDictionarySource.cpp | 161 +++++++++++------- src/Dictionaries/CassandraDictionarySource.h | 52 ++++-- src/Dictionaries/CassandraHelpers.cpp | 30 ++++ src/Dictionaries/CassandraHelpers.h | 6 + src/Dictionaries/ExternalQueryBuilder.cpp | 76 ++++++--- src/Dictionaries/ExternalQueryBuilder.h | 17 +- .../external_sources.py | 6 +- 7 files changed, 236 insertions(+), 112 deletions(-) diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index a58dd383b46..fec60fe3d83 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,8 +1,10 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include "ExternalQueryBuilder.h" #include +#include +#include +#include namespace DB { @@ -21,6 +23,7 @@ namespace DB bool /*check_config*/) -> DictionarySourcePtr { #if USE_CASSANDRA + setupCassandraDriverLibraryLogging(CASS_LOG_TRACE); return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", @@ -35,107 +38,149 @@ namespace DB #if USE_CASSANDRA #include +#include +#include #include "CassandraBlockInputStream.h" namespace DB { namespace ErrorCodes { - extern const int UNSUPPORTED_METHOD; - extern const int WRONG_PASSWORD; + extern const int LOGICAL_ERROR; + extern const int INVALID_CONFIG_PARAMETER; +} + +CassandraSettings::CassandraSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix) + : host(config.getString(config_prefix + ".host")) + , port(config.getUInt(config_prefix + ".port", 0)) + , user(config.getString(config_prefix + ".user", "")) + , password(config.getString(config_prefix + ".password", "")) + , db(config.getString(config_prefix + ".keyspace", "")) + , table(config.getString(config_prefix + ".column_family")) + , allow_filtering(config.getBool(config_prefix + ".allow_filtering", false)) + , partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1)) + , max_threads(config.getUInt(config_prefix + ".max_threads", 8)) + , where(config.getString(config_prefix + ".where", "")) +{ + setConsistency(config.getString(config_prefix + ".consistency", "One")); +} + +void CassandraSettings::setConsistency(const String & config_str) +{ + if (config_str == "One") + consistency = CASS_CONSISTENCY_ONE; + else if (config_str == "Two") + consistency = CASS_CONSISTENCY_TWO; + else if (config_str == "Three") + consistency = CASS_CONSISTENCY_THREE; + else if (config_str == "All") + consistency = CASS_CONSISTENCY_ALL; + else if (config_str == "EachQuorum") + consistency = CASS_CONSISTENCY_EACH_QUORUM; + else if (config_str == "Quorum") + consistency = CASS_CONSISTENCY_QUORUM; + else if (config_str == "LocalQuorum") + consistency = CASS_CONSISTENCY_LOCAL_QUORUM; + else if (config_str == "LocalOne") + consistency = CASS_CONSISTENCY_LOCAL_ONE; + else if (config_str == "Serial") + consistency = CASS_CONSISTENCY_SERIAL; + else if (config_str == "LocalSerial") + consistency = CASS_CONSISTENCY_LOCAL_SERIAL; + else /// CASS_CONSISTENCY_ANY is only valid for writes + throw Exception("Unsupported consistency level: " + config_str, ErrorCodes::INVALID_CONFIG_PARAMETER); } static const size_t max_block_size = 8192; CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure & dict_struct_, - const String & host_, - UInt16 port_, - const String & user_, - const String & password_, - const String & db_, - const String & table_, - const DB::Block & sample_block_) + const DictionaryStructure & dict_struct_, + const CassandraSettings & settings_, + const Block & sample_block_) : log(&Poco::Logger::get("CassandraDictionarySource")) , dict_struct(dict_struct_) - , host(host_) - , port(port_) - , user(user_) - , password(password_) - , db(db_) - , table(table_) + , settings(settings_) , sample_block(sample_block_) + , query_builder(dict_struct, settings.db, settings.table, settings.where, IdentifierQuotingStyle::DoubleQuotes) { - cassandraCheck(cass_cluster_set_contact_points(cluster, host.c_str())); - if (port) - cassandraCheck(cass_cluster_set_port(cluster, port)); - cass_cluster_set_credentials(cluster, user.c_str(), password.c_str()); + cassandraCheck(cass_cluster_set_contact_points(cluster, settings.host.c_str())); + if (settings.port) + cassandraCheck(cass_cluster_set_port(cluster, settings.port)); + cass_cluster_set_credentials(cluster, settings.user.c_str(), settings.password.c_str()); + cassandraCheck(cass_cluster_set_consistency(cluster, settings.consistency)); } CassandraDictionarySource::CassandraDictionarySource( - const DB::DictionaryStructure & dict_struct_, + const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - DB::Block & sample_block_) + const String & config_prefix, + Block & sample_block_) : CassandraDictionarySource( dict_struct_, - config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port", 0), - config.getString(config_prefix + ".user", ""), - config.getString(config_prefix + ".password", ""), - config.getString(config_prefix + ".keyspace", ""), - config.getString(config_prefix + ".column_family"), + CassandraSettings(config, config_prefix), sample_block_) { } -CassandraDictionarySource::CassandraDictionarySource(const CassandraDictionarySource & other) - : CassandraDictionarySource{other.dict_struct, - other.host, - other.port, - other.user, - other.password, - other.db, - other.table, - other.sample_block} +void CassandraDictionarySource::maybeAllowFiltering(String & query) { + if (!settings.allow_filtering) + return; + query.pop_back(); /// remove semicolon + query += " ALLOW FILTERING;"; } BlockInputStreamPtr CassandraDictionarySource::loadAll() { - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadAllQuery(); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading all using query: ", query); + String query = query_builder.composeLoadAllQuery(); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading all using query: {}", query); return std::make_shared(cluster, query, sample_block, max_block_size); } std::string CassandraDictionarySource::toString() const { - return "Cassandra: " + db + '.' + table; + return "Cassandra: " + settings.db + '.' + settings.table; } BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector & ids) { - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadIdsQuery(ids); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading ids using query: ", query); + String query = query_builder.composeLoadIdsQuery(ids); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading ids using query: {}", query); return std::make_shared(cluster, query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) { - //FIXME split conditions on partition key and clustering key - ExternalQueryBuilder builder{dict_struct, db, table, "", IdentifierQuotingStyle::DoubleQuotes}; - String query = builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES); - query.pop_back(); - query += " ALLOW FILTERING;"; - LOG_INFO(log, "Loading keys using query: ", query); - return std::make_shared(cluster, query, sample_block, max_block_size); -} + if (requested_rows.empty()) + throw Exception("No rows requested", ErrorCodes::LOGICAL_ERROR); + /// TODO is there a better way to load data by complex keys? + std::unordered_map> partitions; + for (const auto & row : requested_rows) + { + SipHash partition_key; + for (const auto i : ext::range(0, settings.partition_key_prefix)) + key_columns[i]->updateHashWithValue(row, partition_key); + partitions[partition_key.get64()].push_back(row); + } + + BlockInputStreams streams; + for (const auto & partition : partitions) + { + String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix); + maybeAllowFiltering(query); + LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query); + streams.push_back(std::make_shared(cluster, query, sample_block, max_block_size)); + } + + if (streams.size() == 1) + return streams.front(); + + return std::make_shared(streams, nullptr, settings.max_threads); +} } diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 564fa75c3a2..dff93fcd029 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -6,32 +6,47 @@ #include "DictionaryStructure.h" #include "IDictionarySource.h" +#include "ExternalQueryBuilder.h" #include #include namespace DB { +struct CassandraSettings +{ + String host; + UInt16 port; + String user; + String password; + String db; + String table; + + CassConsistency consistency; + bool allow_filtering; + /// TODO get information about key from the driver + size_t partition_key_prefix; + size_t max_threads; + String where; + + CassandraSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + + void setConsistency(const String & config_str); +}; + class CassandraDictionarySource final : public IDictionarySource { +public: CassandraDictionarySource( const DictionaryStructure & dict_struct, - const String & host, - UInt16 port, - const String & user, - const String & password, - const String & db, - const String & table, + const CassandraSettings & settings_, const Block & sample_block); -public: CassandraDictionarySource( const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, + const String & config_prefix, Block & sample_block); - CassandraDictionarySource(const CassandraDictionarySource & other); - BlockInputStreamPtr loadAll() override; bool supportsSelectiveLoad() const override { return true; } @@ -40,7 +55,10 @@ public: bool hasUpdateField() const override { return false; } - DictionarySourcePtr clone() const override { return std::make_unique(*this); } + DictionarySourcePtr clone() const override + { + return std::make_unique(dict_struct, settings, sample_block); + } BlockInputStreamPtr loadIds(const std::vector & ids) override; @@ -51,18 +69,16 @@ public: throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; } - std::string toString() const override; + String toString() const override; private: + void maybeAllowFiltering(String & query); + Poco::Logger * log; const DictionaryStructure dict_struct; - const String host; - const UInt16 port; - const String user; - const String password; - const String db; - const String table; + const CassandraSettings settings; Block sample_block; + ExternalQueryBuilder query_builder; CassClusterPtr cluster; }; diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp index 9c181abcf43..4f92a75a1f3 100644 --- a/src/Dictionaries/CassandraHelpers.cpp +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -2,6 +2,8 @@ #if USE_CASSANDRA #include +#include +#include namespace DB { @@ -33,6 +35,34 @@ void cassandraWaitAndCheck(CassFuturePtr && future) throw Exception(full_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR); } +static std::once_flag setup_logging_flag; + +void setupCassandraDriverLibraryLogging(CassLogLevel level) +{ + std::call_once(setup_logging_flag, [level]() + { + Poco::Logger * logger = &Poco::Logger::get("CassandraDriverLibrary"); + cass_log_set_level(level); + if (level != CASS_LOG_DISABLED) + cass_log_set_callback(cassandraLogCallback, logger); + }); +} + +void cassandraLogCallback(const CassLogMessage * message, void * data) +{ + Poco::Logger * logger = static_cast(data); + if (message->severity == CASS_LOG_CRITICAL || message->severity == CASS_LOG_ERROR) + LOG_ERROR(logger, message->message); + else if (message->severity == CASS_LOG_WARN) + LOG_WARNING(logger, message->message); + else if (message->severity == CASS_LOG_INFO) + LOG_INFO(logger, message->message); + else if (message->severity == CASS_LOG_DEBUG) + LOG_DEBUG(logger, message->message); + else if (message->severity == CASS_LOG_TRACE) + LOG_TRACE(logger, message->message); +} + } #endif diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 48573c1080f..2a91815e37d 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -67,6 +67,12 @@ using CassIteratorPtr = Cassandra::ObjectHolder std::string -ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method) +ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method, size_t partition_key_prefix) { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; @@ -307,25 +307,30 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st writeString(" OR ", out); first = false; - composeKeyCondition(key_columns, row, out); + + writeString("(", out); + composeKeyCondition(key_columns, row, out, 0, key_columns.size()); + writeString(")", out); } } - else /* if (method == IN_WITH_TUPLES) */ + else if (method == IN_WITH_TUPLES) { - composeKeyTupleDefinition(out); - writeString(" IN (", out); - - first = true; - for (const auto row : requested_rows) - { - if (!first) - writeString(", ", out); - - first = false; - composeKeyTuple(key_columns, row, out); - } - - writeString(")", out); + composeInWithTuples(key_columns, requested_rows, out, 0, key_columns.size()); + } + else /* if (method == CASSANDRA_SEPARATE_PARTITION_KEY) */ + { + /// CQL does not allow using OR conditions + /// and does not allow using multi-column IN expressions with partition key columns. + /// So we have to use multiple queries with conditions like + /// (partition_key_1 = val1 AND partition_key_2 = val2 ...) AND (clustering_key_1, ...) IN ((val3, ...), ...) + /// for each partition key. + /// `partition_key_prefix` is a number of columns from partition key. + /// All `requested_rows` must have the same values of partition key. + composeKeyCondition(key_columns, requested_rows.at(0), out, 0, partition_key_prefix); + if (partition_key_prefix && partition_key_prefix < key_columns.size()) + writeString(" AND ", out); + if (partition_key_prefix < key_columns.size()) + composeInWithTuples(key_columns, requested_rows, out, partition_key_prefix, key_columns.size()); } if (!where.empty()) @@ -339,13 +344,11 @@ ExternalQueryBuilder::composeLoadKeysQuery(const Columns & key_columns, const st } -void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out, + size_t beg, size_t end) const { - writeString("(", out); - - const auto keys_size = key_columns.size(); auto first = true; - for (const auto i : ext::range(0, keys_size)) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(" AND ", out); @@ -359,12 +362,30 @@ void ExternalQueryBuilder::composeKeyCondition(const Columns & key_columns, cons writeString("=", out); key_description.type->serializeAsTextQuoted(*key_columns[i], row, out, format_settings); } +} + + +void ExternalQueryBuilder::composeInWithTuples(const Columns & key_columns, const std::vector & requested_rows, + WriteBuffer & out, size_t beg, size_t end) +{ + composeKeyTupleDefinition(out, beg, end); + writeString(" IN (", out); + + bool first = true; + for (const auto row : requested_rows) + { + if (!first) + writeString(", ", out); + + first = false; + composeKeyTuple(key_columns, row, out, beg, end); + } writeString(")", out); } -void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const { if (!dict_struct.key) throw Exception{"Composite key required for method", ErrorCodes::UNSUPPORTED_METHOD}; @@ -372,26 +393,25 @@ void ExternalQueryBuilder::composeKeyTupleDefinition(WriteBuffer & out) const writeChar('(', out); auto first = true; - for (const auto & key : *dict_struct.key) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(", ", out); first = false; - writeQuoted(key.name, out); + writeQuoted((*dict_struct.key)[i].name, out); } writeChar(')', out); } -void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const +void ExternalQueryBuilder::composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const { writeString("(", out); - const auto keys_size = key_columns.size(); auto first = true; - for (const auto i : ext::range(0, keys_size)) + for (const auto i : ext::range(beg, end)) { if (!first) writeString(", ", out); diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 2ffc6a475ee..3011efbc895 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -42,17 +42,19 @@ struct ExternalQueryBuilder std::string composeLoadIdsQuery(const std::vector & ids); /** Generate a query to load data by set of composite keys. - * There are two methods of specification of composite keys in WHERE: + * There are three methods of specification of composite keys in WHERE: * 1. (x = c11 AND y = c12) OR (x = c21 AND y = c22) ... * 2. (x, y) IN ((c11, c12), (c21, c22), ...) + * 3. (x = c1 AND (y, z) IN ((c2, c3), ...)) */ enum LoadKeysMethod { AND_OR_CHAIN, IN_WITH_TUPLES, + CASSANDRA_SEPARATE_PARTITION_KEY, }; - std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method); + std::string composeLoadKeysQuery(const Columns & key_columns, const std::vector & requested_rows, LoadKeysMethod method, size_t partition_key_prefix = 0); private: @@ -60,14 +62,19 @@ private: void composeLoadAllQuery(WriteBuffer & out) const; + /// In the following methods `beg` and `end` specifies which columns to write in expression + /// Expression in form (x = c1 AND y = c2 ...) - void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out) const; + void composeKeyCondition(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const; + + /// Expression in form (x, y, ...) IN ((c1, c2, ...), ...) + void composeInWithTuples(const Columns & key_columns, const std::vector & requested_rows, WriteBuffer & out, size_t beg, size_t end); /// Expression in form (x, y, ...) - void composeKeyTupleDefinition(WriteBuffer & out) const; + void composeKeyTupleDefinition(WriteBuffer & out, size_t beg, size_t end) const; /// Expression in form (c1, c2, ...) - void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out) const; + void composeKeyTuple(const Columns & key_columns, const size_t row, WriteBuffer & out, size_t beg, size_t end) const; /// Write string with specified quoting style. void writeQuoted(const std::string & s, WriteBuffer & out) const; diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 336f3ddc28b..04fe83414e7 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -436,6 +436,7 @@ class SourceCassandra(ExternalSource): {port} test {table} + 1 '''.format( host=self.docker_hostname, @@ -451,9 +452,8 @@ class SourceCassandra(ExternalSource): self.structure[table_name] = structure columns = ['"' + col.name + '" ' + self.TYPE_MAPPING[col.field_type] for col in structure.get_all_fields()] keys = ['"' + col.name + '"' for col in structure.keys] - # FIXME use partition key - query = 'create table test."{name}" ({columns}, primary key ("{some_col}", {pk}));'.format( - name=table_name, columns=', '.join(columns), some_col=structure.ordinary_fields[0].name, pk=', '.join(keys)) + query = 'create table test."{name}" ({columns}, primary key ({pk}));'.format( + name=table_name, columns=', '.join(columns), pk=', '.join(keys)) self.session.execute(query) self.prepared = True From 83b6467308ad9cf0ca21d2873ee42bbf23ea6d9f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Jun 2020 22:50:11 +0300 Subject: [PATCH 15/38] Added RemoteSource. --- src/DataStreams/RemoteQueryExecutor.h | 4 +- src/Processors/QueryPipeline.cpp | 11 +- src/Processors/RowsBeforeLimitCounter.h | 6 + src/Processors/Sources/DelayedSource.cpp | 6 + src/Processors/Sources/DelayedSource.h | 23 ++++ src/Processors/Sources/RemoteSource.cpp | 107 ++++++++++++++++++ src/Processors/Sources/RemoteSource.h | 80 +++++++++++++ src/Processors/Sources/SourceWithProgress.cpp | 7 +- src/Processors/Sources/SourceWithProgress.h | 5 + src/Processors/ya.make | 2 + 10 files changed, 247 insertions(+), 4 deletions(-) create mode 100644 src/Processors/Sources/DelayedSource.cpp create mode 100644 src/Processors/Sources/DelayedSource.h create mode 100644 src/Processors/Sources/RemoteSource.cpp create mode 100644 src/Processors/Sources/RemoteSource.h diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h index e39a7ccc94b..ce6c46d5a2a 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/DataStreams/RemoteQueryExecutor.h @@ -61,8 +61,8 @@ public: void cancel(); /// Get totals and extremes if any. - Block getTotals() const { return totals; } - Block getExtremes() const { return extremes; } + Block getTotals() const { return std::move(totals); } + Block getExtremes() const { return std::move(extremes); } /// Set callback for progress. It will be called on Progress packet. void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index 92c91a81b8a..5b6109440d5 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB { @@ -673,8 +674,10 @@ void QueryPipeline::initRowsBeforeLimit() { RowsBeforeLimitCounterPtr rows_before_limit_at_least; + /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; std::vector sources; + std::vector remote_sources; std::unordered_set visited; @@ -705,6 +708,9 @@ void QueryPipeline::initRowsBeforeLimit() if (auto * source = typeid_cast(processor)) sources.emplace_back(source); + + if (auto * source = typeid_cast(processor)) + remote_sources.emplace_back(source); } else if (auto * sorting = typeid_cast(processor)) { @@ -735,7 +741,7 @@ void QueryPipeline::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); @@ -744,6 +750,9 @@ void QueryPipeline::initRowsBeforeLimit() for (auto & source : sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + for (auto & source : remote_sources) + source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } /// If there is a limit, then enable rows_before_limit_at_least diff --git a/src/Processors/RowsBeforeLimitCounter.h b/src/Processors/RowsBeforeLimitCounter.h index 36ea4a557a8..f5eb40ff84a 100644 --- a/src/Processors/RowsBeforeLimitCounter.h +++ b/src/Processors/RowsBeforeLimitCounter.h @@ -15,6 +15,12 @@ public: rows_before_limit.fetch_add(rows, std::memory_order_release); } + void set(uint64_t rows) + { + setAppliedLimit(); + rows_before_limit.store(rows, std::memory_order_release); + } + uint64_t get() const { return rows_before_limit.load(std::memory_order_acquire); } void setAppliedLimit() { has_applied_limit.store(true, std::memory_order_release); } diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp new file mode 100644 index 00000000000..e5931c75489 --- /dev/null +++ b/src/Processors/Sources/DelayedSource.cpp @@ -0,0 +1,6 @@ +#include + +namespace DB +{ + +} diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h new file mode 100644 index 00000000000..28cad6bc816 --- /dev/null +++ b/src/Processors/Sources/DelayedSource.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +class DelayedSource : public IProcessor +{ +public: + using Creator = std::function; + + DelayedSource(Block header, Creator processors_creator); + String getName() const override { return "Delayed"; } + + Status prepare() override; + void work() override; + +private: + Creator creator; +}; + +} diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp new file mode 100644 index 00000000000..090f3743709 --- /dev/null +++ b/src/Processors/Sources/RemoteSource.cpp @@ -0,0 +1,107 @@ +#include +#include +#include +#include + +namespace DB +{ + +RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_) + : SourceWithProgress(executor->getHeader(), false) + , add_aggregation_info(add_aggregation_info_), query_executor(std::move(executor)) +{ + /// Add AggregatedChunkInfo if we expect DataTypeAggregateFunction as a result. + const auto & sample = getPort().getHeader(); + for (auto & type : sample.getDataTypes()) + if (typeid_cast(type.get())) + add_aggregation_info = true; +} + +RemoteSource::~RemoteSource() = default; + +Chunk RemoteSource::generate() +{ + if (!was_query_sent) + { + /// Progress method will be called on Progress packet. + query_executor->setProgressCallback([this](const Progress & value) { progress(value); }); + + /// Get rows_before_limit result for remote query from ProfileInfo packet. + query_executor->setProfileInfoCallback([this](const BlockStreamProfileInfo & info) + { + if (rows_before_limit && info.hasAppliedLimit()) + rows_before_limit->set(info.getRowsBeforeLimit()); + }); + + query_executor->sendQuery(); + + was_query_sent = true; + } + + auto block = query_executor->read(); + + if (!block) + { + query_executor->finish(); + return {}; + } + + UInt64 num_rows = block.rows(); + Chunk chunk(block.getColumns(), num_rows); + + if (add_aggregation_info) + { + auto info = std::make_shared(); + info->bucket_num = block.info.bucket_num; + info->is_overflows = block.info.is_overflows; + chunk.setChunkInfo(std::move(info)); + } + + return chunk; +} + +void RemoteSource::onCancel() +{ + query_executor->cancel(); +} + + +RemoteTotalsSource::RemoteTotalsSource(Block header) : ISource(std::move(header)) {} +RemoteTotalsSource::~RemoteTotalsSource() = default; + +Chunk RemoteTotalsSource::generate() +{ + /// Check use_count instead of comparing with nullptr just in case. + /// setQueryExecutor() may be called from other thread, but there shouldn't be any race, + /// because totals end extremes are always read after main data. + if (query_executor.use_count()) + { + if (auto block = query_executor->getTotals()) + { + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); + } + } + + return {}; +} + + +RemoteExtremesSource::RemoteExtremesSource(Block header) : ISource(std::move(header)) {} +RemoteExtremesSource::~RemoteExtremesSource() = default; + +Chunk RemoteExtremesSource::generate() +{ + if (query_executor.use_count()) + { + if (auto block = query_executor->getExtremes()) + { + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); + } + } + + return {}; +} + +} diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h new file mode 100644 index 00000000000..9cc3ea9c459 --- /dev/null +++ b/src/Processors/Sources/RemoteSource.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class RemoteQueryExecutor; +using RemoteQueryExecutorPtr = std::shared_ptr; + +/// Source from RemoteQueryExecutor. Executes remote query and returns query result chunks. +class RemoteSource : public SourceWithProgress +{ +public: + /// Flag add_aggregation_info tells if AggregatedChunkInfo should be added to result chunk. + /// AggregatedChunkInfo stores the bucket number used for two-level aggregation. + /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState. + RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_); + ~RemoteSource(); + + String getName() const override { return "Remote"; } + + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } + + /// Stop reading from stream if output port is finished. + void onUpdatePorts() override + { + if (getPort().isFinished()) + cancel(); + } + +protected: + Chunk generate() override; + void onCancel() override; + +private: + bool was_query_sent = false; + bool add_aggregation_info = false; + RemoteQueryExecutorPtr query_executor; + RowsBeforeLimitCounterPtr rows_before_limit; +}; + +/// Totals source from RemoteQueryExecutor. +class RemoteTotalsSource : public ISource +{ +public: + explicit RemoteTotalsSource(Block header); + ~RemoteTotalsSource(); + + String getName() const override { return "RemoteTotals"; } + + void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } + +protected: + Chunk generate() override; + +private: + RemoteQueryExecutorPtr query_executor; +}; + +/// Extremes source from RemoteQueryExecutor. +class RemoteExtremesSource : public ISource +{ +public: + explicit RemoteExtremesSource(Block header); + ~RemoteExtremesSource(); + + String getName() const override { return "RemoteExtremes"; } + + void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } + +protected: + Chunk generate() override; + +private: + RemoteQueryExecutorPtr query_executor; +}; + +} diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 8d7a0a3d946..6488289d5ce 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -12,6 +12,11 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; } +SourceWithProgress::SourceWithProgress(Block header, bool enable_auto_progress) + : ISourceWithProgress(header), auto_progress(enable_auto_progress) +{ +} + void SourceWithProgress::work() { if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode)) @@ -24,7 +29,7 @@ void SourceWithProgress::work() ISourceWithProgress::work(); - if (!was_progress_called && has_input) + if (auto_progress && !was_progress_called && has_input) progress({ current_chunk.chunk.getNumRows(), current_chunk.chunk.bytes() }); } } diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h index 4778c50e49d..34810045143 100644 --- a/src/Processors/Sources/SourceWithProgress.h +++ b/src/Processors/Sources/SourceWithProgress.h @@ -44,6 +44,8 @@ class SourceWithProgress : public ISourceWithProgress { public: using ISourceWithProgress::ISourceWithProgress; + /// If enable_auto_progress flag is set, progress() will be automatically called on each generated chunk. + SourceWithProgress(Block header, bool enable_auto_progress); using LocalLimits = IBlockInputStream::LocalLimits; using LimitsMode = IBlockInputStream::LimitsMode; @@ -76,6 +78,9 @@ private: /// This flag checks if progress() was manually called at generate() call. /// If not, it will be called for chunk after generate() was finished. bool was_progress_called = false; + + /// If enabled, progress() will be automatically called on each generated chunk. + bool auto_progress = true; }; } diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 62320f1c147..af5ebbcf3e9 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -106,9 +106,11 @@ SRCS( Port.cpp QueryPipeline.cpp ResizeProcessor.cpp + Sources/DelayedSource.cpp Sources/SinkToOutputStream.cpp Sources/SourceFromInputStream.cpp Sources/SourceWithProgress.cpp + Sources/RemoteSource.cpp Transforms/AddingMissedTransform.cpp Transforms/AddingSelectorTransform.cpp Transforms/AggregatingTransform.cpp From e67837bc4ae82dcb5d7c23067f952d24d60252fb Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 3 Jun 2020 18:07:37 +0300 Subject: [PATCH 16/38] cleenup --- .gitmodules | 6 +- CMakeLists.txt | 1 + cmake/find/cassandra.cmake | 12 +- contrib/CMakeLists.txt | 6 +- contrib/cassandra | 2 +- contrib/cassandra-cmake/CMakeLists.txt | 0 contrib/libuv | 2 +- contrib/libuv-cmake/CMakeLists.txt | 441 ------------------ .../compose}/docker_compose_cassandra.yml | 0 .../external-dicts-dict-sources.md | 28 +- .../CassandraBlockInputStream.cpp | 390 +++++++++------- src/Dictionaries/CassandraBlockInputStream.h | 43 +- .../CassandraDictionarySource.cpp | 86 ++-- src/Dictionaries/CassandraDictionarySource.h | 13 +- src/Dictionaries/CassandraHelpers.cpp | 2 +- src/Dictionaries/CassandraHelpers.h | 10 +- src/Dictionaries/ExternalQueryBuilder.cpp | 10 +- src/Dictionaries/registerDictionaries.h | 1 - tests/integration/helpers/cluster.py | 4 +- .../external_sources.py | 1 + .../test.py | 5 +- 21 files changed, 381 insertions(+), 682 deletions(-) delete mode 100644 contrib/cassandra-cmake/CMakeLists.txt delete mode 100644 contrib/libuv-cmake/CMakeLists.txt rename {tests/integration/helpers => docker/test/integration/compose}/docker_compose_cassandra.yml (100%) diff --git a/.gitmodules b/.gitmodules index a4c84301fc9..c05da0c9ff9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -159,11 +159,11 @@ url = https://github.com/openldap/openldap.git [submodule "contrib/cassandra"] path = contrib/cassandra - url = https://github.com/tavplubix/cpp-driver.git - branch = ch-tmp + url = https://github.com/ClickHouse-Extras/cpp-driver.git + branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/tavplubix/libuv.git + url = https://github.com/ClickHouse-Extras/libuv.git branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib diff --git a/CMakeLists.txt b/CMakeLists.txt index 54a88404579..27005bd8d87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -328,6 +328,7 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE include (GNUInstallDirs) include (cmake/contrib_finder.cmake) include (cmake/lib_name.cmake) + find_contrib_lib(double-conversion) # Must be before parquet include (cmake/find/ssl.cmake) include (cmake/find/ldap.cmake) # after ssl diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake index b1d76702cfa..f41e0f645f4 100644 --- a/cmake/find/cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -1,8 +1,10 @@ -if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) +option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES}) + +if (ENABLE_CASSANDRA) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message (WARNING "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") else() set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv") set (CASSANDRA_INCLUDE_DIR @@ -17,6 +19,8 @@ if (NOT DEFINED ENABLE_CASSANDRA OR ENABLE_CASSANDRA) set (USE_CASSANDRA 1) set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message(STATUS "Using cassandra: ${CASSANDRA_LIBRARY}") endif() endif() + +message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}") +message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0a907f01573..99b94d04473 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -291,7 +291,7 @@ if (USE_INTERNAL_AWS_S3_LIBRARY) endif () if (USE_BASE64) - add_subdirectory(base64-cmake) + add_subdirectory (base64-cmake) endif() if (USE_INTERNAL_HYPERSCAN_LIBRARY) @@ -315,8 +315,8 @@ if (USE_FASTOPS) endif() if (USE_CASSANDRA) - add_subdirectory(libuv-cmake) - add_subdirectory(cassandra) + add_subdirectory (libuv) + add_subdirectory (cassandra) endif() add_subdirectory (fmtlib-cmake) diff --git a/contrib/cassandra b/contrib/cassandra index 58a71947d9d..a49b4e0e269 160000 --- a/contrib/cassandra +++ b/contrib/cassandra @@ -1 +1 @@ -Subproject commit 58a71947d9dd8412f5aeb38275fa81417ea27ee0 +Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509 diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/contrib/libuv b/contrib/libuv index 379988fef9b..84438304f41 160000 --- a/contrib/libuv +++ b/contrib/libuv @@ -1 +1 @@ -Subproject commit 379988fef9b0c6ac706a624dbac6be8924a3a0da +Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28 diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt deleted file mode 100644 index 65aeabdd9cb..00000000000 --- a/contrib/libuv-cmake/CMakeLists.txt +++ /dev/null @@ -1,441 +0,0 @@ -cmake_minimum_required(VERSION 3.4) -project(libuv LANGUAGES C) - -include(CMakePackageConfigHelpers) -include(CMakeDependentOption) -include(GNUInstallDirs) -include(CTest) - -#cmake_dependent_option(LIBUV_BUILD_TESTS -# "Build the unit tests when BUILD_TESTING is enabled and we are the root project" ON -# "BUILD_TESTING;CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR" OFF) - -if(MSVC) - list(APPEND uv_cflags /W4) -elseif(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU") - list(APPEND uv_cflags -fvisibility=hidden --std=gnu89) - list(APPEND uv_cflags -Wall -Wextra -Wstrict-prototypes) - list(APPEND uv_cflags -Wno-unused-parameter) -endif() - -set(uv_sources - src/fs-poll.c - src/idna.c - src/inet.c - src/random.c - src/strscpy.c - src/threadpool.c - src/timer.c - src/uv-common.c - src/uv-data-getter-setters.c - src/version.c) - -set(uv_test_sources - test/blackhole-server.c - test/echo-server.c - test/run-tests.c - test/runner.c - test/test-active.c - test/test-async-null-cb.c - test/test-async.c - test/test-barrier.c - test/test-callback-order.c - test/test-callback-stack.c - test/test-close-fd.c - test/test-close-order.c - test/test-condvar.c - test/test-connect-unspecified.c - test/test-connection-fail.c - test/test-cwd-and-chdir.c - test/test-default-loop-close.c - test/test-delayed-accept.c - test/test-dlerror.c - test/test-eintr-handling.c - test/test-embed.c - test/test-emfile.c - test/test-env-vars.c - test/test-error.c - test/test-fail-always.c - test/test-fork.c - test/test-fs-copyfile.c - test/test-fs-event.c - test/test-fs-poll.c - test/test-fs.c - test/test-fs-readdir.c - test/test-fs-fd-hash.c - test/test-fs-open-flags.c - test/test-get-currentexe.c - test/test-get-loadavg.c - test/test-get-memory.c - test/test-get-passwd.c - test/test-getaddrinfo.c - test/test-gethostname.c - test/test-getnameinfo.c - test/test-getsockname.c - test/test-getters-setters.c - test/test-gettimeofday.c - test/test-handle-fileno.c - test/test-homedir.c - test/test-hrtime.c - test/test-idle.c - test/test-idna.c - test/test-ip4-addr.c - test/test-ip6-addr.c - test/test-ipc-heavy-traffic-deadlock-bug.c - test/test-ipc-send-recv.c - test/test-ipc.c - test/test-loop-alive.c - test/test-loop-close.c - test/test-loop-configure.c - test/test-loop-handles.c - test/test-loop-stop.c - test/test-loop-time.c - test/test-multiple-listen.c - test/test-mutexes.c - test/test-osx-select.c - test/test-pass-always.c - test/test-ping-pong.c - test/test-pipe-bind-error.c - test/test-pipe-close-stdout-read-stdin.c - test/test-pipe-connect-error.c - test/test-pipe-connect-multiple.c - test/test-pipe-connect-prepare.c - test/test-pipe-getsockname.c - test/test-pipe-pending-instances.c - test/test-pipe-sendmsg.c - test/test-pipe-server-close.c - test/test-pipe-set-fchmod.c - test/test-pipe-set-non-blocking.c - test/test-platform-output.c - test/test-poll-close-doesnt-corrupt-stack.c - test/test-poll-close.c - test/test-poll-closesocket.c - test/test-poll-oob.c - test/test-poll.c - test/test-process-priority.c - test/test-process-title-threadsafe.c - test/test-process-title.c - test/test-queue-foreach-delete.c - test/test-random.c - test/test-ref.c - test/test-run-nowait.c - test/test-run-once.c - test/test-semaphore.c - test/test-shutdown-close.c - test/test-shutdown-eof.c - test/test-shutdown-twice.c - test/test-signal-multiple-loops.c - test/test-signal-pending-on-close.c - test/test-signal.c - test/test-socket-buffer-size.c - test/test-spawn.c - test/test-stdio-over-pipes.c - test/test-strscpy.c - test/test-tcp-alloc-cb-fail.c - test/test-tcp-bind-error.c - test/test-tcp-bind6-error.c - test/test-tcp-close-accept.c - test/test-tcp-close-while-connecting.c - test/test-tcp-close.c - test/test-tcp-close-reset.c - test/test-tcp-connect-error-after-write.c - test/test-tcp-connect-error.c - test/test-tcp-connect-timeout.c - test/test-tcp-connect6-error.c - test/test-tcp-create-socket-early.c - test/test-tcp-flags.c - test/test-tcp-oob.c - test/test-tcp-open.c - test/test-tcp-read-stop.c - test/test-tcp-shutdown-after-write.c - test/test-tcp-try-write.c - test/test-tcp-try-write-error.c - test/test-tcp-unexpected-read.c - test/test-tcp-write-after-connect.c - test/test-tcp-write-fail.c - test/test-tcp-write-queue-order.c - test/test-tcp-write-to-half-open-connection.c - test/test-tcp-writealot.c - test/test-thread-equal.c - test/test-thread.c - test/test-threadpool-cancel.c - test/test-threadpool.c - test/test-timer-again.c - test/test-timer-from-check.c - test/test-timer.c - test/test-tmpdir.c - test/test-tty-duplicate-key.c - test/test-tty.c - test/test-udp-alloc-cb-fail.c - test/test-udp-bind.c - test/test-udp-connect.c - test/test-udp-create-socket-early.c - test/test-udp-dgram-too-big.c - test/test-udp-ipv6.c - test/test-udp-multicast-interface.c - test/test-udp-multicast-interface6.c - test/test-udp-multicast-join.c - test/test-udp-multicast-join6.c - test/test-udp-multicast-ttl.c - test/test-udp-open.c - test/test-udp-options.c - test/test-udp-send-and-recv.c - test/test-udp-send-hang-loop.c - test/test-udp-send-immediate.c - test/test-udp-send-unreachable.c - test/test-udp-try-send.c - test/test-uname.c - test/test-walk-handles.c - test/test-watcher-cross-stop.c) - -#if(WIN32) -# list(APPEND uv_defines WIN32_LEAN_AND_MEAN _WIN32_WINNT=0x0600) -# list(APPEND uv_libraries -# advapi32 -# iphlpapi -# psapi -# shell32 -# user32 -# userenv -# ws2_32) -# list(APPEND uv_sources -# src/win/async.c -# src/win/core.c -# src/win/detect-wakeup.c -# src/win/dl.c -# src/win/error.c -# src/win/fs.c -# src/win/fs-event.c -# src/win/getaddrinfo.c -# src/win/getnameinfo.c -# src/win/handle.c -# src/win/loop-watcher.c -# src/win/pipe.c -# src/win/thread.c -# src/win/poll.c -# src/win/process.c -# src/win/process-stdio.c -# src/win/signal.c -# src/win/snprintf.c -# src/win/stream.c -# src/win/tcp.c -# src/win/tty.c -# src/win/udp.c -# src/win/util.c -# src/win/winapi.c -# src/win/winsock.c) -# list(APPEND uv_test_libraries ws2_32) -# list(APPEND uv_test_sources src/win/snprintf.c test/runner-win.c) -#else() - -if(CMAKE_SIZEOF_VOID_P EQUAL 4) - list(APPEND uv_defines _FILE_OFFSET_BITS=64 _LARGEFILE_SOURCE) -endif() - -if(NOT CMAKE_SYSTEM_NAME STREQUAL "Android") - # Android has pthread as part of its c library, not as a separate - # libpthread.so. - list(APPEND uv_libraries pthread) -endif() -list(APPEND uv_sources - src/unix/async.c - src/unix/core.c - src/unix/dl.c - src/unix/fs.c - src/unix/getaddrinfo.c - src/unix/getnameinfo.c - src/unix/loop-watcher.c - src/unix/loop.c - src/unix/pipe.c - src/unix/poll.c - src/unix/process.c - src/unix/random-devurandom.c - src/unix/signal.c - src/unix/stream.c - src/unix/tcp.c - src/unix/thread.c - src/unix/tty.c - src/unix/udp.c) -list(APPEND uv_test_sources test/runner-unix.c) -#endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "AIX") - list(APPEND uv_defines - _ALL_SOURCE - _LINUX_SOURCE_COMPAT - _THREAD_SAFE - _XOPEN_SOURCE=500) - list(APPEND uv_libraries perfstat) - list(APPEND uv_sources src/unix/aix.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Android") - list(APPEND uv_libs dl) - list(APPEND uv_sources - src/unix/android-ifaddrs.c - src/unix/linux-core.c - src/unix/linux-inotify.c - src/unix/linux-syscalls.c - src/unix/procfs-exepath.c - src/unix/pthread-fixes.c - src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c - src/unix/sysinfo-loadavg.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "Android|Linux|OS/390") - list(APPEND uv_sources src/unix/proctitle.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD") - list(APPEND uv_sources src/unix/freebsd.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") - list(APPEND uv_sources src/unix/posix-hrtime.c src/unix/bsd-proctitle.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|NetBSD|OpenBSD") - list(APPEND uv_sources src/unix/bsd-ifaddrs.c src/unix/kqueue.c) -endif() - -if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") - list(APPEND uv_sources src/unix/random-getrandom.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") - list(APPEND uv_sources src/unix/random-getentropy.c) -endif() - -if(APPLE) - list(APPEND uv_defines _DARWIN_UNLIMITED_SELECT=1 _DARWIN_USE_64_BIT_INODE=1) - list(APPEND uv_sources - src/unix/darwin-proctitle.c - src/unix/darwin.c - src/unix/fsevents.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "Linux") - list(APPEND uv_defines _GNU_SOURCE _POSIX_C_SOURCE=200112) - list(APPEND uv_libraries dl rt) - list(APPEND uv_sources - src/unix/linux-core.c - src/unix/linux-inotify.c - src/unix/linux-syscalls.c - src/unix/procfs-exepath.c - src/unix/random-getrandom.c - src/unix/random-sysctl-linux.c - src/unix/sysinfo-loadavg.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "NetBSD") - list(APPEND uv_sources src/unix/netbsd.c) - list(APPEND uv_libraries kvm) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD") - list(APPEND uv_sources src/unix/openbsd.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "OS/390") - list(APPEND uv_defines PATH_MAX=255) - list(APPEND uv_defines _AE_BIMODAL) - list(APPEND uv_defines _ALL_SOURCE) - list(APPEND uv_defines _LARGE_TIME_API) - list(APPEND uv_defines _OPEN_MSGQ_EXT) - list(APPEND uv_defines _OPEN_SYS_FILE_EXT) - list(APPEND uv_defines _OPEN_SYS_IF_EXT) - list(APPEND uv_defines _OPEN_SYS_SOCK_EXT3) - list(APPEND uv_defines _OPEN_SYS_SOCK_IPV6) - list(APPEND uv_defines _UNIX03_SOURCE) - list(APPEND uv_defines _UNIX03_THREADS) - list(APPEND uv_defines _UNIX03_WITHDRAWN) - list(APPEND uv_defines _XOPEN_SOURCE_EXTENDED) - list(APPEND uv_sources - src/unix/pthread-fixes.c - src/unix/pthread-barrier.c - src/unix/os390.c - src/unix/os390-syscalls.c) -endif() - -if(CMAKE_SYSTEM_NAME STREQUAL "SunOS") - list(APPEND uv_defines __EXTENSIONS__ _XOPEN_SOURCE=500) - list(APPEND uv_libraries kstat nsl sendfile socket) - list(APPEND uv_sources src/unix/no-proctitle.c src/unix/sunos.c) -endif() - -if(APPLE OR CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD|Linux|NetBSD|OpenBSD") - list(APPEND uv_test_libraries util) -endif() - -set(uv_sources_tmp "") -foreach(file ${uv_sources}) - list(APPEND uv_sources_tmp "${LIBUV_ROOT_DIR}/${file}") -endforeach(file) -set(uv_sources "${uv_sources_tmp}") - -list(APPEND uv_defines CLICKHOUSE_GLIBC_COMPATIBILITY) - -add_library(uv SHARED ${uv_sources}) -target_compile_definitions(uv - INTERFACE USING_UV_SHARED=1 - PRIVATE ${uv_defines} BUILDING_UV_SHARED=1) -target_compile_options(uv PRIVATE ${uv_cflags}) -target_include_directories(uv PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) -target_link_libraries(uv ${uv_libraries}) - -add_library(uv_a STATIC ${uv_sources}) -target_compile_definitions(uv_a PRIVATE ${uv_defines}) -target_compile_options(uv_a PRIVATE ${uv_cflags}) -target_include_directories(uv_a PUBLIC ${LIBUV_ROOT_DIR}/include PRIVATE ${LIBUV_ROOT_DIR}/src) -target_link_libraries(uv_a ${uv_libraries}) - -#if(LIBUV_BUILD_TESTS) -# add_executable(uv_run_tests ${uv_test_sources}) -# target_compile_definitions(uv_run_tests -# PRIVATE ${uv_defines} USING_UV_SHARED=1) -# target_compile_options(uv_run_tests PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_tests uv ${uv_test_libraries}) -# add_test(NAME uv_test -# COMMAND uv_run_tests -# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -# add_executable(uv_run_tests_a ${uv_test_sources}) -# target_compile_definitions(uv_run_tests_a PRIVATE ${uv_defines}) -# target_compile_options(uv_run_tests_a PRIVATE ${uv_cflags}) -# target_link_libraries(uv_run_tests_a uv_a ${uv_test_libraries}) -# add_test(NAME uv_test_a -# COMMAND uv_run_tests_a -# WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) -#endif() - -if(UNIX) - # Now for some gibbering horrors from beyond the stars... - foreach(x ${uv_libraries}) - set(LIBS "${LIBS} -l${x}") - endforeach(x) - file(STRINGS ${LIBUV_ROOT_DIR}/configure.ac configure_ac REGEX ^AC_INIT) - string(REGEX MATCH [0-9]+[.][0-9]+[.][0-9]+ PACKAGE_VERSION "${configure_ac}") - string(REGEX MATCH ^[0-9]+ UV_VERSION_MAJOR "${PACKAGE_VERSION}") - # The version in the filename is mirroring the behaviour of autotools. - set_target_properties(uv PROPERTIES VERSION ${UV_VERSION_MAJOR}.0.0 - SOVERSION ${UV_VERSION_MAJOR}) - set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) - set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) - set(prefix ${CMAKE_INSTALL_PREFIX}) - configure_file(${LIBUV_ROOT_DIR}/libuv.pc.in ${LIBUV_ROOT_DIR}/libuv.pc @ONLY) - - install(DIRECTORY include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) - install(FILES LICENSE DESTINATION ${CMAKE_INSTALL_DOCDIR}) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libuv.pc - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) - install(TARGETS uv LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) - install(TARGETS uv_a ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() - -#if(WIN32) -# install(DIRECTORY include/ DESTINATION include) -# install(FILES LICENSE DESTINATION .) -# install(TARGETS uv uv_a -# RUNTIME DESTINATION lib/$ -# ARCHIVE DESTINATION lib/$) -#endif() diff --git a/tests/integration/helpers/docker_compose_cassandra.yml b/docker/test/integration/compose/docker_compose_cassandra.yml similarity index 100% rename from tests/integration/helpers/docker_compose_cassandra.yml rename to docker/test/integration/compose/docker_compose_cassandra.yml diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 650630701be..71b719ce996 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -633,9 +633,35 @@ Example of settings: localhost - 6349 + 9042 + username + qwerty123 + database_name + table_name + 1 + 1 + One + "SomeColumn" = 42 + 8 ``` +Setting fields: +- `host` – The Cassandra host or comma-separated list of hosts. +- `port` – The port on the Cassandra servers. If not specified, default port is used. +- `user` – Name of the Cassandra user. +- `password` – Password of the Cassandra user. +- `keyspace` – Name of the keyspace (database). +- `column_family` – Name of the column family (table). +- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. +- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. + Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. + Default value is 1 (the first key column is a partition key and other key columns are clustering key). +- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, + `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`. +- `where` – Optional selection criteria. +- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. + + [Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index e00fd5ec3e9..8fb9eb3f93c 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -16,189 +16,261 @@ namespace DB { + namespace ErrorCodes { - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; extern const int CASSANDRA_INTERNAL_ERROR; } CassandraBlockInputStream::CassandraBlockInputStream( - const CassClusterPtr & cluster, + const CassSessionShared & session_, const String & query_str, const Block & sample_block, - const size_t max_block_size_) - : statement(query_str.c_str(), /*parameters count*/ 0) + size_t max_block_size_) + : session(session_) + , statement(query_str.c_str(), /*parameters count*/ 0) , max_block_size(max_block_size_) , has_more_pages(cass_true) { description.init(sample_block); cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); - cassandraWaitAndCheck(cass_session_connect(session, cluster)); } -namespace +void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const { - using ValueType = ExternalResultDescription::ValueType; - - void insertValue(IColumn & column, const ValueType type, const CassValue * cass_value) + switch (type) { - /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) - switch (type) + case ValueType::vtUInt8: { - case ValueType::vtUInt8: - { - cass_int8_t value; - cass_value_get_int8(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt16: - { - cass_int16_t value; - cass_value_get_int16(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt32: - { - cass_int32_t value; - cass_value_get_int32(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtUInt64: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt8: - { - cass_int8_t value; - cass_value_get_int8(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt16: - { - cass_int16_t value; - cass_value_get_int16(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt32: - { - cass_int32_t value; - cass_value_get_int32(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtInt64: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtFloat32: - { - cass_float_t value; - cass_value_get_float(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtFloat64: - { - cass_double_t value; - cass_value_get_double(cass_value, &value); - assert_cast(column).insertValue(value); - break; - } - case ValueType::vtString: - { - const char * value; - size_t value_length; - cass_value_get_string(cass_value, &value, &value_length); - assert_cast(column).insertData(value, value_length); - break; - } - case ValueType::vtDate: - { - cass_uint32_t value; - cass_value_get_uint32(cass_value, &value); - assert_cast(column).insertValue(static_cast(value)); - break; - } - case ValueType::vtDateTime: - { - cass_int64_t value; - cass_value_get_int64(cass_value, &value); - assert_cast(column).insertValue(static_cast(value / 1000)); - break; - } - case ValueType::vtUUID: - { - CassUuid value; - cass_value_get_uuid(cass_value, &value); - std::array uuid_str; - cass_uuid_string(value, uuid_str.data()); - assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); - break; - } + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt16: + { + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt32: + { + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtUInt64: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtInt8: + { + cass_int8_t value; + cass_value_get_int8(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt16: + { + cass_int16_t value; + cass_value_get_int16(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt32: + { + cass_int32_t value; + cass_value_get_int32(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtInt64: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtFloat32: + { + cass_float_t value; + cass_value_get_float(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtFloat64: + { + cass_double_t value; + cass_value_get_double(cass_value, &value); + assert_cast(column).insertValue(value); + break; + } + case ValueType::vtString: + { + const char * value = nullptr; + size_t value_length; + cass_value_get_string(cass_value, &value, &value_length); + assert_cast(column).insertData(value, value_length); + break; + } + case ValueType::vtDate: + { + cass_uint32_t value; + cass_value_get_uint32(cass_value, &value); + assert_cast(column).insertValue(static_cast(value)); + break; + } + case ValueType::vtDateTime: + { + cass_int64_t value; + cass_value_get_int64(cass_value, &value); + assert_cast(column).insertValue(static_cast(value / 1000)); + break; + } + case ValueType::vtUUID: + { + CassUuid value; + cass_value_get_uuid(cass_value, &value); + std::array uuid_str; + cass_uuid_string(value, uuid_str.data()); + assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); + break; } } } - Block CassandraBlockInputStream::readImpl() +void CassandraBlockInputStream::readPrefix() +{ + result_future = cass_session_execute(*session, statement); +} + +Block CassandraBlockInputStream::readImpl() +{ + if (!has_more_pages) + return {}; + + MutableColumns columns = description.sample_block.cloneEmptyColumns(); + + cassandraWaitAndCheck(result_future); + CassResultPtr result = cass_future_get_result(result_future); + + assert(cass_result_column_count(result) == columns.size()); + + assertTypes(result); + + has_more_pages = cass_result_has_more_pages(result); + if (has_more_pages) { - if (!has_more_pages) - return {}; - - MutableColumns columns = description.sample_block.cloneEmptyColumns(); - CassFuturePtr query_future = cass_session_execute(session, statement); - - CassResultPtr result = cass_future_get_result(query_future); - - if (!result) { - const char* error_message; - size_t error_message_length; - cass_future_error_message(query_future, &error_message, &error_message_length); - - throw Exception{error_message, ErrorCodes::CASSANDRA_INTERNAL_ERROR}; - } - - [[maybe_unused]] size_t row_count = 0; - assert(cass_result_column_count(result) == columns.size()); - CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] - while (cass_iterator_next(rows_iter)) - { - const CassRow * row = cass_iterator_get_row(rows_iter); - for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) - { - const CassValue * val = cass_row_get_column(row, col_idx); - if (cass_value_is_null(val)) - columns[col_idx]->insertDefault(); - else if (description.types[col_idx].second) - { - ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); - insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); - column_nullable.getNullMapData().emplace_back(0); - } - else - insertValue(*columns[col_idx], description.types[col_idx].first, val); - } - ++row_count; - } - assert(cass_result_row_count(result) == row_count); - - has_more_pages = cass_result_has_more_pages(result); - - if (has_more_pages) - cassandraCheck(cass_statement_set_paging_state(statement, result)); - - return description.sample_block.cloneWithColumns(std::move(columns)); + cassandraCheck(cass_statement_set_paging_state(statement, result)); + result_future = cass_session_execute(*session, statement); } + CassIteratorPtr rows_iter = cass_iterator_from_result(result); /// Points to rows[-1] + while (cass_iterator_next(rows_iter)) + { + const CassRow * row = cass_iterator_get_row(rows_iter); + for (size_t col_idx = 0; col_idx < columns.size(); ++col_idx) + { + const CassValue * val = cass_row_get_column(row, col_idx); + if (cass_value_is_null(val)) + columns[col_idx]->insertDefault(); + else if (description.types[col_idx].second) + { + ColumnNullable & column_nullable = assert_cast(*columns[col_idx]); + insertValue(column_nullable.getNestedColumn(), description.types[col_idx].first, val); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[col_idx], description.types[col_idx].first, val); + } + } + + assert(cass_result_row_count(result) == columns.front()->size()); + + return description.sample_block.cloneWithColumns(std::move(columns)); +} + +void CassandraBlockInputStream::assertTypes(const CassResultPtr & result) +{ + if (!assert_types) + return; + + size_t column_count = cass_result_column_count(result); + for (size_t i = 0; i < column_count; ++i) + { + CassValueType expected; + String expected_text; + + /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) + switch (description.types[i].first) + { + case ExternalResultDescription::ValueType::vtInt8: + case ExternalResultDescription::ValueType::vtUInt8: + expected = CASS_VALUE_TYPE_TINY_INT; + expected_text = "tinyint"; + break; + case ExternalResultDescription::ValueType::vtInt16: + case ExternalResultDescription::ValueType::vtUInt16: + expected = CASS_VALUE_TYPE_SMALL_INT; + expected_text = "smallint"; + break; + case ExternalResultDescription::ValueType::vtUInt32: + case ExternalResultDescription::ValueType::vtInt32: + expected = CASS_VALUE_TYPE_INT; + expected_text = "int"; + break; + case ExternalResultDescription::ValueType::vtInt64: + case ExternalResultDescription::ValueType::vtUInt64: + expected = CASS_VALUE_TYPE_BIGINT; + expected_text = "bigint"; + break; + case ExternalResultDescription::ValueType::vtFloat32: + expected = CASS_VALUE_TYPE_FLOAT; + expected_text = "float"; + break; + case ExternalResultDescription::ValueType::vtFloat64: + expected = CASS_VALUE_TYPE_DOUBLE; + expected_text = "double"; + break; + case ExternalResultDescription::ValueType::vtString: + expected = CASS_VALUE_TYPE_TEXT; + expected_text = "text, ascii or varchar"; + break; + case ExternalResultDescription::ValueType::vtDate: + expected = CASS_VALUE_TYPE_DATE; + expected_text = "date"; + break; + case ExternalResultDescription::ValueType::vtDateTime: + expected = CASS_VALUE_TYPE_TIMESTAMP; + expected_text = "timestamp"; + break; + case ExternalResultDescription::ValueType::vtUUID: + expected = CASS_VALUE_TYPE_UUID; + expected_text = "uuid"; + break; + } + + CassValueType got = cass_result_column_type(result, i); + + if (got != expected) + { + if (expected == CASS_VALUE_TYPE_TEXT && (got == CASS_VALUE_TYPE_ASCII || got == CASS_VALUE_TYPE_VARCHAR)) + continue; + + const auto & column_name = description.sample_block.getColumnsWithTypeAndName()[i].name; + throw Exception("Type mismatch for column " + column_name + ": expected Cassandra type " + expected_text, + ErrorCodes::TYPE_MISMATCH); + } + } + + assert_types = false; +} + } #endif diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 700211ebb3e..5208e516a0e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -9,30 +9,35 @@ namespace DB { +class CassandraBlockInputStream final : public IBlockInputStream +{ +public: + CassandraBlockInputStream( + const CassSessionShared & session_, + const String & query_str, + const Block & sample_block, + size_t max_block_size); + String getName() const override { return "Cassandra"; } -/// Allows processing results of a Cassandra query as a sequence of Blocks, simplifies chaining - class CassandraBlockInputStream final : public IBlockInputStream - { - public: - CassandraBlockInputStream( - const CassClusterPtr & cluster, - const String & query_str, - const Block & sample_block, - const size_t max_block_size); + Block getHeader() const override { return description.sample_block.cloneEmpty(); } - String getName() const override { return "Cassandra"; } + void readPrefix() override; - Block getHeader() const override { return description.sample_block.cloneEmpty(); } +private: + using ValueType = ExternalResultDescription::ValueType; - private: - Block readImpl() override; + Block readImpl() override; + void insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const; + void assertTypes(const CassResultPtr & result); - CassSessionPtr session; - CassStatementPtr statement; - const size_t max_block_size; - ExternalResultDescription description; - cass_bool_t has_more_pages; - }; + CassSessionShared session; + CassStatementPtr statement; + CassFuturePtr result_future; + const size_t max_block_size; + ExternalResultDescription description; + cass_bool_t has_more_pages; + bool assert_types = true; +}; } diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index fec60fe3d83..c41f528db91 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -1,37 +1,35 @@ #include "CassandraDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include -#include -#include -#include namespace DB { - namespace ErrorCodes - { - extern const int SUPPORT_IS_DISABLED; - } - void registerDictionarySourceCassandra(DictionarySourceFactory & factory) +namespace ErrorCodes +{ + extern const int SUPPORT_IS_DISABLED; + extern const int NOT_IMPLEMENTED; +} + +void registerDictionarySourceCassandra(DictionarySourceFactory & factory) +{ + auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, + [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, + [[maybe_unused]] const std::string & config_prefix, + [[maybe_unused]] Block & sample_block, + const Context & /* context */, + bool /*check_config*/) -> DictionarySourcePtr { - auto create_table_source = [=]([[maybe_unused]] const DictionaryStructure & dict_struct, - [[maybe_unused]] const Poco::Util::AbstractConfiguration & config, - [[maybe_unused]] const std::string & config_prefix, - [[maybe_unused]] Block & sample_block, - const Context & /* context */, - bool /*check_config*/) -> DictionarySourcePtr - { #if USE_CASSANDRA - setupCassandraDriverLibraryLogging(CASS_LOG_TRACE); - return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); + setupCassandraDriverLibraryLogging(CASS_LOG_INFO); + return std::make_unique(dict_struct, config, config_prefix + ".cassandra", sample_block); #else - throw Exception{"Dictionary source of type `cassandra` is disabled because library was built without cassandra support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception{"Dictionary source of type `cassandra` is disabled because ClickHouse was built without cassandra support.", + ErrorCodes::SUPPORT_IS_DISABLED}; #endif - }; - factory.registerSource("cassandra", create_table_source); - } + }; + factory.registerSource("cassandra", create_table_source); +} } @@ -39,8 +37,9 @@ namespace DB #include #include -#include #include "CassandraBlockInputStream.h" +#include +#include namespace DB { @@ -57,7 +56,7 @@ CassandraSettings::CassandraSettings( , port(config.getUInt(config_prefix + ".port", 0)) , user(config.getString(config_prefix + ".user", "")) , password(config.getString(config_prefix + ".password", "")) - , db(config.getString(config_prefix + ".keyspace", "")) + , db(config.getString(config_prefix + ".keyspace")) , table(config.getString(config_prefix + ".column_family")) , allow_filtering(config.getBool(config_prefix + ".allow_filtering", false)) , partition_key_prefix(config.getUInt(config_prefix + ".partition_key_prefix", 1)) @@ -124,7 +123,7 @@ CassandraDictionarySource::CassandraDictionarySource( { } -void CassandraDictionarySource::maybeAllowFiltering(String & query) +void CassandraDictionarySource::maybeAllowFiltering(String & query) const { if (!settings.allow_filtering) return; @@ -137,10 +136,11 @@ BlockInputStreamPtr CassandraDictionarySource::loadAll() String query = query_builder.composeLoadAllQuery(); maybeAllowFiltering(query); LOG_INFO(log, "Loading all using query: {}", query); - return std::make_shared(cluster, query, sample_block, max_block_size); + return std::make_shared(getSession(), query, sample_block, max_block_size); } -std::string CassandraDictionarySource::toString() const { +std::string CassandraDictionarySource::toString() const +{ return "Cassandra: " + settings.db + '.' + settings.table; } @@ -149,7 +149,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadIds(const std::vector String query = query_builder.composeLoadIdsQuery(ids); maybeAllowFiltering(query); LOG_INFO(log, "Loading ids using query: {}", query); - return std::make_shared(cluster, query, sample_block, max_block_size); + return std::make_shared(getSession(), query, sample_block, max_block_size); } BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) @@ -162,7 +162,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu for (const auto & row : requested_rows) { SipHash partition_key; - for (const auto i : ext::range(0, settings.partition_key_prefix)) + for (size_t i = 0; i < settings.partition_key_prefix; ++i) key_columns[i]->updateHashWithValue(row, partition_key); partitions[partition_key.get64()].push_back(row); } @@ -173,7 +173,7 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu String query = query_builder.composeLoadKeysQuery(key_columns, partition.second, ExternalQueryBuilder::CASSANDRA_SEPARATE_PARTITION_KEY, settings.partition_key_prefix); maybeAllowFiltering(query); LOG_INFO(log, "Loading keys for partition hash {} using query: {}", partition.first, query); - streams.push_back(std::make_shared(cluster, query, sample_block, max_block_size)); + streams.push_back(std::make_shared(getSession(), query, sample_block, max_block_size)); } if (streams.size() == 1) @@ -182,6 +182,30 @@ BlockInputStreamPtr CassandraDictionarySource::loadKeys(const Columns & key_colu return std::make_shared(streams, nullptr, settings.max_threads); } +BlockInputStreamPtr CassandraDictionarySource::loadUpdatedAll() +{ + throw Exception("Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED); +} + +CassSessionShared CassandraDictionarySource::getSession() +{ + /// Reuse connection if exists, create new one if not + auto session = maybe_session.lock(); + if (session) + return session; + + std::lock_guard lock(connect_mutex); + session = maybe_session.lock(); + if (session) + return session; + + session = std::make_shared(); + CassFuturePtr future = cass_session_connect(*session, cluster); + cassandraWaitAndCheck(future); + maybe_session = session; + return session; +} + } #endif diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index dff93fcd029..18db66b94c2 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -34,7 +34,8 @@ struct CassandraSettings void setConsistency(const String & config_str); }; -class CassandraDictionarySource final : public IDictionarySource { +class CassandraDictionarySource final : public IDictionarySource +{ public: CassandraDictionarySource( const DictionaryStructure & dict_struct, @@ -64,15 +65,13 @@ public: BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; - BlockInputStreamPtr loadUpdatedAll() override - { - throw Exception{"Method loadUpdatedAll is unsupported for CassandraDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; - } + BlockInputStreamPtr loadUpdatedAll() override; String toString() const override; private: - void maybeAllowFiltering(String & query); + void maybeAllowFiltering(String & query) const; + CassSessionShared getSession(); Poco::Logger * log; const DictionaryStructure dict_struct; @@ -80,7 +79,9 @@ private: Block sample_block; ExternalQueryBuilder query_builder; + std::mutex connect_mutex; CassClusterPtr cluster; + CassSessionWeak maybe_session; }; } diff --git a/src/Dictionaries/CassandraHelpers.cpp b/src/Dictionaries/CassandraHelpers.cpp index 4f92a75a1f3..6de80a455c7 100644 --- a/src/Dictionaries/CassandraHelpers.cpp +++ b/src/Dictionaries/CassandraHelpers.cpp @@ -21,7 +21,7 @@ void cassandraCheck(CassError code) } -void cassandraWaitAndCheck(CassFuturePtr && future) +void cassandraWaitAndCheck(CassFuturePtr & future) { auto code = cass_future_error_code(future); /// Waits if not ready if (code == CASS_OK) diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 2a91815e37d..70b38acf15c 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -7,6 +7,7 @@ #if USE_CASSANDRA #include #include +#include namespace DB { @@ -37,6 +38,7 @@ public: Dtor(ptr); ptr = rhs.ptr; rhs.ptr = nullptr; + return *this; } ~ObjectHolder() @@ -54,8 +56,12 @@ public: /// These object are created on pointer construction using CassClusterPtr = Cassandra::ObjectHolder; -using CassSessionPtr = Cassandra::ObjectHolder; using CassStatementPtr = Cassandra::ObjectHolder; +using CassSessionPtr = Cassandra::ObjectHolder; + +/// Share connections between streams. Executing statements in one session object is thread-safe +using CassSessionShared = std::shared_ptr; +using CassSessionWeak = std::weak_ptr; /// The following objects are created inside Cassandra driver library, /// but must be freed by user code @@ -65,7 +71,7 @@ using CassIteratorPtr = Cassandra::ObjectHoldertest {table} 1 + "Int64_" < 1000000000000000000 '''.format( host=self.docker_hostname, diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 98ba191c948..8cd6940d587 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -3,12 +3,10 @@ import os from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout - from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed -from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra from external_sources import SourceMongo, SourceMongoURI, SourceHTTP, SourceHTTPS, SourceRedis, SourceCassandra import math -import time + SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') @@ -212,7 +210,6 @@ def get_dictionaries(fold, total_folds, all_dicts): return all_dicts[fold * chunk_len : (fold + 1) * chunk_len] -#@pytest.mark.timeout(3000) @pytest.mark.parametrize("fold", list(range(10))) def test_simple_dictionaries(started_cluster, fold): fields = FIELDS["simple"] From 64c013610a852211148e5a9ef1694a7fa13c7a28 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 01:03:49 +0300 Subject: [PATCH 17/38] fix --- src/Dictionaries/CassandraBlockInputStream.cpp | 2 -- src/Dictionaries/CassandraBlockInputStream.h | 4 ++++ .../test_dictionaries_all_layouts_and_sources/test.py | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 8fb9eb3f93c..8a14add868e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include "CassandraBlockInputStream.h" @@ -20,7 +19,6 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int CASSANDRA_INTERNAL_ERROR; } CassandraBlockInputStream::CassandraBlockInputStream( diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 5208e516a0e..667d686fd31 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -1,6 +1,8 @@ #pragma once #include + +#if USE_CASSANDRA #include #include #include @@ -41,3 +43,5 @@ private: }; } + +#endif diff --git a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 8cd6940d587..0a812ea2a8b 100644 --- a/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -225,8 +225,6 @@ def test_simple_dictionaries(started_cluster, fold): node.query("system reload dictionaries") - #time.sleep(3000) - queries_with_answers = [] for dct in simple_dicts: for row in data: From f54f9481621b8d9deb8f36f3333220c3be725347 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 15:17:35 +0300 Subject: [PATCH 18/38] Added DelayedSource. --- .../ClusterProxy/SelectStreamFactory.cpp | 53 +++----- src/Processors/Sources/DelayedSource.cpp | 113 ++++++++++++++++++ src/Processors/Sources/DelayedSource.h | 26 +++- src/Processors/Sources/RemoteSource.cpp | 59 ++++++--- src/Processors/Sources/RemoteSource.h | 14 ++- 5 files changed, 205 insertions(+), 60 deletions(-) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 5d41b0e87ce..bfa6fae0977 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -1,8 +1,6 @@ #include #include #include -#include -#include #include #include #include @@ -13,9 +11,8 @@ #include #include #include -#include -#include -#include +#include +#include namespace ProfileEvents { @@ -118,13 +115,13 @@ void SelectStreamFactory::createForShard( const SelectQueryInfo &, Pipes & res) { - bool force_add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - bool add_totals_port = false; - bool add_extremes_port = false; + bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + bool add_totals = false; + bool add_extremes = false; if (processed_stage == QueryProcessingStage::Complete) { - add_totals_port = query_ast->as().group_by_with_totals; - add_extremes_port = context.getSettingsRef().extremes; + add_totals = query_ast->as().group_by_with_totals; + add_extremes = context.getSettingsRef().extremes; } auto modified_query_ast = query_ast->clone(); @@ -140,20 +137,13 @@ void SelectStreamFactory::createForShard( auto emplace_remote_stream = [&]() { - auto stream = std::make_shared( + auto remote_query_executor = std::make_shared( shard_info.pool, modified_query, header, context, nullptr, throttler, scalars, external_tables, processed_stage); - stream->setPoolMode(PoolMode::GET_MANY); + remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - stream->setMainTable(main_table); + remote_query_executor->setMainTable(main_table); - auto source = std::make_shared(std::move(stream), force_add_agg_info); - - if (add_totals_port) - source->addTotalsPort(); - if (add_extremes_port) - source->addExtremesPort(); - - res.emplace_back(std::move(source)); + res.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes)); }; const auto & settings = context.getSettingsRef(); @@ -246,8 +236,8 @@ void SelectStreamFactory::createForShard( auto lazily_create_stream = [ pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast, context, throttler, main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, - stage = processed_stage, local_delay]() - -> BlockInputStreamPtr + stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes]() + -> Pipe { auto current_settings = context.getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( @@ -277,8 +267,7 @@ void SelectStreamFactory::createForShard( } if (try_results.empty() || local_delay < max_remote_delay) - return std::make_shared( - createLocalStream(modified_query_ast, header, context, stage)); + return createLocalStream(modified_query_ast, header, context, stage).getPipe(); else { std::vector connections; @@ -286,20 +275,14 @@ void SelectStreamFactory::createForShard( for (auto & try_result : try_results) connections.emplace_back(std::move(try_result.entry)); - return std::make_shared( + auto remote_query_executor = std::make_shared( std::move(connections), modified_query, header, context, nullptr, throttler, scalars, external_tables, stage); + + return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes); } }; - auto lazy_stream = std::make_shared("LazyShardWithLocalReplica", header, lazily_create_stream); - auto source = std::make_shared(std::move(lazy_stream), force_add_agg_info); - - if (add_totals_port) - source->addTotalsPort(); - if (add_extremes_port) - source->addExtremesPort(); - - res.emplace_back(std::move(source)); + res.emplace_back(createDelayedPipe(header, lazily_create_stream)); } else emplace_remote_stream(); diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index e5931c75489..9f7f26ab141 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -1,6 +1,119 @@ #include +#include "NullSource.h" namespace DB { +DelayedSource::DelayedSource(const Block & header, Creator processors_creator) + : IProcessor({}, OutputPorts(3, header)) + , creator(std::move(processors_creator)) +{ +} + +IProcessor::Status DelayedSource::prepare() +{ + /// At first, wait for main input is needed and expand pipeline. + if (inputs.empty()) + { + auto & first_output = outputs.front(); + + /// If main port was finished before callback was called, stop execution. + if (first_output.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + if (!first_output.isNeeded()) + return Status::PortFull; + + /// Call creator callback to get processors. + if (processors.empty()) + return Status::Ready; + + return Status::ExpandPipeline; + } + + /// Process ports in order: main, totals, extremes + auto output = outputs.begin(); + for (auto & input : inputs) + { + if (output->isFinished()) + { + input.close(); + continue; + } + + if (!output->isNeeded()) + return Status::PortFull; + + if (input.isFinished()) + { + output->finish(); + continue; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::PortFull; + + output->pushData(input.pullData(true)); + return Status::PortFull; + } + + return Status::Finished; +} + +void DelayedSource::work() +{ + auto pipe = creator(); + + main_output = &pipe.getPort(); + totals_output = pipe.getTotalsPort(); + extremes_output = pipe.getExtremesPort(); + + processors = std::move(pipe).detachProcessors(); + + if (!totals_output) + { + processors.emplace_back(std::make_shared(main_output->getHeader())); + totals_output = &processors.back()->getOutputs().back(); + } + + if (!extremes_output) + { + processors.emplace_back(std::make_shared(main_output->getHeader())); + extremes_output = &processors.back()->getOutputs().back(); + } +} + +Processors DelayedSource::expandPipeline() +{ + /// Add new inputs. They must have the same header as output. + for (const auto & output : {main_output, totals_output, extremes_output}) + { + inputs.emplace_back(outputs.front().getHeader(), this); + /// Connect checks that header is same for ports. + connect(*output, inputs.back()); + inputs.back().setNeeded(); + } + + /// Executor will check that all processors are connected. + return std::move(processors); +} + +Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator) +{ + auto source = std::make_shared(header, std::move(processors_creator)); + + Pipe pipe(&source->getPort(DelayedSource::Main)); + pipe.setTotalsPort(&source->getPort(DelayedSource::Totals)); + pipe.setExtremesPort(&source->getPort(DelayedSource::Extremes)); + + pipe.addProcessors({std::move(source)}); + return pipe; +} + } diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index 28cad6bc816..31ec1e054fe 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -1,23 +1,45 @@ #pragma once #include +#include namespace DB { +/// DelayedSource delays pipeline calculation until it starts execution. +/// It accepts callback which creates a new pipe. +/// +/// First time when DelayedSource's main output port needs data, callback is called. +/// Then, DelayedSource expands pipeline: adds new inputs and connects pipe with it. +/// Then, DelayedSource just move data from inputs to outputs until finished. +/// +/// It main output port of DelayedSource is never needed, callback won't be called. class DelayedSource : public IProcessor { public: - using Creator = std::function; + using Creator = std::function; - DelayedSource(Block header, Creator processors_creator); + DelayedSource(const Block & header, Creator processors_creator); String getName() const override { return "Delayed"; } Status prepare() override; void work() override; + Processors expandPipeline() override; + + enum PortKind { Main = 0, Totals = 1, Extremes = 2 }; + OutputPort & getPort(PortKind kind) { return *std::next(outputs.begin(), kind); } private: Creator creator; + Processors processors; + + /// Outputs from returned pipe. + OutputPort * main_output = nullptr; + OutputPort * totals_output = nullptr; + OutputPort * extremes_output = nullptr; }; +/// Creates pipe from DelayedSource. +Pipe createDelayedPipe(const Block & header, DelayedSource::Creator processors_creator); + } diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 090f3743709..2f76e0c87d4 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -66,42 +66,67 @@ void RemoteSource::onCancel() } -RemoteTotalsSource::RemoteTotalsSource(Block header) : ISource(std::move(header)) {} +RemoteTotalsSource::RemoteTotalsSource(RemoteQueryExecutorPtr executor) + : ISource(executor->getHeader()) + , query_executor(std::move(executor)) +{ +} + RemoteTotalsSource::~RemoteTotalsSource() = default; Chunk RemoteTotalsSource::generate() { - /// Check use_count instead of comparing with nullptr just in case. - /// setQueryExecutor() may be called from other thread, but there shouldn't be any race, - /// because totals end extremes are always read after main data. - if (query_executor.use_count()) + if (auto block = query_executor->getTotals()) { - if (auto block = query_executor->getTotals()) - { - UInt64 num_rows = block.rows(); - return Chunk(block.getColumns(), num_rows); - } + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); } return {}; } -RemoteExtremesSource::RemoteExtremesSource(Block header) : ISource(std::move(header)) {} +RemoteExtremesSource::RemoteExtremesSource(RemoteQueryExecutorPtr executor) + : ISource(executor->getHeader()) + , query_executor(std::move(executor)) +{ +} + RemoteExtremesSource::~RemoteExtremesSource() = default; Chunk RemoteExtremesSource::generate() { - if (query_executor.use_count()) + if (auto block = query_executor->getExtremes()) { - if (auto block = query_executor->getExtremes()) - { - UInt64 num_rows = block.rows(); - return Chunk(block.getColumns(), num_rows); - } + UInt64 num_rows = block.rows(); + return Chunk(block.getColumns(), num_rows); } return {}; } + +Pipe createRemoteSourcePipe( + RemoteQueryExecutorPtr query_executor, + bool add_aggregation_info, bool add_totals, bool add_extremes) +{ + Pipe pipe(std::make_shared(query_executor, add_aggregation_info)); + + if (add_totals) + { + auto totals_source = std::make_shared(query_executor); + pipe.setTotalsPort(&totals_source->getPort()); + pipe.addProcessors({std::move(totals_source)}); + } + + if (add_extremes) + { + auto extremes_source = std::make_shared(query_executor); + pipe.setExtremesPort(&extremes_source->getPort()); + pipe.addProcessors({std::move(extremes_source)}); + } + + return pipe; +} + } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 9cc3ea9c459..85ac1d756c8 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -45,13 +46,11 @@ private: class RemoteTotalsSource : public ISource { public: - explicit RemoteTotalsSource(Block header); + explicit RemoteTotalsSource(RemoteQueryExecutorPtr executor); ~RemoteTotalsSource(); String getName() const override { return "RemoteTotals"; } - void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } - protected: Chunk generate() override; @@ -63,13 +62,11 @@ private: class RemoteExtremesSource : public ISource { public: - explicit RemoteExtremesSource(Block header); + explicit RemoteExtremesSource(RemoteQueryExecutorPtr executor); ~RemoteExtremesSource(); String getName() const override { return "RemoteExtremes"; } - void setQueryExecutor(RemoteQueryExecutorPtr executor) { query_executor.swap(executor); } - protected: Chunk generate() override; @@ -77,4 +74,9 @@ private: RemoteQueryExecutorPtr query_executor; }; +/// Create pipe with remote sources. +Pipe createRemoteSourcePipe( + RemoteQueryExecutorPtr query_executor, + bool add_aggregation_info, bool add_totals, bool add_extremes); + } From 4dae169216c7721739bf843780d777af4c8bae16 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 16:03:06 +0300 Subject: [PATCH 19/38] fix gcc warnings --- src/Dictionaries/CassandraBlockInputStream.cpp | 4 ++-- src/Dictionaries/CassandraBlockInputStream.h | 2 +- src/Dictionaries/CassandraDictionarySource.h | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 8a14add868e..4f6a62a0eea 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -35,7 +35,7 @@ CassandraBlockInputStream::CassandraBlockInputStream( cassandraCheck(cass_statement_set_paging_size(statement, max_block_size)); } -void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const +void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, const CassValue * cass_value) { switch (type) { @@ -202,7 +202,7 @@ void CassandraBlockInputStream::assertTypes(const CassResultPtr & result) size_t column_count = cass_result_column_count(result); for (size_t i = 0; i < column_count; ++i) { - CassValueType expected; + CassValueType expected = CASS_VALUE_TYPE_UNKNOWN; String expected_text; /// Cassandra does not support unsigned integers (cass_uint32_t is for Date) diff --git a/src/Dictionaries/CassandraBlockInputStream.h b/src/Dictionaries/CassandraBlockInputStream.h index 667d686fd31..3b0e583e3ad 100644 --- a/src/Dictionaries/CassandraBlockInputStream.h +++ b/src/Dictionaries/CassandraBlockInputStream.h @@ -30,7 +30,7 @@ private: using ValueType = ExternalResultDescription::ValueType; Block readImpl() override; - void insertValue(IColumn & column, ValueType type, const CassValue * cass_value) const; + static void insertValue(IColumn & column, ValueType type, const CassValue * cass_value); void assertTypes(const CassResultPtr & result); CassSessionShared session; diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index 18db66b94c2..c0a4e774d23 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -9,6 +9,7 @@ #include "ExternalQueryBuilder.h" #include #include +#include namespace DB { From dcf9b9ef39f6106c69feb69d76325e4e9168eb6b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:08:09 +0300 Subject: [PATCH 20/38] Fix build. --- src/Processors/Sources/RemoteSource.cpp | 2 ++ src/Processors/Sources/RemoteSource.h | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 2f76e0c87d4..29946e7322c 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -79,6 +79,7 @@ Chunk RemoteTotalsSource::generate() if (auto block = query_executor->getTotals()) { UInt64 num_rows = block.rows(); +std::cerr << "Got toals " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } @@ -99,6 +100,7 @@ Chunk RemoteExtremesSource::generate() if (auto block = query_executor->getExtremes()) { UInt64 num_rows = block.rows(); +std::cerr << "Got extrees " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 85ac1d756c8..0b4405a0905 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -18,7 +18,7 @@ public: /// AggregatedChunkInfo stores the bucket number used for two-level aggregation. /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState. RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_); - ~RemoteSource(); + ~RemoteSource() override; String getName() const override { return "Remote"; } @@ -47,7 +47,7 @@ class RemoteTotalsSource : public ISource { public: explicit RemoteTotalsSource(RemoteQueryExecutorPtr executor); - ~RemoteTotalsSource(); + ~RemoteTotalsSource() override; String getName() const override { return "RemoteTotals"; } @@ -63,7 +63,7 @@ class RemoteExtremesSource : public ISource { public: explicit RemoteExtremesSource(RemoteQueryExecutorPtr executor); - ~RemoteExtremesSource(); + ~RemoteExtremesSource() override; String getName() const override { return "RemoteExtremes"; } From b419d73880776f831bdf0dcb3ce8cfa3d4ab9642 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:16:58 +0300 Subject: [PATCH 21/38] Fix build. --- src/DataStreams/RemoteQueryExecutor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h index ce6c46d5a2a..0db0e0218be 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/DataStreams/RemoteQueryExecutor.h @@ -61,8 +61,8 @@ public: void cancel(); /// Get totals and extremes if any. - Block getTotals() const { return std::move(totals); } - Block getExtremes() const { return std::move(extremes); } + Block getTotals() { return std::move(totals); } + Block getExtremes() { return std::move(extremes); } /// Set callback for progress. It will be called on Progress packet. void setProgressCallback(ProgressCallback callback) { progress_callback = std::move(callback); } From 563fe4ea359295f9aef30abb234ecc150483f3fe Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:45:23 +0300 Subject: [PATCH 22/38] Fix DelayedSource. --- src/Processors/Sources/DelayedSource.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 9f7f26ab141..267eb78f77b 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -38,25 +38,25 @@ IProcessor::Status DelayedSource::prepare() /// Process ports in order: main, totals, extremes auto output = outputs.begin(); - for (auto & input : inputs) + for (auto input = inputs.begin(); input != inputs.end(); ++input, ++output) { if (output->isFinished()) { - input.close(); + input->close(); continue; } if (!output->isNeeded()) return Status::PortFull; - if (input.isFinished()) + if (input->isFinished()) { output->finish(); continue; } - input.setNeeded(); - if (!input.hasData()) + input->setNeeded(); + if (!input->hasData()) return Status::PortFull; output->pushData(input.pullData(true)); From 18516ba09f9f51156fb6cc8400e91ec426ab278b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 16:46:27 +0300 Subject: [PATCH 23/38] Fix DelayedSource. --- src/Processors/Sources/DelayedSource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 267eb78f77b..42a33d00196 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -59,7 +59,7 @@ IProcessor::Status DelayedSource::prepare() if (!input->hasData()) return Status::PortFull; - output->pushData(input.pullData(true)); + output->pushData(input->pullData(true)); return Status::PortFull; } From 1c982d00e38cc04d3cf83a612fb7c9bae7a662b3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 4 Jun 2020 16:59:12 +0300 Subject: [PATCH 24/38] try fix sync --- src/Dictionaries/CassandraHelpers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 70b38acf15c..8a00e372c96 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -5,7 +5,7 @@ #endif #if USE_CASSANDRA -#include +#include // Y_IGNORE #include #include From 31ad5d7e5d224ba1df8e33f6a14a93e1100e70b8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 23:42:03 +0300 Subject: [PATCH 25/38] Remove debug output. --- src/Processors/Sources/RemoteSource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 29946e7322c..c6fc45d2296 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -100,7 +100,6 @@ Chunk RemoteExtremesSource::generate() if (auto block = query_executor->getExtremes()) { UInt64 num_rows = block.rows(); -std::cerr << "Got extrees " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } From d87b4746656bec285ac098e40457b690cdf5851e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 4 Jun 2020 23:42:59 +0300 Subject: [PATCH 26/38] Remove debug output. --- src/Processors/Sources/RemoteSource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index c6fc45d2296..2f76e0c87d4 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -79,7 +79,6 @@ Chunk RemoteTotalsSource::generate() if (auto block = query_executor->getTotals()) { UInt64 num_rows = block.rows(); -std::cerr << "Got toals " << num_rows << " rows " << std::endl; return Chunk(block.getColumns(), num_rows); } From 75c97ca82aa6de897d41e14267425b67bb0fed1c Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Jun 2020 00:19:23 +0200 Subject: [PATCH 27/38] Adding support for PREWHERE in live view tables. --- src/Storages/LiveView/StorageBlocks.h | 5 +++++ src/Storages/LiveView/StorageLiveView.h | 1 + .../00973_live_view_select_prewhere.reference | 4 ++++ .../00973_live_view_select_prewhere.sql | 20 +++++++++++++++++++ 4 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/00973_live_view_select_prewhere.reference create mode 100644 tests/queries/0_stateless/00973_live_view_select_prewhere.sql diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index a21a9374137..2a9d7766fd7 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -26,6 +26,11 @@ public: return std::make_shared(table_id, columns, std::move(pipes), to_stage); } std::string getName() const override { return "Blocks"; } + /// It is passed inside the query and solved at its level. + bool supportsPrewhere() const override { return true; } + bool supportsSampling() const override { return true; } + bool supportsFinal() const override { return true; } + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override { return to_stage; } Pipes read( diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fe62de224da..85e3d0cee62 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -65,6 +65,7 @@ public: ASTPtr getInnerBlocksQuery(); /// It is passed inside the query and solved at its level. + bool supportsPrewhere() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference new file mode 100644 index 00000000000..a2a88e78c97 --- /dev/null +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference @@ -0,0 +1,4 @@ +5 1 +5 1 +10 2 +10 2 diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql new file mode 100644 index 00000000000..e0e2d342f9e --- /dev/null +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql @@ -0,0 +1,20 @@ +SET allow_experimental_live_view = 1; + +DROP TABLE IF EXISTS lv; +DROP TABLE IF EXISTS mt; + +CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); +CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt PREWHERE a > 1; + +INSERT INTO mt VALUES (1),(2),(3); + +SELECT *,_version FROM lv; +SELECT *,_version FROM lv; + +INSERT INTO mt VALUES (1),(2),(3); + +SELECT *,_version FROM lv; +SELECT *,_version FROM lv; + +DROP TABLE lv; +DROP TABLE mt; From 5fe67c429264614e1a199aeba162421e05fb345d Mon Sep 17 00:00:00 2001 From: Vitaliy Zakaznikov Date: Mon, 8 Jun 2020 13:12:40 +0200 Subject: [PATCH 28/38] * Removing supportsPrewhere() from StorageLiveView.h as it is not valid. * Updating test to check using PREWHERE in query against live view table. * Updating test to check using PREWHERE in the stored query against the table that does not support PREWHWERE. --- src/Storages/LiveView/StorageLiveView.h | 1 - .../00973_live_view_select_prewhere.reference | 2 -- .../0_stateless/00973_live_view_select_prewhere.sql | 12 +++++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 85e3d0cee62..fe62de224da 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -65,7 +65,6 @@ public: ASTPtr getInnerBlocksQuery(); /// It is passed inside the query and solved at its level. - bool supportsPrewhere() const override { return true; } bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference index a2a88e78c97..3a6fe59ae6d 100644 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.reference +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.reference @@ -1,4 +1,2 @@ 5 1 -5 1 -10 2 10 2 diff --git a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql index e0e2d342f9e..df3b7cb505a 100644 --- a/tests/queries/0_stateless/00973_live_view_select_prewhere.sql +++ b/tests/queries/0_stateless/00973_live_view_select_prewhere.sql @@ -1,20 +1,26 @@ SET allow_experimental_live_view = 1; DROP TABLE IF EXISTS lv; +DROP TABLE IF EXISTS lv2; DROP TABLE IF EXISTS mt; CREATE TABLE mt (a Int32) Engine=MergeTree order by tuple(); -CREATE LIVE VIEW lv AS SELECT sum(a) FROM mt PREWHERE a > 1; +CREATE LIVE VIEW lv AS SELECT sum(a) AS sum_a FROM mt PREWHERE a > 1; +CREATE LIVE VIEW lv2 AS SELECT sum(number) AS sum_number FROM system.numbers PREWHERE number > 1; INSERT INTO mt VALUES (1),(2),(3); SELECT *,_version FROM lv; -SELECT *,_version FROM lv; +SELECT *,_version FROM lv PREWHERE sum_a > 5; -- { serverError 182 } INSERT INTO mt VALUES (1),(2),(3); SELECT *,_version FROM lv; -SELECT *,_version FROM lv; +SELECT *,_version FROM lv PREWHERE sum_a > 10; -- { serverError 182 } + +SELECT *,_version FROM lv2; -- { serverError 182 } +SELECT *,_version FROM lv2 PREWHERE sum_number > 10; -- { serverError 182 } DROP TABLE lv; +DROP TABLE lv2; DROP TABLE mt; From 6ee6b751be55154edd8c3132c666876933e9a75e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 9 Jun 2020 03:38:47 +0300 Subject: [PATCH 29/38] Added failing test #11539 --- .../0_stateless/01304_direct_io.reference | 1 + tests/queries/0_stateless/01304_direct_io.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/01304_direct_io.reference create mode 100755 tests/queries/0_stateless/01304_direct_io.sh diff --git a/tests/queries/0_stateless/01304_direct_io.reference b/tests/queries/0_stateless/01304_direct_io.reference new file mode 100644 index 00000000000..ec7a223ddc2 --- /dev/null +++ b/tests/queries/0_stateless/01304_direct_io.reference @@ -0,0 +1 @@ +Loaded 1 queries. diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh new file mode 100755 index 00000000000..0b6af15aa3b --- /dev/null +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery --query " + DROP TABLE IF EXISTS bug; + CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date; + INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); + OPTIMIZE TABLE bug FINAL;" + +$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" >/dev/null 2>$CLICKHOUSE_TMP/err +cat $CLICKHOUSE_TMP/err + +$CLICKHOUSE_CLIENT --multiquery --query " + DROP TABLE bug;" From 1a6c1d179655e2cc970cc99313a1bf92e5161098 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 11:14:02 +0300 Subject: [PATCH 30/38] Fix 01293_system_distribution_queue test flackiness --- .../queries/0_stateless/01293_system_distribution_queue.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index c0ff6a21e8e..4c9c690af09 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -10,6 +10,11 @@ select * from system.distribution_queue; select 'INSERT'; system stop distributed sends dist_01293; insert into dist_01293 select * from numbers(10); +-- metrics updated only after distributed_directory_monitor_sleep_time_ms +set distributed_directory_monitor_sleep_time_ms=10; +-- 1 second should guarantee metrics update +-- XXX: but this is kind of quirk, way more better will be account this metrics without any delays. +select sleep(1) format Null; select is_blocked, error_count, data_files, data_compressed_bytes>100 from system.distribution_queue; system flush distributed dist_01293; From ba4d96438863ada5e9d9f3c0f1596e16822dcf51 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Jun 2020 11:17:38 +0300 Subject: [PATCH 31/38] Add comments for 01281_group_by_limit_memory_tracking test --- .../0_stateless/01281_group_by_limit_memory_tracking.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index 5922b8d74d2..2115530a450 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -20,6 +20,14 @@ function execute_null() function execute_group_by() { + # Peak memory usage for the main query (with GROUP BY) is ~100MiB (with + # max_threads=2 as here). + # So set max_memory_usage_for_user to 150MiB and if the memory tracking + # accounting will be incorrect then the second query will fail + # + # Note that we also need one running query for the user (sleep(3)), since + # max_memory_usage_for_user is installed to 0 once there are no more + # queries for user. local opts=( --max_memory_usage_for_user=$((150<<20)) --max_threads=2 From 32772073ea8655298394fef45f2b5d16679c1f88 Mon Sep 17 00:00:00 2001 From: Ivan Starkov Date: Tue, 9 Jun 2020 13:52:21 +0300 Subject: [PATCH 32/38] Fix docker-entrypoint-initdb.d wait Fixes 'Address family not supported by protocol' for any docker-entrypoint-initdb.d script. wget uses 'localhost' which resolves for both ipv4 and ipv6 with current config (/etc/hosts) and so the wget fails _(Address family not supported by protocol)_ and does not retry. Forcing it to use IPv4 fixes the issue --- docker/server/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 2af8a377b92..6111b0057ed 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # check if clickhouse is ready to accept connections # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) - if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + if ! wget --spider --quiet -4 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then echo >&2 'ClickHouse init process failed.' exit 1 fi From bdcf1170ced2f334d9a273815b9ac8064f4046ba Mon Sep 17 00:00:00 2001 From: Ivan Starkov Date: Tue, 9 Jun 2020 16:32:49 +0300 Subject: [PATCH 33/38] Instead of forcing IPv4 protocol force wget to use protocols in order --- docker/server/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 6111b0057ed..059f3cb631b 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then # check if clickhouse is ready to accept connections # will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay) - if ! wget --spider --quiet -4 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then + if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then echo >&2 'ClickHouse init process failed.' exit 1 fi From e466c6b38724effdb2c93bbb052cfa1f2b2f28fb Mon Sep 17 00:00:00 2001 From: Vxider Date: Tue, 9 Jun 2020 22:38:04 +0800 Subject: [PATCH 34/38] Fix spelling and improve translation --- docs/zh/operations/configuration-files.md | 28 +++++++++++++---------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/docs/zh/operations/configuration-files.md b/docs/zh/operations/configuration-files.md index 16a903b6528..aa0da86e8d0 100644 --- a/docs/zh/operations/configuration-files.md +++ b/docs/zh/operations/configuration-files.md @@ -1,27 +1,31 @@ # 配置文件 {#configuration_files} -主服务器配置文件是 `config.xml`. 它驻留在 `/etc/clickhouse-server/` 目录。 +ClickHouse支持多配置文件管理。主配置文件是`/etc/clickhouse-server/config.xml`。其余文件须在目录`/etc/clickhouse-server/config.d`。 -单个设置可以在复盖 `*.xml` 和 `*.conf` 在文件 `conf.d` 和 `config.d` 配置文件旁边的目录。 +!!! 注意: + 所有配置文件必须是XML格式。此外,配置文件须有相同的跟元素,通常是``。 -该 `replace` 或 `remove` 可以为这些配置文件的元素指定属性。 +主配置文件中的一些配置可以通过`replace`或`remove`属性被配置文件覆盖。 -如果两者都未指定,则递归组合元素的内容,替换重复子项的值。 +如果两者都未指定,则递归组合配置的内容,替换重复子项的值。 -如果 `replace` 如果指定,则将整个元素替换为指定的元素。 +如果指定`replace`属性,则将整个元素替换为指定的元素。 -如果 `remove` 如果指定,则删除该元素。 +如果指定`remove`属性,则删除该元素。 -The config can also define «substitutions». If an element has the `incl` 属性时,从文件中的相应替换将被用作该值。 默认情况下,具有替换的文件的路径为 `/etc/metrika.xml`. 这可以在改变 [包括\_从](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) 服务器配置中的元素。 替换值在指定 `/yandex/substitution_name` 这个文件中的元素。 如果在指定的替换 `incl` 不存在,则将其记录在日志中。 要防止ClickHouse记录丢失的替换,请指定 `optional="true"` 属性(例如,设置 [宏](#macros) server\_settings/settings.md))。 +此外,配置文件还可指定"substitutions"。如果一个元素有`incl`属性,则文件中的相应替换值将被使用。默认情况下,具有替换的文件的路径为`/etc/metrika.xml`。这可以在服务配置中的[include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from)元素中被修改。替换值在这个文件的`/yandex/substitution_name`元素中被指定。如果`incl`中指定的替换值不存在,则将其记录在日志中。为防止ClickHouse记录丢失的替换,请指定`optional="true"`属性(例如,[宏](server-configuration-parameters/settings.md)设置)。 -替换也可以从ZooKeeper执行。 为此,请指定属性 `from_zk = "/path/to/node"`. 元素值被替换为节点的内容 `/path/to/node` 在动物园管理员。 您还可以将整个XML子树放在ZooKeeper节点上,并将其完全插入到源元素中。 +替换也可以从ZooKeeper执行。为此,请指定属性`from_zk = "/path/to/node"`。元素值被替换为ZooKeeper节点`/path/to/node`的内容。您还可以将整个XML子树放在ZooKeeper节点上,并将其完全插入到源元素中。 -该 `config.xml` 文件可以指定具有用户设置、配置文件和配额的单独配置。 这个配置的相对路径在 ‘users\_config’ 元素。 默认情况下,它是 `users.xml`. 如果 `users_config` 被省略,用户设置,配置文件和配额直接在指定 `config.xml`. +`config.xml` 文件可以指定单独的配置文件用于配置用户设置、配置文件及配额。可在`users_config`元素中指定其配置文件相对路径。其默认值是`users.xml`。如果`users_config`被省略,用户设置,配置文件和配额则直接在`config.xml`中指定。 -此外, `users_config` 可以从文件中复盖 `users_config.d` 目录(例如, `users.d`)和替换。 例如,您可以为每个用户提供单独的配置文件,如下所示: +用户配置可以分为如`config.xml`和`config.d/`等形式的单独配置文件。目录名称为配置`user_config`的值,去掉`.xml`后缀并与添加`.d`。由于`users_config`配置默认值为`users.xml`,所以目录名默认使用`users.d`。例如,您可以为每个用户有单独的配置文件,如下所示: + +``` bash +$ cat /etc/clickhouse-server/users.d/alice.xml +``` ``` xml -$ cat /etc/clickhouse-server/users.d/alice.xml @@ -36,7 +40,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ``` -对于每个配置文件,服务器还会生成 `file-preprocessed.xml` 启动时的文件。 这些文件包含所有已完成的替换和复盖,并且它们旨在提供信息。 如果zookeeper替换在配置文件中使用,但ZooKeeper在服务器启动时不可用,则服务器将从预处理的文件中加载配置。 +对于每个配置文件,服务器还会在启动时生成 `file-preprocessed.xml` 文件。这些文件包含所有已完成的替换和复盖,并且它们旨在提供信息。如果zookeeper替换在配置文件中使用,但ZooKeeper在服务器启动时不可用,则服务器将从预处理的文件中加载配置。 服务器跟踪配置文件中的更改,以及执行替换和复盖时使用的文件和ZooKeeper节点,并动态重新加载用户和集群的设置。 这意味着您可以在不重新启动服务器的情况下修改群集、用户及其设置。 From 981462db041e001573ffff35c0809aa0d8482ef2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:01:34 +0300 Subject: [PATCH 35/38] Remove dump flag from ReadBufferAIO --- src/IO/ReadBufferAIO.cpp | 11 ++++------- src/IO/ReadBufferAIO.h | 2 -- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/IO/ReadBufferAIO.cpp b/src/IO/ReadBufferAIO.cpp index ffe8183f005..4a52b1825f7 100644 --- a/src/IO/ReadBufferAIO.cpp +++ b/src/IO/ReadBufferAIO.cpp @@ -95,11 +95,8 @@ bool ReadBufferAIO::nextImpl() if (profile_callback) watch.emplace(clock_type); - if (!is_aio) - { + if (!is_pending_read) synchronousRead(); - is_aio = true; - } else receive(); @@ -215,7 +212,9 @@ void ReadBufferAIO::synchronousRead() void ReadBufferAIO::receive() { if (!waitForAIOCompletion()) - return; + { + throw Exception("Trying to receive data from AIO, but nothing was queued. It's a bug", ErrorCodes::LOGICAL_ERROR); + } finalize(); } @@ -224,8 +223,6 @@ void ReadBufferAIO::skip() if (!waitForAIOCompletion()) return; - is_aio = false; - /// @todo I presume this assignment is redundant since waitForAIOCompletion() performs a similar one // bytes_read = future_bytes_read.get(); if ((bytes_read < 0) || (static_cast(bytes_read) < region_left_padding)) diff --git a/src/IO/ReadBufferAIO.h b/src/IO/ReadBufferAIO.h index 77274c47073..5b2cf247a45 100644 --- a/src/IO/ReadBufferAIO.h +++ b/src/IO/ReadBufferAIO.h @@ -100,8 +100,6 @@ private: bool is_eof = false; /// At least one read request was sent. bool is_started = false; - /// Is the operation asynchronous? - bool is_aio = false; /// Did the asynchronous operation fail? bool aio_failed = false; From e3ae0734c9bbf234ed7cb8542e49f3b3778c8069 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:04:56 +0300 Subject: [PATCH 36/38] Better test --- tests/queries/0_stateless/01304_direct_io.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh index 0b6af15aa3b..32091acd5eb 100755 --- a/tests/queries/0_stateless/01304_direct_io.sh +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -9,8 +9,9 @@ $CLICKHOUSE_CLIENT --multiquery --query " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" -$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" >/dev/null 2>$CLICKHOUSE_TMP/err -cat $CLICKHOUSE_TMP/err +$CLICKHOUSE_BENCHMARK --database $CLICKHOUSE_DATABASE --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>$CLICKHOUSE_TMP/err +cat $CLICKHOUSE_TMP/err | grep Exception +cat $CLICKHOUSE_TMP/err | grep Loaded $CLICKHOUSE_CLIENT --multiquery --query " DROP TABLE bug;" From a6feb81af194376b8d6cdcdc46798418b425e368 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 9 Jun 2020 18:11:11 +0300 Subject: [PATCH 37/38] Fix under msan --- src/IO/ReadBufferAIO.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/IO/ReadBufferAIO.cpp b/src/IO/ReadBufferAIO.cpp index 4a52b1825f7..8b01b67c0c0 100644 --- a/src/IO/ReadBufferAIO.cpp +++ b/src/IO/ReadBufferAIO.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -271,6 +272,9 @@ void ReadBufferAIO::prepare() region_aligned_size = region_aligned_end - region_aligned_begin; buffer_begin = fill_buffer.internalBuffer().begin(); + + /// Unpoison because msan doesn't instrument linux AIO + __msan_unpoison(buffer_begin, fill_buffer.internalBuffer().size()); } void ReadBufferAIO::finalize() From e0de3aa7c98659f546761d579cedb74f38b41dd2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Jun 2020 19:39:26 +0300 Subject: [PATCH 38/38] Revert "Addition to #11184" --- docker/test/stateful/Dockerfile | 2 ++ docker/test/stateful_with_coverage/run.sh | 2 ++ docker/test/stateless/Dockerfile | 2 ++ docker/test/stateless_with_coverage/run.sh | 2 ++ docker/test/stress/Dockerfile | 1 + tests/config/log_queries.xml | 7 +++++++ tests/config/metric_log.xml | 8 ++++++++ 7 files changed, 24 insertions(+) create mode 100644 tests/config/log_queries.xml create mode 100644 tests/config/metric_log.xml diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index d751a2532bc..3aff49bf5a1 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -24,6 +24,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateful_with_coverage/run.sh b/docker/test/stateful_with_coverage/run.sh index 5530aadb4ca..b946f5b187d 100755 --- a/docker/test/stateful_with_coverage/run.sh +++ b/docker/test/stateful_with_coverage/run.sh @@ -59,7 +59,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 8fbaffe88bc..41a53f8a3f5 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -62,7 +62,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stateless_with_coverage/run.sh b/docker/test/stateless_with_coverage/run.sh index 12ed7a25b75..185dc95c783 100755 --- a/docker/test/stateless_with_coverage/run.sh +++ b/docker/test/stateless_with_coverage/run.sh @@ -50,7 +50,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 66f5135a4a4..a5aa3bbf004 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -31,6 +31,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ dpkg -i package_folder/clickhouse-client_*.deb; \ dpkg -i package_folder/clickhouse-test_*.deb; \ + ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \ ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \ echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \ diff --git a/tests/config/log_queries.xml b/tests/config/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/tests/config/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/config/metric_log.xml b/tests/config/metric_log.xml new file mode 100644 index 00000000000..0ca9f162416 --- /dev/null +++ b/tests/config/metric_log.xml @@ -0,0 +1,8 @@ + + + system + metric_log
+ 7500 + 1000 +
+