From 51384fd6917dda0f624b14b3deb09b20400436b6 Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 16 Jan 2019 01:08:56 +0300 Subject: [PATCH 01/49] Add basic functionality for dict --- ci/jobs/quick-build/run.sh | 2 +- cmake/find_poco.cmake | 15 +- dbms/src/Common/config.h.in | 1 + dbms/src/Dictionaries/CMakeLists.txt | 4 + .../Dictionaries/RedisBlockInputStream.cpp | 252 ++++++++++++++++++ dbms/src/Dictionaries/RedisBlockInputStream.h | 44 +++ .../Dictionaries/RedisDictionarySource.cpp | 194 ++++++++++++++ dbms/src/Dictionaries/RedisDictionarySource.h | 84 ++++++ 8 files changed, 592 insertions(+), 4 deletions(-) create mode 100644 dbms/src/Dictionaries/RedisBlockInputStream.cpp create mode 100644 dbms/src/Dictionaries/RedisBlockInputStream.h create mode 100644 dbms/src/Dictionaries/RedisDictionarySource.cpp create mode 100644 dbms/src/Dictionaries/RedisDictionarySource.h diff --git a/ci/jobs/quick-build/run.sh b/ci/jobs/quick-build/run.sh index 6a948c560ee..eb31802e79b 100755 --- a/ci/jobs/quick-build/run.sh +++ b/ci/jobs/quick-build/run.sh @@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse BUILD_TYPE=Debug ENABLE_EMBEDDED_COMPILER=0 -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0" +CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_JEMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_REDIS=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0" [[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index 012f269d48d..4c9cb16e729 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -15,6 +15,9 @@ endif () if (NOT DEFINED ENABLE_POCO_MONGODB OR ENABLE_POCO_MONGODB) list (APPEND POCO_COMPONENTS MongoDB) endif () +if (NOT DEFINED ENABLE_POCO_REDIS OR ENABLE_POCO_REDIS) + list (APPEND POCO_COMPONENTS Redis) +endif () # TODO: after new poco release with SQL library rename ENABLE_POCO_ODBC -> ENABLE_POCO_SQLODBC if (NOT DEFINED ENABLE_POCO_ODBC OR ENABLE_POCO_ODBC) list (APPEND POCO_COMPONENTS DataODBC) @@ -32,7 +35,6 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) set (ENABLE_ZIP 0 CACHE BOOL "") set (ENABLE_PAGECOMPILER 0 CACHE BOOL "") set (ENABLE_PAGECOMPILER_FILE2PAGE 0 CACHE BOOL "") - set (ENABLE_REDIS 0 CACHE BOOL "") set (ENABLE_DATA_SQLITE 0 CACHE BOOL "") set (ENABLE_DATA_MYSQL 0 CACHE BOOL "") set (ENABLE_DATA_POSTGRESQL 0 CACHE BOOL "") @@ -40,7 +42,6 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) set (POCO_ENABLE_ZIP 0 CACHE BOOL "") set (POCO_ENABLE_PAGECOMPILER 0 CACHE BOOL "") set (POCO_ENABLE_PAGECOMPILER_FILE2PAGE 0 CACHE BOOL "") - set (POCO_ENABLE_REDIS 0 CACHE BOOL "") set (POCO_ENABLE_SQL_SQLITE 0 CACHE BOOL "") set (POCO_ENABLE_SQL_MYSQL 0 CACHE BOOL "") set (POCO_ENABLE_SQL_POSTGRESQL 0 CACHE BOOL "") @@ -63,6 +64,11 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) set (Poco_MongoDB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/poco/MongoDB/include/") endif () + if (NOT DEFINED ENABLE_POCO_REDIS OR ENABLE_POCO_REDIS) + set (Poco_Redis_LIBRARY PocoRedis) + set (Poco_Redis_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/poco/Redis/include/") + endif () + if (EXISTS "${ClickHouse_SOURCE_DIR}/contrib/poco/SQL/ODBC/include/") set (Poco_SQL_FOUND 1) set (Poco_SQL_LIBRARY PocoSQL) @@ -116,6 +122,9 @@ endif () if (Poco_MongoDB_LIBRARY) set (USE_POCO_MONGODB 1) endif () +if (Poco_Redis_LIBRARY) + set (USE_POCO_REDIS 1) +endif () if (Poco_DataODBC_LIBRARY AND ODBC_FOUND) set (USE_POCO_DATAODBC 1) endif () @@ -123,7 +132,7 @@ if (Poco_SQLODBC_LIBRARY AND ODBC_FOUND) set (USE_POCO_SQLODBC 1) endif () -message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") +message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") # How to make sutable poco: # use branch: diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 09c2eadde29..d3a61037119 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -12,6 +12,7 @@ #cmakedefine01 USE_POCO_SQLODBC #cmakedefine01 USE_POCO_DATAODBC #cmakedefine01 USE_POCO_MONGODB +#cmakedefine01 USE_POCO_REDIS #cmakedefine01 USE_POCO_NETSSL #cmakedefine01 USE_BASE64 #cmakedefine01 USE_HDFS diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index d7f85a5c7eb..2e8219f2170 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -36,4 +36,8 @@ if(USE_POCO_MONGODB) target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_MongoDB_LIBRARY}) endif() +if(USE_POCO_REDIS) + target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_Redis_LIBRARY}) +endif() + add_subdirectory(Embedded) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp new file mode 100644 index 00000000000..dfbb03a0034 --- /dev/null +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -0,0 +1,252 @@ +#include +#if USE_POCO_REDIS + +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include "DictionaryStructure.h" +# include "RedisBlockInputStream.h" + + +namespace DB +{ + namespace ErrorCodes + { + extern const int TYPE_MISMATCH; + } + + + RedisBlockInputStream::RedisBlockInputStream( + std::shared_ptr client_, + const DB::Block & sample_block, + const size_t max_block_size) + : client(client_), max_block_size{max_block_size} + { + description.init(sample_block); + } + + RedisBlockInputStream::~RedisBlockInputStream() = default; + + + namespace + { + using ValueType = ExternalResultDescription::ValueType; + using RedisArray = Poco::Redis::Array; + + template + void insertNumber(IColumn & column, const Poco::Redis::RedisType::Ptr & value, const std::string & name) + { + switch (value->type()) + { + case Poco::Redis::RedisTypeTraits::TypeId: + static_cast &>(column).getData().push_back( + static_cast *>(value.get())->value()); + break; + case Poco::Redis::RedisTypeTraits::TypeId: + static_cast &>(column).getData().push_back( + parse(static_cast *>(value.get())->value())); + break; + case Poco::Redis::RedisTypeTraits::TypeId: + { + const auto &bs = + static_cast *>(value.get())->value(); + if (bs.isNull()) + static_cast &>(column).getData().emplace_back(); + else + static_cast &>(column).getData().push_back(parse(bs.value())); + break; + } + default: + throw Exception( + "Type mismatch, expected a number, got type id = " + toString(value->type()) + " for column " + name, + ErrorCodes::TYPE_MISMATCH); + } + } + + void insertValue(IColumn & column, const ValueType type, const Poco::Redis::RedisType::Ptr & value, const std::string & name) + { + auto getStringIfCould = [&value, &name]() + { + switch (value->type()) + { + case Poco::Redis::RedisTypeTraits::TypeId: + { + const auto & bs = static_cast *>(value.get())->value(); + if (bs.isNull()) + throw Exception{"Type mismatch, expected not null String for column " + name, + ErrorCodes::TYPE_MISMATCH}; + return bs.value(); + } + case Poco::Redis::RedisTypeTraits::TypeId: + return static_cast *>(value.get())->value(); + default: + throw Exception{"Type mismatch, expected String, got type id = " + toString(value->type()) + " for column " + name, + ErrorCodes::TYPE_MISMATCH}; + } + }; + switch (type) + { + case ValueType::UInt8: + insertNumber(column, value, name); + break; + case ValueType::UInt16: + insertNumber(column, value, name); + break; + case ValueType::UInt32: + insertNumber(column, value, name); + break; + case ValueType::UInt64: + insertNumber(column, value, name); + break; + case ValueType::Int8: + insertNumber(column, value, name); + break; + case ValueType::Int16: + insertNumber(column, value, name); + break; + case ValueType::Int32: + insertNumber(column, value, name); + break; + case ValueType::Int64: + insertNumber(column, value, name); + break; + case ValueType::Float32: + insertNumber(column, value, name); + break; + case ValueType::Float64: + insertNumber(column, value, name); + break; + + case ValueType::String: + { + String string = getStringIfCould(); + static_cast(column).insertDataWithTerminatingZero(string.data(), string.size() + 1); + break; + } + + case ValueType::Date: + { + if (value->type() != Poco::Redis::RedisTypeTraits::TypeId) + throw Exception{"Type mismatch, expected Int64 (Timestamp), got type id = " + toString(value->type()) + " for column " + name, + ErrorCodes::TYPE_MISMATCH}; + + static_cast(column).getData().push_back(UInt16{DateLUT::instance().toDayNum( + static_cast( + static_cast *>(value.get())->value()).epochTime())}); + break; + } + + case ValueType::DateTime: + { + if (value->type() != Poco::Redis::RedisTypeTraits::TypeId) + throw Exception{"Type mismatch, expected Int64 (Timestamp), got type id = " + toString(value->type()) + " for column " + name, + ErrorCodes::TYPE_MISMATCH}; + + static_cast(column).getData().push_back( + static_cast( + static_cast *>(value.get())->value()).epochTime()); + break; + } + case ValueType::UUID: + { + String string = getStringIfCould(); + static_cast(column).getData().push_back(parse(string)); + break; + } + } + } + + void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } + } + + + Block RedisBlockInputStream::readImpl() + { + if (all_read) + return {}; + + const size_t size = 2; + assert(size == description.sample_block.columns()); + MutableColumns columns(description.sample_block.columns()); + + for (const auto i : ext::range(0, size)) + columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty(); + + size_t num_rows = 0; + while (num_rows < max_block_size) + { + RedisArray commandForKeys; + commandForKeys << "SCAN" << cursor; + + auto replyForKeys = client->execute(commandForKeys); + if (cursor = replyForKeys.get(0); cursor == 0) + { + all_read = true; + break; + } + + auto response = replyForKeys.get(1); + if (response.isNull()) + continue; + + Poco::Redis::Array commandForValues; + commandForValues << "MGET"; + + const auto insertValueByIdx = [this, &columns](size_t idx, const auto & value) + { + const auto & name = description.sample_block.getByPosition(idx).name; + if (description.types[idx].second) + { + ColumnNullable & column_nullable = static_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value, name); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], description.types[idx].first, value, name); + }; + + for (const auto & key : response) + { + ++num_rows; + String keyS = static_cast *>(key.get())->value(); + commandForValues << keyS; + insertValueByIdx(0, key); + } + + auto replyForValues = client->execute(commandForValues); + for (const auto & value : replyForValues) + { + if (value.isNull()) + insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); + else + insertValueByIdx(1, value); + } + } + + if (num_rows == 0) + return {}; + + return description.sample_block.cloneWithColumns(std::move(columns)); + } + +} + +#endif diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h new file mode 100644 index 00000000000..7e32b3ff8ff --- /dev/null +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include "ExternalResultDescription.h" + + +namespace Poco +{ + namespace Redis + { + class Client; + } +} + + +namespace DB +{ +/// Converts Redis Cursor to a stream of Blocks + class RedisBlockInputStream final : public IProfilingBlockInputStream + { + public: + RedisBlockInputStream( + std::shared_ptr client_, + const Block & sample_block, + const size_t max_block_size); + + ~RedisBlockInputStream() override; + + String getName() const override { return "Redis"; } + + Block getHeader() const override { return description.sample_block.cloneEmpty(); } + + private: + Block readImpl() override; + + std::shared_ptr client; + const size_t max_block_size; + ExternalResultDescription description; + int64_t cursor = 0; + bool all_read = false; + }; + +} diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp new file mode 100644 index 00000000000..1fb5472b48b --- /dev/null +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -0,0 +1,194 @@ +#include "RedisDictionarySource.h" +#include "DictionarySourceFactory.h" +#include "DictionaryStructure.h" + +namespace DB +{ + namespace ErrorCodes + { + extern const int SUPPORT_IS_DISABLED; + } + + void registerDictionarySourceRedis(DictionarySourceFactory & factory) + { + auto createTableSource = [=](const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block, + const Context & /* context */) -> DictionarySourcePtr { +#if USE_POCO_REDIS + return std::make_unique(dict_struct, config, config_prefix + ".redis", sample_block); +#else + (void)dict_struct; + (void)config; + (void)config_prefix; + (void)sample_block; + throw Exception{"Dictionary source of type `redis` is disabled because poco library was built without redis support.", + ErrorCodes::SUPPORT_IS_DISABLED}; +#endif + }; + factory.registerSource("redis", createTableSource); + } + +} + + +#if USE_POCO_REDIS + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include "RedisBlockInputStream.h" + + +namespace DB +{ + namespace ErrorCodes + { + extern const int UNSUPPORTED_METHOD; + } + + + static const size_t max_block_size = 8192; + + + RedisDictionarySource::RedisDictionarySource( + const DictionaryStructure & dict_struct, + const std::string & host, + UInt16 port, + const Block & sample_block) + : dict_struct{dict_struct} + , host{host} + , port{port} + , sample_block{sample_block} + , client{std::make_shared(host, port)} + { + } + + + RedisDictionarySource::RedisDictionarySource( + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block) + : RedisDictionarySource( + dict_struct, + config.getString(config_prefix + ".host"), + config.getUInt(config_prefix + ".port"), + sample_block) + { + } + + + RedisDictionarySource::RedisDictionarySource(const RedisDictionarySource & other) + : RedisDictionarySource{other.dict_struct, + other.host, + other.port, + other.sample_block} + { + } + + + RedisDictionarySource::~RedisDictionarySource() = default; + + + BlockInputStreamPtr RedisDictionarySource::loadAll() + { + return std::make_shared(client, sample_block, max_block_size); + } + +/* + BlockInputStreamPtr RedisDictionarySource::loadIds(const std::vector & ids) + { + if (!dict_struct.id) + throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; + + Poco::Redis::Array ids_array(new Poco::Redis::Array); + for (const UInt64 id : ids) + ids_array->add(DB::toString(id), Int32(id)); + + cursor->query().selector().addNewDocument(dict_struct.id->name).add("$in", ids_array); + + return std::make_shared(connection, sample_block, max_block_size); + } + + + BlockInputStreamPtr RedisDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) + { + if (!dict_struct.key) + throw Exception{"'key' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; + + Poco::Redis::Array::Ptr keys_array(new Poco::Redis::Array); + + for (const auto row_idx : requested_rows) + { + auto & key = keys_array->addNewDocument(DB::toString(row_idx)); + + for (const auto attr : ext::enumerate(*dict_struct.key)) + { + switch (attr.second.underlying_type) + { + case AttributeUnderlyingType::UInt8: + case AttributeUnderlyingType::UInt16: + case AttributeUnderlyingType::UInt32: + case AttributeUnderlyingType::UInt64: + case AttributeUnderlyingType::UInt128: + case AttributeUnderlyingType::Int8: + case AttributeUnderlyingType::Int16: + case AttributeUnderlyingType::Int32: + case AttributeUnderlyingType::Int64: + case AttributeUnderlyingType::Decimal32: + case AttributeUnderlyingType::Decimal64: + case AttributeUnderlyingType::Decimal128: + key.add(attr.second.name, Int32(key_columns[attr.first]->get64(row_idx))); + break; + + case AttributeUnderlyingType::Float32: + case AttributeUnderlyingType::Float64: + key.add(attr.second.name, applyVisitor(FieldVisitorConvertToNumber(), (*key_columns[attr.first])[row_idx])); + break; + + case AttributeUnderlyingType::String: + String _str(get((*key_columns[attr.first])[row_idx])); + /// Convert string to ObjectID + if (attr.second.is_object_id) + { + Poco::Redis::ObjectId::Ptr _id(new Poco::Redis::ObjectId(_str)); + key.add(attr.second.name, _id); + } + else + { + key.add(attr.second.name, _str); + } + break; + } + } + } + + /// If more than one key we should use $or + cursor->query().selector().add("$or", keys_array); + + return std::make_shared(connection, sample_block, max_block_size); + } +*/ + + std::string RedisDictionarySource::toString() const + { + return "Redis: " + host + ':' + DB::toString(port); + } + +} + +#endif diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h new file mode 100644 index 00000000000..61417fac393 --- /dev/null +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#if USE_POCO_REDIS + +# include "DictionaryStructure.h" +# include "IDictionarySource.h" + +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } + + namespace Redis + { + class Client; + } +} + + +namespace DB +{ +/// Allows loading dictionaries from a Redis collection + class RedisDictionarySource final : public IDictionarySource + { + RedisDictionarySource( + const DictionaryStructure & dict_struct, + const std::string & host, + UInt16 port, + const Block & sample_block); + + public: + RedisDictionarySource( + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Block & sample_block); + + RedisDictionarySource(const RedisDictionarySource & other); + + ~RedisDictionarySource() override; + + BlockInputStreamPtr loadAll() override; + + BlockInputStreamPtr loadUpdatedAll() override + { + throw Exception{"Method loadUpdatedAll is unsupported for RedisDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; + } + + bool supportsSelectiveLoad() const override { return true; } + + BlockInputStreamPtr loadIds(const std::vector & /* ids */) override {throw 1;}; + + BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override {throw 1;}; + + /// @todo: for Redis, modification date can somehow be determined from the `_id` object field + bool isModified() const override { return true; } + + ///Not yet supported + bool hasUpdateField() const override { return false; } + + DictionarySourcePtr clone() const override { return std::make_unique(*this); } + + std::string toString() const override; + + private: + const DictionaryStructure dict_struct; + const std::string host; + const UInt16 port; + Block sample_block; + + std::shared_ptr client; + }; + +} +#endif + +/*namespace DB +{ +class DictionarySourceFactory; +void registerDictionarySourceRedis(DictionarySourceFactory & factory); +}*/ From 8472b26f07778c516000a80e90ffabf945743595 Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 16 Jan 2019 04:05:40 +0300 Subject: [PATCH 02/49] Fix code highlighting --- cmake/find_poco.cmake | 2 +- dbms/src/Dictionaries/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index 4c9cb16e729..ade020141bb 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -132,7 +132,7 @@ if (Poco_SQLODBC_LIBRARY AND ODBC_FOUND) set (USE_POCO_SQLODBC 1) endif () -message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") +message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_INCLUDE_DIR}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") # How to make sutable poco: # use branch: diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index 2e8219f2170..de6bdd6b915 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -37,7 +37,7 @@ if(USE_POCO_MONGODB) endif() if(USE_POCO_REDIS) - target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_Redis_LIBRARY}) + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${Poco_Redis_INCLUDE_DIR}) endif() add_subdirectory(Embedded) From 741f630141f714ad39b39d769dacfd1a1daa884b Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 27 Jan 2019 16:14:02 +0300 Subject: [PATCH 03/49] Support loadIds --- .../Dictionaries/RedisBlockInputStream.cpp | 71 +++++------ dbms/src/Dictionaries/RedisBlockInputStream.h | 7 +- .../Dictionaries/RedisDictionarySource.cpp | 113 ++++++++---------- dbms/src/Dictionaries/RedisDictionarySource.h | 9 +- 4 files changed, 87 insertions(+), 113 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index dfbb03a0034..a7d0b27bd09 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -35,10 +35,10 @@ namespace DB RedisBlockInputStream::RedisBlockInputStream( - std::shared_ptr client_, + const Poco::Redis::Array & reply_array_, const DB::Block & sample_block, const size_t max_block_size) - : client(client_), max_block_size{max_block_size} + : reply_array(reply_array_), max_block_size{max_block_size} { description.init(sample_block); } @@ -190,55 +190,42 @@ namespace DB for (const auto i : ext::range(0, size)) columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty(); + const auto insertValueByIdx = [this, &columns](size_t idx, const auto & value) + { + const auto & name = description.sample_block.getByPosition(idx).name; + if (description.types[idx].second) + { + ColumnNullable & column_nullable = static_cast(*columns[idx]); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value, name); + column_nullable.getNullMapData().emplace_back(0); + } + else + insertValue(*columns[idx], description.types[idx].first, value, name); + }; + size_t num_rows = 0; + + const auto & keys = reply_array.get(0); + const auto & values = reply_array.get(1); + while (num_rows < max_block_size) { - RedisArray commandForKeys; - commandForKeys << "SCAN" << cursor; - - auto replyForKeys = client->execute(commandForKeys); - if (cursor = replyForKeys.get(0); cursor == 0) - { + if (cursor == keys.size()) { all_read = true; break; } - auto response = replyForKeys.get(1); - if (response.isNull()) - continue; + ++num_rows; + ++cursor; - Poco::Redis::Array commandForValues; - commandForValues << "MGET"; + const auto & key = *(keys.begin() + cursor); + insertValueByIdx(0, key); - const auto insertValueByIdx = [this, &columns](size_t idx, const auto & value) - { - const auto & name = description.sample_block.getByPosition(idx).name; - if (description.types[idx].second) - { - ColumnNullable & column_nullable = static_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value, name); - column_nullable.getNullMapData().emplace_back(0); - } - else - insertValue(*columns[idx], description.types[idx].first, value, name); - }; - - for (const auto & key : response) - { - ++num_rows; - String keyS = static_cast *>(key.get())->value(); - commandForValues << keyS; - insertValueByIdx(0, key); - } - - auto replyForValues = client->execute(commandForValues); - for (const auto & value : replyForValues) - { - if (value.isNull()) - insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); - else - insertValueByIdx(1, value); - } + const auto & value = *(values.begin() + cursor); + if (value.isNull()) + insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); + else + insertValueByIdx(1, value); } if (num_rows == 0) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 7e32b3ff8ff..1884ce7a0f6 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -9,6 +9,7 @@ namespace Poco { namespace Redis { + class Array; class Client; } } @@ -21,7 +22,7 @@ namespace DB { public: RedisBlockInputStream( - std::shared_ptr client_, + const Poco::Redis::Array & reply_array_, const Block & sample_block, const size_t max_block_size); @@ -34,10 +35,10 @@ namespace DB private: Block readImpl() override; - std::shared_ptr client; + Poco::Redis::Array reply_array; const size_t max_block_size; ExternalResultDescription description; - int64_t cursor = 0; + size_t cursor = 0; bool all_read = false; }; diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 1fb5472b48b..90229c087dd 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -53,6 +53,17 @@ namespace DB # include "RedisBlockInputStream.h" +namespace +{ + template + Poco::Redis::Array makeResult(const K & keys, const V & values) { + Poco::Redis::Array result; + result << keys << values; + return result; + } +} + + namespace DB { namespace ErrorCodes @@ -106,83 +117,57 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadAll() { - return std::make_shared(client, sample_block, max_block_size); + Int64 cursor = 0; + Poco::Redis::Array keys; + + do + { + Poco::Redis::Array commandForKeys; + commandForKeys << "SCAN" << cursor << "COUNT 1000"; + + Poco::Redis::Array replyForKeys = client->execute(commandForKeys); + cursor = replyForKeys.get(0); + + Poco::Redis::Array response = replyForKeys.get(1); + if (response.isNull()) + continue; + + for (const Poco::Redis::RedisType::Ptr & key : response) + keys.addRedisType(key); + } + while (cursor != 0); + + Poco::Redis::Array commandForValues; + commandForValues << "MGET"; + for (const Poco::Redis::RedisType::Ptr & key : keys) + commandForValues.addRedisType(key); + + Poco::Redis::Array values = client->execute(commandForValues); + + return std::make_shared(makeResult(keys, values), sample_block, max_block_size); } -/* + BlockInputStreamPtr RedisDictionarySource::loadIds(const std::vector & ids) { if (!dict_struct.id) throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; - Poco::Redis::Array ids_array(new Poco::Redis::Array); + Poco::Redis::Array keys; + Poco::Redis::Array command; + command << "MGET"; + for (const UInt64 id : ids) - ids_array->add(DB::toString(id), Int32(id)); - - cursor->query().selector().addNewDocument(dict_struct.id->name).add("$in", ids_array); - - return std::make_shared(connection, sample_block, max_block_size); - } - - - BlockInputStreamPtr RedisDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) - { - if (!dict_struct.key) - throw Exception{"'key' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; - - Poco::Redis::Array::Ptr keys_array(new Poco::Redis::Array); - - for (const auto row_idx : requested_rows) { - auto & key = keys_array->addNewDocument(DB::toString(row_idx)); - - for (const auto attr : ext::enumerate(*dict_struct.key)) - { - switch (attr.second.underlying_type) - { - case AttributeUnderlyingType::UInt8: - case AttributeUnderlyingType::UInt16: - case AttributeUnderlyingType::UInt32: - case AttributeUnderlyingType::UInt64: - case AttributeUnderlyingType::UInt128: - case AttributeUnderlyingType::Int8: - case AttributeUnderlyingType::Int16: - case AttributeUnderlyingType::Int32: - case AttributeUnderlyingType::Int64: - case AttributeUnderlyingType::Decimal32: - case AttributeUnderlyingType::Decimal64: - case AttributeUnderlyingType::Decimal128: - key.add(attr.second.name, Int32(key_columns[attr.first]->get64(row_idx))); - break; - - case AttributeUnderlyingType::Float32: - case AttributeUnderlyingType::Float64: - key.add(attr.second.name, applyVisitor(FieldVisitorConvertToNumber(), (*key_columns[attr.first])[row_idx])); - break; - - case AttributeUnderlyingType::String: - String _str(get((*key_columns[attr.first])[row_idx])); - /// Convert string to ObjectID - if (attr.second.is_object_id) - { - Poco::Redis::ObjectId::Ptr _id(new Poco::Redis::ObjectId(_str)); - key.add(attr.second.name, _id); - } - else - { - key.add(attr.second.name, _str); - } - break; - } - } + keys << static_cast(id); + command << static_cast(id); } - /// If more than one key we should use $or - cursor->query().selector().add("$or", keys_array); + Poco::Redis::Array values = client->execute(command); - return std::make_shared(connection, sample_block, max_block_size); + return std::make_shared(makeResult(keys, values), sample_block, max_block_size); } -*/ + std::string RedisDictionarySource::toString() const { diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index 61417fac393..e3566731f06 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -51,14 +51,15 @@ namespace DB bool supportsSelectiveLoad() const override { return true; } - BlockInputStreamPtr loadIds(const std::vector & /* ids */) override {throw 1;}; + BlockInputStreamPtr loadIds(const std::vector & ids) override; - BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override {throw 1;}; + BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override + { + throw Exception{"Method loadKeys is unsupported for RedisDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; + }; - /// @todo: for Redis, modification date can somehow be determined from the `_id` object field bool isModified() const override { return true; } - ///Not yet supported bool hasUpdateField() const override { return false; } DictionarySourcePtr clone() const override { return std::make_unique(*this); } From b455708eab688c695853eeda694d1d55de74a8d2 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 27 Jan 2019 18:30:51 +0300 Subject: [PATCH 04/49] Use batch query for reading keys --- .../Dictionaries/RedisDictionarySource.cpp | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 90229c087dd..b3ec940a8d1 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -117,25 +117,10 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadAll() { - Int64 cursor = 0; - Poco::Redis::Array keys; - - do - { - Poco::Redis::Array commandForKeys; - commandForKeys << "SCAN" << cursor << "COUNT 1000"; - - Poco::Redis::Array replyForKeys = client->execute(commandForKeys); - cursor = replyForKeys.get(0); - - Poco::Redis::Array response = replyForKeys.get(1); - if (response.isNull()) - continue; - - for (const Poco::Redis::RedisType::Ptr & key : response) - keys.addRedisType(key); - } - while (cursor != 0); + Poco::Redis::Array commandForKeys; + commandForKeys << "KEYS" << "*"; + + Poco::Redis::Array keys = client->execute(commandForKeys); Poco::Redis::Array commandForValues; commandForValues << "MGET"; From 933906403ac3ecf98ba32b7d4f453380a0a8878a Mon Sep 17 00:00:00 2001 From: comunodi Date: Mon, 28 Jan 2019 01:22:18 +0300 Subject: [PATCH 05/49] Optimize memory consumption --- .../Dictionaries/RedisBlockInputStream.cpp | 22 ++++++++------ dbms/src/Dictionaries/RedisBlockInputStream.h | 10 ++++--- .../Dictionaries/RedisDictionarySource.cpp | 29 ++----------------- 3 files changed, 21 insertions(+), 40 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index a7d0b27bd09..32d9abc71a8 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -35,10 +35,11 @@ namespace DB RedisBlockInputStream::RedisBlockInputStream( - const Poco::Redis::Array & reply_array_, + const std::shared_ptr & client_, + const Poco::Redis::Array & keys_, const DB::Block & sample_block, const size_t max_block_size) - : reply_array(reply_array_), max_block_size{max_block_size} + : client(client_), keys(keys_), max_block_size{max_block_size} { description.init(sample_block); } @@ -102,6 +103,7 @@ namespace DB ErrorCodes::TYPE_MISMATCH}; } }; + switch (type) { case ValueType::UInt8: @@ -204,9 +206,7 @@ namespace DB }; size_t num_rows = 0; - - const auto & keys = reply_array.get(0); - const auto & values = reply_array.get(1); + Poco::Redis::Command commandForValues("MGET"); while (num_rows < max_block_size) { @@ -220,17 +220,21 @@ namespace DB const auto & key = *(keys.begin() + cursor); insertValueByIdx(0, key); + commandForValues.addRedisType(key); + } - const auto & value = *(values.begin() + cursor); + if (num_rows == 0) + return {}; + + Poco::Redis::Array values = client->execute(commandForValues); + for (size_t i = 0; i < num_rows; ++i) { + const Poco::Redis::RedisType::Ptr & value = *(values.begin() + i); if (value.isNull()) insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); else insertValueByIdx(1, value); } - if (num_rows == 0) - return {}; - return description.sample_block.cloneWithColumns(std::move(columns)); } diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 1884ce7a0f6..d1c3ad157e9 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include "ExternalResultDescription.h" @@ -18,11 +18,12 @@ namespace Poco namespace DB { /// Converts Redis Cursor to a stream of Blocks - class RedisBlockInputStream final : public IProfilingBlockInputStream + class RedisBlockInputStream final : public IBlockInputStream { public: RedisBlockInputStream( - const Poco::Redis::Array & reply_array_, + const std::shared_ptr & client_, + const Poco::Redis::Array & keys_, const Block & sample_block, const size_t max_block_size); @@ -35,7 +36,8 @@ namespace DB private: Block readImpl() override; - Poco::Redis::Array reply_array; + std::shared_ptr client; + Poco::Redis::Array keys; const size_t max_block_size; ExternalResultDescription description; size_t cursor = 0; diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index b3ec940a8d1..b4c1ac97330 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -53,17 +53,6 @@ namespace DB # include "RedisBlockInputStream.h" -namespace -{ - template - Poco::Redis::Array makeResult(const K & keys, const V & values) { - Poco::Redis::Array result; - result << keys << values; - return result; - } -} - - namespace DB { namespace ErrorCodes @@ -122,14 +111,7 @@ namespace DB Poco::Redis::Array keys = client->execute(commandForKeys); - Poco::Redis::Array commandForValues; - commandForValues << "MGET"; - for (const Poco::Redis::RedisType::Ptr & key : keys) - commandForValues.addRedisType(key); - - Poco::Redis::Array values = client->execute(commandForValues); - - return std::make_shared(makeResult(keys, values), sample_block, max_block_size); + return std::make_shared(client, std::move(keys), sample_block, max_block_size); } @@ -139,18 +121,11 @@ namespace DB throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; Poco::Redis::Array keys; - Poco::Redis::Array command; - command << "MGET"; for (const UInt64 id : ids) - { keys << static_cast(id); - command << static_cast(id); - } - Poco::Redis::Array values = client->execute(command); - - return std::make_shared(makeResult(keys, values), sample_block, max_block_size); + return std::make_shared(client, std::move(keys), sample_block, max_block_size); } From 6e28c22876578ac2b375d999d695bb1382b76e7e Mon Sep 17 00:00:00 2001 From: comunodi Date: Tue, 12 Feb 2019 12:23:22 +0300 Subject: [PATCH 06/49] Add tests --- cmake/find_poco.cmake | 2 +- dbms/src/Dictionaries/CMakeLists.txt | 6 +- .../src/Dictionaries/registerDictionaries.cpp | 2 + .../generate_and_test.py | 76 +++++++++++++++++++ dbms/tests/external_dictionaries/run.sh | 29 +++++++ .../dicts/external_dicts_dict_sources.md | 20 +++++ 6 files changed, 133 insertions(+), 2 deletions(-) diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index ade020141bb..4583fafac54 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -132,7 +132,7 @@ if (Poco_SQLODBC_LIBRARY AND ODBC_FOUND) set (USE_POCO_SQLODBC 1) endif () -message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_INCLUDE_DIR}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") +message(STATUS "Using Poco: ${Poco_INCLUDE_DIRS} : ${Poco_Foundation_LIBRARY},${Poco_Util_LIBRARY},${Poco_Net_LIBRARY},${Poco_NetSSL_LIBRARY},${Poco_Crypto_LIBRARY},${Poco_XML_LIBRARY},${Poco_Data_LIBRARY},${Poco_DataODBC_LIBRARY},${Poco_SQL_LIBRARY},${Poco_SQLODBC_LIBRARY},${Poco_MongoDB_LIBRARY},${Poco_Redis_LIBRARY}; MongoDB=${USE_POCO_MONGODB}, Redis=${USE_POCO_REDIS}, DataODBC=${USE_POCO_DATAODBC}, NetSSL=${USE_POCO_NETSSL}") # How to make sutable poco: # use branch: diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index de6bdd6b915..0a5d198dd8c 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -37,7 +37,11 @@ if(USE_POCO_MONGODB) endif() if(USE_POCO_REDIS) - target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${Poco_Redis_INCLUDE_DIR}) + # for code highlighting in CLion + # target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${Poco_Redis_INCLUDE_DIR}) + + # for build + target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_Redis_LIBRARY}) endif() add_subdirectory(Embedded) diff --git a/dbms/src/Dictionaries/registerDictionaries.cpp b/dbms/src/Dictionaries/registerDictionaries.cpp index 1a8c5a7be7b..ee320d7177b 100644 --- a/dbms/src/Dictionaries/registerDictionaries.cpp +++ b/dbms/src/Dictionaries/registerDictionaries.cpp @@ -7,6 +7,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & source_factory); void registerDictionarySourceMysql(DictionarySourceFactory & source_factory); void registerDictionarySourceClickHouse(DictionarySourceFactory & source_factory); void registerDictionarySourceMongoDB(DictionarySourceFactory & source_factory); +void registerDictionarySourceRedis(DictionarySourceFactory & source_factory); void registerDictionarySourceXDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceJDBC(DictionarySourceFactory & source_factory); void registerDictionarySourceExecutable(DictionarySourceFactory & source_factory); @@ -30,6 +31,7 @@ void registerDictionaries() registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); registerDictionarySourceMongoDB(source_factory); + registerDictionarySourceRedis(source_factory); registerDictionarySourceXDBC(source_factory); registerDictionarySourceJDBC(source_factory); registerDictionarySourceExecutable(source_factory); diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index 2c72d29de9d..f4891424c21 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -119,6 +119,17 @@ def generate_structure(args): [ 'mongodb_user_flat', 0, True ], ]) + if not args.no_redis: + dictionaries.extend([ + [ 'redis_flat', 0, True ], + [ 'redis_hashed', 0, True ], + [ 'redis_cache', 0, True ], + [ 'redis_complex_integers_key_hashed', 1, False ], + [ 'redis_complex_integers_key_cache', 1, False ], + [ 'redis_complex_mixed_key_hashed', 2, False ], + [ 'redis_complex_mixed_key_cache', 2, False ], + ]) + if args.use_lib: dictionaries.extend([ # [ 'library_flat', 0, True ], @@ -382,6 +393,51 @@ def generate_data(args): print 'Could not create MongoDB collection' exit(-1) + # create Redis storage from complete_query via JSON file + if not args.no_redis: + print 'Creating Redis storage' + table_rows = json.loads(subprocess.check_output([ + args.client, + '--port', + args.port, + '--output_format_json_quote_64bit_integers', + '0', + '--query', + "select * from test.dictionary_source where not ignore(" \ + "concat('new Date(\\'', toString(Date_), '\\')') as Date_, " \ + "concat('new ISODate(\\'', replaceOne(toString(DateTime_, 'UTC'), ' ', 'T'), 'Z\\')') as DateTime_" \ + ") format JSON" + ]))['data'] + + # print json.dumps(table_rows) + + # For Integers the first byte of the reply is ":" + # For Bulk Strings the first byte of the reply is "$" + + proto_for_redis = "" + for counter, collection in enumerate(table_rows): + proto_for_redis += "SELECT " + str(counter) + "\r\n" + proto_for_redis += "FLUSHDB\r\n" + for key, value in collection.iteritems(): + value_type = "$" + if isinstance(value, int): + value_type = ":" + else: + value = str(value) + if "Date" in value: + value = value[value.find("'") + 1:-2] + + proto_for_redis += "SET " + "$" + key + " " + value_type + str(value) + "\r\n" + + # with open("clickhouse_redis.log", "w") as f: + # f.write(json.dumps(table_rows) + "\n" + proto_for_redis + "\n") + + open('generated/full.json', 'w').write(proto_for_redis) + result = system('cat {0}/full.json | redis-cli > \\dev\\null'.format(args.generated)) + if result != 0: + print 'Could not create Redis storage' + exit(-1) + def generate_dictionaries(args): dictionary_skeleton = ''' @@ -482,6 +538,13 @@ def generate_dictionaries(args): '''.format(mongo_host=args.mongo_host) + source_redis = ''' + + {redis_host} + 6379 + + '''.format(redis_host=args.redis_host) + source_executable = ''' cat %s @@ -668,6 +731,17 @@ def generate_dictionaries(args): [ source_mongodb_user, layout_flat ], ]) + if not args.no_redis: + sources_and_layouts.extend([ + [ source_redis, layout_flat ], + [ source_redis, layout_hashed ], + [ source_redis, layout_cache ], + [ source_redis, layout_complex_key_cache ], + [ source_redis, layout_complex_key_hashed ], + [ source_redis, layout_complex_key_hashed ], + [ source_redis, layout_complex_key_cache ], + ]) + if args.use_lib: sources_and_layouts.extend([ #[ source_library, layout_flat ], @@ -947,6 +1021,8 @@ if __name__ == '__main__': parser.add_argument('--no_mongo', action='store_true', help = 'Dont use mongodb dictionaries') parser.add_argument('--mongo_host', default = 'localhost', help = 'mongo server host') parser.add_argument('--use_mongo_user', action='store_true', help = 'Test mongodb with user-pass') + parser.add_argument('--no_redis', action='store_true', help = 'Dont use redis dictionaries') + parser.add_argument('--redis_host', default = 'localhost', help = 'redis server host') parser.add_argument('--no_http', action='store_true', help = 'Dont use http dictionaries') parser.add_argument('--http_port', default = 58000, help = 'http server port') diff --git a/dbms/tests/external_dictionaries/run.sh b/dbms/tests/external_dictionaries/run.sh index a04be3080a9..4560e167c57 100755 --- a/dbms/tests/external_dictionaries/run.sh +++ b/dbms/tests/external_dictionaries/run.sh @@ -9,6 +9,7 @@ fi NO_MYSQL=0 NO_MONGO=0 +NO_REDIS=0 for arg in "$@"; do if [ "$arg" = "--no_mysql" ]; then @@ -17,6 +18,9 @@ for arg in "$@"; do if [ "$arg" == "--no_mongo" ]; then NO_MONGO=1 fi + if [ "$arg" == "--no_redis" ]; then + NO_REDIS=1 + fi done # MySQL @@ -101,6 +105,31 @@ else fi fi +# Redis +if [ $NO_REDIS -eq 1 ]; then + echo "Not using Redis" +else + if [ -z $(which redis-cli) ]; then + echo 'Installing Redis' + + sudo apt-get update &>/dev/null + sudo apt-get install redis-server + + which redis-server >/dev/null + if [ $? -ne 0 ]; then + echo 'Failed installing redis-server' + exit -1 + fi + fi + + echo | redis-cli &>/dev/null + if [ $? -ne 0 ]; then + sudo systemctl start redis.service + else + echo 'Redis already started' + fi +fi + # ClickHouse clickhouse-server &> clickhouse.log & sleep 3 diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index f26967c2d0f..67d2d980b75 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -30,6 +30,7 @@ Types of sources (`source_type`): - [MySQL](#dicts-external_dicts_dict_sources-mysql) - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) + - [Redis](#dicts-external_dicts_dict_sources-redis) - [ODBC](#dicts-external_dicts_dict_sources-odbc) @@ -421,4 +422,23 @@ Setting fields: - `db` – Name of the database. - `collection` – Name of the collection. + +### Redis {#dicts-external_dicts_dict_sources-redis} + +Example of settings: + +```xml + + + localhost + 6379 + + +``` + +Setting fields: + +- `host` – The Redis host. +- `port` – The port on the Redis server. + [Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_sources/) From f2eadcfe49606362e6e49f19c385212050012423 Mon Sep 17 00:00:00 2001 From: comunodi Date: Tue, 12 Feb 2019 14:27:49 +0300 Subject: [PATCH 07/49] Remove some escaped lines --- dbms/src/Dictionaries/RedisBlockInputStream.h | 1 - dbms/src/Dictionaries/RedisDictionarySource.h | 7 ------- dbms/tests/external_dictionaries/generate_and_test.py | 5 ----- 3 files changed, 13 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index d1c3ad157e9..f5117ec6a9c 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -17,7 +17,6 @@ namespace Poco namespace DB { -/// Converts Redis Cursor to a stream of Blocks class RedisBlockInputStream final : public IBlockInputStream { public: diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index e3566731f06..d41e557ce24 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -22,7 +22,6 @@ namespace Poco namespace DB { -/// Allows loading dictionaries from a Redis collection class RedisDictionarySource final : public IDictionarySource { RedisDictionarySource( @@ -77,9 +76,3 @@ namespace DB } #endif - -/*namespace DB -{ -class DictionarySourceFactory; -void registerDictionarySourceRedis(DictionarySourceFactory & factory); -}*/ diff --git a/dbms/tests/external_dictionaries/generate_and_test.py b/dbms/tests/external_dictionaries/generate_and_test.py index ebcd62ece5c..90426962189 100755 --- a/dbms/tests/external_dictionaries/generate_and_test.py +++ b/dbms/tests/external_dictionaries/generate_and_test.py @@ -409,8 +409,6 @@ def generate_data(args): ") format JSON" ]))['data'] - # print json.dumps(table_rows) - # For Integers the first byte of the reply is ":" # For Bulk Strings the first byte of the reply is "$" @@ -429,9 +427,6 @@ def generate_data(args): proto_for_redis += "SET " + "$" + key + " " + value_type + str(value) + "\r\n" - # with open("clickhouse_redis.log", "w") as f: - # f.write(json.dumps(table_rows) + "\n" + proto_for_redis + "\n") - open('generated/full.json', 'w').write(proto_for_redis) result = system('cat {0}/full.json | redis-cli > \\dev\\null'.format(args.generated)) if result != 0: From 572463f9c141b79c6e0e4112fa0c2f0bc0e5d8c1 Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 13 Feb 2019 03:05:43 +0300 Subject: [PATCH 08/49] Style fix --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 6 ++++-- dbms/src/Dictionaries/RedisDictionarySource.cpp | 2 +- dbms/src/Dictionaries/RedisDictionarySource.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 32d9abc71a8..85c92aad638 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -210,7 +210,8 @@ namespace DB while (num_rows < max_block_size) { - if (cursor == keys.size()) { + if (cursor == keys.size()) + { all_read = true; break; } @@ -227,7 +228,8 @@ namespace DB return {}; Poco::Redis::Array values = client->execute(commandForValues); - for (size_t i = 0; i < num_rows; ++i) { + for (size_t i = 0; i < num_rows; ++i) + { const Poco::Redis::RedisType::Ptr & value = *(values.begin() + i); if (value.isNull()) insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index b4c1ac97330..7d546d39cf0 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -108,7 +108,7 @@ namespace DB { Poco::Redis::Array commandForKeys; commandForKeys << "KEYS" << "*"; - + Poco::Redis::Array keys = client->execute(commandForKeys); return std::make_shared(client, std::move(keys), sample_block, max_block_size); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index d41e557ce24..f50a85ca10e 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -55,7 +55,7 @@ namespace DB BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override { throw Exception{"Method loadKeys is unsupported for RedisDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; - }; + } bool isModified() const override { return true; } From 162b26fe07626971ebd34cfcd3bd0c7e9200a7b6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 21 Mar 2019 21:10:55 +0300 Subject: [PATCH 09/49] Add integration test for redis --- dbms/tests/integration/helpers/cluster.py | 20 ++++++++++-- .../external_sources.py | 32 +++++++++++++++++++ .../test_external_dictionaries/test.py | 5 +-- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 240cc2c8695..bf0abdff2b0 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -101,6 +101,7 @@ class ClickHouseCluster: self.with_odbc_drivers = False self.with_hdfs = False self.with_mongo = False + self.with_redis = False self.docker_client = None self.is_up = False @@ -112,7 +113,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -130,7 +131,7 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, self.base_configs_dir, self.server_bin_path, + self.zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, self.base_configs_dir, self.server_bin_path, self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image, stay_alive=stay_alive, ipv4_address=ipv4_address, ipv6_address=ipv6_address) @@ -185,6 +186,13 @@ class ClickHouseCluster: self.base_mongo_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mongo.yml')] + if with_redis and not self.with_redis: + self.with_redis = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_redis.yml')]) + self.base_redis_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_redis.yml')] + + return instance @@ -316,6 +324,11 @@ class ClickHouseCluster: subprocess_check_call(self.base_mongo_cmd + ['up', '-d', '--force-recreate']) self.wait_mongo_to_start(30) + if self.with_redis and self.base_redis_cmd: + subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate']) + time.sleep(10) + + subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) start_deadline = time.time() + 20.0 # seconds @@ -414,7 +427,7 @@ class ClickHouseInstance: def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, base_configs_dir, server_bin_path, odbc_bridge_bin_path, + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_mongo, with_redis, base_configs_dir, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): @@ -439,6 +452,7 @@ class ClickHouseInstance: self.with_mysql = with_mysql self.with_kafka = with_kafka self.with_mongo = with_mongo + self.with_redis = with_redis self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, 'docker_compose.yml') diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 71dc05ca78c..e0489acf7cf 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -2,6 +2,7 @@ import warnings import pymysql.cursors import pymongo +import redis from tzlocal import get_localzone import datetime import os @@ -372,3 +373,34 @@ class SourceHTTP(SourceHTTPBase): class SourceHTTPS(SourceHTTPBase): def _get_schema(self): return "https" + + +class SourceRedis(ExternalSource): + def get_source_str(self, table_name): + return ''' + + {host} + {port} + + '''.format( + host=self.docker_hostname, + port=self.docker_port, + ) + + def prepare(self, structure, table_name, cluster): + self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port) + self.prepared = True + + def load_data(self, data, table_name): + for row_num, row in enumerate(data): + self.client.execute_command("SELECT " + str(row_num)) + self.client.execute_command("FLUSHDB") + for cell_name, cell_value in row.data.items(): + value_type = "$" + if isinstance(cell_value, int): + value_type = ":" + else: + cell_value = '"' + str(cell_value).replace(' ', '\s') + '"' + cmd = "SET " + "$" + cell_name + " " + value_type + str(cell_value) + print(cmd) + self.client.execute_command(cmd) diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index 314ec26a106..752b37cd760 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -5,7 +5,7 @@ import time from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed, SourceMongo -from external_sources import SourceHTTP, SourceHTTPS +from external_sources import SourceHTTP, SourceHTTPS, SourceRedis SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -79,6 +79,7 @@ LAYOUTS = [ ] SOURCES = [ + SourceRedis("Redis", "localhost", "6380", "redis1", "6379", "", ""), SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), @@ -120,7 +121,7 @@ def setup_module(module): for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) + node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_redis=True) cluster.add_instance('clickhouse1') @pytest.fixture(scope="module") From 09a130372e2b364c763fee1ddf96715c8ab37c44 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 30 Mar 2019 16:51:59 +0300 Subject: [PATCH 10/49] Missed yml file --- dbms/tests/integration/helpers/docker_compose_redis.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 dbms/tests/integration/helpers/docker_compose_redis.yml diff --git a/dbms/tests/integration/helpers/docker_compose_redis.yml b/dbms/tests/integration/helpers/docker_compose_redis.yml new file mode 100644 index 00000000000..205409b3a21 --- /dev/null +++ b/dbms/tests/integration/helpers/docker_compose_redis.yml @@ -0,0 +1,7 @@ +version: '2.2' +services: + redis1: + image: redis + restart: always + ports: + - 6380:6379 From 8abffd4f602deaf7e7dfdf4fbd0670d47f7eefbb Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 31 Mar 2019 00:42:13 +0300 Subject: [PATCH 11/49] Fix build --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 2 +- dbms/src/Dictionaries/RedisBlockInputStream.h | 2 +- dbms/src/Dictionaries/RedisDictionarySource.cpp | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 85c92aad638..0375e420430 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -210,7 +210,7 @@ namespace DB while (num_rows < max_block_size) { - if (cursor == keys.size()) + if (cursor >= keys.size()) { all_read = true; break; diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index f5117ec6a9c..95a563cee80 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -2,7 +2,7 @@ #include #include -#include "ExternalResultDescription.h" +#include namespace Poco diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 7d546d39cf0..07027d24e24 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -17,9 +17,9 @@ namespace DB Block & sample_block, const Context & /* context */) -> DictionarySourcePtr { #if USE_POCO_REDIS - return std::make_unique(dict_struct, config, config_prefix + ".redis", sample_block); + return std::make_unique(dict_struct, config, config_prefix + ".redis", sample_block); #else - (void)dict_struct; + (void)dict_struct; (void)config; (void)config_prefix; (void)sample_block; @@ -122,7 +122,7 @@ namespace DB Poco::Redis::Array keys; - for (const UInt64 id : ids) + for (UInt64 id : ids) keys << static_cast(id); return std::make_shared(client, std::move(keys), sample_block, max_block_size); From 411fcb19dbf0285b32a5005ffef9a36377d2fd07 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 31 Mar 2019 02:07:40 +0300 Subject: [PATCH 12/49] Missed python package --- dbms/tests/integration/image/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/image/Dockerfile b/dbms/tests/integration/image/Dockerfile index 1dd5c1713b2..9aada808356 100644 --- a/dbms/tests/integration/image/Dockerfile +++ b/dbms/tests/integration/image/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes - ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal +RUN pip install pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal redis ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce From d7771b8a07a3133cc88ba05c13c61ecbeb1ce565 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 7 Apr 2019 12:51:53 +0300 Subject: [PATCH 13/49] Throw exception instead if number of columns mismatch --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 0375e420430..7b5b68dc9cc 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -31,6 +31,7 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; + extern const int LOGICAL_ERROR; } @@ -182,11 +183,20 @@ namespace DB Block RedisBlockInputStream::readImpl() { + if (description.sample_block.rows() == 0) + all_read = true; + if (all_read) return {}; const size_t size = 2; - assert(size == description.sample_block.columns()); + if (size != description.sample_block.columns()) { + throw Exception{"Unsupported number of columns for key-value storage: " + + std::to_string(description.sample_block.columns()) + + " (expected: " + std::to_string(size) + ")", + ErrorCodes::LOGICAL_ERROR}; + } + MutableColumns columns(description.sample_block.columns()); for (const auto i : ext::range(0, size)) From 562f48ea96dfaace489296bd7debaa89791cd84c Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 14 Apr 2019 20:05:50 +0300 Subject: [PATCH 14/49] Optional select db before usage. Use only one column in tests --- .../Dictionaries/RedisDictionarySource.cpp | 16 ++++++++++ dbms/src/Dictionaries/RedisDictionarySource.h | 2 ++ .../test_external_dictionaries/dictionary.py | 5 +++- .../external_sources.py | 8 +++-- .../test_external_dictionaries/test.py | 30 +++++++++++-------- 5 files changed, 45 insertions(+), 16 deletions(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 07027d24e24..2b1536f1b02 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -58,6 +58,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; + extern const int SELECT_DB_FAILURE; } @@ -68,13 +69,26 @@ namespace DB const DictionaryStructure & dict_struct, const std::string & host, UInt16 port, + UInt8 db_index, const Block & sample_block) : dict_struct{dict_struct} , host{host} , port{port} + , db_index{db_index} , sample_block{sample_block} , client{std::make_shared(host, port)} { + if (db_index != 0) + { + Poco::Redis::Array command; + command << "SELECT" << db_index; + String reply = client->execute(command); + if (reply != "+OK\r\n") + { + throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, + ErrorCodes::SELECT_DB_FAILURE}; + } + } } @@ -87,6 +101,7 @@ namespace DB dict_struct, config.getString(config_prefix + ".host"), config.getUInt(config_prefix + ".port"), + config.getUInt(config_prefix + ".db_index", 0), sample_block) { } @@ -96,6 +111,7 @@ namespace DB : RedisDictionarySource{other.dict_struct, other.host, other.port, + other.db_index, other.sample_block} { } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index f50a85ca10e..1e528ce40e9 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -28,6 +28,7 @@ namespace DB const DictionaryStructure & dict_struct, const std::string & host, UInt16 port, + UInt8 db_index, const Block & sample_block); public: @@ -69,6 +70,7 @@ namespace DB const DictionaryStructure dict_struct; const std::string host; const UInt16 port; + const UInt8 db_index; // [0..15] Block sample_block; std::shared_ptr client; diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index bdddc7a9604..49f0ffc1c0b 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -87,12 +87,14 @@ class Field(object): class DictionaryStructure(object): - def __init__(self, layout, fields): + def __init__(self, layout, fields, is_kv=False): self.layout = layout self.keys = [] self.range_key = None self.ordinary_fields = [] self.range_fields = [] + self.is_kv = is_kv + for field in fields: if field.is_key: self.keys.append(field) @@ -286,6 +288,7 @@ class Dictionary(object): self.source = copy.deepcopy(source) self.config_path = config_path self.table_name = table_name + self.is_kv = source.is_kv def generate_config(self): with open(self.config_path, 'w') as result: diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index e0489acf7cf..57c862cbfe6 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -10,7 +10,7 @@ import os class ExternalSource(object): def __init__(self, name, internal_hostname, internal_port, - docker_hostname, docker_port, user, password): + docker_hostname, docker_port, user, password, is_kv): self.name = name self.internal_hostname = internal_hostname self.internal_port = int(internal_port) @@ -18,6 +18,7 @@ class ExternalSource(object): self.docker_port = int(docker_port) self.user = user self.password = password + self.is_kv = is_kv def get_source_str(self, table_name): raise NotImplementedError("Method {} is not implemented for {}".format( @@ -381,6 +382,7 @@ class SourceRedis(ExternalSource): {host} {port} + 0 '''.format( host=self.docker_hostname, @@ -392,8 +394,7 @@ class SourceRedis(ExternalSource): self.prepared = True def load_data(self, data, table_name): - for row_num, row in enumerate(data): - self.client.execute_command("SELECT " + str(row_num)) + for row_num, row in enumerate(data): # FIXME: yield self.client.execute_command("FLUSHDB") for cell_name, cell_value in row.data.items(): value_type = "$" @@ -404,3 +405,4 @@ class SourceRedis(ExternalSource): cmd = "SET " + "$" + cell_name + " " + value_type + str(cell_value) print(cmd) self.client.execute_command(cmd) + return diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index 752b37cd760..93e1db2ce70 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -79,16 +79,16 @@ LAYOUTS = [ ] SOURCES = [ - SourceRedis("Redis", "localhost", "6380", "redis1", "6379", "", ""), - SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), - SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), - SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), - SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", ""), - SourceFile("File", "localhost", "9000", "node", "9000", "", ""), - SourceExecutableHashed("ExecutableHashed", "localhost", "9000", "node", "9000", "", ""), - SourceExecutableCache("ExecutableCache", "localhost", "9000", "node", "9000", "", ""), - SourceHTTP("SourceHTTP", "localhost", "9000", "clickhouse1", "9000", "", ""), - SourceHTTPS("SourceHTTPS", "localhost", "9000", "clickhouse1", "9000", "", ""), + SourceRedis("Redis", "localhost", "6380", "redis1", "6379", "", "", True), + SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse", False), + SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse", False), + SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", "", False), + SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", "", False), + SourceFile("File", "localhost", "9000", "node", "9000", "", "", False), + SourceExecutableHashed("ExecutableHashed", "localhost", "9000", "node", "9000", "", "", False), + SourceExecutableCache("ExecutableCache", "localhost", "9000", "node", "9000", "", "", False), + SourceHTTP("SourceHTTP", "localhost", "9000", "clickhouse1", "9000", "", "", False), + SourceHTTPS("SourceHTTPS", "localhost", "9000", "clickhouse1", "9000", "", "", False), ] DICTIONARIES = [] @@ -108,9 +108,9 @@ def setup_module(module): for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): - structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) + structure = DictionaryStructure(layout, FIELDS[layout.layout_type], source.is_kv) dict_name = source.name + "_" + layout.name - dict_path = os.path.join(dict_configs_path, dict_name + '.xml') + dict_path = os.path.join(dict_configs_path, dict_name + '.xml') # FIXME: single xml config for every column dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) dictionary.generate_config() DICTIONARIES.append(dictionary) @@ -171,6 +171,8 @@ def test_simple_dictionaries(started_cluster): for query in dct.get_select_get_or_default_queries(field, row): queries_with_answers.append((query, field.default_value_for_get)) + if dct.is_kv: + break for query in dct.get_hierarchical_queries(data[0]): queries_with_answers.append((query, [1])) @@ -223,6 +225,8 @@ def test_complex_dictionaries(started_cluster): for query in dct.get_select_get_or_default_queries(field, row): queries_with_answers.append((query, field.default_value_for_get)) + if dct.is_kv: + break for query, answer in queries_with_answers: print query @@ -258,6 +262,8 @@ def test_ranged_dictionaries(started_cluster): if not field.is_key and not field.is_range: for query in dct.get_select_get_queries(field, row): queries_with_answers.append((query, row.get_value_by_name(field.name))) + if dct.is_kv: + break for query, answer in queries_with_answers: print query From f3ead9fe5b8628a35f1030310288bbd2de034a9f Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 14 Apr 2019 20:09:33 +0300 Subject: [PATCH 15/49] Style fix --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 3 +-- dbms/src/Dictionaries/RedisDictionarySource.cpp | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 7b5b68dc9cc..e705ee01474 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -190,12 +190,11 @@ namespace DB return {}; const size_t size = 2; - if (size != description.sample_block.columns()) { + if (size != description.sample_block.columns()) throw Exception{"Unsupported number of columns for key-value storage: " + std::to_string(description.sample_block.columns()) + " (expected: " + std::to_string(size) + ")", ErrorCodes::LOGICAL_ERROR}; - } MutableColumns columns(description.sample_block.columns()); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 2b1536f1b02..717010ac11c 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -84,10 +84,8 @@ namespace DB command << "SELECT" << db_index; String reply = client->execute(command); if (reply != "+OK\r\n") - { throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, ErrorCodes::SELECT_DB_FAILURE}; - } } } From 80827b5a9ff29058dec1b166473012a48a250f3a Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 14 Apr 2019 20:44:44 +0300 Subject: [PATCH 16/49] Build fix --- dbms/src/Dictionaries/RedisDictionarySource.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 717010ac11c..3ef6358c2dd 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -81,8 +81,8 @@ namespace DB if (db_index != 0) { Poco::Redis::Array command; - command << "SELECT" << db_index; - String reply = client->execute(command); + command << "SELECT" << static_cast(db_index); + std::string reply = client->execute(command); if (reply != "+OK\r\n") throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, ErrorCodes::SELECT_DB_FAILURE}; From 5849d669753f0d49b2902393490513021ebc3480 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 14 Apr 2019 20:50:05 +0300 Subject: [PATCH 17/49] Use existing ErrorCode to indicate SELECT failure --- dbms/src/Dictionaries/RedisDictionarySource.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 3ef6358c2dd..d4584e0d568 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -58,7 +58,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int SELECT_DB_FAILURE; + extern const int CANNOT_SELECT; } @@ -85,7 +85,7 @@ namespace DB std::string reply = client->execute(command); if (reply != "+OK\r\n") throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, - ErrorCodes::SELECT_DB_FAILURE}; + ErrorCodes::CANNOT_SELECT}; } } From 27d138818d96a505c8034386c545091c69674b40 Mon Sep 17 00:00:00 2001 From: Gleb-Tretyakov Date: Mon, 15 Apr 2019 00:21:11 +0300 Subject: [PATCH 18/49] fix invalid memory dereference --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index e705ee01474..9b11fcb85ae 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -225,12 +225,13 @@ namespace DB break; } - ++num_rows; - ++cursor; const auto & key = *(keys.begin() + cursor); insertValueByIdx(0, key); commandForValues.addRedisType(key); + + ++num_rows; + ++cursor; } if (num_rows == 0) From 9778f7c2f3e4cfe2efd0650acebfc1d6adcf31f9 Mon Sep 17 00:00:00 2001 From: comunodi Date: Mon, 15 Apr 2019 04:34:10 +0300 Subject: [PATCH 19/49] More logs --- .../Dictionaries/RedisBlockInputStream.cpp | 28 +++++++++++++++---- .../Dictionaries/RedisDictionarySource.cpp | 13 +++++++++ .../test_external_dictionaries/dictionary.py | 3 ++ .../test_external_dictionaries/test.py | 6 ++-- 4 files changed, 41 insertions(+), 9 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 9b11fcb85ae..004f223e723 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -25,6 +25,9 @@ # include "DictionaryStructure.h" # include "RedisBlockInputStream.h" +# include "Poco/Logger.h" +# include "common/logger_useful.h" + namespace DB { @@ -56,6 +59,12 @@ namespace DB template void insertNumber(IColumn & column, const Poco::Redis::RedisType::Ptr & value, const std::string & name) { + LOG_ERROR(&Logger::get("Redis"), "Got value: " + value->toString() + "with type=" + + ", isInteger=" + DB::toString(value->isInteger()) + + ", isSimpleString=" + DB::toString(value->isSimpleString()) + + ", isBulkString=" + DB::toString(value->isBulkString()) + + ", isArray=" + DB::toString(value->isArray()) + + ", isError=" + DB::toString(value->isError())); switch (value->type()) { case Poco::Redis::RedisTypeTraits::TypeId: @@ -68,7 +77,7 @@ namespace DB break; case Poco::Redis::RedisTypeTraits::TypeId: { - const auto &bs = + const auto & bs = static_cast *>(value.get())->value(); if (bs.isNull()) static_cast &>(column).getData().emplace_back(); @@ -78,7 +87,8 @@ namespace DB } default: throw Exception( - "Type mismatch, expected a number, got type id = " + toString(value->type()) + " for column " + name, + "Type mismatch, expected a number, got " + value->toString() + + " with type id = " + toString(value->type()) + " for column " + name, ErrorCodes::TYPE_MISMATCH); } } @@ -189,6 +199,9 @@ namespace DB if (all_read) return {}; + for (size_t i = 0; i < 3; ++i) + if (description.sample_block.columns() >= i + 1) + LOG_ERROR(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); const size_t size = 2; if (size != description.sample_block.columns()) throw Exception{"Unsupported number of columns for key-value storage: " @@ -225,21 +238,27 @@ namespace DB break; } - + LOG_ERROR(&Logger::get("Redis"), "Get key: " + DB::toString(cursor)); const auto & key = *(keys.begin() + cursor); insertValueByIdx(0, key); commandForValues.addRedisType(key); - + LOG_ERROR(&Logger::get("Redis"), "Key has read: " + DB::toString(cursor)); + ++num_rows; ++cursor; } + LOG_ERROR(&Logger::get("Redis"), "All " + DB::toString(num_rows) + " rows added"); + if (num_rows == 0) return {}; + LOG_ERROR(&Logger::get("Redis"), "Req to get values"); Poco::Redis::Array values = client->execute(commandForValues); + LOG_ERROR(&Logger::get("Redis"), "Req executed"); for (size_t i = 0; i < num_rows; ++i) { + LOG_ERROR(&Logger::get("Redis"), "Get value from : " + DB::toString(i)); const Poco::Redis::RedisType::Ptr & value = *(values.begin() + i); if (value.isNull()) insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); @@ -249,7 +268,6 @@ namespace DB return description.sample_block.cloneWithColumns(std::move(columns)); } - } #endif diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index d4584e0d568..d32d45d8ed5 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -52,6 +52,9 @@ namespace DB # include # include "RedisBlockInputStream.h" +# include "Poco/Logger.h" +# include "common/logger_useful.h" + namespace DB { @@ -120,17 +123,25 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadAll() { + LOG_ERROR(&Logger::get("Redis"), "Redis in loadAll"); + Poco::Redis::Array commandForKeys; commandForKeys << "KEYS" << "*"; + LOG_ERROR(&Logger::get("Redis"), "Command for keys: " + commandForKeys.toString()); Poco::Redis::Array keys = client->execute(commandForKeys); + LOG_ERROR(&Logger::get("Redis"), "Command for keys executed"); + LOG_ERROR(&Logger::get("Redis"), "KEYS: " + keys.toString()); + return std::make_shared(client, std::move(keys), sample_block, max_block_size); } BlockInputStreamPtr RedisDictionarySource::loadIds(const std::vector & ids) { + LOG_ERROR(&Logger::get("Redis"), "Redis in loadIds"); + if (!dict_struct.id) throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; @@ -139,6 +150,8 @@ namespace DB for (UInt64 id : ids) keys << static_cast(id); + LOG_ERROR(&Logger::get("Redis"), "KEYS: " + keys.toString()); + return std::make_shared(client, std::move(keys), sample_block, max_block_size); } diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index 49f0ffc1c0b..7e44aef455c 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -118,6 +118,9 @@ class DictionaryStructure(object): fields_strs = [] for field in self.ordinary_fields: fields_strs.append(field.get_attribute_str()) + if self.is_kv: + break + key_strs = [] if self.layout.is_complex: for key_field in self.keys: diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index 93e1db2ce70..d8b92f4e542 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -206,7 +206,7 @@ def test_complex_dictionaries(started_cluster): 'my', 255.543, 3332221.44]), ] - complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex" and not d.is_kv] for dct in complex_dicts: dct.load_data(data) @@ -225,8 +225,6 @@ def test_complex_dictionaries(started_cluster): for query in dct.get_select_get_or_default_queries(field, row): queries_with_answers.append((query, field.default_value_for_get)) - if dct.is_kv: - break for query, answer in queries_with_answers: print query @@ -249,7 +247,7 @@ def test_ranged_dictionaries(started_cluster): 32.543, 3332543.4]), ] - ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] + ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged" and not d.is_kv] for dct in ranged_dicts: dct.load_data(data) From d2427227dd89f32bf9cfb8aaf2a6ecf22bce8c4c Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 02:13:07 +0300 Subject: [PATCH 20/49] Support complex key with 1 or 2 parts --- .../Dictionaries/RedisBlockInputStream.cpp | 135 ++++++++++++------ .../Dictionaries/RedisDictionarySource.cpp | 64 ++++++++- dbms/src/Dictionaries/RedisDictionarySource.h | 25 ++++ 3 files changed, 174 insertions(+), 50 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 004f223e723..8e9aece3670 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -35,6 +35,7 @@ namespace DB { extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; + extern const int LIMIT_EXCEEDED; } @@ -107,10 +108,10 @@ namespace DB ErrorCodes::TYPE_MISMATCH}; return bs.value(); } - case Poco::Redis::RedisTypeTraits::TypeId: - return static_cast *>(value.get())->value(); + case Poco::Redis::RedisTypeTraits::TypeId: + return static_cast *>(value.get())->value(); default: - throw Exception{"Type mismatch, expected String, got type id = " + toString(value->type()) + " for column " + name, + throw Exception{"Type mismatch, expected std::string, got type id = " + toString(value->type()) + " for column " + name, ErrorCodes::TYPE_MISMATCH}; } }; @@ -193,21 +194,23 @@ namespace DB Block RedisBlockInputStream::readImpl() { - if (description.sample_block.rows() == 0) + if (description.sample_block.rows() == 0 || keys.size() == 0) all_read = true; if (all_read) return {}; - for (size_t i = 0; i < 3; ++i) + for (size_t i = 0; i < 5; ++i) if (description.sample_block.columns() >= i + 1) LOG_ERROR(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); - const size_t size = 2; - if (size != description.sample_block.columns()) - throw Exception{"Unsupported number of columns for key-value storage: " - + std::to_string(description.sample_block.columns()) - + " (expected: " + std::to_string(size) + ")", - ErrorCodes::LOGICAL_ERROR}; + + const size_t size = description.sample_block.columns(); +// const size_t size = 2; +// if (size != description.sample_block.columns()) +// throw Exception{"Unsupported number of columns for key-value storage: " +// + DB::toString(description.sample_block.columns()) +// + " (expected: " + DB::toString(size) + ")", +// ErrorCodes::LOGICAL_ERROR}; MutableColumns columns(description.sample_block.columns()); @@ -227,43 +230,89 @@ namespace DB insertValue(*columns[idx], description.types[idx].first, value, name); }; - size_t num_rows = 0; - Poco::Redis::Command commandForValues("MGET"); - - while (num_rows < max_block_size) + if (keys.begin()->get()->isArray()) { - if (cursor >= keys.size()) + size_t num_rows = 0; + while (num_rows < max_block_size) { - all_read = true; - break; + if (cursor >= keys.size()) + { + all_read = true; + break; + } + + const auto & primary_with_secondary = *(keys.begin() + cursor); + const auto & keys_array = + static_cast *>(primary_with_secondary.get())->value(); + if (keys_array.size() < 2) + { + throw Exception{"Too low keys in request to source: " + DB::toString(keys_array.size()) + + ", expected 2 or more", + ErrorCodes::LOGICAL_ERROR}; + } + if (num_rows + keys_array.size() - 1 > max_block_size) + { + if (num_rows == 0) + throw Exception{"Too many (" + DB::toString(keys_array.size()) + ") key attributes", + ErrorCodes::LIMIT_EXCEEDED}; + break; + } + + Poco::Redis::Command commandForValues("HMGET"); + const auto & primary_key = *keys_array.begin(); + for (size_t i = 1; i < keys_array.size(); ++i) + { + const auto & secondary_key = *(keys_array.begin() + i); + insertValueByIdx(0, primary_key); + insertValueByIdx(1, secondary_key); + commandForValues.addRedisType(secondary_key); + } + + Poco::Redis::Array values = client->execute(commandForValues); + for (const auto & value : values) + { + if (value.isNull()) + insertDefaultValue(*columns[2], *description.sample_block.getByPosition(2).column); + else + insertValueByIdx(2, value); + } + + num_rows += keys_array.size() - 1; + cursor += keys_array.size() - 1; + } + } + else + { + size_t num_rows = 0; + Poco::Redis::Command commandForValues("MGET"); + + while (num_rows < max_block_size) + { + if (cursor >= keys.size()) + { + all_read = true; + break; + } + + const auto & key = *(keys.begin() + cursor); + insertValueByIdx(0, key); + commandForValues.addRedisType(key); + + ++num_rows; + ++cursor; } - LOG_ERROR(&Logger::get("Redis"), "Get key: " + DB::toString(cursor)); - const auto & key = *(keys.begin() + cursor); - insertValueByIdx(0, key); - commandForValues.addRedisType(key); - LOG_ERROR(&Logger::get("Redis"), "Key has read: " + DB::toString(cursor)); + if (num_rows == 0) + return {}; - ++num_rows; - ++cursor; - } - - LOG_ERROR(&Logger::get("Redis"), "All " + DB::toString(num_rows) + " rows added"); - - if (num_rows == 0) - return {}; - - LOG_ERROR(&Logger::get("Redis"), "Req to get values"); - Poco::Redis::Array values = client->execute(commandForValues); - LOG_ERROR(&Logger::get("Redis"), "Req executed"); - for (size_t i = 0; i < num_rows; ++i) - { - LOG_ERROR(&Logger::get("Redis"), "Get value from : " + DB::toString(i)); - const Poco::Redis::RedisType::Ptr & value = *(values.begin() + i); - if (value.isNull()) - insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); - else - insertValueByIdx(1, value); + Poco::Redis::Array values = client->execute(commandForValues); + for (const auto & value : values) + { + if (value.isNull()) + insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); + else + insertValueByIdx(1, value); + } } return description.sample_block.cloneWithColumns(std::move(columns)); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index d32d45d8ed5..ce9c1e6f408 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -62,6 +62,7 @@ namespace DB { extern const int UNSUPPORTED_METHOD; extern const int CANNOT_SELECT; + extern const int INVALID_CONFIG_PARAMETER; } @@ -73,18 +74,36 @@ namespace DB const std::string & host, UInt16 port, UInt8 db_index, + RedisStorageType::Id storage_type, const Block & sample_block) : dict_struct{dict_struct} , host{host} , port{port} , db_index{db_index} + , storage_type{storage_type} , sample_block{sample_block} , client{std::make_shared(host, port)} { + if (dict_struct.attributes.size() != 1) + throw Exception{"Invalid number of non key columns for Redis source: " + + DB::toString(dict_struct.attributes.size()) + ", expected 1", + ErrorCodes::INVALID_CONFIG_PARAMETER}; + + if (storage_type == RedisStorageType::HASH_MAP) + { + if (!dict_struct.key.has_value()) + throw Exception{"Redis source with storage type \'hash_map\' mush have key", + ErrorCodes::INVALID_CONFIG_PARAMETER}; + if (dict_struct.key.value().size() > 2) + throw Exception{"Redis source with complex keys having more than 2 attributes are unsupported", + ErrorCodes::INVALID_CONFIG_PARAMETER}; + // suppose key[0] is primary key, key[1] is secondary key + } + if (db_index != 0) { - Poco::Redis::Array command; - command << "SELECT" << static_cast(db_index); + Poco::Redis::Command command("SELECT"); + command << static_cast(db_index); std::string reply = client->execute(command); if (reply != "+OK\r\n") throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, @@ -103,6 +122,7 @@ namespace DB config.getString(config_prefix + ".host"), config.getUInt(config_prefix + ".port"), config.getUInt(config_prefix + ".db_index", 0), + parseStorageType(config.getString(config_prefix + ".storage_type", "")), sample_block) { } @@ -113,6 +133,7 @@ namespace DB other.host, other.port, other.db_index, + other.storage_type, other.sample_block} { } @@ -125,15 +146,35 @@ namespace DB { LOG_ERROR(&Logger::get("Redis"), "Redis in loadAll"); - Poco::Redis::Array commandForKeys; - commandForKeys << "KEYS" << "*"; - LOG_ERROR(&Logger::get("Redis"), "Command for keys: " + commandForKeys.toString()); + Poco::Redis::Command command_for_keys("KEYS"); + command_for_keys << "*"; + LOG_ERROR(&Logger::get("Redis"), "Command for keys: " + command_for_keys.toString()); - Poco::Redis::Array keys = client->execute(commandForKeys); + Poco::Redis::Array keys = client->execute(command_for_keys); LOG_ERROR(&Logger::get("Redis"), "Command for keys executed"); LOG_ERROR(&Logger::get("Redis"), "KEYS: " + keys.toString()); + if (storage_type == RedisStorageType::HASH_MAP && dict_struct.key->size() == 2) + { + Poco::Redis::Array hkeys; + for (const auto & key : keys) + { + Poco::Redis::Command command_for_secondary_keys("HKEYS"); + command_for_secondary_keys.addRedisType(key); + Poco::Redis::Array reply_for_primary_key = client->execute(command_for_secondary_keys); + LOG_ERROR(&Logger::get("Redis"), "Command for hkeys executed"); + + Poco::SharedPtr primary_with_secondary; + primary_with_secondary->addRedisType(key); + for (const auto & secondary_key : reply_for_primary_key) + primary_with_secondary->addRedisType(secondary_key); + LOG_ERROR(&Logger::get("Redis"), "HKEYS: " + primary_with_secondary->toString()); + hkeys.addRedisType(primary_with_secondary); + } + keys = hkeys; + } + return std::make_shared(client, std::move(keys), sample_block, max_block_size); } @@ -142,6 +183,9 @@ namespace DB { LOG_ERROR(&Logger::get("Redis"), "Redis in loadIds"); + if (storage_type != RedisStorageType::SIMPLE) + throw Exception{"Cannot use loadIds with \'simple\' storage type", ErrorCodes::UNSUPPORTED_METHOD}; + if (!dict_struct.id) throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; @@ -155,12 +199,18 @@ namespace DB return std::make_shared(client, std::move(keys), sample_block, max_block_size); } - std::string RedisDictionarySource::toString() const { return "Redis: " + host + ':' + DB::toString(port); } + RedisStorageType::Id RedisDictionarySource::parseStorageType(const std::string & storage_type) { + RedisStorageType::Id storage_type_id = RedisStorageType::valueOf(storage_type); + if (storage_type_id == RedisStorageType::UNKNOWN) { + storage_type_id = RedisStorageType::SIMPLE; + } + return storage_type_id; + } } #endif diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index 1e528ce40e9..37014e76360 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -22,6 +22,25 @@ namespace Poco namespace DB { + namespace RedisStorageType + { + enum Id + { + SIMPLE, + HASH_MAP, + UNKNOWN + }; + + Id valueOf(const std::string& value) + { + if (value == "simple") + return SIMPLE; + if (value == "hash_map") + return HASH_MAP; + return UNKNOWN; + } + } + class RedisDictionarySource final : public IDictionarySource { RedisDictionarySource( @@ -29,6 +48,7 @@ namespace DB const std::string & host, UInt16 port, UInt8 db_index, + RedisStorageType::Id storage_type, const Block & sample_block); public: @@ -55,6 +75,7 @@ namespace DB BlockInputStreamPtr loadKeys(const Columns & /* key_columns */, const std::vector & /* requested_rows */) override { + // Redis does not support native indexing throw Exception{"Method loadKeys is unsupported for RedisDictionarySource", ErrorCodes::NOT_IMPLEMENTED}; } @@ -66,11 +87,15 @@ namespace DB std::string toString() const override; + private: + static RedisStorageType::Id parseStorageType(const std::string& storage_type); + private: const DictionaryStructure dict_struct; const std::string host; const UInt16 port; const UInt8 db_index; // [0..15] + const RedisStorageType::Id storage_type; Block sample_block; std::shared_ptr client; From 20235753442a98a192e441ae7adf54b10743ac8f Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 02:26:57 +0300 Subject: [PATCH 21/49] Fix build --- dbms/src/Dictionaries/RedisDictionarySource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index ce9c1e6f408..e55e5549b27 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -170,7 +170,7 @@ namespace DB for (const auto & secondary_key : reply_for_primary_key) primary_with_secondary->addRedisType(secondary_key); LOG_ERROR(&Logger::get("Redis"), "HKEYS: " + primary_with_secondary->toString()); - hkeys.addRedisType(primary_with_secondary); + hkeys.add(*primary_with_secondary); } keys = hkeys; } From 5bc446befe77403bdd166acd29972e5a6932011a Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 04:11:40 +0300 Subject: [PATCH 22/49] Parse date and datetime from Int64 --- .../Dictionaries/RedisBlockInputStream.cpp | 49 +++++++++++++------ 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 8e9aece3670..9da5a92160b 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -72,9 +72,9 @@ namespace DB static_cast &>(column).getData().push_back( static_cast *>(value.get())->value()); break; - case Poco::Redis::RedisTypeTraits::TypeId: + case Poco::Redis::RedisTypeTraits::TypeId: static_cast &>(column).getData().push_back( - parse(static_cast *>(value.get())->value())); + parse(static_cast *>(value.get())->value())); break; case Poco::Redis::RedisTypeTraits::TypeId: { @@ -116,6 +116,35 @@ namespace DB } }; + auto getInt64IfCould = [&value]() + { + switch (value->type()) + { + case Poco::Redis::RedisTypeTraits::TypeId: + { + return static_cast *>(value.get())->value(); + } + case Poco::Redis::RedisTypeTraits::TypeId: + { + return parse( + static_cast *>(value.get())->value()); + } + case Poco::Redis::RedisTypeTraits::TypeId: + { + const auto & bs = static_cast *>( + value.get())->value(); + if (bs.isNull()) + throw Exception{"Unexpected null value", ErrorCodes::TYPE_MISMATCH}; + return parse(bs.value()); + } + default: + { + throw Exception{"Type mismatch, cannot convert to Int64, got type id = " + toString(value->type()), + ErrorCodes::TYPE_MISMATCH}; + } + } + }; + switch (type) { case ValueType::UInt8: @@ -158,25 +187,17 @@ namespace DB case ValueType::Date: { - if (value->type() != Poco::Redis::RedisTypeTraits::TypeId) - throw Exception{"Type mismatch, expected Int64 (Timestamp), got type id = " + toString(value->type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; - + Int64 int_value = getInt64IfCould(); static_cast(column).getData().push_back(UInt16{DateLUT::instance().toDayNum( - static_cast( - static_cast *>(value.get())->value()).epochTime())}); + static_cast(int_value).epochTime())}); break; } case ValueType::DateTime: { - if (value->type() != Poco::Redis::RedisTypeTraits::TypeId) - throw Exception{"Type mismatch, expected Int64 (Timestamp), got type id = " + toString(value->type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; - + Int64 int_value = getInt64IfCould(); static_cast(column).getData().push_back( - static_cast( - static_cast *>(value.get())->value()).epochTime()); + static_cast(int_value).epochTime()); break; } case ValueType::UUID: From 1265646dbfa817e04abe8eeeaa1e67ad9181429e Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 13:11:38 +0300 Subject: [PATCH 23/49] Cast types to expected in tests --- .../test_external_dictionaries/dictionary.py | 3 + .../external_sources.py | 18 +++++ .../test_external_dictionaries/test.py | 78 +++++++++++-------- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index 7e44aef455c..c468c2bfc67 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -46,6 +46,9 @@ class Row(object): def get_value_by_name(self, name): return self.data[name] + def set_value(self, name, value): + self.data[name] = value + class Field(object): def __init__(self, name, field_type, is_key=False, is_range_key=False, default=None, hierarchical=False, range_hash_type=None, default_value_for_get=None): diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 57c862cbfe6..f7ab5315526 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -6,6 +6,8 @@ import redis from tzlocal import get_localzone import datetime import os +import dateutil.parser +import time class ExternalSource(object): @@ -36,6 +38,9 @@ class ExternalSource(object): def compatible_with_layout(self, layout): return True + def prepare_value_for_type(self, field, value): + return value + class SourceMySQL(ExternalSource): TYPE_MAPPING = { @@ -406,3 +411,16 @@ class SourceRedis(ExternalSource): print(cmd) self.client.execute_command(cmd) return + + def prepare_value_for_type(self, field, value): + if field.field_type == "Date": + dt = dateutil.parser.parse(value) + return int(time.mktime(dt.timetuple()) // 86400) + if field.field_type == "DateTime": + dt = dateutil.parser.parse(value) + return int(time.mktime(dt.timetuple())) + if field.field_type == "Float32": + return str(value) + if field.field_type == "Float64": + return str(value) + return value diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index d8b92f4e542..c42727c76a8 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -1,6 +1,5 @@ import pytest import os -import time from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout @@ -138,28 +137,39 @@ def started_cluster(): finally: cluster.shutdown() +def prepare_row(dct, fields, values): + prepared_values = [] + for field, value in zip(fields, values): + prepared_values.append(dct.source.prepare_value_for_type(field, value)) + return Row(fields, prepared_values) + +def prepare_data(dct, fields, values_by_row): + data = [] + for row in values_by_row: + data.append(prepare_row(dct, fields, row)) + return data def test_simple_dictionaries(started_cluster): fields = FIELDS["simple"] - data = [ - Row(fields, - [1, 22, 333, 4444, 55555, -6, -77, - -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0]), - Row(fields, - [2, 3, 4, 5, 6, -7, -8, - -9, -10, '550e8400-e29b-41d4-a716-446655440002', - '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1]), + values_by_row = [ + [1, 22, 333, 4444, 55555, -6, -77, + -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0], + [2, 3, 4, 5, 6, -7, -8, + -9, -10, '550e8400-e29b-41d4-a716-446655440002', + '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1], ] simple_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"] for dct in simple_dicts: + data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in simple_dicts: + data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key: @@ -193,27 +203,27 @@ def test_simple_dictionaries(started_cluster): def test_complex_dictionaries(started_cluster): fields = FIELDS["complex"] - data = [ - Row(fields, - [1, 'world', 22, 333, 4444, 55555, -6, - -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', - 'hello', 22.543, 3332154213.4]), - Row(fields, - [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, - -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', - '1975-09-28', '2000-02-28 23:33:24', - 'my', 255.543, 3332221.44]), + values_by_row = [ + [1, 'world', 22, 333, 4444, 55555, -6, + -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', + 'hello', 22.543, 3332154213.4], + [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, + -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', + '1975-09-28', '2000-02-28 23:33:24', + 'my', 255.543, 3332221.44], ] complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex" and not d.is_kv] for dct in complex_dicts: + data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in complex_dicts: + data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key: @@ -232,29 +242,29 @@ def test_complex_dictionaries(started_cluster): def test_ranged_dictionaries(started_cluster): fields = FIELDS["ranged"] - data = [ - Row(fields, - [1, '2019-02-10', '2019-02-01', '2019-02-28', - 22, 333, 4444, 55555, -6, -77, -888, -999, - '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', - 22.543, 3332154213.4]), - Row(fields, - [2, '2019-04-10', '2019-04-01', '2019-04-28', - 11, 3223, 41444, 52515, -65, -747, -8388, -9099, - '550e8400-e29b-41d4-a716-446655440004', - '1973-06-29', '2002-02-28 23:23:25', '!!!!', - 32.543, 3332543.4]), + values_by_row = [ + [1, '2019-02-10', '2019-02-01', '2019-02-28', + 22, 333, 4444, 55555, -6, -77, -888, -999, + '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', + 22.543, 3332154213.4], + [2, '2019-04-10', '2019-04-01', '2019-04-28', + 11, 3223, 41444, 52515, -65, -747, -8388, -9099, + '550e8400-e29b-41d4-a716-446655440004', + '1973-06-29', '2002-02-28 23:23:25', '!!!!', + 32.543, 3332543.4], ] ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged" and not d.is_kv] for dct in ranged_dicts: + data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in ranged_dicts: + data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key and not field.is_range: From b05113188c0adb964141b89eadb69670aaa1bc13 Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 13:14:07 +0300 Subject: [PATCH 24/49] Style fix --- dbms/src/Dictionaries/RedisDictionarySource.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index e55e5549b27..4d511cd569c 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -204,11 +204,11 @@ namespace DB return "Redis: " + host + ':' + DB::toString(port); } - RedisStorageType::Id RedisDictionarySource::parseStorageType(const std::string & storage_type) { + RedisStorageType::Id RedisDictionarySource::parseStorageType(const std::string & storage_type) + { RedisStorageType::Id storage_type_id = RedisStorageType::valueOf(storage_type); - if (storage_type_id == RedisStorageType::UNKNOWN) { + if (storage_type_id == RedisStorageType::UNKNOWN) storage_type_id = RedisStorageType::SIMPLE; - } return storage_type_id; } } From f5806e4fb263e9287f81ee3bcdf2547c3735250b Mon Sep 17 00:00:00 2001 From: comunodi Date: Wed, 17 Apr 2019 14:35:02 +0300 Subject: [PATCH 25/49] Disable unsupported sources in tests --- .../test_external_dictionaries/external_sources.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index f7ab5315526..6830f9500c8 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -412,6 +412,11 @@ class SourceRedis(ExternalSource): self.client.execute_command(cmd) return + def compatible_with_layout(self, layout): + if not layout.is_simple: + return False + return True + def prepare_value_for_type(self, field, value): if field.field_type == "Date": dt = dateutil.parser.parse(value) From ccf89f4be6d6dee496c91da369b9579bc4754820 Mon Sep 17 00:00:00 2001 From: comunodi Date: Fri, 24 May 2019 02:42:21 +0300 Subject: [PATCH 26/49] Change LOG_ERROR to LOG_INFO --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 4 ++-- dbms/src/Dictionaries/RedisDictionarySource.cpp | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 9da5a92160b..ed000f1c1a7 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -60,7 +60,7 @@ namespace DB template void insertNumber(IColumn & column, const Poco::Redis::RedisType::Ptr & value, const std::string & name) { - LOG_ERROR(&Logger::get("Redis"), "Got value: " + value->toString() + "with type=" + + LOG_INFO(&Logger::get("Redis"), "Got value: " + value->toString() + "with type=" + ", isInteger=" + DB::toString(value->isInteger()) + ", isSimpleString=" + DB::toString(value->isSimpleString()) + ", isBulkString=" + DB::toString(value->isBulkString()) + @@ -223,7 +223,7 @@ namespace DB for (size_t i = 0; i < 5; ++i) if (description.sample_block.columns() >= i + 1) - LOG_ERROR(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); + LOG_INFO(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); const size_t size = description.sample_block.columns(); // const size_t size = 2; diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 4d511cd569c..a691161c968 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -144,16 +144,16 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadAll() { - LOG_ERROR(&Logger::get("Redis"), "Redis in loadAll"); + LOG_INFO(&Logger::get("Redis"), "Redis in loadAll"); Poco::Redis::Command command_for_keys("KEYS"); command_for_keys << "*"; - LOG_ERROR(&Logger::get("Redis"), "Command for keys: " + command_for_keys.toString()); + LOG_INFO(&Logger::get("Redis"), "Command for keys: " + command_for_keys.toString()); Poco::Redis::Array keys = client->execute(command_for_keys); - LOG_ERROR(&Logger::get("Redis"), "Command for keys executed"); - LOG_ERROR(&Logger::get("Redis"), "KEYS: " + keys.toString()); + LOG_INFO(&Logger::get("Redis"), "Command for keys executed"); + LOG_INFO(&Logger::get("Redis"), "KEYS: " + keys.toString()); if (storage_type == RedisStorageType::HASH_MAP && dict_struct.key->size() == 2) { @@ -163,13 +163,13 @@ namespace DB Poco::Redis::Command command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); Poco::Redis::Array reply_for_primary_key = client->execute(command_for_secondary_keys); - LOG_ERROR(&Logger::get("Redis"), "Command for hkeys executed"); + LOG_INFO(&Logger::get("Redis"), "Command for hkeys executed"); Poco::SharedPtr primary_with_secondary; primary_with_secondary->addRedisType(key); for (const auto & secondary_key : reply_for_primary_key) primary_with_secondary->addRedisType(secondary_key); - LOG_ERROR(&Logger::get("Redis"), "HKEYS: " + primary_with_secondary->toString()); + LOG_INFO(&Logger::get("Redis"), "HKEYS: " + primary_with_secondary->toString()); hkeys.add(*primary_with_secondary); } keys = hkeys; @@ -181,7 +181,7 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadIds(const std::vector & ids) { - LOG_ERROR(&Logger::get("Redis"), "Redis in loadIds"); + LOG_INFO(&Logger::get("Redis"), "Redis in loadIds"); if (storage_type != RedisStorageType::SIMPLE) throw Exception{"Cannot use loadIds with \'simple\' storage type", ErrorCodes::UNSUPPORTED_METHOD}; @@ -194,7 +194,7 @@ namespace DB for (UInt64 id : ids) keys << static_cast(id); - LOG_ERROR(&Logger::get("Redis"), "KEYS: " + keys.toString()); + LOG_INFO(&Logger::get("Redis"), "KEYS: " + keys.toString()); return std::make_shared(client, std::move(keys), sample_block, max_block_size); } From 1f0afdcf6893c02ad62d3ad2ad035f96a3ef096f Mon Sep 17 00:00:00 2001 From: comunodi Date: Sat, 25 May 2019 03:28:09 +0300 Subject: [PATCH 27/49] Parse all args as strings --- .../Dictionaries/RedisBlockInputStream.cpp | 137 +++++------------- .../Dictionaries/RedisDictionarySource.cpp | 12 +- 2 files changed, 44 insertions(+), 105 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index ed000f1c1a7..507d36b7b16 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -57,153 +57,88 @@ namespace DB using ValueType = ExternalResultDescription::ValueType; using RedisArray = Poco::Redis::Array; - template - void insertNumber(IColumn & column, const Poco::Redis::RedisType::Ptr & value, const std::string & name) + std::string getStringOrThrow(const Poco::Redis::RedisType::Ptr & value, const std::string & column_name) { - LOG_INFO(&Logger::get("Redis"), "Got value: " + value->toString() + "with type=" + - ", isInteger=" + DB::toString(value->isInteger()) + - ", isSimpleString=" + DB::toString(value->isSimpleString()) + - ", isBulkString=" + DB::toString(value->isBulkString()) + - ", isArray=" + DB::toString(value->isArray()) + - ", isError=" + DB::toString(value->isError())); + LOG_INFO(&Logger::get("Redis"), + "isNullableString=" + DB::toString(value->isBulkString()) + + ", isSimpleString=" + DB::toString(value->isSimpleString())); switch (value->type()) { - case Poco::Redis::RedisTypeTraits::TypeId: - static_cast &>(column).getData().push_back( - static_cast *>(value.get())->value()); - break; - case Poco::Redis::RedisTypeTraits::TypeId: - static_cast &>(column).getData().push_back( - parse(static_cast *>(value.get())->value())); - break; case Poco::Redis::RedisTypeTraits::TypeId: { - const auto & bs = - static_cast *>(value.get())->value(); + const auto & bs = static_cast *>(value.get())->value(); if (bs.isNull()) - static_cast &>(column).getData().emplace_back(); - else - static_cast &>(column).getData().push_back(parse(bs.value())); - break; + throw Exception{"Type mismatch, expected not null String for column " + column_name, + ErrorCodes::TYPE_MISMATCH}; + return bs.value(); } + case Poco::Redis::RedisTypeTraits::TypeId: + return static_cast *>(value.get())->value(); default: - throw Exception( - "Type mismatch, expected a number, got " + value->toString() + - " with type id = " + toString(value->type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH); + throw Exception{"Type mismatch, expected std::string, got type id = " + toString(value->type()) + " for column " + column_name, + ErrorCodes::TYPE_MISMATCH}; } } + template + inline void insert(IColumn & column, const String & stringValue) + { + static_cast &>(column).insertValue(parse(stringValue)); + } + void insertValue(IColumn & column, const ValueType type, const Poco::Redis::RedisType::Ptr & value, const std::string & name) { - auto getStringIfCould = [&value, &name]() - { - switch (value->type()) - { - case Poco::Redis::RedisTypeTraits::TypeId: - { - const auto & bs = static_cast *>(value.get())->value(); - if (bs.isNull()) - throw Exception{"Type mismatch, expected not null String for column " + name, - ErrorCodes::TYPE_MISMATCH}; - return bs.value(); - } - case Poco::Redis::RedisTypeTraits::TypeId: - return static_cast *>(value.get())->value(); - default: - throw Exception{"Type mismatch, expected std::string, got type id = " + toString(value->type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; - } - }; - - auto getInt64IfCould = [&value]() - { - switch (value->type()) - { - case Poco::Redis::RedisTypeTraits::TypeId: - { - return static_cast *>(value.get())->value(); - } - case Poco::Redis::RedisTypeTraits::TypeId: - { - return parse( - static_cast *>(value.get())->value()); - } - case Poco::Redis::RedisTypeTraits::TypeId: - { - const auto & bs = static_cast *>( - value.get())->value(); - if (bs.isNull()) - throw Exception{"Unexpected null value", ErrorCodes::TYPE_MISMATCH}; - return parse(bs.value()); - } - default: - { - throw Exception{"Type mismatch, cannot convert to Int64, got type id = " + toString(value->type()), - ErrorCodes::TYPE_MISMATCH}; - } - } - }; + String stringValue = getStringOrThrow(value, name); switch (type) { case ValueType::UInt8: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::UInt16: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::UInt32: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::UInt64: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Int8: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Int16: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Int32: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Int64: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Float32: - insertNumber(column, value, name); + insert(column, stringValue); break; case ValueType::Float64: - insertNumber(column, value, name); + insert(column, stringValue); break; - case ValueType::String: - { - String string = getStringIfCould(); - static_cast(column).insertDataWithTerminatingZero(string.data(), string.size() + 1); + insert(column, stringValue); break; - } - case ValueType::Date: { - Int64 int_value = getInt64IfCould(); - static_cast(column).getData().push_back(UInt16{DateLUT::instance().toDayNum( - static_cast(int_value).epochTime())}); + static_cast(column).insertValue(parse(stringValue).getDayNum()); break; } case ValueType::DateTime: { - Int64 int_value = getInt64IfCould(); - static_cast(column).getData().push_back( - static_cast(int_value).epochTime()); + static_cast(column).insertValue(static_cast(parse(stringValue))); break; } case ValueType::UUID: { - String string = getStringIfCould(); - static_cast(column).getData().push_back(parse(string)); + static_cast(column).insertValue(parse(stringValue)); break; } } @@ -226,12 +161,6 @@ namespace DB LOG_INFO(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); const size_t size = description.sample_block.columns(); -// const size_t size = 2; -// if (size != description.sample_block.columns()) -// throw Exception{"Unsupported number of columns for key-value storage: " -// + DB::toString(description.sample_block.columns()) -// + " (expected: " + DB::toString(size) + ")", -// ErrorCodes::LOGICAL_ERROR}; MutableColumns columns(description.sample_block.columns()); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index a691161c968..0c99c785887 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -84,15 +84,20 @@ namespace DB , sample_block{sample_block} , client{std::make_shared(host, port)} { + LOG_INFO(&Logger::get("Redis"), "in ctor"); + LOG_INFO(&Logger::get("Redis"), dict_struct.attributes.size()); if (dict_struct.attributes.size() != 1) throw Exception{"Invalid number of non key columns for Redis source: " + DB::toString(dict_struct.attributes.size()) + ", expected 1", ErrorCodes::INVALID_CONFIG_PARAMETER}; + LOG_INFO(&Logger::get("Redis"), "After first check"); + if (storage_type == RedisStorageType::HASH_MAP) { + LOG_INFO(&Logger::get("Redis"), "SET STORAGE_TYPE"); if (!dict_struct.key.has_value()) - throw Exception{"Redis source with storage type \'hash_map\' mush have key", + throw Exception{"Redis source with storage type \'hash_map\' must have key", ErrorCodes::INVALID_CONFIG_PARAMETER}; if (dict_struct.key.value().size() > 2) throw Exception{"Redis source with complex keys having more than 2 attributes are unsupported", @@ -100,8 +105,11 @@ namespace DB // suppose key[0] is primary key, key[1] is secondary key } + LOG_INFO(&Logger::get("Redis"), "After second check"); + if (db_index != 0) { + LOG_INFO(&Logger::get("Redis"), "SET DB_INDEX"); Poco::Redis::Command command("SELECT"); command << static_cast(db_index); std::string reply = client->execute(command); @@ -109,6 +117,8 @@ namespace DB throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, ErrorCodes::CANNOT_SELECT}; } + + LOG_INFO(&Logger::get("Redis"), "After third check"); } From ba879d95f7be0bd4f40089905a79dc7a3f142ebb Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 26 May 2019 01:53:31 +0300 Subject: [PATCH 28/49] Unify keys handling --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 9 +-------- dbms/src/Dictionaries/RedisDictionarySource.cpp | 2 +- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 507d36b7b16..afa411ce7f4 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -123,24 +123,17 @@ namespace DB insert(column, stringValue); break; case ValueType::String: - insert(column, stringValue); + static_cast(column).insert(parse(stringValue)); break; case ValueType::Date: - { static_cast(column).insertValue(parse(stringValue).getDayNum()); break; - } - case ValueType::DateTime: - { static_cast(column).insertValue(static_cast(parse(stringValue))); break; - } case ValueType::UUID: - { static_cast(column).insertValue(parse(stringValue)); break; - } } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 0c99c785887..d77019cb423 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -202,7 +202,7 @@ namespace DB Poco::Redis::Array keys; for (UInt64 id : ids) - keys << static_cast(id); + keys << DB::toString(id); LOG_INFO(&Logger::get("Redis"), "KEYS: " + keys.toString()); From 179ad928746e4185d8818c5a2976d28f2e08cf64 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 26 May 2019 15:58:40 +0300 Subject: [PATCH 29/49] Fix diff with master --- dbms/src/Dictionaries/RedisBlockInputStream.h | 2 +- dbms/src/Dictionaries/RedisDictionarySource.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 95a563cee80..dc64ee0fdd4 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include namespace Poco diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index 37014e76360..7a0ffaaceb7 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -1,6 +1,7 @@ #pragma once #include +#include #if USE_POCO_REDIS # include "DictionaryStructure.h" @@ -31,7 +32,7 @@ namespace DB UNKNOWN }; - Id valueOf(const std::string& value) + Id valueOf(const std::string & value) { if (value == "simple") return SIMPLE; From b3d8ec3e0444b74c1e558a9c859952b5c7ddd16d Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 26 May 2019 18:55:09 +0300 Subject: [PATCH 30/49] Handle Null keys --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index afa411ce7f4..bed8846cff5 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -57,6 +57,15 @@ namespace DB using ValueType = ExternalResultDescription::ValueType; using RedisArray = Poco::Redis::Array; + bool isNull(const Poco::Redis::RedisType::Ptr & value) + { + if (value.isNull()) + return true; + if (value->isBulkString()) + return static_cast *>(value.get())->value().isNull(); + return false; + } + std::string getStringOrThrow(const Poco::Redis::RedisType::Ptr & value, const std::string & column_name) { LOG_INFO(&Logger::get("Redis"), @@ -214,7 +223,7 @@ namespace DB Poco::Redis::Array values = client->execute(commandForValues); for (const auto & value : values) { - if (value.isNull()) + if (isNull(value)) insertDefaultValue(*columns[2], *description.sample_block.getByPosition(2).column); else insertValueByIdx(2, value); @@ -251,7 +260,7 @@ namespace DB Poco::Redis::Array values = client->execute(commandForValues); for (const auto & value : values) { - if (value.isNull()) + if (isNull(value)) insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); else insertValueByIdx(1, value); From a8ce7530c9558a94a8b864c7bbb3f89b3bb9ca89 Mon Sep 17 00:00:00 2001 From: comunodi Date: Tue, 28 May 2019 23:06:06 +0300 Subject: [PATCH 31/49] Put keys in result block only if value exists --- .../Dictionaries/RedisBlockInputStream.cpp | 41 ++++++++++--------- .../Dictionaries/RedisDictionarySource.cpp | 26 +----------- dbms/src/Dictionaries/RedisDictionarySource.h | 3 +- 3 files changed, 24 insertions(+), 46 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index bed8846cff5..639b1360c74 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -36,6 +36,7 @@ namespace DB extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; extern const int LIMIT_EXCEEDED; + extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; } @@ -59,11 +60,8 @@ namespace DB bool isNull(const Poco::Redis::RedisType::Ptr & value) { - if (value.isNull()) - return true; - if (value->isBulkString()) - return static_cast *>(value.get())->value().isNull(); - return false; + return value->isBulkString() && + static_cast *>(value.get())->value().isNull(); } std::string getStringOrThrow(const Poco::Redis::RedisType::Ptr & value, const std::string & column_name) @@ -158,10 +156,6 @@ namespace DB if (all_read) return {}; - for (size_t i = 0; i < 5; ++i) - if (description.sample_block.columns() >= i + 1) - LOG_INFO(&Logger::get("Redis"), description.sample_block.getByPosition(i).dumpStructure()); - const size_t size = description.sample_block.columns(); MutableColumns columns(description.sample_block.columns()); @@ -220,6 +214,7 @@ namespace DB commandForValues.addRedisType(secondary_key); } + // FIXME: fix insert Poco::Redis::Array values = client->execute(commandForValues); for (const auto & value : values) { @@ -235,10 +230,10 @@ namespace DB } else { - size_t num_rows = 0; Poco::Redis::Command commandForValues("MGET"); - while (num_rows < max_block_size) + // keys.size() > 0 + for (size_t num_rows = 0; num_rows < max_block_size; ++num_rows) { if (cursor >= keys.size()) { @@ -247,23 +242,29 @@ namespace DB } const auto & key = *(keys.begin() + cursor); - insertValueByIdx(0, key); commandForValues.addRedisType(key); - - ++num_rows; ++cursor; } - if (num_rows == 0) - return {}; - Poco::Redis::Array values = client->execute(commandForValues); - for (const auto & value : values) + if (commandForValues.size() != values.size() + 1) + throw Exception{"Inconsistent sizes of keys and values in Redis request", + ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT}; + + for (size_t num_rows = 0; num_rows < values.size(); ++num_rows) { - if (isNull(value)) + const auto & key = *(keys.begin() + cursor - num_rows - 1); + const auto & value = *(values.begin() + values.size() - num_rows - 1); + if (value.isNull()) + { + insertValueByIdx(0, key); insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); - else + } + else if (!isNull(value)) // null string means 'no value for requested key' + { + insertValueByIdx(0, key); insertValueByIdx(1, value); + } } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index d77019cb423..051f6dfaf34 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -52,9 +52,6 @@ namespace DB # include # include "RedisBlockInputStream.h" -# include "Poco/Logger.h" -# include "common/logger_useful.h" - namespace DB { @@ -84,18 +81,13 @@ namespace DB , sample_block{sample_block} , client{std::make_shared(host, port)} { - LOG_INFO(&Logger::get("Redis"), "in ctor"); - LOG_INFO(&Logger::get("Redis"), dict_struct.attributes.size()); if (dict_struct.attributes.size() != 1) throw Exception{"Invalid number of non key columns for Redis source: " + DB::toString(dict_struct.attributes.size()) + ", expected 1", ErrorCodes::INVALID_CONFIG_PARAMETER}; - LOG_INFO(&Logger::get("Redis"), "After first check"); - if (storage_type == RedisStorageType::HASH_MAP) { - LOG_INFO(&Logger::get("Redis"), "SET STORAGE_TYPE"); if (!dict_struct.key.has_value()) throw Exception{"Redis source with storage type \'hash_map\' must have key", ErrorCodes::INVALID_CONFIG_PARAMETER}; @@ -105,11 +97,8 @@ namespace DB // suppose key[0] is primary key, key[1] is secondary key } - LOG_INFO(&Logger::get("Redis"), "After second check"); - if (db_index != 0) { - LOG_INFO(&Logger::get("Redis"), "SET DB_INDEX"); Poco::Redis::Command command("SELECT"); command << static_cast(db_index); std::string reply = client->execute(command); @@ -117,8 +106,6 @@ namespace DB throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, ErrorCodes::CANNOT_SELECT}; } - - LOG_INFO(&Logger::get("Redis"), "After third check"); } @@ -154,17 +141,11 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadAll() { - LOG_INFO(&Logger::get("Redis"), "Redis in loadAll"); - Poco::Redis::Command command_for_keys("KEYS"); command_for_keys << "*"; - LOG_INFO(&Logger::get("Redis"), "Command for keys: " + command_for_keys.toString()); Poco::Redis::Array keys = client->execute(command_for_keys); - LOG_INFO(&Logger::get("Redis"), "Command for keys executed"); - LOG_INFO(&Logger::get("Redis"), "KEYS: " + keys.toString()); - if (storage_type == RedisStorageType::HASH_MAP && dict_struct.key->size() == 2) { Poco::Redis::Array hkeys; @@ -173,13 +154,12 @@ namespace DB Poco::Redis::Command command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); Poco::Redis::Array reply_for_primary_key = client->execute(command_for_secondary_keys); - LOG_INFO(&Logger::get("Redis"), "Command for hkeys executed"); Poco::SharedPtr primary_with_secondary; primary_with_secondary->addRedisType(key); for (const auto & secondary_key : reply_for_primary_key) primary_with_secondary->addRedisType(secondary_key); - LOG_INFO(&Logger::get("Redis"), "HKEYS: " + primary_with_secondary->toString()); + hkeys.add(*primary_with_secondary); } keys = hkeys; @@ -191,8 +171,6 @@ namespace DB BlockInputStreamPtr RedisDictionarySource::loadIds(const std::vector & ids) { - LOG_INFO(&Logger::get("Redis"), "Redis in loadIds"); - if (storage_type != RedisStorageType::SIMPLE) throw Exception{"Cannot use loadIds with \'simple\' storage type", ErrorCodes::UNSUPPORTED_METHOD}; @@ -204,8 +182,6 @@ namespace DB for (UInt64 id : ids) keys << DB::toString(id); - LOG_INFO(&Logger::get("Redis"), "KEYS: " + keys.toString()); - return std::make_shared(client, std::move(keys), sample_block, max_block_size); } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index 7a0ffaaceb7..d56de626a9a 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -2,6 +2,7 @@ #include #include + #if USE_POCO_REDIS # include "DictionaryStructure.h" @@ -95,7 +96,7 @@ namespace DB const DictionaryStructure dict_struct; const std::string host; const UInt16 port; - const UInt8 db_index; // [0..15] + const UInt8 db_index; const RedisStorageType::Id storage_type; Block sample_block; From 67059d8ed1b86234611b5e2cb02cd7df5332c06e Mon Sep 17 00:00:00 2001 From: comunodi Date: Tue, 28 May 2019 23:17:30 +0300 Subject: [PATCH 32/49] Add tests only for kv storages --- dbms/tests/integration/pytest.ini | 2 +- .../test_external_dictionaries/dictionary.py | 27 +- .../external_sources.py | 111 ++++-- .../test_external_dictionaries/test.py | 118 +++---- .../test_external_dictionaries/test_kv.py | 321 ++++++++++++++++++ 5 files changed, 474 insertions(+), 105 deletions(-) create mode 100644 dbms/tests/integration/test_external_dictionaries/test_kv.py diff --git a/dbms/tests/integration/pytest.ini b/dbms/tests/integration/pytest.ini index e51d0efad3d..dc5bb603b63 100644 --- a/dbms/tests/integration/pytest.ini +++ b/dbms/tests/integration/pytest.ini @@ -1,3 +1,3 @@ [pytest] -python_files = test.py +python_files = test*.py norecursedirs = _instances diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index c468c2bfc67..05aa9bfa59d 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -1,4 +1,4 @@ -#-*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import copy @@ -9,7 +9,7 @@ class Layout(object): 'cache': '128', 'complex_key_hashed': '', 'complex_key_cache': '128', - 'range_hashed': '' + 'range_hashed': '', } def __init__(self, name): @@ -18,13 +18,13 @@ class Layout(object): self.is_simple = False self.is_ranged = False if self.name.startswith('complex'): - self.layout_type = "complex" + self.layout_type = 'complex' self.is_complex = True - elif name.startswith("range"): - self.layout_type = "ranged" + elif name.startswith('range'): + self.layout_type = 'ranged' self.is_ranged = True else: - self.layout_type = "simple" + self.layout_type = 'simple' self.is_simple = True def get_str(self): @@ -33,8 +33,7 @@ class Layout(object): def get_key_block_name(self): if self.is_complex: return 'key' - else: - return 'id' + return 'id' class Row(object): @@ -90,13 +89,12 @@ class Field(object): class DictionaryStructure(object): - def __init__(self, layout, fields, is_kv=False): + def __init__(self, layout, fields): self.layout = layout self.keys = [] self.range_key = None self.ordinary_fields = [] self.range_fields = [] - self.is_kv = is_kv for field in fields: if field.is_key: @@ -121,14 +119,12 @@ class DictionaryStructure(object): fields_strs = [] for field in self.ordinary_fields: fields_strs.append(field.get_attribute_str()) - if self.is_kv: - break key_strs = [] if self.layout.is_complex: for key_field in self.keys: key_strs.append(key_field.get_attribute_str()) - else: # same for simple and ranged + else: # same for simple and ranged for key_field in self.keys: key_strs.append(key_field.get_simple_index_str()) @@ -288,13 +284,14 @@ class DictionaryStructure(object): class Dictionary(object): - def __init__(self, name, structure, source, config_path, table_name): + def __init__(self, name, structure, source, config_path, table_name, fields=None, values=None): self.name = name self.structure = copy.deepcopy(structure) self.source = copy.deepcopy(source) self.config_path = config_path self.table_name = table_name - self.is_kv = source.is_kv + self.fields = fields + self.values = values def generate_config(self): with open(self.config_path, 'w') as result: diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 6830f9500c8..a22cc6e024f 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -3,6 +3,7 @@ import warnings import pymysql.cursors import pymongo import redis +import aerospike from tzlocal import get_localzone import datetime import os @@ -12,7 +13,7 @@ import time class ExternalSource(object): def __init__(self, name, internal_hostname, internal_port, - docker_hostname, docker_port, user, password, is_kv): + docker_hostname, docker_port, user, password, storage_type=None): self.name = name self.internal_hostname = internal_hostname self.internal_port = int(internal_port) @@ -20,7 +21,7 @@ class ExternalSource(object): self.docker_port = int(docker_port) self.user = user self.password = password - self.is_kv = is_kv + self.storage_type = storage_type def get_source_str(self, table_name): raise NotImplementedError("Method {} is not implemented for {}".format( @@ -38,9 +39,6 @@ class ExternalSource(object): def compatible_with_layout(self, layout): return True - def prepare_value_for_type(self, field, value): - return value - class SourceMySQL(ExternalSource): TYPE_MAPPING = { @@ -388,10 +386,12 @@ class SourceRedis(ExternalSource): {host} {port} 0 + {storage_type} '''.format( host=self.docker_hostname, port=self.docker_port, + storage_type=self.storage_type, # simple or hash_map ) def prepare(self, structure, table_name, cluster): @@ -399,33 +399,96 @@ class SourceRedis(ExternalSource): self.prepared = True def load_data(self, data, table_name): - for row_num, row in enumerate(data): # FIXME: yield - self.client.execute_command("FLUSHDB") + self.client.flushdb() + for row in data: for cell_name, cell_value in row.data.items(): value_type = "$" if isinstance(cell_value, int): value_type = ":" else: cell_value = '"' + str(cell_value).replace(' ', '\s') + '"' - cmd = "SET " + "$" + cell_name + " " + value_type + str(cell_value) + cmd = "SET ${} {}{}".format(cell_name, value_type, cell_value) print(cmd) self.client.execute_command(cmd) - return + + def load_kv_data(self, values): + self.client.flushdb() + if len(values[0]) == 2: + self.client.mset({value[0]: value[1] for value in values}) + else: + for value in values: + self.client.hset(value[0], value[1], value[2]) def compatible_with_layout(self, layout): - if not layout.is_simple: - return False - return True + if layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "simple": + return True + return False - def prepare_value_for_type(self, field, value): - if field.field_type == "Date": - dt = dateutil.parser.parse(value) - return int(time.mktime(dt.timetuple()) // 86400) - if field.field_type == "DateTime": - dt = dateutil.parser.parse(value) - return int(time.mktime(dt.timetuple())) - if field.field_type == "Float32": - return str(value) - if field.field_type == "Float64": - return str(value) - return value + +class SourceAerospike(ExternalSource): + def __init__(self, name, internal_hostname, internal_port, + docker_hostname, docker_port, user, password, storage_type=None): + ExternalSource.__init__(self, name, internal_hostname, internal_port, + docker_hostname, docker_port, user, password, storage_type) + self.namespace = "test" + self.set = "test_set" + + def get_source_str(self, table_name): + print("AEROSPIKE get source str") + return ''' + + {host} + {port} + + '''.format( + host=self.docker_hostname, + port=self.docker_port, + storage_type=self.storage_type, # simple or hash_map + ) + + def prepare(self, structure, table_name, cluster): + config = { + 'hosts': [ (self.internal_hostname, self.internal_port) ] + } + self.client = aerospike.client(config).connect() + self.prepared = True + print("PREPARED AEROSPIKE") + print(config) + + def compatible_with_layout(self, layout): + print("compatible AEROSPIKE") + return layout.is_simple + + def _flush_aerospike_db(self): + keys = [] + + def handle_record((key, metadata, record)): + print("Handle record {} {}".format(key, record)) + keys.append(key) + + def print_record((key, metadata, record)): + print("Print record {} {}".format(key, record)) + + scan = self.client.scan(self.namespace, self.set) + scan.foreach(handle_record) + + [self.client.remove(key) for key in keys] + + def load_kv_data(self, values): + self._flush_aerospike_db() + + print("Load KV Data Aerospike") + if len(values[0]) == 2: + for value in values: + key = (self.namespace, self.set, value[0]) + print(key) + self.client.put(key, {"bin_value": value[1]}, policy={"key": aerospike.POLICY_KEY_SEND}) + assert self.client.exists(key) + else: + assert("VALUES SIZE != 2") + + # print(values) + + def load_data(self, data, table_name): + print("Load Data Aerospike") + # print(data) diff --git a/dbms/tests/integration/test_external_dictionaries/test.py b/dbms/tests/integration/test_external_dictionaries/test.py index c42727c76a8..841a9124af0 100644 --- a/dbms/tests/integration/test_external_dictionaries/test.py +++ b/dbms/tests/integration/test_external_dictionaries/test.py @@ -3,8 +3,8 @@ import os from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout -from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed, SourceMongo -from external_sources import SourceHTTP, SourceHTTPS, SourceRedis +from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed +from external_sources import SourceMongo, SourceHTTP, SourceHTTPS SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) @@ -78,16 +78,15 @@ LAYOUTS = [ ] SOURCES = [ - SourceRedis("Redis", "localhost", "6380", "redis1", "6379", "", "", True), - SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse", False), - SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse", False), - SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", "", False), - SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", "", False), - SourceFile("File", "localhost", "9000", "node", "9000", "", "", False), - SourceExecutableHashed("ExecutableHashed", "localhost", "9000", "node", "9000", "", "", False), - SourceExecutableCache("ExecutableCache", "localhost", "9000", "node", "9000", "", "", False), - SourceHTTP("SourceHTTP", "localhost", "9000", "clickhouse1", "9000", "", "", False), - SourceHTTPS("SourceHTTPS", "localhost", "9000", "clickhouse1", "9000", "", "", False), + SourceMongo("MongoDB", "localhost", "27018", "mongo1", "27017", "root", "clickhouse"), + SourceMySQL("MySQL", "localhost", "3308", "mysql1", "3306", "root", "clickhouse"), + SourceClickHouse("RemoteClickHouse", "localhost", "9000", "clickhouse1", "9000", "default", ""), + SourceClickHouse("LocalClickHouse", "localhost", "9000", "node", "9000", "default", ""), + SourceFile("File", "localhost", "9000", "node", "9000", "", ""), + SourceExecutableHashed("ExecutableHashed", "localhost", "9000", "node", "9000", "", ""), + SourceExecutableCache("ExecutableCache", "localhost", "9000", "node", "9000", "", ""), + SourceHTTP("SourceHTTP", "localhost", "9000", "clickhouse1", "9000", "", ""), + SourceHTTPS("SourceHTTPS", "localhost", "9000", "clickhouse1", "9000", "", ""), ] DICTIONARIES = [] @@ -95,6 +94,7 @@ DICTIONARIES = [] cluster = None node = None + def setup_module(module): global DICTIONARIES global cluster @@ -107,9 +107,9 @@ def setup_module(module): for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): - structure = DictionaryStructure(layout, FIELDS[layout.layout_type], source.is_kv) + structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) dict_name = source.name + "_" + layout.name - dict_path = os.path.join(dict_configs_path, dict_name + '.xml') # FIXME: single xml config for every column + dict_path = os.path.join(dict_configs_path, dict_name + '.xml') dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) dictionary.generate_config() DICTIONARIES.append(dictionary) @@ -120,9 +120,10 @@ def setup_module(module): for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_redis=True) + node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) cluster.add_instance('clickhouse1') + @pytest.fixture(scope="module") def started_cluster(): try: @@ -137,39 +138,28 @@ def started_cluster(): finally: cluster.shutdown() -def prepare_row(dct, fields, values): - prepared_values = [] - for field, value in zip(fields, values): - prepared_values.append(dct.source.prepare_value_for_type(field, value)) - return Row(fields, prepared_values) - -def prepare_data(dct, fields, values_by_row): - data = [] - for row in values_by_row: - data.append(prepare_row(dct, fields, row)) - return data def test_simple_dictionaries(started_cluster): fields = FIELDS["simple"] - values_by_row = [ - [1, 22, 333, 4444, 55555, -6, -77, - -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0], - [2, 3, 4, 5, 6, -7, -8, - -9, -10, '550e8400-e29b-41d4-a716-446655440002', - '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1], + data = [ + Row(fields, + [1, 22, 333, 4444, 55555, -6, -77, + -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0]), + Row(fields, + [2, 3, 4, 5, 6, -7, -8, + -9, -10, '550e8400-e29b-41d4-a716-446655440002', + '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1]), ] simple_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"] for dct in simple_dicts: - data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in simple_dicts: - data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key: @@ -181,8 +171,6 @@ def test_simple_dictionaries(started_cluster): for query in dct.get_select_get_or_default_queries(field, row): queries_with_answers.append((query, field.default_value_for_get)) - if dct.is_kv: - break for query in dct.get_hierarchical_queries(data[0]): queries_with_answers.append((query, [1])) @@ -201,29 +189,30 @@ def test_simple_dictionaries(started_cluster): answer = str(answer).replace(' ', '') assert node.query(query) == str(answer) + '\n' + def test_complex_dictionaries(started_cluster): fields = FIELDS["complex"] - values_by_row = [ - [1, 'world', 22, 333, 4444, 55555, -6, - -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', - 'hello', 22.543, 3332154213.4], - [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, - -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', - '1975-09-28', '2000-02-28 23:33:24', - 'my', 255.543, 3332221.44], + data = [ + Row(fields, + [1, 'world', 22, 333, 4444, 55555, -6, + -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', + 'hello', 22.543, 3332154213.4]), + Row(fields, + [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, + -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', + '1975-09-28', '2000-02-28 23:33:24', + 'my', 255.543, 3332221.44]), ] - complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex" and not d.is_kv] + complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] for dct in complex_dicts: - data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in complex_dicts: - data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key: @@ -240,38 +229,37 @@ def test_complex_dictionaries(started_cluster): print query assert node.query(query) == str(answer) + '\n' + def test_ranged_dictionaries(started_cluster): fields = FIELDS["ranged"] - values_by_row = [ - [1, '2019-02-10', '2019-02-01', '2019-02-28', - 22, 333, 4444, 55555, -6, -77, -888, -999, - '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', - 22.543, 3332154213.4], - [2, '2019-04-10', '2019-04-01', '2019-04-28', - 11, 3223, 41444, 52515, -65, -747, -8388, -9099, - '550e8400-e29b-41d4-a716-446655440004', - '1973-06-29', '2002-02-28 23:23:25', '!!!!', - 32.543, 3332543.4], + data = [ + Row(fields, + [1, '2019-02-10', '2019-02-01', '2019-02-28', + 22, 333, 4444, 55555, -6, -77, -888, -999, + '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', + 22.543, 3332154213.4]), + Row(fields, + [2, '2019-04-10', '2019-04-01', '2019-04-28', + 11, 3223, 41444, 52515, -65, -747, -8388, -9099, + '550e8400-e29b-41d4-a716-446655440004', + '1973-06-29', '2002-02-28 23:23:25', '!!!!', + 32.543, 3332543.4]), ] - ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged" and not d.is_kv] + ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] for dct in ranged_dicts: - data = prepare_data(dct, fields, values_by_row) dct.load_data(data) node.query("system reload dictionaries") queries_with_answers = [] for dct in ranged_dicts: - data = prepare_data(dct, fields, values_by_row) for row in data: for field in fields: if not field.is_key and not field.is_range: for query in dct.get_select_get_queries(field, row): queries_with_answers.append((query, row.get_value_by_name(field.name))) - if dct.is_kv: - break for query, answer in queries_with_answers: print query diff --git a/dbms/tests/integration/test_external_dictionaries/test_kv.py b/dbms/tests/integration/test_external_dictionaries/test_kv.py new file mode 100644 index 00000000000..b085e89b7d9 --- /dev/null +++ b/dbms/tests/integration/test_external_dictionaries/test_kv.py @@ -0,0 +1,321 @@ +import os + +import pytest +from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout +from external_sources import SourceRedis, SourceAerospike + +from helpers.cluster import ClickHouseCluster + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +FIELDS = { + "simple": [ + Field("KeyField", 'UInt64', is_key=True, default_value_for_get=9999999), + Field("UInt8_", 'UInt8', default_value_for_get=55), + Field("UInt16_", 'UInt16', default_value_for_get=66), + Field("UInt32_", 'UInt32', default_value_for_get=77), + Field("UInt64_", 'UInt64', default_value_for_get=88), + Field("Int8_", 'Int8', default_value_for_get=-55), + Field("Int16_", 'Int16', default_value_for_get=-66), + Field("Int32_", 'Int32', default_value_for_get=-77), + Field("Int64_", 'Int64', default_value_for_get=-88), + Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), + Field("Date_", 'Date', default_value_for_get='2018-12-30'), + Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), + Field("String_", 'String', default_value_for_get='hi'), + Field("Float32_", 'Float32', default_value_for_get=555.11), + Field("Float64_", 'Float64', default_value_for_get=777.11), + Field("ParentKeyField", "UInt64", default_value_for_get=444, hierarchical=True), + ], + "complex": [ + Field("KeyField1", 'UInt64', is_key=True, default_value_for_get=9999999), + Field("KeyField2", 'String', is_key=True, default_value_for_get='xxxxxxxxx'), + Field("UInt8_", 'UInt8', default_value_for_get=55), + Field("UInt16_", 'UInt16', default_value_for_get=66), + Field("UInt32_", 'UInt32', default_value_for_get=77), + Field("UInt64_", 'UInt64', default_value_for_get=88), + Field("Int8_", 'Int8', default_value_for_get=-55), + Field("Int16_", 'Int16', default_value_for_get=-66), + Field("Int32_", 'Int32', default_value_for_get=-77), + Field("Int64_", 'Int64', default_value_for_get=-88), + Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), + Field("Date_", 'Date', default_value_for_get='2018-12-30'), + Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), + Field("String_", 'String', default_value_for_get='hi'), + Field("Float32_", 'Float32', default_value_for_get=555.11), + Field("Float64_", 'Float64', default_value_for_get=777.11), + ], + "ranged": [ + Field("KeyField1", 'UInt64', is_key=True), + Field("KeyField2", 'Date', is_range_key=True), + Field("StartDate", 'Date', range_hash_type='min'), + Field("EndDate", 'Date', range_hash_type='max'), + Field("UInt8_", 'UInt8', default_value_for_get=55), + Field("UInt16_", 'UInt16', default_value_for_get=66), + Field("UInt32_", 'UInt32', default_value_for_get=77), + Field("UInt64_", 'UInt64', default_value_for_get=88), + Field("Int8_", 'Int8', default_value_for_get=-55), + Field("Int16_", 'Int16', default_value_for_get=-66), + Field("Int32_", 'Int32', default_value_for_get=-77), + Field("Int64_", 'Int64', default_value_for_get=-88), + Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), + Field("Date_", 'Date', default_value_for_get='2018-12-30'), + Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), + Field("String_", 'String', default_value_for_get='hi'), + Field("Float32_", 'Float32', default_value_for_get=555.11), + Field("Float64_", 'Float64', default_value_for_get=777.11), + ], +} + +VALUES = { + "simple": [ + [ + 1, 22, 333, 4444, 55555, -6, -77, + -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0, + ], + [ + 2, 3, 4, 5, 6, -7, -8, + -9, -10, '550e8400-e29b-41d4-a716-446655440002', + '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1, + ], + ], + "complex": [ + [ + 1, 'world', 22, 333, 4444, 55555, -6, + -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', + 'hello', 22.543, 3332154213.4, + ], + [ + 2, 'qwerty2', 52, 2345, 6544, 9191991, -2, + -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', + '1975-09-28', '2000-02-28 23:33:24', + 'my', 255.543, 3332221.44, + ], + ], + "ranged": [ + [ + 1, '2019-02-10', '2019-02-01', '2019-02-28', + 22, 333, 4444, 55555, -6, -77, -888, -999, + '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', + 22.543, 3332154213.4, + ], + [ + 2, '2019-04-10', '2019-04-01', '2019-04-28', + 11, 3223, 41444, 52515, -65, -747, -8388, -9099, + '550e8400-e29b-41d4-a716-446655440004', + '1973-06-29', '2002-02-28 23:23:25', '!!!!', + 32.543, 3332543.4, + ], + ], +} + +LAYOUTS = [ + Layout("flat"), + Layout("hashed"), + Layout("cache"), + Layout("complex_key_hashed"), + Layout("complex_key_cache"), + Layout("range_hashed"), +] + +SOURCES = [ + SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), + # SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), + # SourceAerospike("Aerospike", "localhost", "3000", "aerospike1", "3000", "", ""), +] + +DICTIONARIES = [] + +cluster = None +node = None + + +def setup_kv_dict(suffix, layout, fields, kv_source, dict_configs_path, values): + global DICTIONARIES + + structure = DictionaryStructure(layout, fields) + dict_name = "{}_{}_{}".format(kv_source.name, layout.name, suffix) + dict_path = os.path.join(dict_configs_path, dict_name + '.xml') + dictionary = Dictionary(dict_name, structure, kv_source, dict_path, "table_" + dict_name, fields, values) + dictionary.generate_config() + DICTIONARIES.append(dictionary) + + +def setup_module(module): + global DICTIONARIES + global cluster + global node + + dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') + for f in os.listdir(dict_configs_path): + os.remove(os.path.join(dict_configs_path, f)) + + for layout in LAYOUTS: + for source in SOURCES: + if source.compatible_with_layout(layout): + if layout.layout_type == "simple": + fields_len = len(FIELDS["simple"]) + for i in range(fields_len - 1): + local_fields = [FIELDS["simple"][0], FIELDS["simple"][i + 1]] + local_values = [[value[0], value[i + 1]] for value in VALUES["simple"]] + setup_kv_dict(i + 1, layout, local_fields, source, dict_configs_path, local_values) + elif layout.layout_type == "complex": + fields_len = len(FIELDS["complex"]) + for i in range(fields_len - 2): + local_fields = [FIELDS['complex'][1], FIELDS['complex'][i + 2]] + local_values = [[value[1], value[i + 2]] for value in VALUES["complex"]] + setup_kv_dict(i + 2, layout, local_fields, source, dict_configs_path, local_values) + elif layout.layout_type == "ranged": + fields_len = len(FIELDS["ranged"]) + local_fields = FIELDS["ranged"][0:5] + local_values = VALUES["ranged"][0:5] + for i in range(fields_len - 4): + local_fields[4] = FIELDS["ranged"][i + 4] + for j, value in enumerate(VALUES["ranged"]): + local_values[j][4] = value[i + 4] + setup_kv_dict(i + 2, layout, local_fields, source, dict_configs_path, local_values) + else: + print "Source", source.name, "incompatible with layout", layout.name + + main_configs = [] + for fname in os.listdir(dict_configs_path): + main_configs.append(os.path.join(dict_configs_path, fname)) + cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) + # TODO: add your kv source flag below + node = cluster.add_instance('node', main_configs=main_configs, with_redis=True) + cluster.add_instance('clickhouse1') + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for dictionary in DICTIONARIES: + print "Preparing", dictionary.name + dictionary.prepare_source(cluster) + print "Prepared" + + yield cluster + + finally: + cluster.shutdown() + + +def prepare_data(fields, values_by_row): + return [Row(fields, values) for values in values_by_row] + + +def test_simple_kv_dictionaries(started_cluster): + simple_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"] + + for dct in simple_kv_dicts: + queries_with_answers = [] + fields = dct.fields + print("FIELDS AND VALUES FOR " + dct.name) + print(fields) + print(dct.values) + data = prepare_data(fields, dct.values) + dct.source.load_kv_data(dct.values) + + try: + node.query("system reload dictionary '{}'".format(dct.name)) + except Exception: + print(dct.name) + raise + + for row in data: + for field in fields: + if not field.is_key: + for query in dct.get_select_get_queries(field, row): + queries_with_answers.append((query, row.get_value_by_name(field.name))) + + for query in dct.get_select_has_queries(field, row): + queries_with_answers.append((query, 1)) + + for query in dct.get_select_get_or_default_queries(field, row): + queries_with_answers.append((query, field.default_value_for_get)) + if dct.fields[1].hierarchical: + for query in dct.get_hierarchical_queries(data[0]): + queries_with_answers.append((query, [1])) + + for query in dct.get_hierarchical_queries(data[1]): + queries_with_answers.append((query, [2, 1])) + + for query in dct.get_is_in_queries(data[0], data[1]): + queries_with_answers.append((query, 0)) + + for query in dct.get_is_in_queries(data[1], data[0]): + queries_with_answers.append((query, 1)) + + for query, answer in queries_with_answers: + if isinstance(answer, list): + answer = str(answer).replace(' ', '') + print query + assert node.query(query) == str(answer) + '\n', query + + +def test_complex_dictionaries(started_cluster): + complex_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + + for dct in complex_kv_dicts: + queries_with_answers = [] + fields = dct.fields + print("FIELDS AND VALUES FOR " + dct.name) + print(fields) + print(dct.values) + data = prepare_data(fields, dct.values) + dct.source.load_kv_data(dct.values) + + try: + node.query("system reload dictionary '{}'".format(dct.name)) + except Exception: + print(dct.name) + raise + + for row in data: + for field in fields: + if not field.is_key: + for query in dct.get_select_get_queries(field, row): + queries_with_answers.append((query, row.get_value_by_name(field.name))) + + for query in dct.get_select_has_queries(field, row): + queries_with_answers.append((query, 1)) + + for query in dct.get_select_get_or_default_queries(field, row): + queries_with_answers.append((query, field.default_value_for_get)) + + for query, answer in queries_with_answers: + print query + assert node.query(query) == str(answer) + '\n' + + +def xtest_ranged_dictionaries(started_cluster): + complex_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] + + for dct in complex_kv_dicts: + queries_with_answers = [] + fields = dct.fields + print("FIELDS AND VALUES FOR " + dct.name) + print(fields) + print(dct.values) + data = prepare_data(fields, dct.values) + dct.source.load_kv_data(dct.values) + + try: + node.query("system reload dictionary '{}'".format(dct.name)) + except Exception: + print(dct.name) + raise + + for row in data: + for field in fields: + if not field.is_key and not field.is_range: + for query in dct.get_select_get_queries(field, row): + queries_with_answers.append((query, row.get_value_by_name(field.name))) + + for query, answer in queries_with_answers: + print query + assert node.query(query) == str(answer) + '\n' From 61a9e6c448dcffbef47eecdd5700cf7fa97810bd Mon Sep 17 00:00:00 2001 From: comunodi Date: Thu, 30 May 2019 22:44:40 +0300 Subject: [PATCH 33/49] Fix test for hashed dict --- dbms/tests/integration/test_external_dictionaries/dictionary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index 05aa9bfa59d..c20afbe7840 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -183,7 +183,7 @@ class DictionaryStructure(object): if isinstance(val, str): val = "'" + val + "'" key_exprs_strs.append('to{type}({value})'.format(type=key.field_type, value=val)) - key_expr = ', (' + ','.join(key_exprs_strs) + ')' + key_expr = ', tuple(' + ','.join(key_exprs_strs) + ')' date_expr = '' if self.layout.is_ranged: From 4947a0cfa977920d761e2fd62538bf8a9a53b7d9 Mon Sep 17 00:00:00 2001 From: comunodi Date: Thu, 30 May 2019 23:24:23 +0300 Subject: [PATCH 34/49] Disable redundant tests --- .../test_external_dictionaries/external_sources.py | 5 ++++- dbms/tests/integration/test_external_dictionaries/test_kv.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index a22cc6e024f..f26609637b8 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -420,7 +420,10 @@ class SourceRedis(ExternalSource): self.client.hset(value[0], value[1], value[2]) def compatible_with_layout(self, layout): - if layout.is_simple and self.storage_type == "simple" or layout.is_complex and self.storage_type == "simple": + if ( + layout.is_simple and self.storage_type == "simple" or + layout.is_complex and self.storage_type == "simple" and layout.name != "complex_key_cache" + ): return True return False diff --git a/dbms/tests/integration/test_external_dictionaries/test_kv.py b/dbms/tests/integration/test_external_dictionaries/test_kv.py index b085e89b7d9..2ac6f0e714d 100644 --- a/dbms/tests/integration/test_external_dictionaries/test_kv.py +++ b/dbms/tests/integration/test_external_dictionaries/test_kv.py @@ -292,7 +292,7 @@ def test_complex_dictionaries(started_cluster): assert node.query(query) == str(answer) + '\n' -def xtest_ranged_dictionaries(started_cluster): +def test_ranged_dictionaries(started_cluster): complex_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] for dct in complex_kv_dicts: From 2f74c0db70aa85273bd37ab67c906428fe02ef52 Mon Sep 17 00:00:00 2001 From: comunodi Date: Thu, 30 May 2019 23:39:56 +0300 Subject: [PATCH 35/49] Delete useless import --- .../integration/test_external_dictionaries/external_sources.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index f26609637b8..20516e5c997 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -7,7 +7,6 @@ import aerospike from tzlocal import get_localzone import datetime import os -import dateutil.parser import time From c58effc2af382192d389852ecf1bb0373bd85f3b Mon Sep 17 00:00:00 2001 From: comunodi Date: Fri, 31 May 2019 00:06:39 +0300 Subject: [PATCH 36/49] Fix inserting keys with hash map --- .../Dictionaries/RedisBlockInputStream.cpp | 107 ++++++++++-------- .../Dictionaries/RedisDictionarySource.cpp | 6 - 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 639b1360c74..c12418ab087 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -6,13 +6,8 @@ # include # include -# include # include # include -# include -# include -# include -# include # include # include @@ -25,9 +20,6 @@ # include "DictionaryStructure.h" # include "RedisBlockInputStream.h" -# include "Poco/Logger.h" -# include "common/logger_useful.h" - namespace DB { @@ -36,7 +28,7 @@ namespace DB extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; extern const int LIMIT_EXCEEDED; - extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; + extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } @@ -66,9 +58,6 @@ namespace DB std::string getStringOrThrow(const Poco::Redis::RedisType::Ptr & value, const std::string & column_name) { - LOG_INFO(&Logger::get("Redis"), - "isNullableString=" + DB::toString(value->isBulkString()) + - ", isSimpleString=" + DB::toString(value->isSimpleString())); switch (value->type()) { case Poco::Redis::RedisTypeTraits::TypeId: @@ -179,7 +168,7 @@ namespace DB if (keys.begin()->get()->isArray()) { size_t num_rows = 0; - while (num_rows < max_block_size) + while (num_rows < max_block_size && !all_read) { if (cursor >= keys.size()) { @@ -206,6 +195,7 @@ namespace DB Poco::Redis::Command commandForValues("HMGET"); const auto & primary_key = *keys_array.begin(); + commandForValues.addRedisType(primary_key); for (size_t i = 1; i < keys_array.size(); ++i) { const auto & secondary_key = *(keys_array.begin() + i); @@ -213,57 +203,76 @@ namespace DB insertValueByIdx(1, secondary_key); commandForValues.addRedisType(secondary_key); } + ++cursor; - // FIXME: fix insert Poco::Redis::Array values = client->execute(commandForValues); - for (const auto & value : values) - { - if (isNull(value)) - insertDefaultValue(*columns[2], *description.sample_block.getByPosition(2).column); - else - insertValueByIdx(2, value); - } + if (commandForValues.size() != values.size() + 2) // 'HMGET' primary_key secondary_keys + throw Exception{"Inconsistent sizes of keys and values in Redis request", + ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; - num_rows += keys_array.size() - 1; - cursor += keys_array.size() - 1; + for (size_t i = 0; i < values.size(); ++i) + { + const auto & secondary_key = *(keys_array.begin() + i + 1); + const auto & value = *(values.begin() + i); + if (value.isNull()) + { + insertValueByIdx(0, primary_key); + insertValueByIdx(1, secondary_key); + insertDefaultValue(*columns[2], *description.sample_block.getByPosition(2).column); + ++num_rows; + } + else if (!isNull(value)) // null string means 'no value for requested key' + { + insertValueByIdx(0, primary_key); + insertValueByIdx(1, secondary_key); + insertValueByIdx(2, value); + ++num_rows; + } + } } } else { - Poco::Redis::Command commandForValues("MGET"); - - // keys.size() > 0 - for (size_t num_rows = 0; num_rows < max_block_size; ++num_rows) + size_t num_rows = 0; + while (num_rows < max_block_size && !all_read) { - if (cursor >= keys.size()) + Poco::Redis::Command commandForValues("MGET"); + + // keys.size() > 0 + for (size_t i = 0; i < max_block_size && cursor < keys.size(); ++i) + { + const auto & key = *(keys.begin() + cursor); + commandForValues.addRedisType(key); + ++cursor; + } + + if (commandForValues.size() == 1) // only 'MGET' { all_read = true; break; } - const auto & key = *(keys.begin() + cursor); - commandForValues.addRedisType(key); - ++cursor; - } + Poco::Redis::Array values = client->execute(commandForValues); + if (commandForValues.size() != values.size() + 1) // 'MGET' keys + throw Exception{"Inconsistent sizes of keys and values in Redis request", + ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; - Poco::Redis::Array values = client->execute(commandForValues); - if (commandForValues.size() != values.size() + 1) - throw Exception{"Inconsistent sizes of keys and values in Redis request", - ErrorCodes::SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT}; - - for (size_t num_rows = 0; num_rows < values.size(); ++num_rows) - { - const auto & key = *(keys.begin() + cursor - num_rows - 1); - const auto & value = *(values.begin() + values.size() - num_rows - 1); - if (value.isNull()) + for (size_t i = 0; i < values.size(); ++i) { - insertValueByIdx(0, key); - insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); - } - else if (!isNull(value)) // null string means 'no value for requested key' - { - insertValueByIdx(0, key); - insertValueByIdx(1, value); + const auto & key = *(keys.begin() + cursor - i - 1); + const auto & value = *(values.begin() + values.size() - i - 1); + if (value.isNull()) + { + insertValueByIdx(0, key); + insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); + ++num_rows; + } + else if (!isNull(value)) // null string means 'no value for requested key' + { + insertValueByIdx(0, key); + insertValueByIdx(1, value); + ++num_rows; + } } } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 051f6dfaf34..d0256f3272e 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -36,16 +36,10 @@ namespace DB #if USE_POCO_REDIS # include -# include # include # include -# include -# include -# include -# include # include # include -# include # include # include From a964af386cb3dc91a7495ebdbf926640bed4cf6f Mon Sep 17 00:00:00 2001 From: comunodi Date: Fri, 31 May 2019 00:16:12 +0300 Subject: [PATCH 37/49] Optimize includes --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 6 ++---- dbms/src/Dictionaries/RedisDictionarySource.cpp | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index c12418ab087..56f12c74822 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -1,7 +1,6 @@ #include #if USE_POCO_REDIS -# include # include # include @@ -15,8 +14,8 @@ # include # include # include -# include # include + # include "DictionaryStructure.h" # include "RedisBlockInputStream.h" @@ -146,8 +145,7 @@ namespace DB return {}; const size_t size = description.sample_block.columns(); - - MutableColumns columns(description.sample_block.columns()); + MutableColumns columns(size); for (const auto i : ext::range(0, size)) columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty(); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index d0256f3272e..282f4187a45 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -41,9 +41,9 @@ namespace DB # include # include -# include # include -# include +# include + # include "RedisBlockInputStream.h" From 08c2f183dd4d6ba6db785cd2de9398aff4fb4041 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 2 Jun 2019 04:22:06 +0300 Subject: [PATCH 38/49] Fix complex dict with two keys --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 9 +++------ dbms/src/Dictionaries/RedisDictionarySource.cpp | 9 +++++---- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 56f12c74822..7f23a421989 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -192,22 +192,19 @@ namespace DB } Poco::Redis::Command commandForValues("HMGET"); - const auto & primary_key = *keys_array.begin(); - commandForValues.addRedisType(primary_key); - for (size_t i = 1; i < keys_array.size(); ++i) + for (size_t i = 0; i < keys_array.size(); ++i) { const auto & secondary_key = *(keys_array.begin() + i); - insertValueByIdx(0, primary_key); - insertValueByIdx(1, secondary_key); commandForValues.addRedisType(secondary_key); } ++cursor; Poco::Redis::Array values = client->execute(commandForValues); - if (commandForValues.size() != values.size() + 2) // 'HMGET' primary_key secondary_keys + if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception{"Inconsistent sizes of keys and values in Redis request", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; + const auto & primary_key = *keys_array.begin(); for (size_t i = 0; i < values.size(); ++i) { const auto & secondary_key = *(keys_array.begin() + i + 1); diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 282f4187a45..8def8abcf0e 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -147,14 +147,15 @@ namespace DB { Poco::Redis::Command command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); + Poco::Redis::Array reply_for_primary_key = client->execute(command_for_secondary_keys); - Poco::SharedPtr primary_with_secondary; - primary_with_secondary->addRedisType(key); + Poco::Redis::Array primary_with_secondary; + primary_with_secondary.addRedisType(key); for (const auto & secondary_key : reply_for_primary_key) - primary_with_secondary->addRedisType(secondary_key); + primary_with_secondary.addRedisType(secondary_key); - hkeys.add(*primary_with_secondary); + hkeys.add(primary_with_secondary); } keys = hkeys; } From 12af7869cc692195500ee1eeec69f64e5842050b Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 2 Jun 2019 04:30:06 +0300 Subject: [PATCH 39/49] Add tests for complex dict with two keys --- .../test_external_dictionaries/dictionary.py | 2 ++ .../external_sources.py | 28 ++++++++----------- .../test_external_dictionaries/test_kv.py | 14 ++++++---- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/dbms/tests/integration/test_external_dictionaries/dictionary.py b/dbms/tests/integration/test_external_dictionaries/dictionary.py index c20afbe7840..ca07ea27037 100644 --- a/dbms/tests/integration/test_external_dictionaries/dictionary.py +++ b/dbms/tests/integration/test_external_dictionaries/dictionary.py @@ -8,6 +8,8 @@ class Layout(object): 'hashed': '', 'cache': '128', 'complex_key_hashed': '', + 'complex_key_hashed_one_key': '', + 'complex_key_hashed_two_keys': '', 'complex_key_cache': '128', 'range_hashed': '', } diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 20516e5c997..2232bcc92a4 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -7,12 +7,11 @@ import aerospike from tzlocal import get_localzone import datetime import os -import time class ExternalSource(object): def __init__(self, name, internal_hostname, internal_port, - docker_hostname, docker_port, user, password, storage_type=None): + docker_hostname, docker_port, user, password): self.name = name self.internal_hostname = internal_hostname self.internal_port = int(internal_port) @@ -20,7 +19,6 @@ class ExternalSource(object): self.docker_port = int(docker_port) self.user = user self.password = password - self.storage_type = storage_type def get_source_str(self, table_name): raise NotImplementedError("Method {} is not implemented for {}".format( @@ -379,6 +377,14 @@ class SourceHTTPS(SourceHTTPBase): class SourceRedis(ExternalSource): + def __init__( + self, name, internal_hostname, internal_port, docker_hostname, docker_port, user, password, storage_type + ): + super(SourceRedis, self).__init__( + name, internal_hostname, internal_port, docker_hostname, docker_port, user, password + ) + self.storage_type = storage_type + def get_source_str(self, table_name): return ''' @@ -397,19 +403,6 @@ class SourceRedis(ExternalSource): self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port) self.prepared = True - def load_data(self, data, table_name): - self.client.flushdb() - for row in data: - for cell_name, cell_value in row.data.items(): - value_type = "$" - if isinstance(cell_value, int): - value_type = ":" - else: - cell_value = '"' + str(cell_value).replace(' ', '\s') + '"' - cmd = "SET ${} {}{}".format(cell_name, value_type, cell_value) - print(cmd) - self.client.execute_command(cmd) - def load_kv_data(self, values): self.client.flushdb() if len(values[0]) == 2: @@ -421,7 +414,8 @@ class SourceRedis(ExternalSource): def compatible_with_layout(self, layout): if ( layout.is_simple and self.storage_type == "simple" or - layout.is_complex and self.storage_type == "simple" and layout.name != "complex_key_cache" + layout.is_complex and self.storage_type == "simple" and layout.name == "complex_key_hashed_one_key" or + layout.is_complex and self.storage_type == "hash_map" and layout.name == "complex_key_hashed_two_keys" ): return True return False diff --git a/dbms/tests/integration/test_external_dictionaries/test_kv.py b/dbms/tests/integration/test_external_dictionaries/test_kv.py index 2ac6f0e714d..69fa48d5e2e 100644 --- a/dbms/tests/integration/test_external_dictionaries/test_kv.py +++ b/dbms/tests/integration/test_external_dictionaries/test_kv.py @@ -116,14 +116,15 @@ LAYOUTS = [ Layout("flat"), Layout("hashed"), Layout("cache"), - Layout("complex_key_hashed"), + Layout('complex_key_hashed_one_key'), + Layout('complex_key_hashed_two_keys'), Layout("complex_key_cache"), Layout("range_hashed"), ] SOURCES = [ SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), - # SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), + SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), # SourceAerospike("Aerospike", "localhost", "3000", "aerospike1", "3000", "", ""), ] @@ -165,8 +166,12 @@ def setup_module(module): elif layout.layout_type == "complex": fields_len = len(FIELDS["complex"]) for i in range(fields_len - 2): - local_fields = [FIELDS['complex'][1], FIELDS['complex'][i + 2]] - local_values = [[value[1], value[i + 2]] for value in VALUES["complex"]] + if layout.name == 'complex_key_hashed_two_keys': + local_fields = [FIELDS['complex'][0], FIELDS['complex'][1], FIELDS['complex'][i + 2]] + local_values = [[value[0], value[1], value[i + 2]] for value in VALUES["complex"]] + else: + local_fields = [FIELDS['complex'][1], FIELDS['complex'][i + 2]] + local_values = [[value[1], value[i + 2]] for value in VALUES["complex"]] setup_kv_dict(i + 2, layout, local_fields, source, dict_configs_path, local_values) elif layout.layout_type == "ranged": fields_len = len(FIELDS["ranged"]) @@ -184,7 +189,6 @@ def setup_module(module): for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - # TODO: add your kv source flag below node = cluster.add_instance('node', main_configs=main_configs, with_redis=True) cluster.add_instance('clickhouse1') From aed927b6df8a6bfacce2ac22221f5f30107f77f5 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 2 Jun 2019 04:37:35 +0300 Subject: [PATCH 40/49] Remove unused param --- .../test_external_dictionaries/external_sources.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_external_dictionaries/external_sources.py b/dbms/tests/integration/test_external_dictionaries/external_sources.py index 2232bcc92a4..f6802a87c57 100644 --- a/dbms/tests/integration/test_external_dictionaries/external_sources.py +++ b/dbms/tests/integration/test_external_dictionaries/external_sources.py @@ -423,9 +423,9 @@ class SourceRedis(ExternalSource): class SourceAerospike(ExternalSource): def __init__(self, name, internal_hostname, internal_port, - docker_hostname, docker_port, user, password, storage_type=None): + docker_hostname, docker_port, user, password): ExternalSource.__init__(self, name, internal_hostname, internal_port, - docker_hostname, docker_port, user, password, storage_type) + docker_hostname, docker_port, user, password) self.namespace = "test" self.set = "test_set" @@ -439,7 +439,6 @@ class SourceAerospike(ExternalSource): '''.format( host=self.docker_hostname, port=self.docker_port, - storage_type=self.storage_type, # simple or hash_map ) def prepare(self, structure, table_name, cluster): From 102967015e8813129384dcd0f6e377e5b730f167 Mon Sep 17 00:00:00 2001 From: comunodi Date: Sun, 2 Jun 2019 16:29:43 +0300 Subject: [PATCH 41/49] Add RU docs for Redis --- .../dicts/external_dicts_dict_sources.md | 6 ++- .../dicts/external_dicts_dict_sources.md | 47 ++++++++++++++----- 2 files changed, 40 insertions(+), 13 deletions(-) diff --git a/docs/en/query_language/dicts/external_dicts_dict_sources.md b/docs/en/query_language/dicts/external_dicts_dict_sources.md index 029bad304c7..f78b67d6495 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/en/query_language/dicts/external_dicts_dict_sources.md @@ -27,11 +27,11 @@ Types of sources (`source_type`): - [Executable file](#dicts-external_dicts_dict_sources-executable) - [HTTP(s)](#dicts-external_dicts_dict_sources-http) - DBMS + - [ODBC](#dicts-external_dicts_dict_sources-odbc) - [MySQL](#dicts-external_dicts_dict_sources-mysql) - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) - [Redis](#dicts-external_dicts_dict_sources-redis) - - [ODBC](#dicts-external_dicts_dict_sources-odbc) ## Local File {#dicts-external_dicts_dict_sources-local_file} @@ -434,6 +434,8 @@ Example of settings: localhost 6379 + simple + 0 ``` @@ -442,5 +444,7 @@ Setting fields: - `host` – The Redis host. - `port` – The port on the Redis server. +- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. +- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. [Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_sources/) diff --git a/docs/ru/query_language/dicts/external_dicts_dict_sources.md b/docs/ru/query_language/dicts/external_dicts_dict_sources.md index c9e419eb09c..436c4e95daf 100644 --- a/docs/ru/query_language/dicts/external_dicts_dict_sources.md +++ b/docs/ru/query_language/dicts/external_dicts_dict_sources.md @@ -1,5 +1,5 @@ -# Источники внешних словарей +# Источники внешних словарей {#dicts-external_dicts_dict_sources} Внешний словарь можно подключить из множества источников. @@ -24,17 +24,18 @@ Типы источников (`source_type`): -- [Локальный файл](#ispolniaemyi-fail) -- [Исполняемый файл](#ispolniaemyi-fail) -- [HTTP(s)](#http-s) +- [Локальный файл](#dicts-external_dicts_dict_sources-local_file) +- [Исполняемый файл](#dicts-external_dicts_dict_sources-executable) +- [HTTP(s)](#dicts-external_dicts_dict_sources-http) - СУБД: - [ODBC](#dicts-external_dicts_dict_sources-odbc) - - [MySQL](#mysql) - - [ClickHouse](#clickhouse) - - [MongoDB](#mongodb) + - [MySQL](#dicts-external_dicts_dict_sources-mysql) + - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse) + - [MongoDB](#dicts-external_dicts_dict_sources-mongodb) + - [Redis](#dicts-external_dicts_dict_sources-redis) -## Локальный файл +## Локальный файл {#dicts-external_dicts_dict_sources-local_file} Пример настройки: @@ -53,7 +54,7 @@ - `format` - Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)". -## Исполняемый файл +## Исполняемый файл {#dicts-external_dicts_dict_sources-executable} Работа с исполняемым файлом зависит от [размещения словаря в памяти](external_dicts_dict_layout.md). Если тип размещения словаря `cache` и `complex_key_cache`, то ClickHouse запрашивает необходимые ключи, отправляя запрос в `STDIN` исполняемого файла. @@ -74,7 +75,7 @@ - `format` - Формат файла. Поддерживаются все форматы, описанные в разделе "[Форматы](../../interfaces/formats.md#formats)". -## HTTP(s) +## HTTP(s) {#dicts-external_dicts_dict_sources-http} Работа с HTTP(s) сервером зависит от [размещения словаря в памяти](external_dicts_dict_layout.md). Если тип размещения словаря `cache` и `complex_key_cache`, то ClickHouse запрашивает необходимые ключи, отправляя запрос методом `POST`. @@ -360,7 +361,7 @@ MySQL можно подключить на локальном хосте чер ``` -### ClickHouse +### ClickHouse {#dicts-external_dicts_dict_sources-clickhouse} Пример настройки: @@ -390,7 +391,7 @@ MySQL можно подключить на локальном хосте чер - `invalidate_query` - запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external_dicts_dict_lifetime.md). -### MongoDB +### MongoDB {#dicts-external_dicts_dict_sources-mongodb} Пример настройки: @@ -416,4 +417,26 @@ MySQL можно подключить на локальном хосте чер - `db` - имя базы данных. - `collection` - имя коллекции. +### Redis {#dicts-external_dicts_dict_sources-redis} + +Пример настройки: + +```xml + + + localhost + 6379 + simple + 0 + + +``` + +Поля настройки: + +- `host` – хост Redis. +- `port` – порт сервера Redis. +- `storage_type` – способ хранения ключей. Необходимо использовать `simple` для источников с одним столбцом ключей, `hash_map` -- для источников с двумя столбцами ключей. Источники с более, чем двумя столбцами ключей, не поддерживаются. Может отсутствовать, значение по умолчанию `simple`. +- `db_index` – номер базы данных. Может отсутствовать, значение по умолчанию 0. + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/dicts/external_dicts_dict_sources/) From fa416dc94149cb891bc1ff2ac47a2a97d782f1ee Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Thu, 12 Sep 2019 17:48:28 +0300 Subject: [PATCH 42/49] fix Redis dictionary --- dbms/src/Common/config.h.in | 12 +---- dbms/src/Core/config_core.h.in | 1 + .../Dictionaries/RedisBlockInputStream.cpp | 38 +++++++------ .../Dictionaries/RedisDictionarySource.cpp | 53 ++++++++++--------- 4 files changed, 52 insertions(+), 52 deletions(-) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 8630954f205..ad017d3bf6b 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -3,17 +3,6 @@ // .h autogenerated by cmake! #cmakedefine01 USE_RE2_ST -<<<<<<< HEAD -======= -#cmakedefine01 USE_VECTORCLASS -#cmakedefine01 USE_RDKAFKA -#cmakedefine01 USE_CAPNP -#cmakedefine01 USE_EMBEDDED_COMPILER -#cmakedefine01 USE_POCO_SQLODBC -#cmakedefine01 USE_POCO_DATAODBC -#cmakedefine01 USE_POCO_MONGODB -#cmakedefine01 USE_POCO_REDIS ->>>>>>> 102967015e8813129384dcd0f6e377e5b730f167 #cmakedefine01 USE_POCO_NETSSL #cmakedefine01 USE_HDFS #cmakedefine01 USE_CPUID @@ -21,3 +10,4 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 CLICKHOUSE_SPLIT_BINARY +#cmakedefine01 USE_POCO_REDIS diff --git a/dbms/src/Core/config_core.h.in b/dbms/src/Core/config_core.h.in index 840a96413df..15402294f83 100644 --- a/dbms/src/Core/config_core.h.in +++ b/dbms/src/Core/config_core.h.in @@ -9,6 +9,7 @@ #cmakedefine01 USE_POCO_SQLODBC #cmakedefine01 USE_POCO_DATAODBC #cmakedefine01 USE_POCO_MONGODB +#cmakedefine01 USE_POCO_REDIS #cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 USE_SSL diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 7f23a421989..31ae9162141 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -35,8 +35,8 @@ namespace DB const std::shared_ptr & client_, const Poco::Redis::Array & keys_, const DB::Block & sample_block, - const size_t max_block_size) - : client(client_), keys(keys_), max_block_size{max_block_size} + const size_t max_block_size_) + : client(client_), keys(keys_), max_block_size{max_block_size_} { description.init(sample_block); } @@ -87,46 +87,46 @@ namespace DB switch (type) { - case ValueType::UInt8: + case ValueType::vtUInt8: insert(column, stringValue); break; - case ValueType::UInt16: + case ValueType::vtUInt16: insert(column, stringValue); break; - case ValueType::UInt32: + case ValueType::vtUInt32: insert(column, stringValue); break; - case ValueType::UInt64: + case ValueType::vtUInt64: insert(column, stringValue); break; - case ValueType::Int8: + case ValueType::vtInt8: insert(column, stringValue); break; - case ValueType::Int16: + case ValueType::vtInt16: insert(column, stringValue); break; - case ValueType::Int32: + case ValueType::vtInt32: insert(column, stringValue); break; - case ValueType::Int64: + case ValueType::vtInt64: insert(column, stringValue); break; - case ValueType::Float32: + case ValueType::vtFloat32: insert(column, stringValue); break; - case ValueType::Float64: + case ValueType::vtFloat64: insert(column, stringValue); break; - case ValueType::String: + case ValueType::vtString: static_cast(column).insert(parse(stringValue)); break; - case ValueType::Date: + case ValueType::vtDate: static_cast(column).insertValue(parse(stringValue).getDayNum()); break; - case ValueType::DateTime: + case ValueType::vtDateTime: static_cast(column).insertValue(static_cast(parse(stringValue))); break; - case ValueType::UUID: + case ValueType::vtUUID: static_cast(column).insertValue(parse(stringValue)); break; } @@ -138,7 +138,7 @@ namespace DB Block RedisBlockInputStream::readImpl() { - if (description.sample_block.rows() == 0 || keys.size() == 0) + if (keys.isNull() || description.sample_block.rows() == 0 || keys.size() == 0) all_read = true; if (all_read) @@ -163,6 +163,8 @@ namespace DB insertValue(*columns[idx], description.types[idx].first, value, name); }; + std::cerr << "keys: " << keys.toString() << "\n"; + if (keys.begin()->get()->isArray()) { size_t num_rows = 0; @@ -199,6 +201,8 @@ namespace DB } ++cursor; + std::cerr << "Redis command: " << commandForValues.toString() << "\n"; + Poco::Redis::Array values = client->execute(commandForValues); if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception{"Inconsistent sizes of keys and values in Redis request", diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 8def8abcf0e..fc1593b339e 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -61,18 +61,18 @@ namespace DB RedisDictionarySource::RedisDictionarySource( - const DictionaryStructure & dict_struct, - const std::string & host, - UInt16 port, - UInt8 db_index, - RedisStorageType::Id storage_type, - const Block & sample_block) - : dict_struct{dict_struct} - , host{host} - , port{port} - , db_index{db_index} - , storage_type{storage_type} - , sample_block{sample_block} + const DictionaryStructure & dict_struct_, + const std::string & host_, + UInt16 port_, + UInt8 db_index_, + RedisStorageType::Id storage_type_, + const Block & sample_block_) + : dict_struct{dict_struct_} + , host{host_} + , port{port_} + , db_index{db_index_} + , storage_type{storage_type_} + , sample_block{sample_block_} , client{std::make_shared(host, port)} { if (dict_struct.attributes.size() != 1) @@ -80,8 +80,8 @@ namespace DB DB::toString(dict_struct.attributes.size()) + ", expected 1", ErrorCodes::INVALID_CONFIG_PARAMETER}; - if (storage_type == RedisStorageType::HASH_MAP) { + if (storage_type == RedisStorageType::HASH_MAP) if (!dict_struct.key.has_value()) throw Exception{"Redis source with storage type \'hash_map\' must have key", ErrorCodes::INVALID_CONFIG_PARAMETER}; @@ -104,17 +104,17 @@ namespace DB RedisDictionarySource::RedisDictionarySource( - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, - Block & sample_block) + const DictionaryStructure & dict_struct_, + const Poco::Util::AbstractConfiguration & config_, + const std::string & config_prefix_, + Block & sample_block_) : RedisDictionarySource( - dict_struct, - config.getString(config_prefix + ".host"), - config.getUInt(config_prefix + ".port"), - config.getUInt(config_prefix + ".db_index", 0), - parseStorageType(config.getString(config_prefix + ".storage_type", "")), - sample_block) + dict_struct_, + config_.getString(config_prefix_ + ".host"), + config_.getUInt(config_prefix_ + ".port"), + config_.getUInt(config_prefix_ + ".db_index", 0), + parseStorageType(config_.getString(config_prefix_ + ".storage_type", "")), + sample_block_) { } @@ -140,11 +140,16 @@ namespace DB Poco::Redis::Array keys = client->execute(command_for_keys); - if (storage_type == RedisStorageType::HASH_MAP && dict_struct.key->size() == 2) + if (storage_type == RedisStorageType::HASH_MAP && !keys.isNull()) { Poco::Redis::Array hkeys; for (const auto & key : keys) { + Poco::Redis::Command command_for_type("TYPE"); + auto type_reply = client->execute(command_for_type.addRedisType(key)); + if (type_reply != "hash") + continue; + Poco::Redis::Command command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); From ad986f285ea9ec3a7f23de9007f2582fc33e42d0 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Fri, 13 Sep 2019 20:38:56 +0300 Subject: [PATCH 43/49] fix redis with mixed keys --- .../Dictionaries/RedisBlockInputStream.cpp | 4 --- .../Dictionaries/RedisDictionarySource.cpp | 35 +++++++++++++------ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 31ae9162141..cc8f1d005de 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -163,8 +163,6 @@ namespace DB insertValue(*columns[idx], description.types[idx].first, value, name); }; - std::cerr << "keys: " << keys.toString() << "\n"; - if (keys.begin()->get()->isArray()) { size_t num_rows = 0; @@ -201,8 +199,6 @@ namespace DB } ++cursor; - std::cerr << "Redis command: " << commandForValues.toString() << "\n"; - Poco::Redis::Array values = client->execute(commandForValues); if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception{"Inconsistent sizes of keys and values in Redis request", diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index fc1593b339e..92d7644db1f 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -132,37 +132,52 @@ namespace DB RedisDictionarySource::~RedisDictionarySource() = default; + static std::string storageTypeToKeyType(RedisStorageType::Id type) + { + switch (type) + { + case RedisStorageType::Id::SIMPLE: + return "string"; + case RedisStorageType::Id::HASH_MAP: + return "hash"; + default: + return "none"; + } + + __builtin_unreachable(); + } BlockInputStreamPtr RedisDictionarySource::loadAll() { Poco::Redis::Command command_for_keys("KEYS"); command_for_keys << "*"; - Poco::Redis::Array keys = client->execute(command_for_keys); + /// Get only keys for specified storage type. + auto all_keys = client->execute(command_for_keys); + Poco::Redis::Array keys; + auto key_type = storageTypeToKeyType(storage_type); + for (auto & key : all_keys) + if (key_type == client->execute(Poco::Redis::Command("TYPE").addRedisType(key))) + keys.addRedisType(std::move(key)); if (storage_type == RedisStorageType::HASH_MAP && !keys.isNull()) { Poco::Redis::Array hkeys; for (const auto & key : keys) { - Poco::Redis::Command command_for_type("TYPE"); - auto type_reply = client->execute(command_for_type.addRedisType(key)); - if (type_reply != "hash") - continue; - Poco::Redis::Command command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); - Poco::Redis::Array reply_for_primary_key = client->execute(command_for_secondary_keys); + auto secondary_keys = client->execute(command_for_secondary_keys); Poco::Redis::Array primary_with_secondary; primary_with_secondary.addRedisType(key); - for (const auto & secondary_key : reply_for_primary_key) + for (const auto & secondary_key : secondary_keys) primary_with_secondary.addRedisType(secondary_key); - hkeys.add(primary_with_secondary); + hkeys.add(std::move(primary_with_secondary)); } - keys = hkeys; + keys = std::move(hkeys); } return std::make_shared(client, std::move(keys), sample_block, max_block_size); From e9336c9166166832138a819b9c2ddfd9a997a62b Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Mon, 16 Sep 2019 19:17:56 +0300 Subject: [PATCH 44/49] improvements of redis external dictionary --- .../Dictionaries/RedisBlockInputStream.cpp | 116 +++++++----------- dbms/src/Dictionaries/RedisBlockInputStream.h | 9 +- .../Dictionaries/RedisDictionarySource.cpp | 65 ++++++---- dbms/src/Dictionaries/RedisDictionarySource.h | 27 ++-- 4 files changed, 101 insertions(+), 116 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index cc8f1d005de..016a13cf9e0 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -33,10 +33,11 @@ namespace DB RedisBlockInputStream::RedisBlockInputStream( const std::shared_ptr & client_, - const Poco::Redis::Array & keys_, + const RedisArray & keys_, + const RedisStorageType & storage_type_, const DB::Block & sample_block, const size_t max_block_size_) - : client(client_), keys(keys_), max_block_size{max_block_size_} + : client(client_), keys(keys_), storage_type(storage_type_), max_block_size{max_block_size_} { description.init(sample_block); } @@ -47,9 +48,8 @@ namespace DB namespace { using ValueType = ExternalResultDescription::ValueType; - using RedisArray = Poco::Redis::Array; - bool isNull(const Poco::Redis::RedisType::Ptr & value) + bool isNullString(const Poco::Redis::RedisType::Ptr & value) { return value->isBulkString() && static_cast *>(value.get())->value().isNull(); @@ -131,14 +131,12 @@ namespace DB break; } } - - void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } } Block RedisBlockInputStream::readImpl() { - if (keys.isNull() || description.sample_block.rows() == 0 || keys.size() == 0) + if (keys.isNull() || description.sample_block.rows() == 0 || cursor >= keys.size()) all_read = true; if (all_read) @@ -163,43 +161,31 @@ namespace DB insertValue(*columns[idx], description.types[idx].first, value, name); }; - if (keys.begin()->get()->isArray()) + if (storage_type == RedisStorageType::HASH_MAP) { size_t num_rows = 0; while (num_rows < max_block_size && !all_read) { if (cursor >= keys.size()) - { - all_read = true; break; - } - const auto & primary_with_secondary = *(keys.begin() + cursor); - const auto & keys_array = - static_cast *>(primary_with_secondary.get())->value(); + const auto & keys_array = keys.get(cursor); if (keys_array.size() < 2) { throw Exception{"Too low keys in request to source: " + DB::toString(keys_array.size()) - + ", expected 2 or more", - ErrorCodes::LOGICAL_ERROR}; + + ", expected 2 or more", ErrorCodes::LOGICAL_ERROR}; } + if (num_rows + keys_array.size() - 1 > max_block_size) - { - if (num_rows == 0) - throw Exception{"Too many (" + DB::toString(keys_array.size()) + ") key attributes", - ErrorCodes::LIMIT_EXCEEDED}; break; - } - Poco::Redis::Command commandForValues("HMGET"); - for (size_t i = 0; i < keys_array.size(); ++i) - { - const auto & secondary_key = *(keys_array.begin() + i); - commandForValues.addRedisType(secondary_key); - } + Poco::Redis::Command command_for_values("HMGET"); + for (auto it = keys_array.begin(); it != keys_array.end(); ++it) + command_for_values.addRedisType(*it); + ++cursor; + auto values = client->execute(command_for_values); - Poco::Redis::Array values = client->execute(commandForValues); if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception{"Inconsistent sizes of keys and values in Redis request", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; @@ -209,14 +195,12 @@ namespace DB { const auto & secondary_key = *(keys_array.begin() + i + 1); const auto & value = *(values.begin() + i); + if (value.isNull()) - { - insertValueByIdx(0, primary_key); - insertValueByIdx(1, secondary_key); - insertDefaultValue(*columns[2], *description.sample_block.getByPosition(2).column); - ++num_rows; - } - else if (!isNull(value)) // null string means 'no value for requested key' + throw Exception("Got NULL value in response from Redis", ErrorCodes::LOGICAL_ERROR); + + /// null string means 'no value for requested key' + if (!isNullString(value)) { insertValueByIdx(0, primary_key); insertValueByIdx(1, secondary_key); @@ -228,46 +212,34 @@ namespace DB } else { - size_t num_rows = 0; - while (num_rows < max_block_size && !all_read) + Poco::Redis::Command command_for_values("MGET"); + + // keys.size() > 0 + for (size_t i = 0; i < max_block_size && cursor < keys.size(); ++i) { - Poco::Redis::Command commandForValues("MGET"); + const auto & key = *(keys.begin() + cursor); + command_for_values.addRedisType(key); + ++cursor; + } - // keys.size() > 0 - for (size_t i = 0; i < max_block_size && cursor < keys.size(); ++i) + auto values = client->execute(command_for_values); + if (command_for_values.size() != values.size() + 1) // 'MGET' keys + throw Exception{"Inconsistent sizes of keys and values in Redis request", + ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; + + for (size_t i = 0; i < values.size(); ++i) + { + const auto & key = *(keys.begin() + cursor - i - 1); + const auto & value = *(values.begin() + values.size() - i - 1); + + if (value.isNull()) + throw Exception("Got NULL value in response from Redis", ErrorCodes::LOGICAL_ERROR); + + /// null string means 'no value for requested key' + if (!isNullString(value)) { - const auto & key = *(keys.begin() + cursor); - commandForValues.addRedisType(key); - ++cursor; - } - - if (commandForValues.size() == 1) // only 'MGET' - { - all_read = true; - break; - } - - Poco::Redis::Array values = client->execute(commandForValues); - if (commandForValues.size() != values.size() + 1) // 'MGET' keys - throw Exception{"Inconsistent sizes of keys and values in Redis request", - ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; - - for (size_t i = 0; i < values.size(); ++i) - { - const auto & key = *(keys.begin() + cursor - i - 1); - const auto & value = *(values.begin() + values.size() - i - 1); - if (value.isNull()) - { - insertValueByIdx(0, key); - insertDefaultValue(*columns[1], *description.sample_block.getByPosition(1).column); - ++num_rows; - } - else if (!isNull(value)) // null string means 'no value for requested key' - { - insertValueByIdx(0, key); - insertValueByIdx(1, value); - ++num_rows; - } + insertValueByIdx(0, key); + insertValueByIdx(1, value); } } } diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index dc64ee0fdd4..5034e16080b 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -3,7 +3,7 @@ #include #include #include - +#include "RedisDictionarySource.h" namespace Poco { @@ -11,6 +11,7 @@ namespace Poco { class Array; class Client; + class RedisType; } } @@ -20,9 +21,14 @@ namespace DB class RedisBlockInputStream final : public IBlockInputStream { public: + using RedisArray = Poco::Redis::Array; + using RedisTypePtr = Poco::Redis::RedisType::Ptr; + using RedisBulkString = Poco::Redis::BulkString; + RedisBlockInputStream( const std::shared_ptr & client_, const Poco::Redis::Array & keys_, + const RedisStorageType & storage_type_, const Block & sample_block, const size_t max_block_size); @@ -37,6 +43,7 @@ namespace DB std::shared_ptr client; Poco::Redis::Array keys; + RedisStorageType storage_type; const size_t max_block_size; ExternalResultDescription description; size_t cursor = 0; diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 92d7644db1f..5d67dd6ae92 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -57,7 +57,7 @@ namespace DB } - static const size_t max_block_size = 8192; + static const size_t max_block_size = 4; RedisDictionarySource::RedisDictionarySource( @@ -65,7 +65,7 @@ namespace DB const std::string & host_, UInt16 port_, UInt8 db_index_, - RedisStorageType::Id storage_type_, + RedisStorageType storage_type_, const Block & sample_block_) : dict_struct{dict_struct_} , host{host_} @@ -80,11 +80,12 @@ namespace DB DB::toString(dict_struct.attributes.size()) + ", expected 1", ErrorCodes::INVALID_CONFIG_PARAMETER}; - { if (storage_type == RedisStorageType::HASH_MAP) + { if (!dict_struct.key.has_value()) throw Exception{"Redis source with storage type \'hash_map\' must have key", ErrorCodes::INVALID_CONFIG_PARAMETER}; + if (dict_struct.key.value().size() > 2) throw Exception{"Redis source with complex keys having more than 2 attributes are unsupported", ErrorCodes::INVALID_CONFIG_PARAMETER}; @@ -93,7 +94,7 @@ namespace DB if (db_index != 0) { - Poco::Redis::Command command("SELECT"); + RedisCommand command("SELECT"); command << static_cast(db_index); std::string reply = client->execute(command); if (reply != "+OK\r\n") @@ -132,55 +133,65 @@ namespace DB RedisDictionarySource::~RedisDictionarySource() = default; - static std::string storageTypeToKeyType(RedisStorageType::Id type) + static std::string storageTypeToKeyType(RedisStorageType type) { switch (type) { - case RedisStorageType::Id::SIMPLE: + case RedisStorageType::SIMPLE: return "string"; - case RedisStorageType::Id::HASH_MAP: + case RedisStorageType::HASH_MAP: return "hash"; default: return "none"; } - __builtin_unreachable(); + __builtin_unreachable(); } BlockInputStreamPtr RedisDictionarySource::loadAll() { - Poco::Redis::Command command_for_keys("KEYS"); + RedisCommand command_for_keys("KEYS"); command_for_keys << "*"; /// Get only keys for specified storage type. - auto all_keys = client->execute(command_for_keys); - Poco::Redis::Array keys; + auto all_keys = client->execute(command_for_keys); + RedisArray keys; auto key_type = storageTypeToKeyType(storage_type); for (auto & key : all_keys) - if (key_type == client->execute(Poco::Redis::Command("TYPE").addRedisType(key))) + if (key_type == client->execute(RedisCommand("TYPE").addRedisType(key))) keys.addRedisType(std::move(key)); if (storage_type == RedisStorageType::HASH_MAP && !keys.isNull()) { - Poco::Redis::Array hkeys; + RedisArray hkeys; for (const auto & key : keys) { - Poco::Redis::Command command_for_secondary_keys("HKEYS"); + RedisCommand command_for_secondary_keys("HKEYS"); command_for_secondary_keys.addRedisType(key); - auto secondary_keys = client->execute(command_for_secondary_keys); + auto secondary_keys = client->execute(command_for_secondary_keys); - Poco::Redis::Array primary_with_secondary; + RedisArray primary_with_secondary; primary_with_secondary.addRedisType(key); for (const auto & secondary_key : secondary_keys) + { primary_with_secondary.addRedisType(secondary_key); - - hkeys.add(std::move(primary_with_secondary)); + /// Do not store more than max_block_size values for one request. + if (primary_with_secondary.size() == max_block_size + 1) + { + hkeys.add(std::move(primary_with_secondary)); + primary_with_secondary.clear(); + primary_with_secondary.addRedisType(key); + } + } + if (primary_with_secondary.size() > 1) + hkeys.add(std::move(primary_with_secondary)); } + keys = std::move(hkeys); } - return std::make_shared(client, std::move(keys), sample_block, max_block_size); + return std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size); } @@ -192,12 +203,12 @@ namespace DB if (!dict_struct.id) throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD}; - Poco::Redis::Array keys; + RedisArray keys; for (UInt64 id : ids) keys << DB::toString(id); - return std::make_shared(client, std::move(keys), sample_block, max_block_size); + return std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size); } std::string RedisDictionarySource::toString() const @@ -205,12 +216,14 @@ namespace DB return "Redis: " + host + ':' + DB::toString(port); } - RedisStorageType::Id RedisDictionarySource::parseStorageType(const std::string & storage_type) + RedisStorageType RedisDictionarySource::parseStorageType(const std::string & storage_type_str) { - RedisStorageType::Id storage_type_id = RedisStorageType::valueOf(storage_type); - if (storage_type_id == RedisStorageType::UNKNOWN) - storage_type_id = RedisStorageType::SIMPLE; - return storage_type_id; + if (storage_type_str == "hash_map") + return RedisStorageType::HASH_MAP; + else if (!storage_type_str.empty() && storage_type_str != "simple") + throw Exception("Unknown storage type " + storage_type_str + " for Redis dictionary", ErrorCodes::INVALID_CONFIG_PARAMETER); + + return RedisStorageType::SIMPLE; } } diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index d56de626a9a..19ba0a00e5f 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -18,30 +18,20 @@ namespace Poco namespace Redis { class Client; + class Array; + class Command; } } namespace DB { - namespace RedisStorageType + enum class RedisStorageType { - enum Id - { SIMPLE, HASH_MAP, UNKNOWN - }; - - Id valueOf(const std::string & value) - { - if (value == "simple") - return SIMPLE; - if (value == "hash_map") - return HASH_MAP; - return UNKNOWN; - } - } + }; class RedisDictionarySource final : public IDictionarySource { @@ -50,10 +40,13 @@ namespace DB const std::string & host, UInt16 port, UInt8 db_index, - RedisStorageType::Id storage_type, + RedisStorageType storage_type, const Block & sample_block); public: + using RedisArray = Poco::Redis::Array; + using RedisCommand = Poco::Redis::Command; + RedisDictionarySource( const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, @@ -90,14 +83,14 @@ namespace DB std::string toString() const override; private: - static RedisStorageType::Id parseStorageType(const std::string& storage_type); + static RedisStorageType parseStorageType(const std::string& storage_type); private: const DictionaryStructure dict_struct; const std::string host; const UInt16 port; const UInt8 db_index; - const RedisStorageType::Id storage_type; + const RedisStorageType storage_type; Block sample_block; std::shared_ptr client; From 4df1f1bb9a217e3d15b5b7ed69f91cb415bdeceb Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 17 Sep 2019 16:35:19 +0300 Subject: [PATCH 45/49] better integration test for redis dictionary (but still bad) --- .../Dictionaries/RedisDictionarySource.cpp | 8 +- dbms/tests/integration/helpers/cluster.py | 2 +- dbms/tests/integration/pytest.ini | 2 +- .../dictionary.py | 13 +- .../external_sources.py | 18 +- .../test.py | 218 +++++++++--- .../test_external_dictionaries/test_kv.py | 325 ------------------ 7 files changed, 201 insertions(+), 385 deletions(-) delete mode 100644 dbms/tests/integration/test_external_dictionaries/test_kv.py diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 5d67dd6ae92..5957e891722 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -57,8 +57,7 @@ namespace DB } - static const size_t max_block_size = 4; - + static const size_t max_block_size = 8192; RedisDictionarySource::RedisDictionarySource( const DictionaryStructure & dict_struct_, @@ -155,13 +154,16 @@ namespace DB /// Get only keys for specified storage type. auto all_keys = client->execute(command_for_keys); + if (all_keys.isNull()) + return std::make_shared(client, RedisArray{}, storage_type, sample_block, max_block_size); + RedisArray keys; auto key_type = storageTypeToKeyType(storage_type); for (auto & key : all_keys) if (key_type == client->execute(RedisCommand("TYPE").addRedisType(key))) keys.addRedisType(std::move(key)); - if (storage_type == RedisStorageType::HASH_MAP && !keys.isNull()) + if (storage_type == RedisStorageType::HASH_MAP) { RedisArray hkeys; for (const auto & key : keys) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index d3e4789d09e..30552975639 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -115,7 +115,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. diff --git a/dbms/tests/integration/pytest.ini b/dbms/tests/integration/pytest.ini index de681b6e750..31364843b29 100644 --- a/dbms/tests/integration/pytest.ini +++ b/dbms/tests/integration/pytest.ini @@ -1,4 +1,4 @@ [pytest] -python_files = test*.py +python_files = test.py norecursedirs = _instances timeout = 600 diff --git a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py index 6d53a5dfdd1..18e13fde2ad 100644 --- a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py +++ b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/dictionary.py @@ -44,6 +44,9 @@ class Row(object): for field, value in zip(fields, values): self.data[field.name] = value + def has_field(self, name): + return name in self.data + def get_value_by_name(self, name): return self.data[name] @@ -97,6 +100,7 @@ class DictionaryStructure(object): self.range_key = None self.ordinary_fields = [] self.range_fields = [] + self.has_hierarchy = False for field in fields: if field.is_key: @@ -105,6 +109,9 @@ class DictionaryStructure(object): self.range_fields.append(field) else: self.ordinary_fields.append(field) + + if field.hierarchical: + self.has_hierarchy = True if field.is_range_key: if self.range_key is not None: @@ -286,14 +293,13 @@ class DictionaryStructure(object): class Dictionary(object): - def __init__(self, name, structure, source, config_path, table_name, fields=None, values=None): + def __init__(self, name, structure, source, config_path, table_name, fields): self.name = name self.structure = copy.deepcopy(structure) self.source = copy.deepcopy(source) self.config_path = config_path self.table_name = table_name self.fields = fields - self.values = values def generate_config(self): with open(self.config_path, 'w') as result: @@ -343,3 +349,6 @@ class Dictionary(object): def is_complex(self): return self.structure.layout.is_complex + + def get_fields(self): + return self.fields diff --git a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py index 58af8c6487b..d1503224e98 100644 --- a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py +++ b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/external_sources.py @@ -402,14 +402,20 @@ class SourceRedis(ExternalSource): def prepare(self, structure, table_name, cluster): self.client = redis.StrictRedis(host=self.internal_hostname, port=self.internal_port) self.prepared = True + self.ordered_names = structure.get_ordered_names() - def load_kv_data(self, values): + def load_data(self, data, table_name): self.client.flushdb() - if len(values[0]) == 2: - self.client.mset({value[0]: value[1] for value in values}) - else: - for value in values: - self.client.hset(value[0], value[1], value[2]) + for row in list(data): + values = [] + for name in self.ordered_names: + values.append(str(row.data[name])) + print 'values: ', values + if len(values) == 2: + self.client.set(*values) + print 'kek: ', self.client.get(values[0]) + else: + self.client.hset(*values) def compatible_with_layout(self, layout): if ( diff --git a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py index 841a9124af0..01f9b15b51f 100644 --- a/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py +++ b/dbms/tests/integration/test_dictionaries_all_layouts_and_sources/test.py @@ -4,9 +4,10 @@ import os from helpers.cluster import ClickHouseCluster from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout from external_sources import SourceMySQL, SourceClickHouse, SourceFile, SourceExecutableCache, SourceExecutableHashed -from external_sources import SourceMongo, SourceHTTP, SourceHTTPS +from external_sources import SourceMongo, SourceHTTP, SourceHTTPS, SourceRedis SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') FIELDS = { "simple": [ @@ -65,9 +66,44 @@ FIELDS = { Field("Float32_", 'Float32', default_value_for_get=555.11), Field("Float64_", 'Float64', default_value_for_get=777.11), ] - } +VALUES = { + "simple": [ + [1, 22, 333, 4444, 55555, -6, -77, + -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0], + [2, 3, 4, 5, 6, -7, -8, + -9, -10, '550e8400-e29b-41d4-a716-446655440002', + '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1] + ], + "complex": [ + [1, 'world', 22, 333, 4444, 55555, -6, + -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', + 'hello', 22.543, 3332154213.4], + [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, + -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', + '1975-09-28', '2000-02-28 23:33:24', + 'my', 255.543, 3332221.44] + + ], + "ranged": [ + [1, '2019-02-10', '2019-02-01', '2019-02-28', + 22, 333, 4444, 55555, -6, -77, -888, -999, + '550e8400-e29b-41d4-a716-446655440003', + '1973-06-28', '1985-02-28 23:43:25', 'hello', + 22.543, 3332154213.4], + [2, '2019-04-10', '2019-04-01', '2019-04-28', + 11, 3223, 41444, 52515, -65, -747, -8388, -9099, + '550e8400-e29b-41d4-a716-446655440004', + '1973-06-29', '2002-02-28 23:23:25', '!!!!', + 32.543, 3332543.4] + ] +} + + + LAYOUTS = [ Layout("hashed"), Layout("cache"), @@ -91,36 +127,59 @@ SOURCES = [ DICTIONARIES = [] +# Key-value dictionaries with onle one possible field for key +SOURCES_KV = [ + SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), + SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), +] + +DICTIONARIES_KV = [] + cluster = None node = None +def get_dict(source, layout, fields, suffix_name=''): + global dict_configs_path + + structure = DictionaryStructure(layout, fields) + dict_name = source.name + "_" + layout.name + '_' + suffix_name + dict_path = os.path.join(dict_configs_path, dict_name + '.xml') + dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name, fields) + dictionary.generate_config() + return dictionary def setup_module(module): global DICTIONARIES global cluster global node + global dict_configs_path - dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') for f in os.listdir(dict_configs_path): os.remove(os.path.join(dict_configs_path, f)) for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): - structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) - dict_name = source.name + "_" + layout.name - dict_path = os.path.join(dict_configs_path, dict_name + '.xml') - dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) - dictionary.generate_config() - DICTIONARIES.append(dictionary) + DICTIONARIES.append(get_dict(source, layout, FIELDS[layout.layout_type])) else: print "Source", source.name, "incompatible with layout", layout.name + + for layout in LAYOUTS: + field_keys = list(filter(lambda x: x.is_key, FIELDS[layout.layout_type])) + for source in SOURCES_KV: + if not source.compatible_with_layout(layout): + print "Source", source.name, "incompatible with layout", layout.name + continue + + for field in FIELDS[layout.layout_type]: + if not (field.is_key or field.is_range or field.is_range_key): + DICTIONARIES_KV.append(get_dict(source, layout, field_keys + [field], field.name)) main_configs = [] for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) + node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True, with_redis=True) cluster.add_instance('clickhouse1') @@ -128,7 +187,7 @@ def setup_module(module): def started_cluster(): try: cluster.start() - for dictionary in DICTIONARIES: + for dictionary in DICTIONARIES + DICTIONARIES_KV: print "Preparing", dictionary.name dictionary.prepare_source(cluster) print "Prepared" @@ -141,16 +200,8 @@ def started_cluster(): def test_simple_dictionaries(started_cluster): fields = FIELDS["simple"] - data = [ - Row(fields, - [1, 22, 333, 4444, 55555, -6, -77, - -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0]), - Row(fields, - [2, 3, 4, 5, 6, -7, -8, - -9, -10, '550e8400-e29b-41d4-a716-446655440002', - '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1]), - ] + values = VALUES["simple"] + data = [Row(fields, vals) for vals in values] simple_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"] for dct in simple_dicts: @@ -192,18 +243,8 @@ def test_simple_dictionaries(started_cluster): def test_complex_dictionaries(started_cluster): fields = FIELDS["complex"] - data = [ - Row(fields, - [1, 'world', 22, 333, 4444, 55555, -6, - -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', - 'hello', 22.543, 3332154213.4]), - Row(fields, - [2, 'qwerty2', 52, 2345, 6544, 9191991, -2, - -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', - '1975-09-28', '2000-02-28 23:33:24', - 'my', 255.543, 3332221.44]), - ] + values = VALUES["complex"] + data = [Row(fields, vals) for vals in values] complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] for dct in complex_dicts: @@ -232,20 +273,8 @@ def test_complex_dictionaries(started_cluster): def test_ranged_dictionaries(started_cluster): fields = FIELDS["ranged"] - data = [ - Row(fields, - [1, '2019-02-10', '2019-02-01', '2019-02-28', - 22, 333, 4444, 55555, -6, -77, -888, -999, - '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', - 22.543, 3332154213.4]), - Row(fields, - [2, '2019-04-10', '2019-04-01', '2019-04-28', - 11, 3223, 41444, 52515, -65, -747, -8388, -9099, - '550e8400-e29b-41d4-a716-446655440004', - '1973-06-29', '2002-02-28 23:23:25', '!!!!', - 32.543, 3332543.4]), - ] + values = VALUES["ranged"] + data = [Row(fields, vals) for vals in values] ranged_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] for dct in ranged_dicts: @@ -264,3 +293,98 @@ def test_ranged_dictionaries(started_cluster): for query, answer in queries_with_answers: print query assert node.query(query) == str(answer) + '\n' + + +def test_key_value_simple_dictionaries(started_cluster): + fields = FIELDS["simple"] + values = VALUES["simple"] + data = [Row(fields, vals) for vals in values] + + simple_dicts = [d for d in DICTIONARIES_KV if d.structure.layout.layout_type == "simple"] + + for dct in simple_dicts: + queries_with_answers = [] + local_data = [] + for row in data: + local_fields = dct.get_fields() + local_values = [row.get_value_by_name(field.name) for field in local_fields if row.has_field(field.name)] + local_data.append(Row(local_fields, local_values)) + + dct.load_data(local_data) + + node.query("system reload dictionary {}".format(dct.name)) + + print 'name: ', dct.name + + for row in local_data: + print dct.get_fields() + for field in dct.get_fields(): + print field.name, field.is_key + if not field.is_key: + for query in dct.get_select_get_queries(field, row): + queries_with_answers.append((query, row.get_value_by_name(field.name))) + + for query in dct.get_select_has_queries(field, row): + queries_with_answers.append((query, 1)) + + for query in dct.get_select_get_or_default_queries(field, row): + queries_with_answers.append((query, field.default_value_for_get)) + + if dct.structure.has_hierarchy: + for query in dct.get_hierarchical_queries(data[0]): + queries_with_answers.append((query, [1])) + + for query in dct.get_hierarchical_queries(data[1]): + queries_with_answers.append((query, [2, 1])) + + for query in dct.get_is_in_queries(data[0], data[1]): + queries_with_answers.append((query, 0)) + + for query in dct.get_is_in_queries(data[1], data[0]): + queries_with_answers.append((query, 1)) + + for query, answer in queries_with_answers: + print query + if isinstance(answer, list): + answer = str(answer).replace(' ', '') + assert node.query(query) == str(answer) + '\n' + + +def test_key_value_complex_dictionaries(started_cluster): + fields = FIELDS["complex"] + values = VALUES["complex"] + data = [Row(fields, vals) for vals in values] + + complex_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] + for dct in complex_dicts: + dct.load_data(data) + + node.query("system reload dictionaries") + + for dct in complex_dicts: + queries_with_answers = [] + local_data = [] + for row in data: + local_fields = dct.get_fields() + local_values = [row.get_value_by_name(field.name) for field in local_fields if row.has_field(field.name)] + local_data.append(Row(local_fields, local_values)) + + dct.load_data(local_data) + + node.query("system reload dictionary {}".format(dct.name)) + + for row in local_data: + for field in dct.get_fields(): + if not field.is_key: + for query in dct.get_select_get_queries(field, row): + queries_with_answers.append((query, row.get_value_by_name(field.name))) + + for query in dct.get_select_has_queries(field, row): + queries_with_answers.append((query, 1)) + + for query in dct.get_select_get_or_default_queries(field, row): + queries_with_answers.append((query, field.default_value_for_get)) + + for query, answer in queries_with_answers: + print query + assert node.query(query) == str(answer) + '\n' diff --git a/dbms/tests/integration/test_external_dictionaries/test_kv.py b/dbms/tests/integration/test_external_dictionaries/test_kv.py deleted file mode 100644 index 69fa48d5e2e..00000000000 --- a/dbms/tests/integration/test_external_dictionaries/test_kv.py +++ /dev/null @@ -1,325 +0,0 @@ -import os - -import pytest -from dictionary import Field, Row, Dictionary, DictionaryStructure, Layout -from external_sources import SourceRedis, SourceAerospike - -from helpers.cluster import ClickHouseCluster - -SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) - -FIELDS = { - "simple": [ - Field("KeyField", 'UInt64', is_key=True, default_value_for_get=9999999), - Field("UInt8_", 'UInt8', default_value_for_get=55), - Field("UInt16_", 'UInt16', default_value_for_get=66), - Field("UInt32_", 'UInt32', default_value_for_get=77), - Field("UInt64_", 'UInt64', default_value_for_get=88), - Field("Int8_", 'Int8', default_value_for_get=-55), - Field("Int16_", 'Int16', default_value_for_get=-66), - Field("Int32_", 'Int32', default_value_for_get=-77), - Field("Int64_", 'Int64', default_value_for_get=-88), - Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), - Field("Date_", 'Date', default_value_for_get='2018-12-30'), - Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), - Field("String_", 'String', default_value_for_get='hi'), - Field("Float32_", 'Float32', default_value_for_get=555.11), - Field("Float64_", 'Float64', default_value_for_get=777.11), - Field("ParentKeyField", "UInt64", default_value_for_get=444, hierarchical=True), - ], - "complex": [ - Field("KeyField1", 'UInt64', is_key=True, default_value_for_get=9999999), - Field("KeyField2", 'String', is_key=True, default_value_for_get='xxxxxxxxx'), - Field("UInt8_", 'UInt8', default_value_for_get=55), - Field("UInt16_", 'UInt16', default_value_for_get=66), - Field("UInt32_", 'UInt32', default_value_for_get=77), - Field("UInt64_", 'UInt64', default_value_for_get=88), - Field("Int8_", 'Int8', default_value_for_get=-55), - Field("Int16_", 'Int16', default_value_for_get=-66), - Field("Int32_", 'Int32', default_value_for_get=-77), - Field("Int64_", 'Int64', default_value_for_get=-88), - Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), - Field("Date_", 'Date', default_value_for_get='2018-12-30'), - Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), - Field("String_", 'String', default_value_for_get='hi'), - Field("Float32_", 'Float32', default_value_for_get=555.11), - Field("Float64_", 'Float64', default_value_for_get=777.11), - ], - "ranged": [ - Field("KeyField1", 'UInt64', is_key=True), - Field("KeyField2", 'Date', is_range_key=True), - Field("StartDate", 'Date', range_hash_type='min'), - Field("EndDate", 'Date', range_hash_type='max'), - Field("UInt8_", 'UInt8', default_value_for_get=55), - Field("UInt16_", 'UInt16', default_value_for_get=66), - Field("UInt32_", 'UInt32', default_value_for_get=77), - Field("UInt64_", 'UInt64', default_value_for_get=88), - Field("Int8_", 'Int8', default_value_for_get=-55), - Field("Int16_", 'Int16', default_value_for_get=-66), - Field("Int32_", 'Int32', default_value_for_get=-77), - Field("Int64_", 'Int64', default_value_for_get=-88), - Field("UUID_", 'UUID', default_value_for_get='550e8400-0000-0000-0000-000000000000'), - Field("Date_", 'Date', default_value_for_get='2018-12-30'), - Field("DateTime_", 'DateTime', default_value_for_get='2018-12-30 00:00:00'), - Field("String_", 'String', default_value_for_get='hi'), - Field("Float32_", 'Float32', default_value_for_get=555.11), - Field("Float64_", 'Float64', default_value_for_get=777.11), - ], -} - -VALUES = { - "simple": [ - [ - 1, 22, 333, 4444, 55555, -6, -77, - -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', 22.543, 3332154213.4, 0, - ], - [ - 2, 3, 4, 5, 6, -7, -8, - -9, -10, '550e8400-e29b-41d4-a716-446655440002', - '1978-06-28', '1986-02-28 23:42:25', 'hello', 21.543, 3222154213.4, 1, - ], - ], - "complex": [ - [ - 1, 'world', 22, 333, 4444, 55555, -6, - -77, -888, -999, '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', - 'hello', 22.543, 3332154213.4, - ], - [ - 2, 'qwerty2', 52, 2345, 6544, 9191991, -2, - -717, -81818, -92929, '550e8400-e29b-41d4-a716-446655440007', - '1975-09-28', '2000-02-28 23:33:24', - 'my', 255.543, 3332221.44, - ], - ], - "ranged": [ - [ - 1, '2019-02-10', '2019-02-01', '2019-02-28', - 22, 333, 4444, 55555, -6, -77, -888, -999, - '550e8400-e29b-41d4-a716-446655440003', - '1973-06-28', '1985-02-28 23:43:25', 'hello', - 22.543, 3332154213.4, - ], - [ - 2, '2019-04-10', '2019-04-01', '2019-04-28', - 11, 3223, 41444, 52515, -65, -747, -8388, -9099, - '550e8400-e29b-41d4-a716-446655440004', - '1973-06-29', '2002-02-28 23:23:25', '!!!!', - 32.543, 3332543.4, - ], - ], -} - -LAYOUTS = [ - Layout("flat"), - Layout("hashed"), - Layout("cache"), - Layout('complex_key_hashed_one_key'), - Layout('complex_key_hashed_two_keys'), - Layout("complex_key_cache"), - Layout("range_hashed"), -] - -SOURCES = [ - SourceRedis("RedisSimple", "localhost", "6380", "redis1", "6379", "", "", storage_type="simple"), - SourceRedis("RedisHash", "localhost", "6380", "redis1", "6379", "", "", storage_type="hash_map"), - # SourceAerospike("Aerospike", "localhost", "3000", "aerospike1", "3000", "", ""), -] - -DICTIONARIES = [] - -cluster = None -node = None - - -def setup_kv_dict(suffix, layout, fields, kv_source, dict_configs_path, values): - global DICTIONARIES - - structure = DictionaryStructure(layout, fields) - dict_name = "{}_{}_{}".format(kv_source.name, layout.name, suffix) - dict_path = os.path.join(dict_configs_path, dict_name + '.xml') - dictionary = Dictionary(dict_name, structure, kv_source, dict_path, "table_" + dict_name, fields, values) - dictionary.generate_config() - DICTIONARIES.append(dictionary) - - -def setup_module(module): - global DICTIONARIES - global cluster - global node - - dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') - for f in os.listdir(dict_configs_path): - os.remove(os.path.join(dict_configs_path, f)) - - for layout in LAYOUTS: - for source in SOURCES: - if source.compatible_with_layout(layout): - if layout.layout_type == "simple": - fields_len = len(FIELDS["simple"]) - for i in range(fields_len - 1): - local_fields = [FIELDS["simple"][0], FIELDS["simple"][i + 1]] - local_values = [[value[0], value[i + 1]] for value in VALUES["simple"]] - setup_kv_dict(i + 1, layout, local_fields, source, dict_configs_path, local_values) - elif layout.layout_type == "complex": - fields_len = len(FIELDS["complex"]) - for i in range(fields_len - 2): - if layout.name == 'complex_key_hashed_two_keys': - local_fields = [FIELDS['complex'][0], FIELDS['complex'][1], FIELDS['complex'][i + 2]] - local_values = [[value[0], value[1], value[i + 2]] for value in VALUES["complex"]] - else: - local_fields = [FIELDS['complex'][1], FIELDS['complex'][i + 2]] - local_values = [[value[1], value[i + 2]] for value in VALUES["complex"]] - setup_kv_dict(i + 2, layout, local_fields, source, dict_configs_path, local_values) - elif layout.layout_type == "ranged": - fields_len = len(FIELDS["ranged"]) - local_fields = FIELDS["ranged"][0:5] - local_values = VALUES["ranged"][0:5] - for i in range(fields_len - 4): - local_fields[4] = FIELDS["ranged"][i + 4] - for j, value in enumerate(VALUES["ranged"]): - local_values[j][4] = value[i + 4] - setup_kv_dict(i + 2, layout, local_fields, source, dict_configs_path, local_values) - else: - print "Source", source.name, "incompatible with layout", layout.name - - main_configs = [] - for fname in os.listdir(dict_configs_path): - main_configs.append(os.path.join(dict_configs_path, fname)) - cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) - node = cluster.add_instance('node', main_configs=main_configs, with_redis=True) - cluster.add_instance('clickhouse1') - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - for dictionary in DICTIONARIES: - print "Preparing", dictionary.name - dictionary.prepare_source(cluster) - print "Prepared" - - yield cluster - - finally: - cluster.shutdown() - - -def prepare_data(fields, values_by_row): - return [Row(fields, values) for values in values_by_row] - - -def test_simple_kv_dictionaries(started_cluster): - simple_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "simple"] - - for dct in simple_kv_dicts: - queries_with_answers = [] - fields = dct.fields - print("FIELDS AND VALUES FOR " + dct.name) - print(fields) - print(dct.values) - data = prepare_data(fields, dct.values) - dct.source.load_kv_data(dct.values) - - try: - node.query("system reload dictionary '{}'".format(dct.name)) - except Exception: - print(dct.name) - raise - - for row in data: - for field in fields: - if not field.is_key: - for query in dct.get_select_get_queries(field, row): - queries_with_answers.append((query, row.get_value_by_name(field.name))) - - for query in dct.get_select_has_queries(field, row): - queries_with_answers.append((query, 1)) - - for query in dct.get_select_get_or_default_queries(field, row): - queries_with_answers.append((query, field.default_value_for_get)) - if dct.fields[1].hierarchical: - for query in dct.get_hierarchical_queries(data[0]): - queries_with_answers.append((query, [1])) - - for query in dct.get_hierarchical_queries(data[1]): - queries_with_answers.append((query, [2, 1])) - - for query in dct.get_is_in_queries(data[0], data[1]): - queries_with_answers.append((query, 0)) - - for query in dct.get_is_in_queries(data[1], data[0]): - queries_with_answers.append((query, 1)) - - for query, answer in queries_with_answers: - if isinstance(answer, list): - answer = str(answer).replace(' ', '') - print query - assert node.query(query) == str(answer) + '\n', query - - -def test_complex_dictionaries(started_cluster): - complex_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "complex"] - - for dct in complex_kv_dicts: - queries_with_answers = [] - fields = dct.fields - print("FIELDS AND VALUES FOR " + dct.name) - print(fields) - print(dct.values) - data = prepare_data(fields, dct.values) - dct.source.load_kv_data(dct.values) - - try: - node.query("system reload dictionary '{}'".format(dct.name)) - except Exception: - print(dct.name) - raise - - for row in data: - for field in fields: - if not field.is_key: - for query in dct.get_select_get_queries(field, row): - queries_with_answers.append((query, row.get_value_by_name(field.name))) - - for query in dct.get_select_has_queries(field, row): - queries_with_answers.append((query, 1)) - - for query in dct.get_select_get_or_default_queries(field, row): - queries_with_answers.append((query, field.default_value_for_get)) - - for query, answer in queries_with_answers: - print query - assert node.query(query) == str(answer) + '\n' - - -def test_ranged_dictionaries(started_cluster): - complex_kv_dicts = [d for d in DICTIONARIES if d.structure.layout.layout_type == "ranged"] - - for dct in complex_kv_dicts: - queries_with_answers = [] - fields = dct.fields - print("FIELDS AND VALUES FOR " + dct.name) - print(fields) - print(dct.values) - data = prepare_data(fields, dct.values) - dct.source.load_kv_data(dct.values) - - try: - node.query("system reload dictionary '{}'".format(dct.name)) - except Exception: - print(dct.name) - raise - - for row in data: - for field in fields: - if not field.is_key and not field.is_range: - for query in dct.get_select_get_queries(field, row): - queries_with_answers.append((query, row.get_value_by_name(field.name))) - - for query, answer in queries_with_answers: - print query - assert node.query(query) == str(answer) + '\n' From 4480e97f9fbbe825b13a4eb19ecd3056e750b2d5 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 17 Sep 2019 17:16:07 +0300 Subject: [PATCH 46/49] fix build --- dbms/src/Common/config.h.in | 1 - dbms/src/Dictionaries/RedisBlockInputStream.cpp | 3 +-- dbms/src/Dictionaries/RedisBlockInputStream.h | 3 --- dbms/src/Dictionaries/RedisDictionarySource.h | 2 +- .../Storages/System/StorageSystemBuildOptions.generated.cpp.in | 1 + 5 files changed, 3 insertions(+), 7 deletions(-) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index ad017d3bf6b..7804068e5c4 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -10,4 +10,3 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 CLICKHOUSE_SPLIT_BINARY -#cmakedefine01 USE_POCO_REDIS diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 016a13cf9e0..5b680a965a3 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -1,4 +1,4 @@ -#include +#include "RedisBlockInputStream.h" #if USE_POCO_REDIS # include @@ -17,7 +17,6 @@ # include # include "DictionaryStructure.h" -# include "RedisBlockInputStream.h" namespace DB diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 5034e16080b..578e644c9f8 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -11,7 +11,6 @@ namespace Poco { class Array; class Client; - class RedisType; } } @@ -22,8 +21,6 @@ namespace DB { public: using RedisArray = Poco::Redis::Array; - using RedisTypePtr = Poco::Redis::RedisType::Ptr; - using RedisBulkString = Poco::Redis::BulkString; RedisBlockInputStream( const std::shared_ptr & client_, diff --git a/dbms/src/Dictionaries/RedisDictionarySource.h b/dbms/src/Dictionaries/RedisDictionarySource.h index 19ba0a00e5f..f63dd9545d2 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.h +++ b/dbms/src/Dictionaries/RedisDictionarySource.h @@ -1,6 +1,6 @@ #pragma once -#include +#include "config_core.h" #include #if USE_POCO_REDIS diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 63ddfe15649..1bb87068426 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -61,6 +61,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "USE_HYPERSCAN", "@USE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", + "USE_POCO_REDIS", "@USE_POCO_REDIS", nullptr, nullptr }; From df82e4bde869e9a16d9fe0fa3a888e9ca9114ad1 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 17 Sep 2019 17:55:09 +0300 Subject: [PATCH 47/49] fix build --- dbms/src/Dictionaries/RedisBlockInputStream.cpp | 1 + dbms/src/Dictionaries/RedisBlockInputStream.h | 13 ++++++++++--- dbms/src/Dictionaries/RedisDictionarySource.cpp | 8 ++++---- .../StorageSystemBuildOptions.generated.cpp.in | 2 +- 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index 5b680a965a3..daaae7d1d00 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -1,4 +1,5 @@ #include "RedisBlockInputStream.h" + #if USE_POCO_REDIS # include diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 578e644c9f8..448005f1ef5 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -1,9 +1,13 @@ #pragma once +#include "config_core.h" #include -#include -#include -#include "RedisDictionarySource.h" + +#if USE_POCO_REDIS +# include +# include +# include "RedisDictionarySource.h" +# include namespace Poco { @@ -11,6 +15,7 @@ namespace Poco { class Array; class Client; + class RedisType; } } @@ -48,3 +53,5 @@ namespace DB }; } + +#endif diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index 5957e891722..ef901a4ea1b 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -19,10 +19,10 @@ namespace DB #if USE_POCO_REDIS return std::make_unique(dict_struct, config, config_prefix + ".redis", sample_block); #else - (void)dict_struct; - (void)config; - (void)config_prefix; - (void)sample_block; + UNUSED(dict_struct); + UNUSED(config); + UNUSED(config_prefix); + UNUSED(sample_block); throw Exception{"Dictionary source of type `redis` is disabled because poco library was built without redis support.", ErrorCodes::SUPPORT_IS_DISABLED}; #endif diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 1bb87068426..25e7086c1a6 100644 --- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -61,7 +61,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "USE_HYPERSCAN", "@USE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", - "USE_POCO_REDIS", "@USE_POCO_REDIS", + "USE_POCO_REDIS", "@USE_POCO_REDIS@", nullptr, nullptr }; From 7ccf04440a24e5f5f33a878a5c55b28310191d30 Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Tue, 17 Sep 2019 20:57:48 +0300 Subject: [PATCH 48/49] better code in Redis external dictionary --- dbms/src/Common/ErrorCodes.cpp | 2 +- .../Dictionaries/RedisBlockInputStream.cpp | 93 ++++++------------- dbms/src/Dictionaries/RedisBlockInputStream.h | 4 +- .../Dictionaries/RedisDictionarySource.cpp | 28 +++--- 4 files changed, 44 insertions(+), 83 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index c4aa1449e0f..06a967ecded 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -451,7 +451,7 @@ namespace ErrorCodes extern const int INVALID_TEMPLATE_FORMAT = 474; extern const int INVALID_WITH_FILL_EXPRESSION = 475; extern const int WITH_TIES_WITHOUT_ORDER_BY = 476; - extern const int INVALID_USAGE_OF_INPUT = 477; + extern const int INTERNAL_REDIS_ERROR = 477; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.cpp b/dbms/src/Dictionaries/RedisBlockInputStream.cpp index daaae7d1d00..ad3d9002b36 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.cpp +++ b/dbms/src/Dictionaries/RedisBlockInputStream.cpp @@ -26,8 +26,8 @@ namespace DB { extern const int TYPE_MISMATCH; extern const int LOGICAL_ERROR; - extern const int LIMIT_EXCEEDED; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int INTERNAL_REDIS_ERROR; } @@ -49,42 +49,18 @@ namespace DB { using ValueType = ExternalResultDescription::ValueType; - bool isNullString(const Poco::Redis::RedisType::Ptr & value) - { - return value->isBulkString() && - static_cast *>(value.get())->value().isNull(); - } - - std::string getStringOrThrow(const Poco::Redis::RedisType::Ptr & value, const std::string & column_name) - { - switch (value->type()) - { - case Poco::Redis::RedisTypeTraits::TypeId: - { - const auto & bs = static_cast *>(value.get())->value(); - if (bs.isNull()) - throw Exception{"Type mismatch, expected not null String for column " + column_name, - ErrorCodes::TYPE_MISMATCH}; - return bs.value(); - } - case Poco::Redis::RedisTypeTraits::TypeId: - return static_cast *>(value.get())->value(); - default: - throw Exception{"Type mismatch, expected std::string, got type id = " + toString(value->type()) + " for column " + column_name, - ErrorCodes::TYPE_MISMATCH}; - } - } - template inline void insert(IColumn & column, const String & stringValue) { - static_cast &>(column).insertValue(parse(stringValue)); + assert_cast &>(column).insertValue(parse(stringValue)); } - void insertValue(IColumn & column, const ValueType type, const Poco::Redis::RedisType::Ptr & value, const std::string & name) + void insertValue(IColumn & column, const ValueType type, const Poco::Redis::BulkString & bulk_string) { - String stringValue = getStringOrThrow(value, name); + if (bulk_string.isNull()) + throw Exception{"Type mismatch, expected not Null String", ErrorCodes::TYPE_MISMATCH}; + String stringValue = bulk_string.value(); switch (type) { case ValueType::vtUInt8: @@ -118,16 +94,16 @@ namespace DB insert(column, stringValue); break; case ValueType::vtString: - static_cast(column).insert(parse(stringValue)); + assert_cast(column).insert(parse(stringValue)); break; case ValueType::vtDate: - static_cast(column).insertValue(parse(stringValue).getDayNum()); + assert_cast(column).insertValue(parse(stringValue).getDayNum()); break; case ValueType::vtDateTime: - static_cast(column).insertValue(static_cast(parse(stringValue))); + assert_cast(column).insertValue(static_cast(parse(stringValue))); break; case ValueType::vtUUID: - static_cast(column).insertValue(parse(stringValue)); + assert_cast(column).insertValue(parse(stringValue)); break; } } @@ -150,25 +126,21 @@ namespace DB const auto insertValueByIdx = [this, &columns](size_t idx, const auto & value) { - const auto & name = description.sample_block.getByPosition(idx).name; if (description.types[idx].second) { ColumnNullable & column_nullable = static_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value, name); + insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); column_nullable.getNullMapData().emplace_back(0); } else - insertValue(*columns[idx], description.types[idx].first, value, name); + insertValue(*columns[idx], description.types[idx].first, value); }; if (storage_type == RedisStorageType::HASH_MAP) { size_t num_rows = 0; - while (num_rows < max_block_size && !all_read) + for (; cursor < keys.size(); ++cursor) { - if (cursor >= keys.size()) - break; - const auto & keys_array = keys.get(cursor); if (keys_array.size() < 2) { @@ -183,24 +155,20 @@ namespace DB for (auto it = keys_array.begin(); it != keys_array.end(); ++it) command_for_values.addRedisType(*it); - ++cursor; auto values = client->execute(command_for_values); if (keys_array.size() != values.size() + 1) // 'HMGET' primary_key secondary_keys throw Exception{"Inconsistent sizes of keys and values in Redis request", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; - const auto & primary_key = *keys_array.begin(); + const auto & primary_key = keys_array.get(0); for (size_t i = 0; i < values.size(); ++i) { - const auto & secondary_key = *(keys_array.begin() + i + 1); - const auto & value = *(values.begin() + i); - - if (value.isNull()) - throw Exception("Got NULL value in response from Redis", ErrorCodes::LOGICAL_ERROR); + const auto & secondary_key = keys_array.get(i + 1); + const auto & value = values.get(i); /// null string means 'no value for requested key' - if (!isNullString(value)) + if (!value.isNull()) { insertValueByIdx(0, primary_key); insertValueByIdx(1, secondary_key); @@ -214,34 +182,27 @@ namespace DB { Poco::Redis::Command command_for_values("MGET"); - // keys.size() > 0 - for (size_t i = 0; i < max_block_size && cursor < keys.size(); ++i) - { - const auto & key = *(keys.begin() + cursor); - command_for_values.addRedisType(key); - ++cursor; - } + size_t need_values = std::min(max_block_size, keys.size() - cursor); + for (size_t i = 0; i < need_values; ++i) + command_for_values.add(keys.get(cursor + i)); auto values = client->execute(command_for_values); - if (command_for_values.size() != values.size() + 1) // 'MGET' keys - throw Exception{"Inconsistent sizes of keys and values in Redis request", - ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH}; + if (values.size() != need_values) + throw Exception{"Inconsistent sizes of keys and values in Redis request", ErrorCodes::INTERNAL_REDIS_ERROR}; for (size_t i = 0; i < values.size(); ++i) { - const auto & key = *(keys.begin() + cursor - i - 1); - const auto & value = *(values.begin() + values.size() - i - 1); + const auto & key = keys.get(cursor + i); + const auto & value = values.get(i); - if (value.isNull()) - throw Exception("Got NULL value in response from Redis", ErrorCodes::LOGICAL_ERROR); - - /// null string means 'no value for requested key' - if (!isNullString(value)) + /// Null string means 'no value for requested key' + if (!value.isNull()) { insertValueByIdx(0, key); insertValueByIdx(1, value); } } + cursor += need_values; } return description.sample_block.cloneWithColumns(std::move(columns)); diff --git a/dbms/src/Dictionaries/RedisBlockInputStream.h b/dbms/src/Dictionaries/RedisBlockInputStream.h index 448005f1ef5..86448095787 100644 --- a/dbms/src/Dictionaries/RedisBlockInputStream.h +++ b/dbms/src/Dictionaries/RedisBlockInputStream.h @@ -8,14 +8,13 @@ # include # include "RedisDictionarySource.h" # include +# include namespace Poco { namespace Redis { - class Array; class Client; - class RedisType; } } @@ -26,6 +25,7 @@ namespace DB { public: using RedisArray = Poco::Redis::Array; + using RedisBulkString = Poco::Redis::BulkString; RedisBlockInputStream( const std::shared_ptr & client_, diff --git a/dbms/src/Dictionaries/RedisDictionarySource.cpp b/dbms/src/Dictionaries/RedisDictionarySource.cpp index ef901a4ea1b..905ae104dc0 100644 --- a/dbms/src/Dictionaries/RedisDictionarySource.cpp +++ b/dbms/src/Dictionaries/RedisDictionarySource.cpp @@ -13,7 +13,7 @@ namespace DB { auto createTableSource = [=](const DictionaryStructure & dict_struct, const Poco::Util::AbstractConfiguration & config, - const std::string & config_prefix, + const String & config_prefix, Block & sample_block, const Context & /* context */) -> DictionarySourcePtr { #if USE_POCO_REDIS @@ -52,8 +52,8 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int CANNOT_SELECT; extern const int INVALID_CONFIG_PARAMETER; + extern const int INTERNAL_REDIS_ERROR; } @@ -61,7 +61,7 @@ namespace DB RedisDictionarySource::RedisDictionarySource( const DictionaryStructure & dict_struct_, - const std::string & host_, + const String & host_, UInt16 port_, UInt8 db_index_, RedisStorageType storage_type_, @@ -81,12 +81,12 @@ namespace DB if (storage_type == RedisStorageType::HASH_MAP) { - if (!dict_struct.key.has_value()) + if (!dict_struct.key) throw Exception{"Redis source with storage type \'hash_map\' must have key", ErrorCodes::INVALID_CONFIG_PARAMETER}; - if (dict_struct.key.value().size() > 2) - throw Exception{"Redis source with complex keys having more than 2 attributes are unsupported", + if (dict_struct.key->size() != 2) + throw Exception{"Redis source with storage type \'hash_map\' requiers 2 keys", ErrorCodes::INVALID_CONFIG_PARAMETER}; // suppose key[0] is primary key, key[1] is secondary key } @@ -95,10 +95,10 @@ namespace DB { RedisCommand command("SELECT"); command << static_cast(db_index); - std::string reply = client->execute(command); + String reply = client->execute(command); if (reply != "+OK\r\n") - throw Exception{"Selecting db with index " + DB::toString(db_index) + " failed with reason " + reply, - ErrorCodes::CANNOT_SELECT}; + throw Exception{"Selecting database with index " + DB::toString(db_index) + + " failed with reason " + reply, ErrorCodes::INTERNAL_REDIS_ERROR}; } } @@ -106,7 +106,7 @@ namespace DB RedisDictionarySource::RedisDictionarySource( const DictionaryStructure & dict_struct_, const Poco::Util::AbstractConfiguration & config_, - const std::string & config_prefix_, + const String & config_prefix_, Block & sample_block_) : RedisDictionarySource( dict_struct_, @@ -132,7 +132,7 @@ namespace DB RedisDictionarySource::~RedisDictionarySource() = default; - static std::string storageTypeToKeyType(RedisStorageType type) + static String storageTypeToKeyType(RedisStorageType type) { switch (type) { @@ -160,7 +160,7 @@ namespace DB RedisArray keys; auto key_type = storageTypeToKeyType(storage_type); for (auto & key : all_keys) - if (key_type == client->execute(RedisCommand("TYPE").addRedisType(key))) + if (key_type == client->execute(RedisCommand("TYPE").addRedisType(key))) keys.addRedisType(std::move(key)); if (storage_type == RedisStorageType::HASH_MAP) @@ -213,12 +213,12 @@ namespace DB return std::make_shared(client, std::move(keys), storage_type, sample_block, max_block_size); } - std::string RedisDictionarySource::toString() const + String RedisDictionarySource::toString() const { return "Redis: " + host + ':' + DB::toString(port); } - RedisStorageType RedisDictionarySource::parseStorageType(const std::string & storage_type_str) + RedisStorageType RedisDictionarySource::parseStorageType(const String & storage_type_str) { if (storage_type_str == "hash_map") return RedisStorageType::HASH_MAP; From 651f5b0e9ff63fe521992eb0f46cfb37cb326d6d Mon Sep 17 00:00:00 2001 From: CurtizJ Date: Wed, 18 Sep 2019 13:21:10 +0300 Subject: [PATCH 49/49] merging with master --- dbms/src/Common/ErrorCodes.cpp | 5 +---- dbms/tests/integration/helpers/cluster.py | 6 +----- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 54b155c3872..0347f2c164a 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -451,16 +451,13 @@ namespace ErrorCodes extern const int INVALID_TEMPLATE_FORMAT = 474; extern const int INVALID_WITH_FILL_EXPRESSION = 475; extern const int WITH_TIES_WITHOUT_ORDER_BY = 476; -<<<<<<< HEAD - extern const int INTERNAL_REDIS_ERROR = 477; -======= extern const int INVALID_USAGE_OF_INPUT = 477; extern const int UNKNOWN_POLICY = 478; extern const int UNKNOWN_DISK = 479; extern const int UNKNOWN_PROTOCOL = 480; extern const int PATH_ACCESS_DENIED = 481; extern const int DICTIONARY_ACCESS_DENIED = 482; ->>>>>>> upstream/master + extern const int INTERNAL_REDIS_ERROR = 483; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 14139ba43a8..0ba48c32a8d 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -115,11 +115,7 @@ class ClickHouseCluster: cmd += " client" return cmd -<<<<<<< HEAD - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False): -======= - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=[]): ->>>>>>> upstream/master + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False, with_redis=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=[]): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.