From fcaa058c9271658a9e0bf180eaa2d6f0b298746d Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Sat, 16 Nov 2024 00:28:54 +0400 Subject: [PATCH 01/56] get rid of poco mongodb integration implementation --- base/poco/CMakeLists.txt | 5 - base/poco/MongoDB/CMakeLists.txt | 16 - .../poco/MongoDB/include/Poco/MongoDB/Array.h | 142 ----- .../MongoDB/include/Poco/MongoDB/BSONReader.h | 88 --- .../MongoDB/include/Poco/MongoDB/BSONWriter.h | 76 --- .../MongoDB/include/Poco/MongoDB/Binary.h | 158 ----- .../MongoDB/include/Poco/MongoDB/Connection.h | 191 ------ .../MongoDB/include/Poco/MongoDB/Cursor.h | 80 --- .../MongoDB/include/Poco/MongoDB/Database.h | 233 ------- .../include/Poco/MongoDB/DeleteRequest.h | 116 ---- .../MongoDB/include/Poco/MongoDB/Document.h | 296 --------- .../MongoDB/include/Poco/MongoDB/Element.h | 393 ------------ .../include/Poco/MongoDB/GetMoreRequest.h | 92 --- .../include/Poco/MongoDB/InsertRequest.h | 100 --- .../include/Poco/MongoDB/JavaScriptCode.h | 108 ---- .../include/Poco/MongoDB/KillCursorsRequest.h | 65 -- .../MongoDB/include/Poco/MongoDB/Message.h | 76 --- .../include/Poco/MongoDB/MessageHeader.h | 140 ----- .../MongoDB/include/Poco/MongoDB/MongoDB.h | 64 -- .../MongoDB/include/Poco/MongoDB/ObjectId.h | 151 ----- .../include/Poco/MongoDB/OpMsgCursor.h | 96 --- .../include/Poco/MongoDB/OpMsgMessage.h | 163 ----- .../Poco/MongoDB/PoolableConnectionFactory.h | 123 ---- .../include/Poco/MongoDB/QueryRequest.h | 190 ------ .../include/Poco/MongoDB/RegularExpression.h | 135 ---- .../MongoDB/include/Poco/MongoDB/ReplicaSet.h | 61 -- .../include/Poco/MongoDB/RequestMessage.h | 54 -- .../include/Poco/MongoDB/ResponseMessage.h | 114 ---- .../include/Poco/MongoDB/UpdateRequest.h | 117 ---- base/poco/MongoDB/src/Array.cpp | 75 --- base/poco/MongoDB/src/Binary.cpp | 89 --- base/poco/MongoDB/src/Connection.cpp | 348 ----------- base/poco/MongoDB/src/Cursor.cpp | 83 --- base/poco/MongoDB/src/Database.cpp | 482 --------------- base/poco/MongoDB/src/DeleteRequest.cpp | 54 -- base/poco/MongoDB/src/Document.cpp | 227 ------- base/poco/MongoDB/src/Element.cpp | 32 - base/poco/MongoDB/src/GetMoreRequest.cpp | 46 -- base/poco/MongoDB/src/InsertRequest.cpp | 49 -- base/poco/MongoDB/src/JavaScriptCode.cpp | 33 - base/poco/MongoDB/src/KillCursorsRequest.cpp | 44 -- base/poco/MongoDB/src/Message.cpp | 33 - base/poco/MongoDB/src/MessageHeader.cpp | 63 -- base/poco/MongoDB/src/ObjectId.cpp | 66 -- base/poco/MongoDB/src/OpMsgCursor.cpp | 187 ------ base/poco/MongoDB/src/OpMsgMessage.cpp | 412 ------------- base/poco/MongoDB/src/QueryRequest.cpp | 54 -- base/poco/MongoDB/src/RegularExpression.cpp | 71 --- base/poco/MongoDB/src/ReplicaSet.cpp | 89 --- base/poco/MongoDB/src/RequestMessage.cpp | 51 -- base/poco/MongoDB/src/ResponseMessage.cpp | 80 --- base/poco/MongoDB/src/UpdateRequest.cpp | 47 -- .../table-engines/integrations/mongodb.md | 5 - .../settings.md | 8 - programs/format/Format.cpp | 4 +- programs/local/LocalServer.cpp | 7 +- programs/server/Server.cpp | 7 +- src/CMakeLists.txt | 4 - src/Core/ServerSettings.cpp | 1 - src/Core/Settings.cpp | 2 +- src/Dictionaries/CMakeLists.txt | 4 - .../MongoDBPocoLegacyDictionarySource.cpp | 305 --------- .../MongoDBPocoLegacyDictionarySource.h | 93 --- src/Dictionaries/registerDictionaries.cpp | 7 +- src/Dictionaries/registerDictionaries.h | 2 +- .../Sources/MongoDBPocoLegacySource.cpp | 578 ------------------ .../Sources/MongoDBPocoLegacySource.h | 92 --- src/Storages/StorageMongoDBPocoLegacy.cpp | 327 ---------- src/Storages/StorageMongoDBPocoLegacy.h | 79 --- .../StorageMongoDBPocoLegacySocketFactory.cpp | 57 -- .../StorageMongoDBPocoLegacySocketFactory.h | 24 - src/Storages/registerStorages.cpp | 7 +- src/Storages/registerStorages.h | 2 +- .../TableFunctionMongoDBPocoLegacy.cpp | 128 ---- src/TableFunctions/registerTableFunctions.cpp | 7 +- src/TableFunctions/registerTableFunctions.h | 2 +- tests/integration/helpers/external_sources.py | 10 +- .../configs/mongo/legacy.xml | 3 - .../configs/mongo/new.xml | 3 - .../test_mongo.py | 37 +- .../test_mongo_uri.py | 30 +- .../configs/feature_flag.xml | 3 - .../integration/test_storage_mongodb/test.py | 1 - .../test_storage_mongodb_legacy/__init__.py | 0 .../configs/feature_flag.xml | 3 - .../configs/named_collections.xml | 12 - .../configs/users.xml | 9 - .../mongo_secure_config/cert.crt | 24 - .../mongo_secure_config/config.d/ssl_conf.xml | 8 - .../mongo_secure_config/key.pem | 52 -- .../mongo_secure_config/mongo_cert.pem | 52 -- .../mongo_secure_config/mongo_secure.conf | 6 - .../test_storage_mongodb_legacy/test.py | 509 --------------- .../configs/feature_flag.xml | 3 - .../test_table_function_mongodb/test.py | 1 - .../__init__.py | 0 .../configs/feature_flag.xml | 3 - .../configs/users.xml | 9 - .../mongo_secure_config/cert.crt | 24 - .../mongo_secure_config/config.d/ssl_conf.xml | 8 - .../mongo_secure_config/key.pem | 52 -- .../mongo_secure_config/mongo_secure.conf | 6 - .../test.py | 276 --------- ...new_table_functions_must_be_documented.sql | 1 - 104 files changed, 36 insertions(+), 9219 deletions(-) delete mode 100644 base/poco/MongoDB/CMakeLists.txt delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Array.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/BSONReader.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/BSONWriter.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Binary.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Connection.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Cursor.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Database.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/DeleteRequest.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Document.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Element.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/GetMoreRequest.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/InsertRequest.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/JavaScriptCode.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/KillCursorsRequest.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/Message.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/MessageHeader.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/MongoDB.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/ObjectId.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/OpMsgCursor.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/OpMsgMessage.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/PoolableConnectionFactory.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/QueryRequest.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/RegularExpression.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/ReplicaSet.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/RequestMessage.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/ResponseMessage.h delete mode 100644 base/poco/MongoDB/include/Poco/MongoDB/UpdateRequest.h delete mode 100644 base/poco/MongoDB/src/Array.cpp delete mode 100644 base/poco/MongoDB/src/Binary.cpp delete mode 100644 base/poco/MongoDB/src/Connection.cpp delete mode 100644 base/poco/MongoDB/src/Cursor.cpp delete mode 100644 base/poco/MongoDB/src/Database.cpp delete mode 100644 base/poco/MongoDB/src/DeleteRequest.cpp delete mode 100644 base/poco/MongoDB/src/Document.cpp delete mode 100644 base/poco/MongoDB/src/Element.cpp delete mode 100644 base/poco/MongoDB/src/GetMoreRequest.cpp delete mode 100644 base/poco/MongoDB/src/InsertRequest.cpp delete mode 100644 base/poco/MongoDB/src/JavaScriptCode.cpp delete mode 100644 base/poco/MongoDB/src/KillCursorsRequest.cpp delete mode 100644 base/poco/MongoDB/src/Message.cpp delete mode 100644 base/poco/MongoDB/src/MessageHeader.cpp delete mode 100644 base/poco/MongoDB/src/ObjectId.cpp delete mode 100644 base/poco/MongoDB/src/OpMsgCursor.cpp delete mode 100644 base/poco/MongoDB/src/OpMsgMessage.cpp delete mode 100644 base/poco/MongoDB/src/QueryRequest.cpp delete mode 100644 base/poco/MongoDB/src/RegularExpression.cpp delete mode 100644 base/poco/MongoDB/src/ReplicaSet.cpp delete mode 100644 base/poco/MongoDB/src/RequestMessage.cpp delete mode 100644 base/poco/MongoDB/src/ResponseMessage.cpp delete mode 100644 base/poco/MongoDB/src/UpdateRequest.cpp delete mode 100644 src/Dictionaries/MongoDBPocoLegacyDictionarySource.cpp delete mode 100644 src/Dictionaries/MongoDBPocoLegacyDictionarySource.h delete mode 100644 src/Processors/Sources/MongoDBPocoLegacySource.cpp delete mode 100644 src/Processors/Sources/MongoDBPocoLegacySource.h delete mode 100644 src/Storages/StorageMongoDBPocoLegacy.cpp delete mode 100644 src/Storages/StorageMongoDBPocoLegacy.h delete mode 100644 src/Storages/StorageMongoDBPocoLegacySocketFactory.cpp delete mode 100644 src/Storages/StorageMongoDBPocoLegacySocketFactory.h delete mode 100644 src/TableFunctions/TableFunctionMongoDBPocoLegacy.cpp delete mode 100644 tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/legacy.xml delete mode 100644 tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/new.xml delete mode 100644 tests/integration/test_storage_mongodb/configs/feature_flag.xml delete mode 100644 tests/integration/test_storage_mongodb_legacy/__init__.py delete mode 100644 tests/integration/test_storage_mongodb_legacy/configs/feature_flag.xml delete mode 100644 tests/integration/test_storage_mongodb_legacy/configs/named_collections.xml delete mode 100644 tests/integration/test_storage_mongodb_legacy/configs/users.xml delete mode 100644 tests/integration/test_storage_mongodb_legacy/mongo_secure_config/cert.crt delete mode 100644 tests/integration/test_storage_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml delete mode 100644 tests/integration/test_storage_mongodb_legacy/mongo_secure_config/key.pem delete mode 100644 tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_cert.pem delete mode 100644 tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_secure.conf delete mode 100644 tests/integration/test_storage_mongodb_legacy/test.py delete mode 100644 tests/integration/test_table_function_mongodb/configs/feature_flag.xml delete mode 100644 tests/integration/test_table_function_mongodb_legacy/__init__.py delete mode 100644 tests/integration/test_table_function_mongodb_legacy/configs/feature_flag.xml delete mode 100644 tests/integration/test_table_function_mongodb_legacy/configs/users.xml delete mode 100644 tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/cert.crt delete mode 100644 tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml delete mode 100644 tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/key.pem delete mode 100644 tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/mongo_secure.conf delete mode 100644 tests/integration/test_table_function_mongodb_legacy/test.py diff --git a/base/poco/CMakeLists.txt b/base/poco/CMakeLists.txt index 434e24cf334..8cc66ff403c 100644 --- a/base/poco/CMakeLists.txt +++ b/base/poco/CMakeLists.txt @@ -3,11 +3,6 @@ add_subdirectory (Data) add_subdirectory (Data/ODBC) add_subdirectory (Foundation) add_subdirectory (JSON) - -if (USE_MONGODB) - add_subdirectory(MongoDB) -endif() - add_subdirectory (Net) add_subdirectory (NetSSL_OpenSSL) add_subdirectory (Redis) diff --git a/base/poco/MongoDB/CMakeLists.txt b/base/poco/MongoDB/CMakeLists.txt deleted file mode 100644 index bb6f90ed8f5..00000000000 --- a/base/poco/MongoDB/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -file (GLOB SRCS src/*.cpp) - -add_library (_poco_mongodb ${SRCS}) -add_library (Poco::MongoDB ALIAS _poco_mongodb) - -# TODO: remove these warning exclusions -target_compile_options (_poco_mongodb - PRIVATE - -Wno-old-style-cast - -Wno-unused-parameter - -Wno-zero-as-null-pointer-constant -) - -target_include_directories (_poco_mongodb SYSTEM PUBLIC "include") -target_link_libraries (_poco_mongodb PUBLIC Poco::Net) - diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Array.h b/base/poco/MongoDB/include/Poco/MongoDB/Array.h deleted file mode 100644 index 8a30c785b2d..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Array.h +++ /dev/null @@ -1,142 +0,0 @@ -// -// Array.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Array -// -// Definition of the Array class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Array_INCLUDED -#define MongoDB_Array_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/NumberFormatter.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Array : public Document - /// This class represents a BSON Array. - { - public: - using Ptr = SharedPtr; - - Array(); - /// Creates an empty Array. - - virtual ~Array(); - /// Destroys the Array. - - // Document template functions available for backward compatibility - using Document::add; - using Document::get; - - template - Document & add(T value) - /// Creates an element with the name from the current pos and value and - /// adds it to the array document. - /// - /// The active document is returned to allow chaining of the add methods. - { - return Document::add(Poco::NumberFormatter::format(size()), value); - } - - Document & add(const char * value) - /// Creates an element with a name from the current pos and value and - /// adds it to the array document. - /// - /// The active document is returned to allow chaining of the add methods. - { - return Document::add(Poco::NumberFormatter::format(size()), value); - } - - template - T get(std::size_t pos) const - /// Returns the element at the given index and tries to convert - /// it to the template type. If the element is not found, a - /// Poco::NotFoundException will be thrown. If the element cannot be - /// converted a BadCastException will be thrown. - { - return Document::get(Poco::NumberFormatter::format(pos)); - } - - template - T get(std::size_t pos, const T & deflt) const - /// Returns the element at the given index and tries to convert - /// it to the template type. If the element is not found, or - /// has the wrong type, the deflt argument will be returned. - { - return Document::get(Poco::NumberFormatter::format(pos), deflt); - } - - Element::Ptr get(std::size_t pos) const; - /// Returns the element at the given index. - /// An empty element will be returned if the element is not found. - - template - bool isType(std::size_t pos) const - /// Returns true if the type of the element equals the TypeId of ElementTrait, - /// otherwise false. - { - return Document::isType(Poco::NumberFormatter::format(pos)); - } - - std::string toString(int indent = 0) const; - /// Returns a string representation of the Array. - - private: - friend void BSONReader::read(Array::Ptr & to); - }; - - - // BSON Embedded Array - // spec: document - template <> - struct ElementTraits - { - enum - { - TypeId = 0x04 - }; - - static std::string toString(const Array::Ptr & value, int indent = 0) - { - //TODO: - return value.isNull() ? "null" : value->toString(indent); - } - }; - - - template <> - inline void BSONReader::read(Array::Ptr & to) - { - to->read(_reader); - } - - - template <> - inline void BSONWriter::write(Array::Ptr & from) - { - from->write(_writer); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Array_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/BSONReader.h b/base/poco/MongoDB/include/Poco/MongoDB/BSONReader.h deleted file mode 100644 index 5858226ee49..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/BSONReader.h +++ /dev/null @@ -1,88 +0,0 @@ -// -// BSONReader.h -// -// Library: MongoDB -// Package: MongoDB -// Module: BSONReader -// -// Definition of the BSONReader class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_BSONReader_INCLUDED -#define MongoDB_BSONReader_INCLUDED - - -#include "Poco/BinaryReader.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API BSONReader - /// Class for reading BSON using a Poco::BinaryReader - { - public: - BSONReader(const Poco::BinaryReader & reader) : _reader(reader) - /// Creates the BSONReader using the given BinaryWriter. - { - } - - virtual ~BSONReader() - /// Destroys the BSONReader. - { - } - - template - void read(T & t) - /// Reads the value from the reader. The default implementation uses the >> operator to - /// the given argument. Special types can write their own version. - { - _reader >> t; - } - - std::string readCString(); - /// Reads a cstring from the reader. - /// A cstring is a string terminated with a 0x00. - - private: - Poco::BinaryReader _reader; - }; - - - // - // inlines - // - inline std::string BSONReader::readCString() - { - std::string val; - while (_reader.good()) - { - char c; - _reader >> c; - if (_reader.good()) - { - if (c == 0x00) - return val; - else - val += c; - } - } - return val; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_BSONReader_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/BSONWriter.h b/base/poco/MongoDB/include/Poco/MongoDB/BSONWriter.h deleted file mode 100644 index 0a4e6e371b9..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/BSONWriter.h +++ /dev/null @@ -1,76 +0,0 @@ -// -// BSONWriter.h -// -// Library: MongoDB -// Package: MongoDB -// Module: BSONWriter -// -// Definition of the BSONWriter class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_BSONWriter_INCLUDED -#define MongoDB_BSONWriter_INCLUDED - - -#include "Poco/BinaryWriter.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API BSONWriter - /// Class for writing BSON using a Poco::BinaryWriter. - { - public: - BSONWriter(const Poco::BinaryWriter & writer) : _writer(writer) - /// Creates the BSONWriter. - { - } - - virtual ~BSONWriter() - /// Destroys the BSONWriter. - { - } - - template - void write(T & t) - /// Writes the value to the writer. The default implementation uses - /// the << operator. Special types can write their own version. - { - _writer << t; - } - - void writeCString(const std::string & value); - /// Writes a cstring to the writer. A cstring is a string - /// terminated a null character. - - private: - Poco::BinaryWriter _writer; - }; - - - // - // inlines - // - inline void BSONWriter::writeCString(const std::string & value) - { - _writer.writeRaw(value); - _writer << (unsigned char)0x00; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_BSONWriter_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Binary.h b/base/poco/MongoDB/include/Poco/MongoDB/Binary.h deleted file mode 100644 index aad8736e8b6..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Binary.h +++ /dev/null @@ -1,158 +0,0 @@ -// -// Binary.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Binary -// -// Definition of the Binary class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Binary_INCLUDED -#define MongoDB_Binary_INCLUDED - - -#include -#include "Poco/Base64Encoder.h" -#include "Poco/Buffer.h" -#include "Poco/MemoryStream.h" -#include "Poco/MongoDB/Element.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/StreamCopier.h" -#include "Poco/UUID.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Binary - /// Implements BSON Binary. - /// - /// A Binary stores its data in a Poco::Buffer. - { - public: - using Ptr = SharedPtr; - - Binary(); - /// Creates an empty Binary with subtype 0. - - Binary(Poco::Int32 size, unsigned char subtype); - /// Creates a Binary with a buffer of the given size and the given subtype. - - Binary(const UUID & uuid); - /// Creates a Binary containing an UUID. - - Binary(const std::string & data, unsigned char subtype = 0); - /// Creates a Binary with the contents of the given string and the given subtype. - - Binary(const void * data, Poco::Int32 size, unsigned char subtype = 0); - /// Creates a Binary with the contents of the given buffer and the given subtype. - - virtual ~Binary(); - /// Destroys the Binary. - - Buffer & buffer(); - /// Returns a reference to the internal buffer - - unsigned char subtype() const; - /// Returns the subtype. - - void subtype(unsigned char type); - /// Sets the subtype. - - std::string toString(int indent = 0) const; - /// Returns the contents of the Binary as Base64-encoded string. - - std::string toRawString() const; - /// Returns the raw content of the Binary as a string. - - UUID uuid() const; - /// Returns the UUID when the binary subtype is 0x04. - /// Otherwise, throws a Poco::BadCastException. - - private: - Buffer _buffer; - unsigned char _subtype; - }; - - - // - // inlines - // - inline unsigned char Binary::subtype() const - { - return _subtype; - } - - - inline void Binary::subtype(unsigned char type) - { - _subtype = type; - } - - - inline Buffer & Binary::buffer() - { - return _buffer; - } - - - inline std::string Binary::toRawString() const - { - return std::string(reinterpret_cast(_buffer.begin()), _buffer.size()); - } - - - // BSON Embedded Document - // spec: binary - template <> - struct ElementTraits - { - enum - { - TypeId = 0x05 - }; - - static std::string toString(const Binary::Ptr & value, int indent = 0) { return value.isNull() ? "" : value->toString(); } - }; - - - template <> - inline void BSONReader::read(Binary::Ptr & to) - { - Poco::Int32 size; - _reader >> size; - - to->buffer().resize(size); - - unsigned char subtype; - _reader >> subtype; - to->subtype(subtype); - - _reader.readRaw((char *)to->buffer().begin(), size); - } - - - template <> - inline void BSONWriter::write(Binary::Ptr & from) - { - _writer << (Poco::Int32)from->buffer().size(); - _writer << from->subtype(); - _writer.writeRaw((char *)from->buffer().begin(), from->buffer().size()); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Binary_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h b/base/poco/MongoDB/include/Poco/MongoDB/Connection.h deleted file mode 100644 index cf679d530aa..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Connection.h +++ /dev/null @@ -1,191 +0,0 @@ -// -// Connection.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Connection -// -// Definition of the Connection class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Connection_INCLUDED -#define MongoDB_Connection_INCLUDED - - -#include "Poco/MongoDB/OpMsgMessage.h" -#include "Poco/MongoDB/RequestMessage.h" -#include "Poco/MongoDB/ResponseMessage.h" -#include "Poco/Mutex.h" -#include "Poco/Net/SocketAddress.h" -#include "Poco/Net/StreamSocket.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Connection - /// Represents a connection to a MongoDB server - /// using the MongoDB wire protocol. - /// - /// See https://docs.mongodb.com/manual/reference/mongodb-wire-protocol/ - /// for more information on the wire protocol. - { - public: - using Ptr = Poco::SharedPtr; - - class MongoDB_API SocketFactory - { - public: - SocketFactory(); - /// Creates the SocketFactory. - - virtual ~SocketFactory(); - /// Destroys the SocketFactory. - - virtual Poco::Net::StreamSocket createSocket(const std::string & host, int port, Poco::Timespan connectTimeout, bool secure); - /// Creates a Poco::Net::StreamSocket (if secure is false), or a - /// Poco::Net::SecureStreamSocket (if secure is true) connected to the - /// given host and port number. - /// - /// The default implementation will throw a Poco::NotImplementedException - /// if secure is true. - }; - - Connection(); - /// Creates an unconnected Connection. - /// - /// Use this when you want to connect later on. - - Connection(const std::string & hostAndPort); - /// Creates a Connection connected to the given MongoDB instance at host:port. - /// - /// The host and port must be separated with a colon. - - Connection(const std::string & uri, SocketFactory & socketFactory); - /// Creates a Connection connected to the given MongoDB instance at the - /// given URI. - /// - /// See the corresponding connect() method for more information. - - Connection(const std::string & host, int port); - /// Creates a Connection connected to the given MongoDB instance at host and port. - - Connection(const Poco::Net::SocketAddress & addrs); - /// Creates a Connection connected to the given MongoDB instance at the given address. - - Connection(const Poco::Net::StreamSocket & socket); - /// Creates a Connection connected to the given MongoDB instance using the given socket, - /// which must already be connected. - - virtual ~Connection(); - /// Destroys the Connection. - - Poco::Net::SocketAddress address() const; - /// Returns the address of the MongoDB server. - - const std::string & uri() const; - /// Returns the uri on which the connection was made. - - void connect(const std::string & hostAndPort); - /// Connects to the given MongoDB server. - /// - /// The host and port must be separated with a colon. - - void connect(const std::string & uri, SocketFactory & socketFactory); - /// Connects to the given MongoDB instance at the given URI. - /// - /// The URI must be in standard MongoDB connection string URI format: - /// - /// mongodb://:@hostname.com:/database-name?options - /// - /// The following options are supported: - /// - /// - ssl: If ssl=true is specified, a custom SocketFactory subclass creating - /// a SecureStreamSocket must be supplied. - /// - connectTimeoutMS: Socket connection timeout in milliseconds. - /// - socketTimeoutMS: Socket send/receive timeout in milliseconds. - /// - authMechanism: Authentication mechanism. Only "SCRAM-SHA-1" (default) - /// and "MONGODB-CR" are supported. - /// - /// Unknown options are silently ignored. - /// - /// Will also attempt to authenticate using the specified credentials, - /// using Database::authenticate(). - /// - /// Throws a Poco::NoPermissionException if authentication fails. - - void connect(const std::string & host, int port); - /// Connects to the given MongoDB server. - - void connect(const Poco::Net::SocketAddress & addrs); - /// Connects to the given MongoDB server. - - void connect(const Poco::Net::StreamSocket & socket); - /// Connects using an already connected socket. - - void disconnect(); - /// Disconnects from the MongoDB server. - - void sendRequest(RequestMessage & request); - /// Sends a request to the MongoDB server. - /// - /// Used for one-way requests without a response. - - void sendRequest(RequestMessage & request, ResponseMessage & response); - /// Sends a request to the MongoDB server and receives the response. - /// - /// Use this when a response is expected: only a "query" or "getmore" - /// request will return a response. - - void sendRequest(OpMsgMessage & request, OpMsgMessage & response); - /// Sends a request to the MongoDB server and receives the response - /// using newer wire protocol with OP_MSG. - - void sendRequest(OpMsgMessage & request); - /// Sends an unacknowledged request to the MongoDB server using newer - /// wire protocol with OP_MSG. - /// No response is sent by the server. - - void readResponse(OpMsgMessage & response); - /// Reads additional response data when previous message's flag moreToCome - /// indicates that server will send more data. - /// NOTE: See comments in OpMsgCursor code. - - - protected: - void connect(); - - private: - Poco::Net::SocketAddress _address; - Poco::Net::StreamSocket _socket; - std::string _uri; - }; - - - // - // inlines - // - inline Net::SocketAddress Connection::address() const - { - return _address; - } - inline const std::string & Connection::uri() const - { - return _uri; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Connection_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Cursor.h b/base/poco/MongoDB/include/Poco/MongoDB/Cursor.h deleted file mode 100644 index 8849d737a62..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Cursor.h +++ /dev/null @@ -1,80 +0,0 @@ -// -// Cursor.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Cursor -// -// Definition of the Cursor class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Cursor_INCLUDED -#define MongoDB_Cursor_INCLUDED - - -#include "Poco/MongoDB/Connection.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/QueryRequest.h" -#include "Poco/MongoDB/ResponseMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Cursor : public Document - /// Cursor is an helper class for querying multiple documents. - { - public: - Cursor(const std::string & dbname, const std::string & collectionName, QueryRequest::Flags flags = QueryRequest::QUERY_DEFAULT); - /// Creates a Cursor for the given database and collection, using the specified flags. - - Cursor(const std::string & fullCollectionName, QueryRequest::Flags flags = QueryRequest::QUERY_DEFAULT); - /// Creates a Cursor for the given database and collection ("database.collection"), using the specified flags. - - Cursor(const Document & aggregationResponse); - /// Creates a Cursor for the given aggregation query response. - - virtual ~Cursor(); - /// Destroys the Cursor. - - ResponseMessage & next(Connection & connection); - /// Tries to get the next documents. As long as ResponseMessage has a - /// cursor ID next can be called to retrieve the next bunch of documents. - /// - /// The cursor must be killed (see kill()) when not all documents are needed. - - QueryRequest & query(); - /// Returns the associated query. - - void kill(Connection & connection); - /// Kills the cursor and reset it so that it can be reused. - - private: - QueryRequest _query; - ResponseMessage _response; - }; - - - // - // inlines - // - inline QueryRequest & Cursor::query() - { - return _query; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Cursor_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Database.h b/base/poco/MongoDB/include/Poco/MongoDB/Database.h deleted file mode 100644 index 1fa91f4ca1a..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Database.h +++ /dev/null @@ -1,233 +0,0 @@ -// -// Database.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Database -// -// Definition of the Database class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Database_INCLUDED -#define MongoDB_Database_INCLUDED - - -#include "Poco/MongoDB/Connection.h" -#include "Poco/MongoDB/DeleteRequest.h" -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/InsertRequest.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/QueryRequest.h" -#include "Poco/MongoDB/UpdateRequest.h" - -#include "Poco/MongoDB/OpMsgCursor.h" -#include "Poco/MongoDB/OpMsgMessage.h" - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Database - /// Database is a helper class for creating requests. MongoDB works with - /// collection names and uses the part before the first dot as the name of - /// the database. - { - public: - explicit Database(const std::string & name); - /// Creates a Database for the database with the given name. - - virtual ~Database(); - /// Destroys the Database. - - const std::string & name() const; - /// Database name - - bool authenticate( - Connection & connection, - const std::string & username, - const std::string & password, - const std::string & method = AUTH_SCRAM_SHA1); - /// Authenticates against the database using the given connection, - /// username and password, as well as authentication method. - /// - /// "MONGODB-CR" (default prior to MongoDB 3.0) and - /// "SCRAM-SHA-1" (default starting in 3.0) are the only supported - /// authentication methods. - /// - /// Returns true if authentication was successful, otherwise false. - /// - /// May throw a Poco::ProtocolException if authentication fails for a reason other than - /// invalid credentials. - - Document::Ptr queryBuildInfo(Connection & connection) const; - /// Queries server build info (all wire protocols) - - Document::Ptr queryServerHello(Connection & connection, bool old = false) const; - /// Queries hello response from server (all wire protocols) - - Int64 count(Connection & connection, const std::string & collectionName) const; - /// Sends a count request for the given collection to MongoDB. (old wire protocol) - /// - /// If the command fails, -1 is returned. - - Poco::SharedPtr createCommand() const; - /// Creates a QueryRequest for a command. (old wire protocol) - - Poco::SharedPtr createCountRequest(const std::string & collectionName) const; - /// Creates a QueryRequest to count the given collection. - /// The collectionname must not contain the database name. (old wire protocol) - - Poco::SharedPtr createDeleteRequest(const std::string & collectionName) const; - /// Creates a DeleteRequest to delete documents in the given collection. - /// The collectionname must not contain the database name. (old wire protocol) - - Poco::SharedPtr createInsertRequest(const std::string & collectionName) const; - /// Creates an InsertRequest to insert new documents in the given collection. - /// The collectionname must not contain the database name. (old wire protocol) - - Poco::SharedPtr createQueryRequest(const std::string & collectionName) const; - /// Creates a QueryRequest. (old wire protocol) - /// The collectionname must not contain the database name. - - Poco::SharedPtr createUpdateRequest(const std::string & collectionName) const; - /// Creates an UpdateRequest. (old wire protocol) - /// The collectionname must not contain the database name. - - Poco::SharedPtr createOpMsgMessage(const std::string & collectionName) const; - /// Creates OpMsgMessage. (new wire protocol) - - Poco::SharedPtr createOpMsgMessage() const; - /// Creates OpMsgMessage for database commands that do not require collection as an argument. (new wire protocol) - - Poco::SharedPtr createOpMsgCursor(const std::string & collectionName) const; - /// Creates OpMsgCursor. (new wire protocol) - - Poco::MongoDB::Document::Ptr ensureIndex( - Connection & connection, - const std::string & collection, - const std::string & indexName, - Poco::MongoDB::Document::Ptr keys, - bool unique = false, - bool background = false, - int version = 0, - int ttl = 0); - /// Creates an index. The document returned is the result of a getLastError call. - /// For more info look at the ensureIndex information on the MongoDB website. (old wire protocol) - - Document::Ptr getLastErrorDoc(Connection & connection) const; - /// Sends the getLastError command to the database and returns the error document. - /// (old wire protocol) - - std::string getLastError(Connection & connection) const; - /// Sends the getLastError command to the database and returns the err element - /// from the error document. When err is null, an empty string is returned. - /// (old wire protocol) - - static const std::string AUTH_MONGODB_CR; - /// Default authentication mechanism prior to MongoDB 3.0. - - static const std::string AUTH_SCRAM_SHA1; - /// Default authentication mechanism for MongoDB 3.0. - - enum WireVersion - /// Wire version as reported by the command hello. - /// See details in MongoDB github, repository specifications. - /// @see queryServerHello - { - VER_26 = 1, - VER_26_2 = 2, - VER_30 = 3, - VER_32 = 4, - VER_34 = 5, - VER_36 = 6, ///< First wire version that supports OP_MSG - VER_40 = 7, - VER_42 = 8, - VER_44 = 9, - VER_50 = 13, - VER_51 = 14, ///< First wire version that supports only OP_MSG - VER_52 = 15, - VER_53 = 16, - VER_60 = 17 - }; - - protected: - bool authCR(Connection & connection, const std::string & username, const std::string & password); - bool authSCRAM(Connection & connection, const std::string & username, const std::string & password); - - private: - std::string _dbname; - }; - - - // - // inlines - // - inline const std::string & Database::name() const - { - return _dbname; - } - - - inline Poco::SharedPtr Database::createCommand() const - { - Poco::SharedPtr cmd = createQueryRequest("$cmd"); - cmd->setNumberToReturn(1); - return cmd; - } - - - inline Poco::SharedPtr Database::createDeleteRequest(const std::string & collectionName) const - { - return new Poco::MongoDB::DeleteRequest(_dbname + '.' + collectionName); - } - - - inline Poco::SharedPtr Database::createInsertRequest(const std::string & collectionName) const - { - return new Poco::MongoDB::InsertRequest(_dbname + '.' + collectionName); - } - - - inline Poco::SharedPtr Database::createQueryRequest(const std::string & collectionName) const - { - return new Poco::MongoDB::QueryRequest(_dbname + '.' + collectionName); - } - - - inline Poco::SharedPtr Database::createUpdateRequest(const std::string & collectionName) const - { - return new Poco::MongoDB::UpdateRequest(_dbname + '.' + collectionName); - } - - // -- New wire protocol commands - - inline Poco::SharedPtr Database::createOpMsgMessage(const std::string & collectionName) const - { - return new Poco::MongoDB::OpMsgMessage(_dbname, collectionName); - } - - inline Poco::SharedPtr Database::createOpMsgMessage() const - { - // Collection name for database commands is not needed. - return createOpMsgMessage(""); - } - - inline Poco::SharedPtr Database::createOpMsgCursor(const std::string & collectionName) const - { - return new Poco::MongoDB::OpMsgCursor(_dbname, collectionName); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Database_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/DeleteRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/DeleteRequest.h deleted file mode 100644 index a54f2a23121..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/DeleteRequest.h +++ /dev/null @@ -1,116 +0,0 @@ -// -// DeleteRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: DeleteRequest -// -// Definition of the DeleteRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_DeleteRequest_INCLUDED -#define MongoDB_DeleteRequest_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API DeleteRequest : public RequestMessage - /// A DeleteRequest is used to delete one or more documents from a database. - /// - /// Specific flags for this request - /// - DELETE_DEFAULT: default delete operation - /// - DELETE_SINGLE_REMOVE: delete only the first document - { - public: - enum Flags - { - DELETE_DEFAULT = 0, - /// Default - - DELETE_SINGLE_REMOVE = 1 - /// Delete only the first document. - }; - - DeleteRequest(const std::string & collectionName, Flags flags = DELETE_DEFAULT); - /// Creates a DeleteRequest for the given collection using the given flags. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". - - DeleteRequest(const std::string & collectionName, bool justOne); - /// Creates a DeleteRequest for the given collection. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". - /// - /// If justOne is true, only the first matching document will - /// be removed (the same as using flag DELETE_SINGLE_REMOVE). - - virtual ~DeleteRequest(); - /// Destructor - - Flags flags() const; - /// Returns the flags. - - void flags(Flags flag); - /// Sets the flags. - - Document & selector(); - /// Returns the selector document. - - protected: - void buildRequest(BinaryWriter & writer); - /// Writes the OP_DELETE request to the writer. - - private: - Flags _flags; - std::string _fullCollectionName; - Document _selector; - }; - - - /// - /// inlines - /// - inline DeleteRequest::Flags DeleteRequest::flags() const - { - return _flags; - } - - - inline void DeleteRequest::flags(DeleteRequest::Flags flags) - { - _flags = flags; - } - - - inline Document & DeleteRequest::selector() - { - return _selector; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_DeleteRequest_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Document.h b/base/poco/MongoDB/include/Poco/MongoDB/Document.h deleted file mode 100644 index 9e1df349e20..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Document.h +++ /dev/null @@ -1,296 +0,0 @@ -// -// Document.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Document -// -// Definition of the Document class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Document_INCLUDED -#define MongoDB_Document_INCLUDED - - -#include -#include -#include "Poco/BinaryReader.h" -#include "Poco/BinaryWriter.h" -#include "Poco/MongoDB/Element.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - class Array; - - class ElementFindByName - { - public: - ElementFindByName(const std::string & name) : _name(name) { } - - bool operator()(const Element::Ptr & element) { return !element.isNull() && element->name() == _name; } - - private: - std::string _name; - }; - - - class MongoDB_API Document - /// Represents a MongoDB (BSON) document. - { - public: - using Ptr = SharedPtr; - using Vector = std::vector; - - Document(); - /// Creates an empty Document. - - virtual ~Document(); - /// Destroys the Document. - - Document & addElement(Element::Ptr element); - /// Add an element to the document. - /// - /// The active document is returned to allow chaining of the add methods. - - template - Document & add(const std::string & name, T value) - /// Creates an element with the given name and value and - /// adds it to the document. - /// - /// The active document is returned to allow chaining of the add methods. - { - return addElement(new ConcreteElement(name, value)); - } - - Document & add(const std::string & name, const char * value) - /// Creates an element with the given name and value and - /// adds it to the document. - /// - /// The active document is returned to allow chaining of the add methods. - { - return addElement(new ConcreteElement(name, std::string(value))); - } - - Document & addNewDocument(const std::string & name); - /// Create a new document and add it to this document. - /// Unlike the other add methods, this method returns - /// a reference to the new document. - - Array & addNewArray(const std::string & name); - /// Create a new array and add it to this document. - /// Method returns a reference to the new array. - - void clear(); - /// Removes all elements from the document. - - void elementNames(std::vector & keys) const; - /// Puts all element names into std::vector. - - bool empty() const; - /// Returns true if the document doesn't contain any documents. - - bool exists(const std::string & name) const; - /// Returns true if the document has an element with the given name. - - template - T get(const std::string & name) const - /// Returns the element with the given name and tries to convert - /// it to the template type. When the element is not found, a - /// NotFoundException will be thrown. When the element can't be - /// converted a BadCastException will be thrown. - { - Element::Ptr element = get(name); - if (element.isNull()) - { - throw NotFoundException(name); - } - else - { - if (ElementTraits::TypeId == element->type()) - { - ConcreteElement * concrete = dynamic_cast *>(element.get()); - if (concrete != 0) - { - return concrete->value(); - } - } - throw BadCastException("Invalid type mismatch!"); - } - } - - template - T get(const std::string & name, const T & def) const - /// Returns the element with the given name and tries to convert - /// it to the template type. When the element is not found, or - /// has the wrong type, the def argument will be returned. - { - Element::Ptr element = get(name); - if (element.isNull()) - { - return def; - } - - if (ElementTraits::TypeId == element->type()) - { - ConcreteElement * concrete = dynamic_cast *>(element.get()); - if (concrete != 0) - { - return concrete->value(); - } - } - - return def; - } - - Element::Ptr get(const std::string & name) const; - /// Returns the element with the given name. - /// An empty element will be returned when the element is not found. - - Int64 getInteger(const std::string & name) const; - /// Returns an integer. Useful when MongoDB returns Int32, Int64 - /// or double for a number (count for example). This method will always - /// return an Int64. When the element is not found, a - /// Poco::NotFoundException will be thrown. - - bool remove(const std::string & name); - /// Removes an element from the document. - - template - bool isType(const std::string & name) const - /// Returns true when the type of the element equals the TypeId of ElementTrait. - { - Element::Ptr element = get(name); - if (element.isNull()) - { - return false; - } - - return ElementTraits::TypeId == element->type(); - } - - void read(BinaryReader & reader); - /// Reads a document from the reader - - std::size_t size() const; - /// Returns the number of elements in the document. - - virtual std::string toString(int indent = 0) const; - /// Returns a String representation of the document. - - void write(BinaryWriter & writer); - /// Writes a document to the reader - - protected: - ElementSet _elements; - }; - - - // - // inlines - // - inline Document & Document::addElement(Element::Ptr element) - { - _elements.push_back(element); - return *this; - } - - - inline Document & Document::addNewDocument(const std::string & name) - { - Document::Ptr newDoc = new Document(); - add(name, newDoc); - return *newDoc; - } - - - inline void Document::clear() - { - _elements.clear(); - } - - - inline bool Document::empty() const - { - return _elements.empty(); - } - - - inline void Document::elementNames(std::vector & keys) const - { - for (ElementSet::const_iterator it = _elements.begin(); it != _elements.end(); ++it) - { - keys.push_back((*it)->name()); - } - } - - - inline bool Document::exists(const std::string & name) const - { - return std::find_if(_elements.begin(), _elements.end(), ElementFindByName(name)) != _elements.end(); - } - - - inline bool Document::remove(const std::string & name) - { - auto it = std::find_if(_elements.begin(), _elements.end(), ElementFindByName(name)); - if (it == _elements.end()) - return false; - - _elements.erase(it); - return true; - } - - - inline std::size_t Document::size() const - { - return _elements.size(); - } - - - // BSON Embedded Document - // spec: document - template <> - struct ElementTraits - { - enum - { - TypeId = 0x03 - }; - - static std::string toString(const Document::Ptr & value, int indent = 0) - { - return value.isNull() ? "null" : value->toString(indent); - } - }; - - - template <> - inline void BSONReader::read(Document::Ptr & to) - { - to->read(_reader); - } - - - template <> - inline void BSONWriter::write(Document::Ptr & from) - { - from->write(_writer); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Document_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Element.h b/base/poco/MongoDB/include/Poco/MongoDB/Element.h deleted file mode 100644 index 26525d7d02b..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Element.h +++ /dev/null @@ -1,393 +0,0 @@ -// -// Element.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Element -// -// Definition of the Element class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Element_INCLUDED -#define MongoDB_Element_INCLUDED - - -#include -#include -#include -#include -#include "Poco/BinaryReader.h" -#include "Poco/BinaryWriter.h" -#include "Poco/DateTimeFormatter.h" -#include "Poco/MongoDB/BSONReader.h" -#include "Poco/MongoDB/BSONWriter.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/Nullable.h" -#include "Poco/NumberFormatter.h" -#include "Poco/SharedPtr.h" -#include "Poco/Timestamp.h" -#include "Poco/UTF8String.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Element - /// Represents an Element of a Document or an Array. - { - public: - using Ptr = Poco::SharedPtr; - - explicit Element(const std::string & name); - /// Creates the Element with the given name. - - virtual ~Element(); - /// Destructor - - const std::string & name() const; - /// Returns the name of the element. - - virtual std::string toString(int indent = 0) const = 0; - /// Returns a string representation of the element. - - virtual int type() const = 0; - /// Returns the MongoDB type of the element. - - private: - virtual void read(BinaryReader & reader) = 0; - virtual void write(BinaryWriter & writer) = 0; - - friend class Document; - std::string _name; - }; - - - // - // inlines - // - inline const std::string & Element::name() const - { - return _name; - } - - - using ElementSet = std::list; - - - template - struct ElementTraits - { - }; - - - // BSON Floating point - // spec: double - template <> - struct ElementTraits - { - enum - { - TypeId = 0x01 - }; - - static std::string toString(const double & value, int indent = 0) { return Poco::NumberFormatter::format(value); } - }; - - - // BSON UTF-8 string - // spec: int32 (byte*) "\x00" - // int32 is the number bytes in byte* + 1 (for trailing "\x00") - template <> - struct ElementTraits - { - enum - { - TypeId = 0x02 - }; - - static std::string toString(const std::string & value, int indent = 0) - { - std::ostringstream oss; - - oss << '"'; - - for (std::string::const_iterator it = value.begin(); it != value.end(); ++it) - { - switch (*it) - { - case '"': - oss << "\\\""; - break; - case '\\': - oss << "\\\\"; - break; - case '\b': - oss << "\\b"; - break; - case '\f': - oss << "\\f"; - break; - case '\n': - oss << "\\n"; - break; - case '\r': - oss << "\\r"; - break; - case '\t': - oss << "\\t"; - break; - default: { - if (*it > 0 && *it <= 0x1F) - { - oss << "\\u" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) << static_cast(*it); - } - else - { - oss << *it; - } - break; - } - } - } - oss << '"'; - return oss.str(); - } - }; - - - template <> - inline void BSONReader::read(std::string & to) - { - Poco::Int32 size; - _reader >> size; - _reader.readRaw(size, to); - to.erase(to.end() - 1); // remove terminating 0 - } - - - template <> - inline void BSONWriter::write(std::string & from) - { - _writer << (Poco::Int32)(from.length() + 1); - writeCString(from); - } - - - // BSON bool - // spec: "\x00" "\x01" - template <> - struct ElementTraits - { - enum - { - TypeId = 0x08 - }; - - static std::string toString(const bool & value, int indent = 0) { return value ? "true" : "false"; } - }; - - - template <> - inline void BSONReader::read(bool & to) - { - unsigned char b; - _reader >> b; - to = b != 0; - } - - - template <> - inline void BSONWriter::write(bool & from) - { - unsigned char b = from ? 0x01 : 0x00; - _writer << b; - } - - - // BSON 32-bit integer - // spec: int32 - template <> - struct ElementTraits - { - enum - { - TypeId = 0x10 - }; - - - static std::string toString(const Int32 & value, int indent = 0) { return Poco::NumberFormatter::format(value); } - }; - - - // BSON UTC datetime - // spec: int64 - template <> - struct ElementTraits - { - enum - { - TypeId = 0x09 - }; - - static std::string toString(const Timestamp & value, int indent = 0) - { - std::string result; - result.append(1, '"'); - result.append(DateTimeFormatter::format(value, "%Y-%m-%dT%H:%M:%s%z")); - result.append(1, '"'); - return result; - } - }; - - - template <> - inline void BSONReader::read(Timestamp & to) - { - Poco::Int64 value; - _reader >> value; - to = Timestamp::fromEpochTime(static_cast(value / 1000)); - to += (value % 1000 * 1000); - } - - - template <> - inline void BSONWriter::write(Timestamp & from) - { - _writer << (from.epochMicroseconds() / 1000); - } - - - using NullValue = Nullable; - - - // BSON Null Value - // spec: - template <> - struct ElementTraits - { - enum - { - TypeId = 0x0A - }; - - static std::string toString(const NullValue & value, int indent = 0) { return "null"; } - }; - - - template <> - inline void BSONReader::read(NullValue & to) - { - } - - - template <> - inline void BSONWriter::write(NullValue & from) - { - } - - - struct BSONTimestamp - { - Poco::Timestamp ts; - Poco::Int32 inc; - }; - - - // BSON Timestamp - // spec: int64 - template <> - struct ElementTraits - { - enum - { - TypeId = 0x11 - }; - - static std::string toString(const BSONTimestamp & value, int indent = 0) - { - std::string result; - result.append(1, '"'); - result.append(DateTimeFormatter::format(value.ts, "%Y-%m-%dT%H:%M:%s%z")); - result.append(1, ' '); - result.append(NumberFormatter::format(value.inc)); - result.append(1, '"'); - return result; - } - }; - - - template <> - inline void BSONReader::read(BSONTimestamp & to) - { - Poco::Int64 value; - _reader >> value; - to.inc = value & 0xffffffff; - value >>= 32; - to.ts = Timestamp::fromEpochTime(static_cast(value)); - } - - - template <> - inline void BSONWriter::write(BSONTimestamp & from) - { - Poco::Int64 value = from.ts.epochMicroseconds() / 1000; - value <<= 32; - value += from.inc; - _writer << value; - } - - - // BSON 64-bit integer - // spec: int64 - template <> - struct ElementTraits - { - enum - { - TypeId = 0x12 - }; - - static std::string toString(const Int64 & value, int indent = 0) { return NumberFormatter::format(value); } - }; - - - template - class ConcreteElement : public Element - { - public: - ConcreteElement(const std::string & name, const T & init) : Element(name), _value(init) { } - - virtual ~ConcreteElement() { } - - - T value() const { return _value; } - - - std::string toString(int indent = 0) const { return ElementTraits::toString(_value, indent); } - - - int type() const { return ElementTraits::TypeId; } - - void read(BinaryReader & reader) { BSONReader(reader).read(_value); } - - void write(BinaryWriter & writer) { BSONWriter(writer).write(_value); } - - private: - T _value; - }; - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Element_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/GetMoreRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/GetMoreRequest.h deleted file mode 100644 index a95ff4a63db..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/GetMoreRequest.h +++ /dev/null @@ -1,92 +0,0 @@ -// -// GetMoreRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: GetMoreRequest -// -// Definition of the GetMoreRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_GetMoreRequest_INCLUDED -#define MongoDB_GetMoreRequest_INCLUDED - - -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API GetMoreRequest : public RequestMessage - /// A GetMoreRequest is used to query the database for more documents in a collection - /// after a query request is send (OP_GETMORE). - { - public: - GetMoreRequest(const std::string & collectionName, Int64 cursorID); - /// Creates a GetMoreRequest for the give collection and cursor. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". The cursorID has been returned by the response on the query request. - /// By default the numberToReturn is set to 100. - - virtual ~GetMoreRequest(); - /// Destroys the GetMoreRequest. - - Int32 getNumberToReturn() const; - /// Returns the limit of returned documents. - - void setNumberToReturn(Int32 n); - /// Sets the limit of returned documents. - - Int64 cursorID() const; - /// Returns the cursor ID. - - protected: - void buildRequest(BinaryWriter & writer); - - private: - std::string _fullCollectionName; - Int32 _numberToReturn; - Int64 _cursorID; - }; - - - // - // inlines - // - inline Int32 GetMoreRequest::getNumberToReturn() const - { - return _numberToReturn; - } - - - inline void GetMoreRequest::setNumberToReturn(Int32 n) - { - _numberToReturn = n; - } - - - inline Int64 GetMoreRequest::cursorID() const - { - return _cursorID; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_GetMoreRequest_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/InsertRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/InsertRequest.h deleted file mode 100644 index f9bdcc624aa..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/InsertRequest.h +++ /dev/null @@ -1,100 +0,0 @@ -// -// InsertRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: InsertRequest -// -// Definition of the InsertRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_InsertRequest_INCLUDED -#define MongoDB_InsertRequest_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API InsertRequest : public RequestMessage - /// A request for inserting one or more documents to the database - /// (OP_INSERT). - { - public: - enum Flags - { - INSERT_DEFAULT = 0, - /// If specified, perform a normal insert operation. - - INSERT_CONTINUE_ON_ERROR = 1 - /// If set, the database will not stop processing a bulk insert if one - /// fails (e.g. due to duplicate IDs). This makes bulk insert behave similarly - /// to a series of single inserts, except lastError will be set if any insert - /// fails, not just the last one. If multiple errors occur, only the most - /// recent will be reported. - }; - - InsertRequest(const std::string & collectionName, Flags flags = INSERT_DEFAULT); - /// Creates an InsertRequest. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". - - virtual ~InsertRequest(); - /// Destroys the InsertRequest. - - Document & addNewDocument(); - /// Adds a new document for insertion. A reference to the empty document is - /// returned. InsertRequest is the owner of the Document and will free it - /// on destruction. - - Document::Vector & documents(); - /// Returns the documents to insert into the database. - - protected: - void buildRequest(BinaryWriter & writer); - - private: - Int32 _flags; - std::string _fullCollectionName; - Document::Vector _documents; - }; - - - // - // inlines - // - inline Document & InsertRequest::addNewDocument() - { - Document::Ptr doc = new Document(); - _documents.push_back(doc); - return *doc; - } - - - inline Document::Vector & InsertRequest::documents() - { - return _documents; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_InsertRequest_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/JavaScriptCode.h b/base/poco/MongoDB/include/Poco/MongoDB/JavaScriptCode.h deleted file mode 100644 index c0f584b7c19..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/JavaScriptCode.h +++ /dev/null @@ -1,108 +0,0 @@ -// -// JavaScriptCode.h -// -// Library: MongoDB -// Package: MongoDB -// Module: JavaScriptCode -// -// Definition of the JavaScriptCode class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_JavaScriptCode_INCLUDED -#define MongoDB_JavaScriptCode_INCLUDED - - -#include "Poco/MongoDB/BSONReader.h" -#include "Poco/MongoDB/BSONWriter.h" -#include "Poco/MongoDB/Element.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/SharedPtr.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API JavaScriptCode - /// Represents JavaScript type in BSON. - { - public: - using Ptr = SharedPtr; - - JavaScriptCode(); - /// Creates an empty JavaScriptCode object. - - virtual ~JavaScriptCode(); - /// Destroys the JavaScriptCode. - - void setCode(const std::string & code); - /// Sets the JavaScript code. - - std::string getCode() const; - /// Returns the JavaScript code. - - private: - std::string _code; - }; - - - // - // inlines - // - inline void JavaScriptCode::setCode(const std::string & code) - { - _code = code; - } - - - inline std::string JavaScriptCode::getCode() const - { - return _code; - } - - - // BSON JavaScript code - // spec: string - template <> - struct ElementTraits - { - enum - { - TypeId = 0x0D - }; - - static std::string toString(const JavaScriptCode::Ptr & value, int indent = 0) { return value.isNull() ? "" : value->getCode(); } - }; - - - template <> - inline void BSONReader::read(JavaScriptCode::Ptr & to) - { - std::string code; - BSONReader(_reader).read(code); - to = new JavaScriptCode(); - to->setCode(code); - } - - - template <> - inline void BSONWriter::write(JavaScriptCode::Ptr & from) - { - std::string code = from->getCode(); - BSONWriter(_writer).write(code); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_JavaScriptCode_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/KillCursorsRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/KillCursorsRequest.h deleted file mode 100644 index e6cdd3df34c..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/KillCursorsRequest.h +++ /dev/null @@ -1,65 +0,0 @@ -// -// KillCursorsRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: KillCursorsRequest -// -// Definition of the KillCursorsRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_KillCursorsRequest_INCLUDED -#define MongoDB_KillCursorsRequest_INCLUDED - - -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API KillCursorsRequest : public RequestMessage - /// Class for creating an OP_KILL_CURSORS client request. This - /// request is used to kill cursors, which are still open, - /// returned by query requests. - { - public: - KillCursorsRequest(); - /// Creates a KillCursorsRequest. - - virtual ~KillCursorsRequest(); - /// Destroys the KillCursorsRequest. - - std::vector & cursors(); - /// The internal list of cursors. - - protected: - void buildRequest(BinaryWriter & writer); - std::vector _cursors; - }; - - - // - // inlines - // - inline std::vector & KillCursorsRequest::cursors() - { - return _cursors; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_KillCursorsRequest_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Message.h b/base/poco/MongoDB/include/Poco/MongoDB/Message.h deleted file mode 100644 index f2585db5f49..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/Message.h +++ /dev/null @@ -1,76 +0,0 @@ -// -// Message.h -// -// Library: MongoDB -// Package: MongoDB -// Module: Message -// -// Definition of the Message class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_Message_INCLUDED -#define MongoDB_Message_INCLUDED - - -#include -#include "Poco/BinaryReader.h" -#include "Poco/BinaryWriter.h" -#include "Poco/MongoDB/MessageHeader.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/Net/Socket.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API Message - /// Base class for all messages send or retrieved from MongoDB server. - { - public: - explicit Message(MessageHeader::OpCode opcode); - /// Creates a Message using the given OpCode. - - virtual ~Message(); - /// Destructor - - MessageHeader & header(); - /// Returns the message header - - protected: - MessageHeader _header; - - void messageLength(Poco::Int32 length); - /// Sets the message length in the message header - }; - - - // - // inlines - // - inline MessageHeader & Message::header() - { - return _header; - } - - - inline void Message::messageLength(Poco::Int32 length) - { - poco_assert(length > 0); - _header.setMessageLength(length); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_Message_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/MessageHeader.h b/base/poco/MongoDB/include/Poco/MongoDB/MessageHeader.h deleted file mode 100644 index 98f45e876c1..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/MessageHeader.h +++ /dev/null @@ -1,140 +0,0 @@ -// -// MessageHeader.h -// -// Library: MongoDB -// Package: MongoDB -// Module: MessageHeader -// -// Definition of the MessageHeader class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_MessageHeader_INCLUDED -#define MongoDB_MessageHeader_INCLUDED - - -#include "Poco/MongoDB/MessageHeader.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class Message; // Required to disambiguate friend declaration in MessageHeader. - - - class MongoDB_API MessageHeader - /// Represents the message header which is always prepended to a - /// MongoDB request or response message. - { - public: - static const unsigned int MSG_HEADER_SIZE = 16; - - enum OpCode - { - // Opcodes deprecated in MongoDB 5.0 - OP_REPLY = 1, - OP_UPDATE = 2001, - OP_INSERT = 2002, - OP_QUERY = 2004, - OP_GET_MORE = 2005, - OP_DELETE = 2006, - OP_KILL_CURSORS = 2007, - - /// Opcodes supported in MongoDB 5.1 and later - OP_COMPRESSED = 2012, - OP_MSG = 2013 - }; - - explicit MessageHeader(OpCode); - /// Creates the MessageHeader using the given OpCode. - - virtual ~MessageHeader(); - /// Destroys the MessageHeader. - - void read(BinaryReader & reader); - /// Reads the header using the given BinaryReader. - - void write(BinaryWriter & writer); - /// Writes the header using the given BinaryWriter. - - Int32 getMessageLength() const; - /// Returns the message length. - - OpCode opCode() const; - /// Returns the OpCode. - - Int32 getRequestID() const; - /// Returns the request ID of the current message. - - void setRequestID(Int32 id); - /// Sets the request ID of the current message. - - Int32 responseTo() const; - /// Returns the request id from the original request. - - private: - void setMessageLength(Int32 length); - /// Sets the message length. - - Int32 _messageLength; - Int32 _requestID; - Int32 _responseTo; - OpCode _opCode; - - friend class Message; - }; - - - // - // inlines - // - inline MessageHeader::OpCode MessageHeader::opCode() const - { - return _opCode; - } - - - inline Int32 MessageHeader::getMessageLength() const - { - return _messageLength; - } - - - inline void MessageHeader::setMessageLength(Int32 length) - { - poco_assert(_messageLength >= 0); - _messageLength = MSG_HEADER_SIZE + length; - } - - - inline void MessageHeader::setRequestID(Int32 id) - { - _requestID = id; - } - - - inline Int32 MessageHeader::getRequestID() const - { - return _requestID; - } - - inline Int32 MessageHeader::responseTo() const - { - return _responseTo; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_MessageHeader_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/MongoDB.h b/base/poco/MongoDB/include/Poco/MongoDB/MongoDB.h deleted file mode 100644 index de246ddc9dd..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/MongoDB.h +++ /dev/null @@ -1,64 +0,0 @@ -// -// MongoDB.h -// -// Library: MongoDB -// Package: MongoDB -// Module: MongoDB -// -// Basic definitions for the Poco MongoDB library. -// This file must be the first file included by every other MongoDB -// header file. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDBMongoDB_INCLUDED -#define MongoDBMongoDB_INCLUDED - - -#include "Poco/Foundation.h" - - -// -// The following block is the standard way of creating macros which make exporting -// from a DLL simpler. All files within this DLL are compiled with the MongoDB_EXPORTS -// symbol defined on the command line. this symbol should not be defined on any project -// that uses this DLL. This way any other project whose source files include this file see -// MongoDB_API functions as being imported from a DLL, whereas this DLL sees symbols -// defined with this macro as being exported. -// - - -#if defined(_WIN32) && defined(POCO_DLL) -# if defined(MongoDB_EXPORTS) -# define MongoDB_API __declspec(dllexport) -# else -# define MongoDB_API __declspec(dllimport) -# endif -#endif - - -#if !defined(MongoDB_API) -# if !defined(POCO_NO_GCC_API_ATTRIBUTE) && defined(__GNUC__) && (__GNUC__ >= 4) -# define MongoDB_API __attribute__((visibility("default"))) -# else -# define MongoDB_API -# endif -#endif - - -// -// Automatically link MongoDB library. -// -#if defined(_MSC_VER) -# if !defined(POCO_NO_AUTOMATIC_LIBS) && !defined(MongoDB_EXPORTS) -# pragma comment(lib, "PocoMongoDB" POCO_LIB_SUFFIX) -# endif -#endif - - -#endif // MongoDBMongoDB_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/ObjectId.h b/base/poco/MongoDB/include/Poco/MongoDB/ObjectId.h deleted file mode 100644 index 8a335320ea0..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/ObjectId.h +++ /dev/null @@ -1,151 +0,0 @@ -// -// Array.h -// -// Library: MongoDB -// Package: MongoDB -// Module: ObjectId -// -// Definition of the ObjectId class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_ObjectId_INCLUDED -#define MongoDB_ObjectId_INCLUDED - - -#include "Poco/MongoDB/Element.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/Timestamp.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API ObjectId - /// ObjectId is a 12-byte BSON type, constructed using: - /// - /// - a 4-byte timestamp, - /// - a 3-byte machine identifier, - /// - a 2-byte process id, and - /// - a 3-byte counter, starting with a random value. - /// - /// In MongoDB, documents stored in a collection require a unique _id field that acts - /// as a primary key. Because ObjectIds are small, most likely unique, and fast to generate, - /// MongoDB uses ObjectIds as the default value for the _id field if the _id field is not - /// specified; i.e., the mongod adds the _id field and generates a unique ObjectId to assign - /// as its value. - { - public: - using Ptr = SharedPtr; - - explicit ObjectId(const std::string & id); - /// Creates an ObjectId from a string. - /// - /// The string must contain a hexadecimal representation - /// of an object ID. This means a string of 24 characters. - - ObjectId(const ObjectId & copy); - /// Creates an ObjectId by copying another one. - - virtual ~ObjectId(); - /// Destroys the ObjectId. - - Timestamp timestamp() const; - /// Returns the timestamp which is stored in the first four bytes of the id - - std::string toString(const std::string & fmt = "%02x") const; - /// Returns the id in string format. The fmt parameter - /// specifies the formatting used for individual members - /// of the ID char array. - - private: - ObjectId(); - - static int fromHex(char c); - static char fromHex(const char * c); - - unsigned char _id[12]; - - friend class BSONWriter; - friend class BSONReader; - friend class Document; - }; - - - // - // inlines - // - inline Timestamp ObjectId::timestamp() const - { - int time; - char * T = (char *)&time; - T[0] = _id[3]; - T[1] = _id[2]; - T[2] = _id[1]; - T[3] = _id[0]; - return Timestamp::fromEpochTime((time_t)time); - } - - - inline int ObjectId::fromHex(char c) - { - if ('0' <= c && c <= '9') - return c - '0'; - if ('a' <= c && c <= 'f') - return c - 'a' + 10; - if ('A' <= c && c <= 'F') - return c - 'A' + 10; - return 0xff; - } - - - inline char ObjectId::fromHex(const char * c) - { - return (char)((fromHex(c[0]) << 4) | fromHex(c[1])); - } - - - // BSON Embedded Document - // spec: ObjectId - template <> - struct ElementTraits - { - enum - { - TypeId = 0x07 - }; - - static std::string toString(const ObjectId::Ptr & id, int indent = 0, const std::string & fmt = "%02x") - { - return id->toString(fmt); - } - }; - - - template <> - inline void BSONReader::read(ObjectId::Ptr & to) - { - _reader.readRaw((char *)to->_id, 12); - } - - - template <> - inline void BSONWriter::write(ObjectId::Ptr & from) - { - _writer.writeRaw((char *)from->_id, 12); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_ObjectId_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/OpMsgCursor.h b/base/poco/MongoDB/include/Poco/MongoDB/OpMsgCursor.h deleted file mode 100644 index a465a71bb1c..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/OpMsgCursor.h +++ /dev/null @@ -1,96 +0,0 @@ -// -// OpMsgCursor.h -// -// Library: MongoDB -// Package: MongoDB -// Module: OpMsgCursor -// -// Definition of the OpMsgCursor class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_OpMsgCursor_INCLUDED -#define MongoDB_OpMsgCursor_INCLUDED - - -#include "Poco/MongoDB/Connection.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/OpMsgMessage.h" - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API OpMsgCursor : public Document - /// OpMsgCursor is an helper class for querying multiple documents using OpMsgMessage. - { - public: - OpMsgCursor(const std::string & dbname, const std::string & collectionName); - /// Creates a OpMsgCursor for the given database and collection. - - virtual ~OpMsgCursor(); - /// Destroys the OpMsgCursor. - - void setEmptyFirstBatch(bool empty); - /// Empty first batch is used to get error response faster with little server processing - - bool emptyFirstBatch() const; - - void setBatchSize(Int32 batchSize); - /// Set non-default batch size - - Int32 batchSize() const; - /// Current batch size (zero or negative number indicates default batch size) - - Int64 cursorID() const; - - OpMsgMessage & next(Connection & connection); - /// Tries to get the next documents. As long as response message has a - /// cursor ID next can be called to retrieve the next bunch of documents. - /// - /// The cursor must be killed (see kill()) when not all documents are needed. - - OpMsgMessage & query(); - /// Returns the associated query. - - void kill(Connection & connection); - /// Kills the cursor and reset it so that it can be reused. - - private: - OpMsgMessage _query; - OpMsgMessage _response; - - bool _emptyFirstBatch{false}; - Int32 _batchSize{-1}; - /// Batch size used in the cursor. Zero or negative value means that default shall be used. - - Int64 _cursorID{0}; - }; - - - // - // inlines - // - inline OpMsgMessage & OpMsgCursor::query() - { - return _query; - } - - inline Int64 OpMsgCursor::cursorID() const - { - return _cursorID; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_OpMsgCursor_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/OpMsgMessage.h b/base/poco/MongoDB/include/Poco/MongoDB/OpMsgMessage.h deleted file mode 100644 index 699c7fc4e12..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/OpMsgMessage.h +++ /dev/null @@ -1,163 +0,0 @@ -// -// OpMsgMessage.h -// -// Library: MongoDB -// Package: MongoDB -// Module: OpMsgMessage -// -// Definition of the OpMsgMessage class. -// -// Copyright (c) 2022, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_OpMsgMessage_INCLUDED -#define MongoDB_OpMsgMessage_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/Message.h" -#include "Poco/MongoDB/MongoDB.h" - -#include - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API OpMsgMessage : public Message - /// This class represents a request/response (OP_MSG) to send requests and receive responses to/from MongoDB. - { - public: - // Constants for most often used MongoDB commands that can be sent using OP_MSG - // For complete list see: https://www.mongodb.com/docs/manual/reference/command/ - - // Query and write - static const std::string CMD_INSERT; - static const std::string CMD_DELETE; - static const std::string CMD_UPDATE; - static const std::string CMD_FIND; - static const std::string CMD_FIND_AND_MODIFY; - static const std::string CMD_GET_MORE; - - // Aggregation - static const std::string CMD_AGGREGATE; - static const std::string CMD_COUNT; - static const std::string CMD_DISTINCT; - static const std::string CMD_MAP_REDUCE; - - // Replication and administration - static const std::string CMD_HELLO; - static const std::string CMD_REPL_SET_GET_STATUS; - static const std::string CMD_REPL_SET_GET_CONFIG; - - static const std::string CMD_CREATE; - static const std::string CMD_CREATE_INDEXES; - static const std::string CMD_DROP; - static const std::string CMD_DROP_DATABASE; - static const std::string CMD_KILL_CURSORS; - static const std::string CMD_LIST_DATABASES; - static const std::string CMD_LIST_INDEXES; - - // Diagnostic - static const std::string CMD_BUILD_INFO; - static const std::string CMD_COLL_STATS; - static const std::string CMD_DB_STATS; - static const std::string CMD_HOST_INFO; - - - enum Flags : UInt32 - { - MSG_FLAGS_DEFAULT = 0, - - MSG_CHECKSUM_PRESENT = (1 << 0), - - MSG_MORE_TO_COME = (1 << 1), - /// Sender will send another message and is not prepared for overlapping messages - - MSG_EXHAUST_ALLOWED = (1 << 16) - /// Client is prepared for multiple replies (using the moreToCome bit) to this request - }; - - OpMsgMessage(); - /// Creates an OpMsgMessage for response. - - OpMsgMessage(const std::string & databaseName, const std::string & collectionName, UInt32 flags = MSG_FLAGS_DEFAULT); - /// Creates an OpMsgMessage for requests. - - virtual ~OpMsgMessage(); - - const std::string & databaseName() const; - - const std::string & collectionName() const; - - void setCommandName(const std::string & command); - /// Sets the command name and clears the command document - - void setCursor(Poco::Int64 cursorID, Poco::Int32 batchSize = -1); - /// Sets the command "getMore" for the cursor id with batch size (if it is not negative). - - const std::string & commandName() const; - /// Current command name. - - void setAcknowledgedRequest(bool ack); - /// Set false to create request that does not return response. - /// It has effect only for commands that write or delete documents. - /// Default is true (request returns acknowledge response). - - bool acknowledgedRequest() const; - - UInt32 flags() const; - - Document & body(); - /// Access to body document. - /// Additional query arguments shall be added after setting the command name. - - const Document & body() const; - - Document::Vector & documents(); - /// Documents prepared for request or retrieved in response. - - const Document::Vector & documents() const; - /// Documents prepared for request or retrieved in response. - - bool responseOk() const; - /// Reads "ok" status from the response message. - - void clear(); - /// Clears the message. - - void send(std::ostream & ostr); - /// Writes the request to stream. - - void read(std::istream & istr); - /// Reads the response from the stream. - - private: - enum PayloadType : UInt8 - { - PAYLOAD_TYPE_0 = 0, - PAYLOAD_TYPE_1 = 1 - }; - - std::string _databaseName; - std::string _collectionName; - UInt32 _flags{MSG_FLAGS_DEFAULT}; - std::string _commandName; - bool _acknowledged{true}; - - Document _body; - Document::Vector _documents; - }; - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_OpMsgMessage_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/PoolableConnectionFactory.h b/base/poco/MongoDB/include/Poco/MongoDB/PoolableConnectionFactory.h deleted file mode 100644 index 53f4a5127ef..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/PoolableConnectionFactory.h +++ /dev/null @@ -1,123 +0,0 @@ -// -// PoolableConnectionFactory.h -// -// Library: MongoDB -// Package: MongoDB -// Module: PoolableConnectionFactory -// -// Definition of the PoolableConnectionFactory class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_PoolableConnectionFactory_INCLUDED -#define MongoDB_PoolableConnectionFactory_INCLUDED - - -#include "Poco/MongoDB/Connection.h" -#include "Poco/ObjectPool.h" - - -namespace Poco -{ - - -template <> -class PoolableObjectFactory -/// PoolableObjectFactory specialisation for Connection. New connections -/// are created with the given address or URI. -/// -/// If a Connection::SocketFactory is given, it must live for the entire -/// lifetime of the PoolableObjectFactory. -{ -public: - PoolableObjectFactory(Net::SocketAddress & address) : _address(address), _pSocketFactory(0) { } - - PoolableObjectFactory(const std::string & address) : _address(address), _pSocketFactory(0) { } - - PoolableObjectFactory(const std::string & uri, MongoDB::Connection::SocketFactory & socketFactory) - : _uri(uri), _pSocketFactory(&socketFactory) - { - } - - MongoDB::Connection::Ptr createObject() - { - if (_pSocketFactory) - return new MongoDB::Connection(_uri, *_pSocketFactory); - else - return new MongoDB::Connection(_address); - } - - bool validateObject(MongoDB::Connection::Ptr pObject) { return true; } - - void activateObject(MongoDB::Connection::Ptr pObject) { } - - void deactivateObject(MongoDB::Connection::Ptr pObject) { } - - void destroyObject(MongoDB::Connection::Ptr pObject) { } - -private: - Net::SocketAddress _address; - std::string _uri; - MongoDB::Connection::SocketFactory * _pSocketFactory; -}; - - -namespace MongoDB -{ - - - class PooledConnection - /// Helper class for borrowing and returning a connection automatically from a pool. - { - public: - PooledConnection(Poco::ObjectPool & pool) : _pool(pool) { _connection = _pool.borrowObject(); } - - virtual ~PooledConnection() - { - try - { - if (_connection) - { - _pool.returnObject(_connection); - } - } - catch (...) - { - poco_unexpected(); - } - } - - operator Connection::Ptr() { return _connection; } - -#if defined(POCO_ENABLE_CPP11) - // Disable copy to prevent unwanted release of resources: C++11 way - PooledConnection(const PooledConnection &) = delete; - PooledConnection & operator=(const PooledConnection &) = delete; - - // Enable move semantics - PooledConnection(PooledConnection && other) = default; - PooledConnection & operator=(PooledConnection &&) = default; -#endif - - private: -#if !defined(POCO_ENABLE_CPP11) - // Disable copy to prevent unwanted release of resources: pre C++11 way - PooledConnection(const PooledConnection &); - PooledConnection & operator=(const PooledConnection &); -#endif - - Poco::ObjectPool & _pool; - Connection::Ptr _connection; - }; - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_PoolableConnectionFactory_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/QueryRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/QueryRequest.h deleted file mode 100644 index 6a9e80f6d3c..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/QueryRequest.h +++ /dev/null @@ -1,190 +0,0 @@ -// -// QueryRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: QueryRequest -// -// Definition of the QueryRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_QueryRequest_INCLUDED -#define MongoDB_QueryRequest_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API QueryRequest : public RequestMessage - /// A request to query documents in a MongoDB database - /// using an OP_QUERY request. - { - public: - enum Flags - { - QUERY_DEFAULT = 0, - /// Do not set any flags. - - QUERY_TAILABLE_CURSOR = 2, - /// Tailable means cursor is not closed when the last data is retrieved. - /// Rather, the cursor marks the final object’s position. - /// You can resume using the cursor later, from where it was located, - /// if more data were received. Like any "latent cursor", the cursor may - /// become invalid at some point (CursorNotFound) – for example if the final - /// object it references were deleted. - - QUERY_SLAVE_OK = 4, - /// Allow query of replica slave. Normally these return an error except - /// for namespace "local". - - // QUERY_OPLOG_REPLAY = 8 (internal replication use only - drivers should not implement) - - QUERY_NO_CURSOR_TIMEOUT = 16, - /// The server normally times out idle cursors after an inactivity period - /// (10 minutes) to prevent excess memory use. Set this option to prevent that. - - QUERY_AWAIT_DATA = 32, - /// Use with QUERY_TAILABLECURSOR. If we are at the end of the data, block for - /// a while rather than returning no data. After a timeout period, we do - /// return as normal. - - QUERY_EXHAUST = 64, - /// Stream the data down full blast in multiple "more" packages, on the - /// assumption that the client will fully read all data queried. - /// Faster when you are pulling a lot of data and know you want to pull - /// it all down. - /// Note: the client is not allowed to not read all the data unless it - /// closes the connection. - - QUERY_PARTIAL = 128 - /// Get partial results from a mongos if some shards are down - /// (instead of throwing an error). - }; - - QueryRequest(const std::string & collectionName, Flags flags = QUERY_DEFAULT); - /// Creates a QueryRequest. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". - - virtual ~QueryRequest(); - /// Destroys the QueryRequest. - - Flags getFlags() const; - /// Returns the flags. - - void setFlags(Flags flag); - /// Set the flags. - - std::string fullCollectionName() const; - /// Returns the . used for this query. - - Int32 getNumberToSkip() const; - /// Returns the number of documents to skip. - - void setNumberToSkip(Int32 n); - /// Sets the number of documents to skip. - - Int32 getNumberToReturn() const; - /// Returns the number of documents to return. - - void setNumberToReturn(Int32 n); - /// Sets the number of documents to return (limit). - - Document & selector(); - /// Returns the selector document. - - Document & returnFieldSelector(); - /// Returns the field selector document. - - protected: - void buildRequest(BinaryWriter & writer); - - private: - Flags _flags; - std::string _fullCollectionName; - Int32 _numberToSkip; - Int32 _numberToReturn; - Document _selector; - Document _returnFieldSelector; - }; - - - // - // inlines - // - inline QueryRequest::Flags QueryRequest::getFlags() const - { - return _flags; - } - - - inline void QueryRequest::setFlags(QueryRequest::Flags flags) - { - _flags = flags; - } - - - inline std::string QueryRequest::fullCollectionName() const - { - return _fullCollectionName; - } - - - inline Document & QueryRequest::selector() - { - return _selector; - } - - - inline Document & QueryRequest::returnFieldSelector() - { - return _returnFieldSelector; - } - - - inline Int32 QueryRequest::getNumberToSkip() const - { - return _numberToSkip; - } - - - inline void QueryRequest::setNumberToSkip(Int32 n) - { - _numberToSkip = n; - } - - - inline Int32 QueryRequest::getNumberToReturn() const - { - return _numberToReturn; - } - - - inline void QueryRequest::setNumberToReturn(Int32 n) - { - _numberToReturn = n; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_QueryRequest_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/RegularExpression.h b/base/poco/MongoDB/include/Poco/MongoDB/RegularExpression.h deleted file mode 100644 index 244b8c14163..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/RegularExpression.h +++ /dev/null @@ -1,135 +0,0 @@ -// -// RegularExpression.h -// -// Library: MongoDB -// Package: MongoDB -// Module: RegularExpression -// -// Definition of the RegularExpression class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_RegularExpression_INCLUDED -#define MongoDB_RegularExpression_INCLUDED - - -#include "Poco/MongoDB/Element.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/RegularExpression.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API RegularExpression - /// Represents a regular expression in BSON format. - { - public: - using Ptr = SharedPtr; - - RegularExpression(); - /// Creates an empty RegularExpression. - - RegularExpression(const std::string & pattern, const std::string & options); - /// Creates a RegularExpression using the given pattern and options. - - virtual ~RegularExpression(); - /// Destroys the RegularExpression. - - SharedPtr createRE() const; - /// Tries to create a Poco::RegularExpression from the MongoDB regular expression. - - std::string getOptions() const; - /// Returns the options string. - - void setOptions(const std::string & options); - /// Sets the options string. - - std::string getPattern() const; - /// Returns the pattern. - - void setPattern(const std::string & pattern); - /// Sets the pattern. - - private: - std::string _pattern; - std::string _options; - }; - - - /// - /// inlines - /// - inline std::string RegularExpression::getPattern() const - { - return _pattern; - } - - - inline void RegularExpression::setPattern(const std::string & pattern) - { - _pattern = pattern; - } - - - inline std::string RegularExpression::getOptions() const - { - return _options; - } - - - inline void RegularExpression::setOptions(const std::string & options) - { - _options = options; - } - - - // BSON Regex - // spec: cstring cstring - template <> - struct ElementTraits - { - enum - { - TypeId = 0x0B - }; - - static std::string toString(const RegularExpression::Ptr & value, int indent = 0) - { - //TODO - return "RE: not implemented yet"; - } - }; - - - template <> - inline void BSONReader::read(RegularExpression::Ptr & to) - { - std::string pattern = readCString(); - std::string options = readCString(); - - to = new RegularExpression(pattern, options); - } - - - template <> - inline void BSONWriter::write(RegularExpression::Ptr & from) - { - writeCString(from->getPattern()); - writeCString(from->getOptions()); - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_RegularExpression_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/ReplicaSet.h b/base/poco/MongoDB/include/Poco/MongoDB/ReplicaSet.h deleted file mode 100644 index 61c96fd8d00..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/ReplicaSet.h +++ /dev/null @@ -1,61 +0,0 @@ -// -// ReplicaSet.h -// -// Library: MongoDB -// Package: MongoDB -// Module: ReplicaSet -// -// Definition of the ReplicaSet class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_ReplicaSet_INCLUDED -#define MongoDB_ReplicaSet_INCLUDED - - -#include -#include "Poco/MongoDB/Connection.h" -#include "Poco/Net/SocketAddress.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API ReplicaSet - /// Class for working with a MongoDB replica set. - { - public: - explicit ReplicaSet(const std::vector & addresses); - /// Creates the ReplicaSet using the given server addresses. - - virtual ~ReplicaSet(); - /// Destroys the ReplicaSet. - - Connection::Ptr findMaster(); - /// Tries to find the master MongoDB instance from the addresses - /// passed to the constructor. - /// - /// Returns the Connection to the master, or null if no master - /// instance was found. - - protected: - Connection::Ptr isMaster(const Net::SocketAddress & host); - - private: - std::vector _addresses; - }; - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_ReplicaSet_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/RequestMessage.h b/base/poco/MongoDB/include/Poco/MongoDB/RequestMessage.h deleted file mode 100644 index 5ac750a7fe0..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/RequestMessage.h +++ /dev/null @@ -1,54 +0,0 @@ -// -// RequestMessage.h -// -// Library: MongoDB -// Package: MongoDB -// Module: RequestMessage -// -// Definition of the RequestMessage class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_RequestMessage_INCLUDED -#define MongoDB_RequestMessage_INCLUDED - - -#include -#include "Poco/MongoDB/Message.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API RequestMessage : public Message - /// Base class for a request sent to the MongoDB server. - { - public: - explicit RequestMessage(MessageHeader::OpCode opcode); - /// Creates a RequestMessage using the given opcode. - - virtual ~RequestMessage(); - /// Destroys the RequestMessage. - - void send(std::ostream & ostr); - /// Writes the request to stream. - - protected: - virtual void buildRequest(BinaryWriter & ss) = 0; - }; - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_RequestMessage_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/ResponseMessage.h b/base/poco/MongoDB/include/Poco/MongoDB/ResponseMessage.h deleted file mode 100644 index 9cb92cb16c4..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/ResponseMessage.h +++ /dev/null @@ -1,114 +0,0 @@ -// -// ResponseMessage.h -// -// Library: MongoDB -// Package: MongoDB -// Module: ResponseMessage -// -// Definition of the ResponseMessage class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_ResponseMessage_INCLUDED -#define MongoDB_ResponseMessage_INCLUDED - - -#include -#include -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/Message.h" -#include "Poco/MongoDB/MongoDB.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class MongoDB_API ResponseMessage : public Message - /// This class represents a response (OP_REPLY) from MongoDB. - { - public: - ResponseMessage(); - /// Creates an empty ResponseMessage. - - ResponseMessage(const Int64 & cursorID); - /// Creates an ResponseMessage for existing cursor ID. - - virtual ~ResponseMessage(); - /// Destroys the ResponseMessage. - - Int64 cursorID() const; - /// Returns the cursor ID. - - void clear(); - /// Clears the response. - - std::size_t count() const; - /// Returns the number of documents in the response. - - Document::Vector & documents(); - /// Returns a vector containing the received documents. - - bool empty() const; - /// Returns true if the response does not contain any documents. - - bool hasDocuments() const; - /// Returns true if there is at least one document in the response. - - void read(std::istream & istr); - /// Reads the response from the stream. - - private: - Int32 _responseFlags; - Int64 _cursorID; - Int32 _startingFrom; - Int32 _numberReturned; - Document::Vector _documents; - }; - - - // - // inlines - // - inline std::size_t ResponseMessage::count() const - { - return _documents.size(); - } - - - inline bool ResponseMessage::empty() const - { - return _documents.size() == 0; - } - - - inline Int64 ResponseMessage::cursorID() const - { - return _cursorID; - } - - - inline Document::Vector & ResponseMessage::documents() - { - return _documents; - } - - - inline bool ResponseMessage::hasDocuments() const - { - return _documents.size() > 0; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_ResponseMessage_INCLUDED diff --git a/base/poco/MongoDB/include/Poco/MongoDB/UpdateRequest.h b/base/poco/MongoDB/include/Poco/MongoDB/UpdateRequest.h deleted file mode 100644 index 0f61c4a3e29..00000000000 --- a/base/poco/MongoDB/include/Poco/MongoDB/UpdateRequest.h +++ /dev/null @@ -1,117 +0,0 @@ -// -// UpdateRequest.h -// -// Library: MongoDB -// Package: MongoDB -// Module: UpdateRequest -// -// Definition of the UpdateRequest class. -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef MongoDB_UpdateRequest_INCLUDED -#define MongoDB_UpdateRequest_INCLUDED - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/MongoDB.h" -#include "Poco/MongoDB/RequestMessage.h" - - -namespace Poco -{ -namespace MongoDB -{ - - - class UpdateRequest : public RequestMessage - /// This request is used to update a document in a database - /// using the OP_UPDATE client request. - { - public: - enum Flags - { - UPDATE_DEFAULT = 0, - /// If set, the database will insert the supplied object into the - /// collection if no matching document is found. - - UPDATE_UPSERT = 1, - /// If set, the database will update all matching objects in the collection. - /// Otherwise only updates first matching doc. - - UPDATE_MULTIUPDATE = 2 - /// If set to, updates multiple documents that meet the query criteria. - /// Otherwise only updates one document. - }; - - UpdateRequest(const std::string & collectionName, Flags flags = UPDATE_DEFAULT); - /// Creates the UpdateRequest. - /// - /// The full collection name is the concatenation of the database - /// name with the collection name, using a "." for the concatenation. For example, - /// for the database "foo" and the collection "bar", the full collection name is - /// "foo.bar". - - virtual ~UpdateRequest(); - /// Destroys the UpdateRequest. - - Document & selector(); - /// Returns the selector document. - - Document & update(); - /// Returns the document to update. - - Flags flags() const; - /// Returns the flags - - void flags(Flags flags); - /// Sets the flags - - protected: - void buildRequest(BinaryWriter & writer); - - private: - Flags _flags; - std::string _fullCollectionName; - Document _selector; - Document _update; - }; - - - // - // inlines - // - inline UpdateRequest::Flags UpdateRequest::flags() const - { - return _flags; - } - - - inline void UpdateRequest::flags(UpdateRequest::Flags flags) - { - _flags = flags; - } - - - inline Document & UpdateRequest::selector() - { - return _selector; - } - - - inline Document & UpdateRequest::update() - { - return _update; - } - - -} -} // namespace Poco::MongoDB - - -#endif // MongoDB_UpdateRequest_INCLUDED diff --git a/base/poco/MongoDB/src/Array.cpp b/base/poco/MongoDB/src/Array.cpp deleted file mode 100644 index 6fff0994d82..00000000000 --- a/base/poco/MongoDB/src/Array.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// -// Array.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Array -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Array.h" -#include - - -namespace Poco { -namespace MongoDB { - - -Array::Array(): - Document() -{ -} - - -Array::~Array() -{ -} - - -Element::Ptr Array::get(std::size_t pos) const -{ - std::string name = Poco::NumberFormatter::format(pos); - return Document::get(name); -} - - -std::string Array::toString(int indent) const -{ - std::ostringstream oss; - - oss << "["; - - if (indent > 0) oss << std::endl; - - for (ElementSet::const_iterator it = _elements.begin(); it != _elements.end(); ++it) - { - if (it != _elements.begin()) - { - oss << ","; - if (indent > 0) oss << std::endl; - } - - for (int i = 0; i < indent; ++i) oss << ' '; - - oss << (*it)->toString(indent > 0 ? indent + 2 : 0); - } - - if (indent > 0) - { - oss << std::endl; - if (indent >= 2) indent -= 2; - for (int i = 0; i < indent; ++i) oss << ' '; - } - - oss << "]"; - - return oss.str(); -} - - -} } // Namespace Poco::Mongo diff --git a/base/poco/MongoDB/src/Binary.cpp b/base/poco/MongoDB/src/Binary.cpp deleted file mode 100644 index 8b0e6baeccb..00000000000 --- a/base/poco/MongoDB/src/Binary.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// -// Binary.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Binary -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Binary.h" - - -namespace Poco { -namespace MongoDB { - - -Binary::Binary(): - _buffer(0), - _subtype(0) -{ -} - - -Binary::Binary(Poco::Int32 size, unsigned char subtype): - _buffer(size), - _subtype(subtype) -{ -} - - -Binary::Binary(const UUID& uuid): - _buffer(128 / 8), - _subtype(0x04) -{ - unsigned char szUUID[16]; - uuid.copyTo((char*) szUUID); - _buffer.assign(szUUID, 16); -} - - - -Binary::Binary(const std::string& data, unsigned char subtype): - _buffer(reinterpret_cast(data.data()), data.size()), - _subtype(subtype) -{ -} - - -Binary::Binary(const void* data, Poco::Int32 size, unsigned char subtype): - _buffer(reinterpret_cast(data), size), - _subtype(subtype) -{ -} - - -Binary::~Binary() -{ -} - - -std::string Binary::toString(int indent) const -{ - std::ostringstream oss; - Base64Encoder encoder(oss); - MemoryInputStream mis((const char*) _buffer.begin(), _buffer.size()); - StreamCopier::copyStream(mis, encoder); - encoder.close(); - return oss.str(); -} - - -UUID Binary::uuid() const -{ - if ((_subtype == 0x04 || _subtype == 0x03) && _buffer.size() == 16) - { - UUID uuid; - uuid.copyFrom((const char*) _buffer.begin()); - return uuid; - } - throw BadCastException("Invalid subtype: " + std::to_string(_subtype) + ", size: " + std::to_string(_buffer.size())); -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/Connection.cpp b/base/poco/MongoDB/src/Connection.cpp deleted file mode 100644 index fa20887054b..00000000000 --- a/base/poco/MongoDB/src/Connection.cpp +++ /dev/null @@ -1,348 +0,0 @@ -// -// Connection.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Connection -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/SocketStream.h" -#include "Poco/MongoDB/Connection.h" -#include "Poco/MongoDB/Database.h" -#include "Poco/URI.h" -#include "Poco/Format.h" -#include "Poco/NumberParser.h" -#include "Poco/Exception.h" - - -namespace Poco { -namespace MongoDB { - - -Connection::SocketFactory::SocketFactory() -{ -} - - -Connection::SocketFactory::~SocketFactory() -{ -} - - -Poco::Net::StreamSocket Connection::SocketFactory::createSocket(const std::string& host, int port, Poco::Timespan connectTimeout, bool secure) -{ - if (!secure) - { - Poco::Net::SocketAddress addr(host, port); - Poco::Net::StreamSocket socket; - if (connectTimeout > 0) - socket.connect(addr, connectTimeout); - else - socket.connect(addr); - return socket; - } - else throw Poco::NotImplementedException("Default SocketFactory implementation does not support SecureStreamSocket"); -} - - -Connection::Connection(): - _address(), - _socket() -{ -} - - -Connection::Connection(const std::string& hostAndPort): - _address(hostAndPort), - _socket() -{ - connect(); -} - - -Connection::Connection(const std::string& uri, SocketFactory& socketFactory): - _address(), - _socket() -{ - connect(uri, socketFactory); -} - - -Connection::Connection(const std::string& host, int port): - _address(host, port), - _socket() -{ - connect(); -} - - -Connection::Connection(const Poco::Net::SocketAddress& addrs): - _address(addrs), - _socket() -{ - connect(); -} - - -Connection::Connection(const Poco::Net::StreamSocket& socket): - _address(socket.peerAddress()), - _socket(socket) -{ -} - - -Connection::~Connection() -{ - try - { - disconnect(); - } - catch (...) - { - } -} - - -void Connection::connect() -{ - _socket.connect(_address); -} - - -void Connection::connect(const std::string& hostAndPort) -{ - _address = Poco::Net::SocketAddress(hostAndPort); - connect(); -} - - -void Connection::connect(const std::string& host, int port) -{ - _address = Poco::Net::SocketAddress(host, port); - connect(); -} - - -void Connection::connect(const Poco::Net::SocketAddress& addrs) -{ - _address = addrs; - connect(); -} - - -void Connection::connect(const Poco::Net::StreamSocket& socket) -{ - _address = socket.peerAddress(); - _socket = socket; -} - - -void Connection::connect(const std::string& uri, SocketFactory& socketFactory) -{ - std::vector strAddresses; - std::string newURI; - - if (uri.find(',') != std::string::npos) - { - size_t pos; - size_t head = 0; - if ((pos = uri.find("@")) != std::string::npos) - { - head = pos + 1; - } - else if ((pos = uri.find("://")) != std::string::npos) - { - head = pos + 3; - } - - std::string tempstr; - std::string::const_iterator it = uri.begin(); - it += head; - size_t tail = head; - for (;it != uri.end() && *it != '?' && *it != '/'; ++it) - { - tempstr += *it; - tail++; - } - - it = tempstr.begin(); - std::string token; - for (;it != tempstr.end(); ++it) - { - if (*it == ',') - { - newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); - strAddresses.push_back(newURI); - token = ""; - } - else - { - token += *it; - } - } - newURI = uri.substr(0, head) + token + uri.substr(tail, uri.length()); - strAddresses.push_back(newURI); - } - else - { - strAddresses.push_back(uri); - } - - newURI = strAddresses.front(); - Poco::URI theURI(newURI); - if (theURI.getScheme() != "mongodb") throw Poco::UnknownURISchemeException(uri); - - std::string userInfo = theURI.getUserInfo(); - std::string databaseName = theURI.getPath(); - if (!databaseName.empty() && databaseName[0] == '/') databaseName.erase(0, 1); - if (databaseName.empty()) databaseName = "admin"; - - bool ssl = false; - Poco::Timespan connectTimeout; - Poco::Timespan socketTimeout; - std::string authMechanism = Database::AUTH_SCRAM_SHA1; - std::string readPreference="primary"; - - Poco::URI::QueryParameters params = theURI.getQueryParameters(); - for (Poco::URI::QueryParameters::const_iterator it = params.begin(); it != params.end(); ++it) - { - if (it->first == "ssl") - { - ssl = (it->second == "true"); - } - else if (it->first == "connectTimeoutMS") - { - connectTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "socketTimeoutMS") - { - socketTimeout = static_cast(1000)*Poco::NumberParser::parse(it->second); - } - else if (it->first == "authMechanism") - { - authMechanism = it->second; - } - else if (it->first == "readPreference") - { - readPreference= it->second; - } - } - - for (std::vector::const_iterator it = strAddresses.cbegin();it != strAddresses.cend(); ++it) - { - newURI = *it; - theURI = Poco::URI(newURI); - - std::string host = theURI.getHost(); - Poco::UInt16 port = theURI.getPort(); - if (port == 0) port = 27017; - - connect(socketFactory.createSocket(host, port, connectTimeout, ssl)); - _uri = newURI; - if (socketTimeout > 0) - { - _socket.setSendTimeout(socketTimeout); - _socket.setReceiveTimeout(socketTimeout); - } - if (strAddresses.size() > 1) - { - Poco::MongoDB::QueryRequest request("admin.$cmd"); - request.setNumberToReturn(1); - request.selector().add("isMaster", 1); - Poco::MongoDB::ResponseMessage response; - - sendRequest(request, response); - _uri = newURI; - if (!response.documents().empty()) - { - Poco::MongoDB::Document::Ptr doc = response.documents()[0]; - if (doc->get("ismaster") && readPreference == "primary") - { - break; - } - else if (!doc->get("ismaster") && readPreference == "secondary") - { - break; - } - else if (it + 1 == strAddresses.cend()) - { - throw Poco::URISyntaxException(uri); - } - } - } - } - if (!userInfo.empty()) - { - std::string username; - std::string password; - std::string::size_type pos = userInfo.find(':'); - if (pos != std::string::npos) - { - username.assign(userInfo, 0, pos++); - password.assign(userInfo, pos, userInfo.size() - pos); - } - else username = userInfo; - - Database database(databaseName); - - if (!database.authenticate(*this, username, password, authMechanism)) - throw Poco::NoPermissionException(Poco::format("Access to MongoDB database %s denied for user %s", databaseName, username)); - } -} - - -void Connection::disconnect() -{ - _socket.close(); -} - - -void Connection::sendRequest(RequestMessage& request) -{ - Poco::Net::SocketOutputStream sos(_socket); - request.send(sos); -} - - -void Connection::sendRequest(RequestMessage& request, ResponseMessage& response) -{ - sendRequest(request); - - Poco::Net::SocketInputStream sis(_socket); - response.read(sis); -} - - -void Connection::sendRequest(OpMsgMessage& request, OpMsgMessage& response) -{ - Poco::Net::SocketOutputStream sos(_socket); - request.send(sos); - - response.clear(); - readResponse(response); -} - - -void Connection::sendRequest(OpMsgMessage& request) -{ - request.setAcknowledgedRequest(false); - Poco::Net::SocketOutputStream sos(_socket); - request.send(sos); -} - - -void Connection::readResponse(OpMsgMessage& response) -{ - Poco::Net::SocketInputStream sis(_socket); - response.read(sis); -} - - - -} } // Poco::MongoDB diff --git a/base/poco/MongoDB/src/Cursor.cpp b/base/poco/MongoDB/src/Cursor.cpp deleted file mode 100644 index ef7a4ca961d..00000000000 --- a/base/poco/MongoDB/src/Cursor.cpp +++ /dev/null @@ -1,83 +0,0 @@ -// -// Cursor.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Cursor -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Cursor.h" -#include "Poco/MongoDB/GetMoreRequest.h" -#include "Poco/MongoDB/KillCursorsRequest.h" - - -namespace Poco { -namespace MongoDB { - - -Cursor::Cursor(const std::string& db, const std::string& collection, QueryRequest::Flags flags): - _query(db + '.' + collection, flags) -{ -} - - -Cursor::Cursor(const std::string& fullCollectionName, QueryRequest::Flags flags): - _query(fullCollectionName, flags) -{ -} - - -Cursor::Cursor(const Document& aggregationResponse) : - _query(aggregationResponse.get("cursor")->get("ns")), - _response(aggregationResponse.get("cursor")->get("id")) -{ -} - -Cursor::~Cursor() -{ - try - { - poco_assert_dbg(!_response.cursorID()); - } - catch (...) - { - } -} - - -ResponseMessage& Cursor::next(Connection& connection) -{ - if (_response.cursorID() == 0) - { - connection.sendRequest(_query, _response); - } - else - { - Poco::MongoDB::GetMoreRequest getMore(_query.fullCollectionName(), _response.cursorID()); - getMore.setNumberToReturn(_query.getNumberToReturn()); - _response.clear(); - connection.sendRequest(getMore, _response); - } - return _response; -} - - -void Cursor::kill(Connection& connection) -{ - if (_response.cursorID() != 0) - { - KillCursorsRequest killRequest; - killRequest.cursors().push_back(_response.cursorID()); - connection.sendRequest(killRequest); - } - _response.clear(); -} - - -} } // Namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/Database.cpp b/base/poco/MongoDB/src/Database.cpp deleted file mode 100644 index 15c46b17251..00000000000 --- a/base/poco/MongoDB/src/Database.cpp +++ /dev/null @@ -1,482 +0,0 @@ -// -// Database.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Database -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Database.h" -#include "Poco/MongoDB/Binary.h" -#include "Poco/MD5Engine.h" -#include "Poco/SHA1Engine.h" -#include "Poco/PBKDF2Engine.h" -#include "Poco/HMACEngine.h" -#include "Poco/Base64Decoder.h" -#include "Poco/MemoryStream.h" -#include "Poco/StreamCopier.h" -#include "Poco/Exception.h" -#include "Poco/RandomStream.h" -#include "Poco/Random.h" -#include "Poco/Format.h" -#include "Poco/NumberParser.h" -#include -#include - - -namespace Poco { -namespace MongoDB { - - -const std::string Database::AUTH_MONGODB_CR("MONGODB-CR"); -const std::string Database::AUTH_SCRAM_SHA1("SCRAM-SHA-1"); - - -namespace -{ - std::map parseKeyValueList(const std::string& str) - { - std::map kvm; - std::string::const_iterator it = str.begin(); - std::string::const_iterator end = str.end(); - while (it != end) - { - std::string k; - std::string v; - while (it != end && *it != '=') k += *it++; - if (it != end) ++it; - while (it != end && *it != ',') v += *it++; - if (it != end) ++it; - kvm[k] = v; - } - return kvm; - } - - std::string decodeBase64(const std::string& base64) - { - Poco::MemoryInputStream istr(base64.data(), base64.size()); - Poco::Base64Decoder decoder(istr); - std::string result; - Poco::StreamCopier::copyToString(decoder, result); - return result; - } - - std::string encodeBase64(const std::string& data) - { - std::ostringstream ostr; - Poco::Base64Encoder encoder(ostr); - encoder.rdbuf()->setLineLength(0); - encoder << data; - encoder.close(); - return ostr.str(); - } - - std::string digestToBinaryString(Poco::DigestEngine& engine) - { - Poco::DigestEngine::Digest d = engine.digest(); - return std::string(reinterpret_cast(&d[0]), d.size()); - } - - std::string digestToHexString(Poco::DigestEngine& engine) - { - Poco::DigestEngine::Digest d = engine.digest(); - return Poco::DigestEngine::digestToHex(d); - } - - std::string digestToBase64(Poco::DigestEngine& engine) - { - return encodeBase64(digestToBinaryString(engine)); - } - - std::string hashCredentials(const std::string& username, const std::string& password) - { - Poco::MD5Engine md5; - md5.update(username); - md5.update(std::string(":mongo:")); - md5.update(password); - return digestToHexString(md5); - } - - std::string createNonce() - { - Poco::MD5Engine md5; - Poco::RandomInputStream randomStream; - Poco::Random random; - for (int i = 0; i < 4; i++) - { - md5.update(randomStream.get()); - md5.update(random.nextChar()); - } - return digestToHexString(md5); - } -} - - -Database::Database(const std::string& db): - _dbname(db) -{ -} - - -Database::~Database() -{ -} - - -bool Database::authenticate(Connection& connection, const std::string& username, const std::string& password, const std::string& method) -{ - if (username.empty()) throw Poco::InvalidArgumentException("empty username"); - if (password.empty()) throw Poco::InvalidArgumentException("empty password"); - - if (method == AUTH_MONGODB_CR) - return authCR(connection, username, password); - else if (method == AUTH_SCRAM_SHA1) - return authSCRAM(connection, username, password); - else - throw Poco::InvalidArgumentException("authentication method", method); -} - - -bool Database::authCR(Connection& connection, const std::string& username, const std::string& password) -{ - std::string nonce; - Poco::SharedPtr pCommand = createCommand(); - pCommand->selector().add("getnonce", 1); - - ResponseMessage response; - connection.sendRequest(*pCommand, response); - if (response.documents().size() > 0) - { - Document::Ptr pDoc = response.documents()[0]; - if (pDoc->getInteger("ok") != 1) return false; - nonce = pDoc->get("nonce", ""); - if (nonce.empty()) throw Poco::ProtocolException("no nonce received"); - } - else throw Poco::ProtocolException("empty response for getnonce"); - - std::string credsDigest = hashCredentials(username, password); - - Poco::MD5Engine md5; - md5.update(nonce); - md5.update(username); - md5.update(credsDigest); - std::string key = digestToHexString(md5); - - pCommand = createCommand(); - pCommand->selector() - .add("authenticate", 1) - .add("user", username) - .add("nonce", nonce) - .add("key", key); - - connection.sendRequest(*pCommand, response); - if (response.documents().size() > 0) - { - Document::Ptr pDoc = response.documents()[0]; - return pDoc->getInteger("ok") == 1; - } - else throw Poco::ProtocolException("empty response for authenticate"); -} - - -bool Database::authSCRAM(Connection& connection, const std::string& username, const std::string& password) -{ - std::string clientNonce(createNonce()); - std::string clientFirstMsg = Poco::format("n=%s,r=%s", username, clientNonce); - - Poco::SharedPtr pCommand = createCommand(); - pCommand->selector() - .add("saslStart", 1) - .add("mechanism", AUTH_SCRAM_SHA1) - .add("payload", new Binary(Poco::format("n,,%s", clientFirstMsg))); - - ResponseMessage response; - connection.sendRequest(*pCommand, response); - - Int32 conversationId = 0; - std::string serverFirstMsg; - - if (response.documents().size() > 0) - { - Document::Ptr pDoc = response.documents()[0]; - if (pDoc->getInteger("ok") == 1) - { - Binary::Ptr pPayload = pDoc->get("payload"); - serverFirstMsg = pPayload->toRawString(); - conversationId = pDoc->get("conversationId"); - } - else - { - if (pDoc->exists("errmsg")) - { - const Poco::MongoDB::Element::Ptr value = pDoc->get("errmsg"); - auto message = static_cast &>(*value).value(); - throw Poco::RuntimeException(message); - } - else - return false; - } - } - else throw Poco::ProtocolException("empty response for saslStart"); - - std::map kvm = parseKeyValueList(serverFirstMsg); - const std::string serverNonce = kvm["r"]; - const std::string salt = decodeBase64(kvm["s"]); - const unsigned iterations = Poco::NumberParser::parseUnsigned(kvm["i"]); - const Poco::UInt32 dkLen = 20; - - std::string hashedPassword = hashCredentials(username, password); - - Poco::PBKDF2Engine > pbkdf2(salt, iterations, dkLen); - pbkdf2.update(hashedPassword); - std::string saltedPassword = digestToBinaryString(pbkdf2); - - std::string clientFinalNoProof = Poco::format("c=biws,r=%s", serverNonce); - std::string authMessage = Poco::format("%s,%s,%s", clientFirstMsg, serverFirstMsg, clientFinalNoProof); - - Poco::HMACEngine hmacKey(saltedPassword); - hmacKey.update(std::string("Client Key")); - std::string clientKey = digestToBinaryString(hmacKey); - - Poco::SHA1Engine sha1; - sha1.update(clientKey); - std::string storedKey = digestToBinaryString(sha1); - - Poco::HMACEngine hmacSig(storedKey); - hmacSig.update(authMessage); - std::string clientSignature = digestToBinaryString(hmacSig); - - std::string clientProof(clientKey); - for (std::size_t i = 0; i < clientProof.size(); i++) - { - clientProof[i] ^= clientSignature[i]; - } - - std::string clientFinal = Poco::format("%s,p=%s", clientFinalNoProof, encodeBase64(clientProof)); - - pCommand = createCommand(); - pCommand->selector() - .add("saslContinue", 1) - .add("conversationId", conversationId) - .add("payload", new Binary(clientFinal)); - - std::string serverSecondMsg; - connection.sendRequest(*pCommand, response); - if (response.documents().size() > 0) - { - Document::Ptr pDoc = response.documents()[0]; - if (pDoc->getInteger("ok") == 1) - { - Binary::Ptr pPayload = pDoc->get("payload"); - serverSecondMsg = pPayload->toRawString(); - } - else - { - if (pDoc->exists("errmsg")) - { - const Poco::MongoDB::Element::Ptr value = pDoc->get("errmsg"); - auto message = static_cast &>(*value).value(); - throw Poco::RuntimeException(message); - } - else - return false; - } - } - else throw Poco::ProtocolException("empty response for saslContinue"); - - Poco::HMACEngine hmacSKey(saltedPassword); - hmacSKey.update(std::string("Server Key")); - std::string serverKey = digestToBinaryString(hmacSKey); - - Poco::HMACEngine hmacSSig(serverKey); - hmacSSig.update(authMessage); - std::string serverSignature = digestToBase64(hmacSSig); - - kvm = parseKeyValueList(serverSecondMsg); - std::string serverSignatureReceived = kvm["v"]; - - if (serverSignature != serverSignatureReceived) - throw Poco::ProtocolException("server signature verification failed"); - - pCommand = createCommand(); - pCommand->selector() - .add("saslContinue", 1) - .add("conversationId", conversationId) - .add("payload", new Binary); - - connection.sendRequest(*pCommand, response); - if (response.documents().size() > 0) - { - Document::Ptr pDoc = response.documents()[0]; - if (pDoc->getInteger("ok") == 1) - { - return true; - } - else - { - if (pDoc->exists("errmsg")) - { - const Poco::MongoDB::Element::Ptr value = pDoc->get("errmsg"); - auto message = static_cast &>(*value).value(); - throw Poco::RuntimeException(message); - } - else - return false; - } - } - else throw Poco::ProtocolException("empty response for saslContinue"); -} - - -Document::Ptr Database::queryBuildInfo(Connection& connection) const -{ - // build info can be issued on "config" system database - Poco::SharedPtr request = createCommand(); - request->selector().add("buildInfo", 1); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*request, response); - - Document::Ptr buildInfo; - if ( response.documents().size() > 0 ) - { - buildInfo = response.documents()[0]; - } - else - { - throw Poco::ProtocolException("Didn't get a response from the buildinfo command"); - } - return buildInfo; -} - - -Document::Ptr Database::queryServerHello(Connection& connection, bool old) const -{ - // hello can be issued on "config" system database - Poco::SharedPtr request = createCommand(); - - // 'hello' command was previously called 'isMaster' - std::string command_name; - if (old) - command_name = "isMaster"; - else - command_name = "hello"; - - request->selector().add(command_name, 1); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*request, response); - - Document::Ptr hello; - if ( response.documents().size() > 0 ) - { - hello = response.documents()[0]; - } - else - { - throw Poco::ProtocolException("Didn't get a response from the hello command"); - } - return hello; -} - - -Int64 Database::count(Connection& connection, const std::string& collectionName) const -{ - Poco::SharedPtr countRequest = createCountRequest(collectionName); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*countRequest, response); - - if (response.documents().size() > 0) - { - Poco::MongoDB::Document::Ptr doc = response.documents()[0]; - return doc->getInteger("n"); - } - - return -1; -} - - -Poco::MongoDB::Document::Ptr Database::ensureIndex(Connection& connection, const std::string& collection, const std::string& indexName, Poco::MongoDB::Document::Ptr keys, bool unique, bool background, int version, int ttl) -{ - Poco::MongoDB::Document::Ptr index = new Poco::MongoDB::Document(); - index->add("ns", _dbname + "." + collection); - index->add("name", indexName); - index->add("key", keys); - - if (version > 0) - { - index->add("version", version); - } - - if (unique) - { - index->add("unique", true); - } - - if (background) - { - index->add("background", true); - } - - if (ttl > 0) - { - index->add("expireAfterSeconds", ttl); - } - - Poco::SharedPtr insertRequest = createInsertRequest("system.indexes"); - insertRequest->documents().push_back(index); - connection.sendRequest(*insertRequest); - - return getLastErrorDoc(connection); -} - - -Document::Ptr Database::getLastErrorDoc(Connection& connection) const -{ - Document::Ptr errorDoc; - - Poco::SharedPtr request = createCommand(); - request->setNumberToReturn(1); - request->selector().add("getLastError", 1); - - Poco::MongoDB::ResponseMessage response; - connection.sendRequest(*request, response); - - if (response.documents().size() > 0) - { - errorDoc = response.documents()[0]; - } - - return errorDoc; -} - - -std::string Database::getLastError(Connection& connection) const -{ - Document::Ptr errorDoc = getLastErrorDoc(connection); - if (!errorDoc.isNull() && errorDoc->isType("err")) - { - return errorDoc->get("err"); - } - - return ""; -} - - -Poco::SharedPtr Database::createCountRequest(const std::string& collectionName) const -{ - Poco::SharedPtr request = createCommand(); - request->setNumberToReturn(1); - request->selector().add("count", collectionName); - return request; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/DeleteRequest.cpp b/base/poco/MongoDB/src/DeleteRequest.cpp deleted file mode 100644 index ba75beb55fb..00000000000 --- a/base/poco/MongoDB/src/DeleteRequest.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// DeleteRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: DeleteRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/DeleteRequest.h" - - -namespace Poco { -namespace MongoDB { - - -DeleteRequest::DeleteRequest(const std::string& collectionName, DeleteRequest::Flags flags): - RequestMessage(MessageHeader::OP_DELETE), - _flags(flags), - _fullCollectionName(collectionName), - _selector() -{ -} - - -DeleteRequest::DeleteRequest(const std::string& collectionName, bool justOne): - RequestMessage(MessageHeader::OP_DELETE), - _flags(justOne ? DELETE_SINGLE_REMOVE : DELETE_DEFAULT), - _fullCollectionName(collectionName), - _selector() -{ -} - - -DeleteRequest::~DeleteRequest() -{ -} - - -void DeleteRequest::buildRequest(BinaryWriter& writer) -{ - writer << 0; // 0 - reserved for future use - BSONWriter(writer).writeCString(_fullCollectionName); - writer << _flags; - _selector.write(writer); -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/Document.cpp b/base/poco/MongoDB/src/Document.cpp deleted file mode 100644 index f7c5c9c5dc6..00000000000 --- a/base/poco/MongoDB/src/Document.cpp +++ /dev/null @@ -1,227 +0,0 @@ -// -// Document.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Document -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Document.h" -#include "Poco/MongoDB/Binary.h" -#include "Poco/MongoDB/ObjectId.h" -#include "Poco/MongoDB/Array.h" -#include "Poco/MongoDB/RegularExpression.h" -#include "Poco/MongoDB/JavaScriptCode.h" -#include - - -namespace Poco { -namespace MongoDB { - - -Document::Document() -{ -} - - -Document::~Document() -{ -} - - -Array& Document::addNewArray(const std::string& name) -{ - Array::Ptr newArray = new Array(); - add(name, newArray); - return *newArray; -} - - -Element::Ptr Document::get(const std::string& name) const -{ - Element::Ptr element; - - ElementSet::const_iterator it = std::find_if(_elements.begin(), _elements.end(), ElementFindByName(name)); - if (it != _elements.end()) - { - return *it; - } - - return element; -} - - -Int64 Document::getInteger(const std::string& name) const -{ - Element::Ptr element = get(name); - if (element.isNull()) throw Poco::NotFoundException(name); - - if (ElementTraits::TypeId == element->type()) - { - ConcreteElement* concrete = dynamic_cast*>(element.get()); - if (concrete) return static_cast(concrete->value()); - } - else if (ElementTraits::TypeId == element->type()) - { - ConcreteElement* concrete = dynamic_cast*>(element.get()); - if (concrete) return concrete->value(); - } - else if (ElementTraits::TypeId == element->type()) - { - ConcreteElement* concrete = dynamic_cast*>(element.get()); - if (concrete) return concrete->value(); - } - throw Poco::BadCastException("Invalid type mismatch!"); -} - - -void Document::read(BinaryReader& reader) -{ - int size; - reader >> size; - - unsigned char type; - reader >> type; - - while (type != '\0') - { - Element::Ptr element; - - std::string name = BSONReader(reader).readCString(); - - switch (type) - { - case ElementTraits::TypeId: - element = new ConcreteElement(name, 0); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, 0); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, ""); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new Document); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new Array); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new Binary); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new ObjectId); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, false); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, Poco::Timestamp()); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, BSONTimestamp()); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, NullValue(0)); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new RegularExpression()); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, new JavaScriptCode()); - break; - case ElementTraits::TypeId: - element = new ConcreteElement(name, 0); - break; - default: - { - std::stringstream ss; - ss << "Element " << name << " contains an unsupported type 0x" << std::hex << (int) type; - throw Poco::NotImplementedException(ss.str()); - } - //TODO: x0F -> JavaScript code with scope - // xFF -> Min Key - // x7F -> Max Key - } - - element->read(reader); - _elements.push_back(element); - - reader >> type; - } -} - - -std::string Document::toString(int indent) const -{ - std::ostringstream oss; - - oss << '{'; - - if (indent > 0) oss << std::endl; - - - for (ElementSet::const_iterator it = _elements.begin(); it != _elements.end(); ++it) - { - if (it != _elements.begin()) - { - oss << ','; - if (indent > 0) oss << std::endl; - } - - for (int i = 0; i < indent; ++i) oss << ' '; - - oss << '"' << (*it)->name() << '"'; - oss << (indent > 0 ? " : " : ":"); - - oss << (*it)->toString(indent > 0 ? indent + 2 : 0); - } - - if (indent > 0) - { - oss << std::endl; - if (indent >= 2) indent -= 2; - - for (int i = 0; i < indent; ++i) oss << ' '; - } - - oss << '}'; - - return oss.str(); -} - - -void Document::write(BinaryWriter& writer) -{ - if (_elements.empty()) - { - writer << 5; - } - else - { - std::stringstream sstream; - Poco::BinaryWriter tempWriter(sstream, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - for (ElementSet::iterator it = _elements.begin(); it != _elements.end(); ++it) - { - tempWriter << static_cast((*it)->type()); - BSONWriter(tempWriter).writeCString((*it)->name()); - Element::Ptr element = *it; - element->write(tempWriter); - } - tempWriter.flush(); - - Poco::Int32 len = static_cast(5 + sstream.tellp()); /* 5 = sizeof(len) + 0-byte */ - writer << len; - writer.writeRaw(sstream.str()); - } - writer << '\0'; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/Element.cpp b/base/poco/MongoDB/src/Element.cpp deleted file mode 100644 index f91ce264493..00000000000 --- a/base/poco/MongoDB/src/Element.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Element.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Element -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Element.h" - - -namespace Poco { -namespace MongoDB { - - -Element::Element(const std::string& name) : _name(name) -{ -} - - -Element::~Element() -{ -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/GetMoreRequest.cpp b/base/poco/MongoDB/src/GetMoreRequest.cpp deleted file mode 100644 index 2c1f6909eb7..00000000000 --- a/base/poco/MongoDB/src/GetMoreRequest.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// -// GetMoreRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: GetMoreRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/GetMoreRequest.h" -#include "Poco/MongoDB/Element.h" - - -namespace Poco { -namespace MongoDB { - - -GetMoreRequest::GetMoreRequest(const std::string& collectionName, Int64 cursorID): - RequestMessage(MessageHeader::OP_GET_MORE), - _fullCollectionName(collectionName), - _numberToReturn(100), - _cursorID(cursorID) -{ -} - - -GetMoreRequest::~GetMoreRequest() -{ -} - - -void GetMoreRequest::buildRequest(BinaryWriter& writer) -{ - writer << 0; // 0 - reserved for future use - BSONWriter(writer).writeCString(_fullCollectionName); - writer << _numberToReturn; - writer << _cursorID; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/InsertRequest.cpp b/base/poco/MongoDB/src/InsertRequest.cpp deleted file mode 100644 index 65be5654b3e..00000000000 --- a/base/poco/MongoDB/src/InsertRequest.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// -// InsertRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: InsertRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/InsertRequest.h" - - -namespace Poco { -namespace MongoDB { - - -InsertRequest::InsertRequest(const std::string& collectionName, Flags flags): - RequestMessage(MessageHeader::OP_INSERT), - _flags(flags), - _fullCollectionName(collectionName) -{ -} - - -InsertRequest::~InsertRequest() -{ -} - - -void InsertRequest::buildRequest(BinaryWriter& writer) -{ - poco_assert (!_documents.empty()); - - writer << _flags; - BSONWriter bsonWriter(writer); - bsonWriter.writeCString(_fullCollectionName); - for (Document::Vector::iterator it = _documents.begin(); it != _documents.end(); ++it) - { - bsonWriter.write(*it); - } -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/JavaScriptCode.cpp b/base/poco/MongoDB/src/JavaScriptCode.cpp deleted file mode 100644 index 41f5fcabe6b..00000000000 --- a/base/poco/MongoDB/src/JavaScriptCode.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// JavaScriptCode.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: JavaScriptCode -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/JavaScriptCode.h" - - -namespace Poco { -namespace MongoDB { - - -JavaScriptCode::JavaScriptCode() -{ - -} - - -JavaScriptCode::~JavaScriptCode() -{ -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/KillCursorsRequest.cpp b/base/poco/MongoDB/src/KillCursorsRequest.cpp deleted file mode 100644 index 448002aa16a..00000000000 --- a/base/poco/MongoDB/src/KillCursorsRequest.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// KillCursorsRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: KillCursorsRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/KillCursorsRequest.h" - - -namespace Poco { -namespace MongoDB { - - -KillCursorsRequest::KillCursorsRequest(): - RequestMessage(MessageHeader::OP_KILL_CURSORS) -{ -} - - -KillCursorsRequest::~KillCursorsRequest() -{ -} - - -void KillCursorsRequest::buildRequest(BinaryWriter& writer) -{ - writer << 0; // 0 - reserved for future use - writer << static_cast(_cursors.size()); - for (std::vector::iterator it = _cursors.begin(); it != _cursors.end(); ++it) - { - writer << *it; - } -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/Message.cpp b/base/poco/MongoDB/src/Message.cpp deleted file mode 100644 index 7b1cb23bab6..00000000000 --- a/base/poco/MongoDB/src/Message.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// -// Message.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: Message -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Message.h" - - -namespace Poco { -namespace MongoDB { - - -Message::Message(MessageHeader::OpCode opcode): - _header(opcode) -{ -} - - -Message::~Message() -{ -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/MessageHeader.cpp b/base/poco/MongoDB/src/MessageHeader.cpp deleted file mode 100644 index b472bcec465..00000000000 --- a/base/poco/MongoDB/src/MessageHeader.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// MessageHeader.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: MessageHeader -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/Message.h" -#include "Poco/Exception.h" - - -namespace Poco { -namespace MongoDB { - - -MessageHeader::MessageHeader(OpCode opCode): - _messageLength(0), - _requestID(0), - _responseTo(0), - _opCode(opCode) -{ -} - - -MessageHeader::~MessageHeader() -{ -} - - -void MessageHeader::read(BinaryReader& reader) -{ - reader >> _messageLength; - reader >> _requestID; - reader >> _responseTo; - - Int32 opCode; - reader >> opCode; - _opCode = static_cast(opCode); - - if (!reader.good()) - { - throw IOException("Failed to read from socket"); - } -} - - -void MessageHeader::write(BinaryWriter& writer) -{ - writer << _messageLength; - writer << _requestID; - writer << _responseTo; - writer << static_cast(_opCode); -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/ObjectId.cpp b/base/poco/MongoDB/src/ObjectId.cpp deleted file mode 100644 index e360d129843..00000000000 --- a/base/poco/MongoDB/src/ObjectId.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// -// ObjectId.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: ObjectId -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/ObjectId.h" -#include "Poco/Format.h" -#include - - -namespace Poco { -namespace MongoDB { - - -ObjectId::ObjectId() -{ - std::memset(_id, 0, sizeof(_id)); -} - - -ObjectId::ObjectId(const std::string& id) -{ - poco_assert_dbg(id.size() == 24); - - const char* p = id.c_str(); - for (std::size_t i = 0; i < 12; ++i) - { - _id[i] = fromHex(p); - p += 2; - } -} - - -ObjectId::ObjectId(const ObjectId& copy) -{ - std::memcpy(_id, copy._id, sizeof(_id)); -} - - -ObjectId::~ObjectId() -{ -} - - -std::string ObjectId::toString(const std::string& fmt) const -{ - std::string s; - - for (int i = 0; i < 12; ++i) - { - s += Poco::format(fmt, (unsigned int) _id[i]); - } - return s; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/OpMsgCursor.cpp b/base/poco/MongoDB/src/OpMsgCursor.cpp deleted file mode 100644 index 6abd45ecf76..00000000000 --- a/base/poco/MongoDB/src/OpMsgCursor.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// -// OpMsgCursor.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: OpMsgCursor -// -// Copyright (c) 2022, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/OpMsgCursor.h" -#include "Poco/MongoDB/Array.h" - -// -// NOTE: -// -// MongoDB specification indicates that the flag MSG_EXHAUST_ALLOWED shall be -// used in the request when the receiver is ready to receive multiple messages -// without sending additional requests in between. Sender (MongoDB) indicates -// that more messages follow with flag MSG_MORE_TO_COME. -// -// It seems that this does not work properly. MSG_MORE_TO_COME is set and reading -// next messages sometimes works, however often the data is missing in response -// or the message header contains wrong message length and reading blocks. -// Opcode in the header is correct. -// -// Using MSG_EXHAUST_ALLOWED is therefore currently disabled. -// -// It seems that related JIRA ticket is: -// -// https://jira.mongodb.org/browse/SERVER-57297 -// -// https://github.com/mongodb/specifications/blob/master/source/message/OP_MSG.rst -// - -#define MONGODB_EXHAUST_ALLOWED_WORKS false - -namespace Poco { -namespace MongoDB { - - -[[ maybe_unused ]] static const std::string keyCursor {"cursor"}; -[[ maybe_unused ]] static const std::string keyFirstBatch {"firstBatch"}; -[[ maybe_unused ]] static const std::string keyNextBatch {"nextBatch"}; - -static Poco::Int64 cursorIdFromResponse(const MongoDB::Document& doc); - - -OpMsgCursor::OpMsgCursor(const std::string& db, const std::string& collection): -#if MONGODB_EXHAUST_ALLOWED_WORKS - _query(db, collection, OpMsgMessage::MSG_EXHAUST_ALLOWED) -#else - _query(db, collection) -#endif -{ -} - -OpMsgCursor::~OpMsgCursor() -{ - try - { - poco_assert_dbg(_cursorID == 0); - } - catch (...) - { - } -} - - -void OpMsgCursor::setEmptyFirstBatch(bool empty) -{ - _emptyFirstBatch = empty; -} - - -bool OpMsgCursor::emptyFirstBatch() const -{ - return _emptyFirstBatch; -} - - -void OpMsgCursor::setBatchSize(Int32 batchSize) -{ - _batchSize = batchSize; -} - - -Int32 OpMsgCursor::batchSize() const -{ - return _batchSize; -} - - -OpMsgMessage& OpMsgCursor::next(Connection& connection) -{ - if (_cursorID == 0) - { - _response.clear(); - - if (_emptyFirstBatch || _batchSize > 0) - { - Int32 bsize = _emptyFirstBatch ? 0 : _batchSize; - if (_query.commandName() == OpMsgMessage::CMD_FIND) - { - _query.body().add("batchSize", bsize); - } - else if (_query.commandName() == OpMsgMessage::CMD_AGGREGATE) - { - auto& cursorDoc = _query.body().addNewDocument("cursor"); - cursorDoc.add("batchSize", bsize); - } - } - - connection.sendRequest(_query, _response); - - const auto& rdoc = _response.body(); - _cursorID = cursorIdFromResponse(rdoc); - } - else - { -#if MONGODB_EXHAUST_ALLOWED_WORKS - std::cout << "Response flags: " << _response.flags() << std::endl; - if (_response.flags() & OpMsgMessage::MSG_MORE_TO_COME) - { - std::cout << "More to come. Reading more response: " << std::endl; - _response.clear(); - connection.readResponse(_response); - } - else -#endif - { - _response.clear(); - _query.setCursor(_cursorID, _batchSize); - connection.sendRequest(_query, _response); - } - } - - const auto& rdoc = _response.body(); - _cursorID = cursorIdFromResponse(rdoc); - - return _response; -} - - -void OpMsgCursor::kill(Connection& connection) -{ - _response.clear(); - if (_cursorID != 0) - { - _query.setCommandName(OpMsgMessage::CMD_KILL_CURSORS); - - MongoDB::Array::Ptr cursors = new MongoDB::Array(); - cursors->add(_cursorID); - _query.body().add("cursors", cursors); - - connection.sendRequest(_query, _response); - - const auto killed = _response.body().get("cursorsKilled", nullptr); - if (!killed || killed->size() != 1 || killed->get(0, -1) != _cursorID) - { - throw Poco::ProtocolException("Cursor not killed as expected: " + std::to_string(_cursorID)); - } - - _cursorID = 0; - _query.clear(); - _response.clear(); - } -} - - -Poco::Int64 cursorIdFromResponse(const MongoDB::Document& doc) -{ - Poco::Int64 id {0}; - auto cursorDoc = doc.get(keyCursor, nullptr); - if(cursorDoc) - { - id = cursorDoc->get("id", 0); - } - return id; -} - - -} } // Namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/OpMsgMessage.cpp b/base/poco/MongoDB/src/OpMsgMessage.cpp deleted file mode 100644 index 2b55772ca59..00000000000 --- a/base/poco/MongoDB/src/OpMsgMessage.cpp +++ /dev/null @@ -1,412 +0,0 @@ -// -// OpMsgMessage.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: OpMsgMessage -// -// Copyright (c) 2022, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - -#include "Poco/MongoDB/OpMsgMessage.h" -#include "Poco/MongoDB/MessageHeader.h" -#include "Poco/MongoDB/Array.h" -#include "Poco/StreamCopier.h" -#include "Poco/Logger.h" - -#define POCO_MONGODB_DUMP false - -namespace Poco { -namespace MongoDB { - -// Query and write -const std::string OpMsgMessage::CMD_INSERT { "insert" }; -const std::string OpMsgMessage::CMD_DELETE { "delete" }; -const std::string OpMsgMessage::CMD_UPDATE { "update" }; -const std::string OpMsgMessage::CMD_FIND { "find" }; -const std::string OpMsgMessage::CMD_FIND_AND_MODIFY { "findAndModify" }; -const std::string OpMsgMessage::CMD_GET_MORE { "getMore" }; - -// Aggregation -const std::string OpMsgMessage::CMD_AGGREGATE { "aggregate" }; -const std::string OpMsgMessage::CMD_COUNT { "count" }; -const std::string OpMsgMessage::CMD_DISTINCT { "distinct" }; -const std::string OpMsgMessage::CMD_MAP_REDUCE { "mapReduce" }; - -// Replication and administration -const std::string OpMsgMessage::CMD_HELLO { "hello" }; -const std::string OpMsgMessage::CMD_REPL_SET_GET_STATUS { "replSetGetStatus" }; -const std::string OpMsgMessage::CMD_REPL_SET_GET_CONFIG { "replSetGetConfig" }; - -const std::string OpMsgMessage::CMD_CREATE { "create" }; -const std::string OpMsgMessage::CMD_CREATE_INDEXES { "createIndexes" }; -const std::string OpMsgMessage::CMD_DROP { "drop" }; -const std::string OpMsgMessage::CMD_DROP_DATABASE { "dropDatabase" }; -const std::string OpMsgMessage::CMD_KILL_CURSORS { "killCursors" }; -const std::string OpMsgMessage::CMD_LIST_DATABASES { "listDatabases" }; -const std::string OpMsgMessage::CMD_LIST_INDEXES { "listIndexes" }; - -// Diagnostic -const std::string OpMsgMessage::CMD_BUILD_INFO { "buildInfo" }; -const std::string OpMsgMessage::CMD_COLL_STATS { "collStats" }; -const std::string OpMsgMessage::CMD_DB_STATS { "dbStats" }; -const std::string OpMsgMessage::CMD_HOST_INFO { "hostInfo" }; - - -static const std::string& commandIdentifier(const std::string& command); - /// Commands have different names for the payload that is sent in a separate section - - -static const std::string keyCursor {"cursor"}; -static const std::string keyFirstBatch {"firstBatch"}; -static const std::string keyNextBatch {"nextBatch"}; - - -OpMsgMessage::OpMsgMessage() : - Message(MessageHeader::OP_MSG) -{ -} - - -OpMsgMessage::OpMsgMessage(const std::string& databaseName, const std::string& collectionName, UInt32 flags) : - Message(MessageHeader::OP_MSG), - _databaseName(databaseName), - _collectionName(collectionName), - _flags(flags) -{ -} - - -OpMsgMessage::~OpMsgMessage() -{ -} - -const std::string& OpMsgMessage::databaseName() const -{ - return _databaseName; -} - - -const std::string& OpMsgMessage::collectionName() const -{ - return _collectionName; -} - - -void OpMsgMessage::setCommandName(const std::string& command) -{ - _commandName = command; - _body.clear(); - - // IMPORTANT: Command name must be first - if (_collectionName.empty()) - { - // Collection is not specified. It is assumed that this particular command does - // not need it. - _body.add(_commandName, Int32(1)); - } - else - { - _body.add(_commandName, _collectionName); - } - _body.add("$db", _databaseName); -} - - -void OpMsgMessage::setCursor(Poco::Int64 cursorID, Poco::Int32 batchSize) -{ - _commandName = OpMsgMessage::CMD_GET_MORE; - _body.clear(); - - // IMPORTANT: Command name must be first - _body.add(_commandName, cursorID); - _body.add("$db", _databaseName); - _body.add("collection", _collectionName); - if (batchSize > 0) - { - _body.add("batchSize", batchSize); - } -} - - -const std::string& OpMsgMessage::commandName() const -{ - return _commandName; -} - - -void OpMsgMessage::setAcknowledgedRequest(bool ack) -{ - const auto& id = commandIdentifier(_commandName); - if (id.empty()) - return; - - _acknowledged = ack; - - auto writeConcern = _body.get("writeConcern", nullptr); - if (writeConcern) - writeConcern->remove("w"); - - if (ack) - { - _flags = _flags & (~MSG_MORE_TO_COME); - } - else - { - _flags = _flags | MSG_MORE_TO_COME; - if (!writeConcern) - _body.addNewDocument("writeConcern").add("w", 0); - else - writeConcern->add("w", 0); - } - -} - - -bool OpMsgMessage::acknowledgedRequest() const -{ - return _acknowledged; -} - - -UInt32 OpMsgMessage::flags() const -{ - return _flags; -} - - -Document& OpMsgMessage::body() -{ - return _body; -} - - -const Document& OpMsgMessage::body() const -{ - return _body; -} - - -Document::Vector& OpMsgMessage::documents() -{ - return _documents; -} - - -const Document::Vector& OpMsgMessage::documents() const -{ - return _documents; -} - - -bool OpMsgMessage::responseOk() const -{ - Poco::Int64 ok {false}; - if (_body.exists("ok")) - { - ok = _body.getInteger("ok"); - } - return (ok != 0); -} - - -void OpMsgMessage::clear() -{ - _flags = MSG_FLAGS_DEFAULT; - _commandName.clear(); - _body.clear(); - _documents.clear(); -} - - -void OpMsgMessage::send(std::ostream& ostr) -{ - BinaryWriter socketWriter(ostr, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - - // Serialise the body - std::stringstream ss; - BinaryWriter writer(ss, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - writer << _flags; - - writer << PAYLOAD_TYPE_0; - _body.write(writer); - - if (!_documents.empty()) - { - // Serialise attached documents - - std::stringstream ssdoc; - BinaryWriter wdoc(ssdoc, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - for (auto& doc: _documents) - { - doc->write(wdoc); - } - wdoc.flush(); - - const std::string& identifier = commandIdentifier(_commandName); - const Poco::Int32 size = static_cast(sizeof(size) + identifier.size() + 1 + ssdoc.tellp()); - writer << PAYLOAD_TYPE_1; - writer << size; - writer.writeCString(identifier.c_str()); - StreamCopier::copyStream(ssdoc, ss); - } - writer.flush(); - -#if POCO_MONGODB_DUMP - const std::string section = ss.str(); - std::string dump; - Logger::formatDump(dump, section.data(), section.length()); - std::cout << dump << std::endl; -#endif - - messageLength(static_cast(ss.tellp())); - - _header.write(socketWriter); - StreamCopier::copyStream(ss, ostr); - - ostr.flush(); -} - - -void OpMsgMessage::read(std::istream& istr) -{ - std::string message; - { - BinaryReader reader(istr, BinaryReader::LITTLE_ENDIAN_BYTE_ORDER); - _header.read(reader); - - poco_assert_dbg(_header.opCode() == _header.OP_MSG); - - const std::streamsize remainingSize {_header.getMessageLength() - _header.MSG_HEADER_SIZE }; - message.reserve(remainingSize); - -#if POCO_MONGODB_DUMP - std::cout - << "Message hdr: " << _header.getMessageLength() << " " << remainingSize << " " - << _header.opCode() << " " << _header.getRequestID() << " " << _header.responseTo() - << std::endl; -#endif - - reader.readRaw(remainingSize, message); - -#if POCO_MONGODB_DUMP - std::string dump; - Logger::formatDump(dump, message.data(), message.length()); - std::cout << dump << std::endl; -#endif - } - // Read complete message and then interpret it. - - std::istringstream msgss(message); - BinaryReader reader(msgss, BinaryReader::LITTLE_ENDIAN_BYTE_ORDER); - - Poco::UInt8 payloadType {0xFF}; - - reader >> _flags; - reader >> payloadType; - poco_assert_dbg(payloadType == PAYLOAD_TYPE_0); - - _body.read(reader); - - // Read next sections from the buffer - while (msgss.good()) - { - // NOTE: Not tested yet with database, because it returns everything in the body. - // Does MongoDB ever return documents as Payload type 1? - reader >> payloadType; - if (!msgss.good()) - { - break; - } - poco_assert_dbg(payloadType == PAYLOAD_TYPE_1); -#if POCO_MONGODB_DUMP - std::cout << "section payload: " << payloadType << std::endl; -#endif - - Poco::Int32 sectionSize {0}; - reader >> sectionSize; - poco_assert_dbg(sectionSize > 0); - -#if POCO_MONGODB_DUMP - std::cout << "section size: " << sectionSize << std::endl; -#endif - std::streamoff offset = sectionSize - sizeof(sectionSize); - std::streampos endOfSection = msgss.tellg() + offset; - - std::string identifier; - reader.readCString(identifier); -#if POCO_MONGODB_DUMP - std::cout << "section identifier: " << identifier << std::endl; -#endif - - // Loop to read documents from this section. - while (msgss.tellg() < endOfSection) - { -#if POCO_MONGODB_DUMP - std::cout << "section doc: " << msgss.tellg() << " " << endOfSection << std::endl; -#endif - Document::Ptr doc = new Document(); - doc->read(reader); - _documents.push_back(doc); - if (msgss.tellg() < 0) - { - break; - } - } - } - - // Extract documents from the cursor batch if they are there. - MongoDB::Array::Ptr batch; - auto curDoc = _body.get(keyCursor, nullptr); - if (curDoc) - { - batch = curDoc->get(keyFirstBatch, nullptr); - if (!batch) - { - batch = curDoc->get(keyNextBatch, nullptr); - } - } - if (batch) - { - for(std::size_t i = 0; i < batch->size(); i++) - { - const auto& d = batch->get(i, nullptr); - if (d) - { - _documents.push_back(d); - } - } - } - -} - -const std::string& commandIdentifier(const std::string& command) -{ - // Names of identifiers for commands that send bulk documents in the request - // The identifier is set in the section type 1. - static std::map identifiers { - { OpMsgMessage::CMD_INSERT, "documents" }, - { OpMsgMessage::CMD_DELETE, "deletes" }, - { OpMsgMessage::CMD_UPDATE, "updates" }, - - // Not sure if create index can send document section - { OpMsgMessage::CMD_CREATE_INDEXES, "indexes" } - }; - - const auto i = identifiers.find(command); - if (i != identifiers.end()) - { - return i->second; - } - - // This likely means that documents are incorrectly set for a command - // that does not send list of documents in section type 1. - static const std::string emptyIdentifier; - return emptyIdentifier; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/QueryRequest.cpp b/base/poco/MongoDB/src/QueryRequest.cpp deleted file mode 100644 index 6d7d23a8456..00000000000 --- a/base/poco/MongoDB/src/QueryRequest.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// -// QueryRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: QueryRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/QueryRequest.h" - - -namespace Poco { -namespace MongoDB { - - -QueryRequest::QueryRequest(const std::string& collectionName, QueryRequest::Flags flags): - RequestMessage(MessageHeader::OP_QUERY), - _flags(flags), - _fullCollectionName(collectionName), - _numberToSkip(0), - _numberToReturn(100), - _selector(), - _returnFieldSelector() -{ -} - - -QueryRequest::~QueryRequest() -{ -} - - -void QueryRequest::buildRequest(BinaryWriter& writer) -{ - writer << _flags; - BSONWriter(writer).writeCString(_fullCollectionName); - writer << _numberToSkip; - writer << _numberToReturn; - _selector.write(writer); - - if (!_returnFieldSelector.empty()) - { - _returnFieldSelector.write(writer); - } -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/RegularExpression.cpp b/base/poco/MongoDB/src/RegularExpression.cpp deleted file mode 100644 index 5f7eb6bb51b..00000000000 --- a/base/poco/MongoDB/src/RegularExpression.cpp +++ /dev/null @@ -1,71 +0,0 @@ -// -// RegularExpression.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: RegularExpression -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/RegularExpression.h" -#include - - -namespace Poco { -namespace MongoDB { - - -RegularExpression::RegularExpression() -{ -} - - -RegularExpression::RegularExpression(const std::string& pattern, const std::string& options): - _pattern(pattern), - _options(options) -{ -} - - -RegularExpression::~RegularExpression() -{ -} - - -SharedPtr RegularExpression::createRE() const -{ - int options = 0; - for (std::string::const_iterator optIt = _options.begin(); optIt != _options.end(); ++optIt) - { - switch (*optIt) - { - case 'i': // Case Insensitive - options |= Poco::RegularExpression::RE_CASELESS; - break; - case 'm': // Multiline matching - options |= Poco::RegularExpression::RE_MULTILINE; - break; - case 'x': // Verbose mode - //No equivalent in Poco - break; - case 'l': // \w \W Locale dependent - //No equivalent in Poco - break; - case 's': // Dotall mode - options |= Poco::RegularExpression::RE_DOTALL; - break; - case 'u': // \w \W Unicode - //No equivalent in Poco - break; - } - } - return new Poco::RegularExpression(_pattern, options); -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/ReplicaSet.cpp b/base/poco/MongoDB/src/ReplicaSet.cpp deleted file mode 100644 index fce2f2bdada..00000000000 --- a/base/poco/MongoDB/src/ReplicaSet.cpp +++ /dev/null @@ -1,89 +0,0 @@ -// -// ReplicaSet.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: ReplicaSet -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/ReplicaSet.h" -#include "Poco/MongoDB/QueryRequest.h" -#include "Poco/MongoDB/ResponseMessage.h" - - -namespace Poco { -namespace MongoDB { - - -ReplicaSet::ReplicaSet(const std::vector &addresses): - _addresses(addresses) -{ -} - - -ReplicaSet::~ReplicaSet() -{ -} - - -Connection::Ptr ReplicaSet::findMaster() -{ - Connection::Ptr master; - - for (std::vector::iterator it = _addresses.begin(); it != _addresses.end(); ++it) - { - master = isMaster(*it); - if (!master.isNull()) - { - break; - } - } - - return master; -} - - -Connection::Ptr ReplicaSet::isMaster(const Net::SocketAddress& address) -{ - Connection::Ptr conn = new Connection(); - - try - { - conn->connect(address); - - QueryRequest request("admin.$cmd"); - request.setNumberToReturn(1); - request.selector().add("isMaster", 1); - - ResponseMessage response; - conn->sendRequest(request, response); - - if (response.documents().size() > 0) - { - Document::Ptr doc = response.documents()[0]; - if (doc->get("ismaster")) - { - return conn; - } - else if (doc->exists("primary")) - { - return isMaster(Net::SocketAddress(doc->get("primary"))); - } - } - } - catch (...) - { - conn = 0; - } - - return 0; -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/RequestMessage.cpp b/base/poco/MongoDB/src/RequestMessage.cpp deleted file mode 100644 index 999ed8a6ba1..00000000000 --- a/base/poco/MongoDB/src/RequestMessage.cpp +++ /dev/null @@ -1,51 +0,0 @@ -// -// RequestMessage.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: RequestMessage -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/RequestMessage.h" -#include "Poco/Net/SocketStream.h" -#include "Poco/StreamCopier.h" - - -namespace Poco { -namespace MongoDB { - - -RequestMessage::RequestMessage(MessageHeader::OpCode opcode): - Message(opcode) -{ -} - - -RequestMessage::~RequestMessage() -{ -} - - -void RequestMessage::send(std::ostream& ostr) -{ - std::stringstream ss; - BinaryWriter requestWriter(ss, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - buildRequest(requestWriter); - requestWriter.flush(); - - messageLength(static_cast(ss.tellp())); - - BinaryWriter socketWriter(ostr, BinaryWriter::LITTLE_ENDIAN_BYTE_ORDER); - _header.write(socketWriter); - StreamCopier::copyStream(ss, ostr); - ostr.flush(); -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/ResponseMessage.cpp b/base/poco/MongoDB/src/ResponseMessage.cpp deleted file mode 100644 index e8216767494..00000000000 --- a/base/poco/MongoDB/src/ResponseMessage.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// -// ResponseMessage.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: ResponseMessage -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/ResponseMessage.h" -#include "Poco/Net/SocketStream.h" - - -namespace Poco { -namespace MongoDB { - - -ResponseMessage::ResponseMessage(): - Message(MessageHeader::OP_REPLY), - _responseFlags(0), - _cursorID(0), - _startingFrom(0), - _numberReturned(0) -{ -} - - -ResponseMessage::ResponseMessage(const Int64& cursorID): - Message(MessageHeader::OP_REPLY), - _responseFlags(0), - _cursorID(cursorID), - _startingFrom(0), - _numberReturned(0) -{ -} - - -ResponseMessage::~ResponseMessage() -{ -} - - -void ResponseMessage::clear() -{ - _responseFlags = 0; - _startingFrom = 0; - _cursorID = 0; - _numberReturned = 0; - _documents.clear(); -} - - -void ResponseMessage::read(std::istream& istr) -{ - clear(); - - BinaryReader reader(istr, BinaryReader::LITTLE_ENDIAN_BYTE_ORDER); - - _header.read(reader); - - reader >> _responseFlags; - reader >> _cursorID; - reader >> _startingFrom; - reader >> _numberReturned; - - for (int i = 0; i < _numberReturned; ++i) - { - Document::Ptr doc = new Document(); - doc->read(reader); - _documents.push_back(doc); - } -} - - -} } // namespace Poco::MongoDB diff --git a/base/poco/MongoDB/src/UpdateRequest.cpp b/base/poco/MongoDB/src/UpdateRequest.cpp deleted file mode 100644 index 7477fc752d5..00000000000 --- a/base/poco/MongoDB/src/UpdateRequest.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// UpdateRequest.cpp -// -// Library: MongoDB -// Package: MongoDB -// Module: UpdateRequest -// -// Copyright (c) 2012, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MongoDB/UpdateRequest.h" - - -namespace Poco { -namespace MongoDB { - - -UpdateRequest::UpdateRequest(const std::string& collectionName, UpdateRequest::Flags flags): - RequestMessage(MessageHeader::OP_UPDATE), - _flags(flags), - _fullCollectionName(collectionName), - _selector(), - _update() -{ -} - - -UpdateRequest::~UpdateRequest() -{ -} - - -void UpdateRequest::buildRequest(BinaryWriter& writer) -{ - writer << 0; // 0 - reserved for future use - BSONWriter(writer).writeCString(_fullCollectionName); - writer << _flags; - _selector.write(writer); - _update.write(writer); -} - - -} } // namespace Poco::MongoDB diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 834b57757b9..151edeb0a00 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -11,11 +11,6 @@ MongoDB engine is read-only table engine which allows to read data from remote [ Only MongoDB v3.6+ servers are supported. [Seed list(`mongodb+srv`)](https://www.mongodb.com/docs/manual/reference/glossary/#std-term-seed-list) is not yet supported. -:::note -If you're facing troubles, please report the issue, and try to use [the legacy implementation](../../../operations/server-configuration-parameters/settings.md#use_legacy_mongodb_integration). -Keep in mind that it is deprecated, and will be removed in next releases. -::: - ## Creating a Table {#creating-a-table} ``` sql diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index ca4938b1a47..c2abc721882 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -3277,11 +3277,3 @@ Type: UInt64 Default value: 100 Zero means unlimited - -## use_legacy_mongodb_integration - -Use the legacy MongoDB integration implementation. Deprecated. - -Type: Bool - -Default value: `true`. diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 900d9cb8e01..522b9a74cff 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -188,9 +188,9 @@ int mainEntryClickHouseFormat(int argc, char ** argv) registerInterpreters(); registerFunctions(); registerAggregateFunctions(); - registerTableFunctions(false); + registerTableFunctions(); registerDatabases(); - registerStorages(false); + registerStorages(); registerFormats(); std::unordered_set additional_names; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e6f8ecef097..51c41248dfa 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -110,7 +110,6 @@ namespace ServerSetting extern const ServerSettingsString uncompressed_cache_policy; extern const ServerSettingsUInt64 uncompressed_cache_size; extern const ServerSettingsDouble uncompressed_cache_size_ratio; - extern const ServerSettingsBool use_legacy_mongodb_integration; } namespace ErrorCodes @@ -549,10 +548,10 @@ try /// Don't initialize DateLUT registerFunctions(); registerAggregateFunctions(); - registerTableFunctions(server_settings[ServerSetting::use_legacy_mongodb_integration]); + registerTableFunctions(); registerDatabases(); - registerStorages(server_settings[ServerSetting::use_legacy_mongodb_integration]); - registerDictionaries(server_settings[ServerSetting::use_legacy_mongodb_integration]); + registerStorages(); + registerDictionaries(); registerDisks(/* global_skip_access_check= */ true); registerFormats(); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 68f262079ff..b1b81d8da4c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -279,7 +279,6 @@ namespace ServerSetting extern const ServerSettingsString uncompressed_cache_policy; extern const ServerSettingsUInt64 uncompressed_cache_size; extern const ServerSettingsDouble uncompressed_cache_size_ratio; - extern const ServerSettingsBool use_legacy_mongodb_integration; } } @@ -912,10 +911,10 @@ try registerInterpreters(); registerFunctions(); registerAggregateFunctions(); - registerTableFunctions(server_settings[ServerSetting::use_legacy_mongodb_integration]); + registerTableFunctions(); registerDatabases(); - registerStorages(server_settings[ServerSetting::use_legacy_mongodb_integration]); - registerDictionaries(server_settings[ServerSetting::use_legacy_mongodb_integration]); + registerStorages(); + registerDictionaries(); registerDisks(/* global_skip_access_check= */ false); registerFormats(); registerRemoteFileMetadatas(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3627d760d4c..8ecdf0995a5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -433,10 +433,6 @@ dbms_target_link_libraries ( Poco::Redis ) -if (USE_MONGODB) - dbms_target_link_libraries (PUBLIC Poco::MongoDB) -endif() - if (TARGET ch_contrib::mongocxx) dbms_target_link_libraries( PUBLIC diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 2f8e7b6843a..163db07f7b7 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -194,7 +194,6 @@ namespace DB DECLARE(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \ DECLARE(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \ DECLARE(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \ - DECLARE(Bool, use_legacy_mongodb_integration, true, "Use the legacy MongoDB integration implementation. Note: it's highly recommended to set this option to false, since legacy implementation will be removed in the future. Please submit any issues you encounter with the new implementation.", 0) \ \ DECLARE(UInt64, prefetch_threadpool_pool_size, 100, "Size of background pool for prefetches for remote object storages", 0) \ DECLARE(UInt64, prefetch_threadpool_queue_size, 1000000, "Number of tasks which is possible to push into prefetches pool", 0) \ diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 0e2e1f8a3f0..874728aeb31 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -5715,7 +5715,7 @@ Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting If enabled, only allow identifiers containing alphanumeric characters and underscores. )", 0) \ DECLARE(Bool, mongodb_throw_on_unsupported_query, true, R"( -If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option does not apply to the legacy implementation or when 'allow_experimental_analyzer=0'. +If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option is not applied when 'allow_experimental_analyzer=0'. )", 0) \ DECLARE(Bool, implicit_select, false, R"( Allow writing simple SELECT queries without the leading SELECT keyword, which makes it simple for calculator-style usage, e.g. `1 + 2` becomes a valid query. diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index a5bb0a7543c..d1d8a824651 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -40,10 +40,6 @@ target_link_libraries(clickhouse_dictionaries Poco::Redis ) -if (USE_MONGODB) - target_link_libraries(clickhouse_dictionaries PRIVATE Poco::MongoDB) -endif() - target_link_libraries(clickhouse_dictionaries PUBLIC ch_contrib::abseil_swiss_tables) if (TARGET ch_contrib::cassandra) diff --git a/src/Dictionaries/MongoDBPocoLegacyDictionarySource.cpp b/src/Dictionaries/MongoDBPocoLegacyDictionarySource.cpp deleted file mode 100644 index 4495215d826..00000000000 --- a/src/Dictionaries/MongoDBPocoLegacyDictionarySource.cpp +++ /dev/null @@ -1,305 +0,0 @@ -#include "config.h" - -#include "DictionarySourceFactory.h" -#if USE_MONGODB -#include -#include "MongoDBPocoLegacyDictionarySource.h" -#include "DictionaryStructure.h" -#include "registerDictionaries.h" -#include -#include -#endif - -namespace DB -{ - -namespace ErrorCodes -{ -#if USE_MONGODB -extern const int NOT_IMPLEMENTED; -extern const int UNSUPPORTED_METHOD; -extern const int MONGODB_CANNOT_AUTHENTICATE; -#else -extern const int SUPPORT_IS_DISABLED; -#endif -} - -void registerDictionarySourceMongoDBPocoLegacy(DictionarySourceFactory & factory) -{ - #if USE_MONGODB - auto create_mongo_db_dictionary = []( - const DictionaryStructure & dict_struct, - const Poco::Util::AbstractConfiguration & config, - const std::string & root_config_prefix, - Block & sample_block, - ContextPtr context, - const std::string & /* default_database */, - bool created_from_ddl) - { - const auto config_prefix = root_config_prefix + ".mongodb"; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr; - - String host, username, password, database, method, options, collection; - UInt16 port; - if (named_collection) - { - validateNamedCollection( - *named_collection, - /* required_keys */{"collection"}, - /* optional_keys */ValidateKeysMultiset{ - "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"}); - - host = named_collection->getOrDefault("host", ""); - port = static_cast(named_collection->getOrDefault("port", 0)); - username = named_collection->getOrDefault("user", ""); - password = named_collection->getOrDefault("password", ""); - database = named_collection->getAnyOrDefault({"db", "database"}, ""); - method = named_collection->getOrDefault("method", ""); - collection = named_collection->getOrDefault("collection", ""); - options = named_collection->getOrDefault("options", ""); - } - else - { - host = config.getString(config_prefix + ".host", ""); - port = config.getUInt(config_prefix + ".port", 0); - username = config.getString(config_prefix + ".user", ""); - password = config.getString(config_prefix + ".password", ""); - database = config.getString(config_prefix + ".db", ""); - method = config.getString(config_prefix + ".method", ""); - collection = config.getString(config_prefix + ".collection"); - options = config.getString(config_prefix + ".options", ""); - } - - if (created_from_ddl) - context->getRemoteHostFilter().checkHostAndPort(host, toString(port)); - - return std::make_unique(dict_struct, - config.getString(config_prefix + ".uri", ""), - host, - port, - username, - password, - method, - database, - collection, - options, - sample_block); - }; - #else - auto create_mongo_db_dictionary = []( - const DictionaryStructure & /* dict_struct */, - const Poco::Util::AbstractConfiguration & /* config */, - const std::string & /* root_config_prefix */, - Block & /* sample_block */, - ContextPtr /* context */, - const std::string & /* default_database */, - bool /* created_from_ddl */) -> DictionarySourcePtr - { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Dictionary source of type `mongodb` is disabled because ClickHouse was built without mongodb support."); - }; - #endif - - factory.registerSource("mongodb", create_mongo_db_dictionary); -} - -} - -#if USE_MONGODB -#include -#include -#include -#include -#include -#include -#include -#include - -// only after poco -// naming conflict: -// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); -// src/IO/WriteHelpers.h:146 #define writeCString(s, buf) -#include - - -namespace DB -{ -static const UInt64 max_block_size = 8192; - - -MongoDBPocoLegacyDictionarySource::MongoDBPocoLegacyDictionarySource( - const DictionaryStructure & dict_struct_, - const std::string & uri_, - const std::string & host_, - UInt16 port_, - const std::string & user_, - const std::string & password_, - const std::string & method_, - const std::string & db_, - const std::string & collection_, - const std::string & options_, - const Block & sample_block_) - : dict_struct{dict_struct_} - , uri{uri_} - , host{host_} - , port{port_} - , user{user_} - , password{password_} - , method{method_} - , db{db_} - , collection{collection_} - , options(options_) - , sample_block{sample_block_} - , connection{std::make_shared()} -{ - - StorageMongoDBPocoLegacySocketFactory socket_factory; - if (!uri.empty()) - { - // Connect with URI. - connection->connect(uri, socket_factory); - - Poco::URI poco_uri(connection->uri()); - - // Parse database from URI. This is required for correctness -- the - // cursor is created using database name and collection name, so we have - // to specify them properly. - db = poco_uri.getPath(); - // getPath() may return a leading slash, remove it. - if (!db.empty() && db[0] == '/') - { - db.erase(0, 1); - } - - // Parse some other parts from URI, for logging and display purposes. - host = poco_uri.getHost(); - port = poco_uri.getPort(); - user = poco_uri.getUserInfo(); - if (size_t separator = user.find(':'); separator != std::string::npos) - { - user.resize(separator); - } - } - else - { - // Connect with host/port/user/etc through constructing the uri - std::string uri_constructed("mongodb://" + host + ":" + std::to_string(port) + "/" + db + (options.empty() ? "" : "?" + options)); - connection->connect(uri_constructed, socket_factory); - - if (!user.empty()) - { - Poco::MongoDB::Database poco_db(db); - if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method)) - throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); - } - } -} - - -MongoDBPocoLegacyDictionarySource::MongoDBPocoLegacyDictionarySource(const MongoDBPocoLegacyDictionarySource & other) - : MongoDBPocoLegacyDictionarySource{ - other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, - other.collection, other.options, other.sample_block - } -{ -} - -MongoDBPocoLegacyDictionarySource::~MongoDBPocoLegacyDictionarySource() = default; - -QueryPipeline MongoDBPocoLegacyDictionarySource::loadAll() -{ - return QueryPipeline(std::make_shared(connection, db, collection, Poco::MongoDB::Document{}, sample_block, max_block_size)); -} - -QueryPipeline MongoDBPocoLegacyDictionarySource::loadIds(const std::vector & ids) -{ - if (!dict_struct.id) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is required for selective loading"); - - Poco::MongoDB::Document query; - - /** NOTE: While building array, Poco::MongoDB requires passing of different unused element names, along with values. - * In general, Poco::MongoDB is quite inefficient and bulky. - */ - - Poco::MongoDB::Array::Ptr ids_array(new Poco::MongoDB::Array); - for (const UInt64 id : ids) - ids_array->add(DB::toString(id), static_cast(id)); - - query.addNewDocument(dict_struct.id->name).add("$in", ids_array); - - return QueryPipeline(std::make_shared(connection, db, collection, query, sample_block, max_block_size)); -} - - -QueryPipeline MongoDBPocoLegacyDictionarySource::loadKeys(const Columns & key_columns, const std::vector & requested_rows) -{ - if (!dict_struct.key) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is required for selective loading"); - - Poco::MongoDB::Document query; - Poco::MongoDB::Array::Ptr keys_array(new Poco::MongoDB::Array); - - for (const auto row_idx : requested_rows) - { - auto & key = keys_array->addNewDocument(DB::toString(row_idx)); - - const auto & key_attributes = *dict_struct.key; - for (size_t attribute_index = 0; attribute_index < key_attributes.size(); ++attribute_index) - { - const auto & key_attribute = key_attributes[attribute_index]; - - switch (key_attribute.underlying_type) - { - case AttributeUnderlyingType::UInt8: - case AttributeUnderlyingType::UInt16: - case AttributeUnderlyingType::UInt32: - case AttributeUnderlyingType::UInt64: - case AttributeUnderlyingType::Int8: - case AttributeUnderlyingType::Int16: - case AttributeUnderlyingType::Int32: - case AttributeUnderlyingType::Int64: - { - key.add(key_attribute.name, static_cast(key_columns[attribute_index]->get64(row_idx))); - break; - } - case AttributeUnderlyingType::Float32: - case AttributeUnderlyingType::Float64: - { - key.add(key_attribute.name, key_columns[attribute_index]->getFloat64(row_idx)); - break; - } - case AttributeUnderlyingType::String: - { - String loaded_str((*key_columns[attribute_index])[row_idx].safeGet()); - /// Convert string to ObjectID - if (key_attribute.is_object_id) - { - Poco::MongoDB::ObjectId::Ptr loaded_id(new Poco::MongoDB::ObjectId(loaded_str)); - key.add(key_attribute.name, loaded_id); - } - else - { - key.add(key_attribute.name, loaded_str); - } - break; - } - default: - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported dictionary attribute type for MongoDB dictionary source"); - } - } - } - - /// If more than one key we should use $or - query.add("$or", keys_array); - - return QueryPipeline(std::make_shared(connection, db, collection, query, sample_block, max_block_size)); -} - -std::string MongoDBPocoLegacyDictionarySource::toString() const -{ - return fmt::format("MongoDB: {}.{},{}{}:{}", db, collection, (user.empty() ? " " : " " + user + '@'), host, port); -} - -} -#endif diff --git a/src/Dictionaries/MongoDBPocoLegacyDictionarySource.h b/src/Dictionaries/MongoDBPocoLegacyDictionarySource.h deleted file mode 100644 index 95dc1194981..00000000000 --- a/src/Dictionaries/MongoDBPocoLegacyDictionarySource.h +++ /dev/null @@ -1,93 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_MONGODB -#include -#include - -#include "DictionaryStructure.h" -#include "IDictionarySource.h" - -namespace Poco -{ -namespace Util -{ - class AbstractConfiguration; -} - -namespace MongoDB -{ - class Connection; -} -} - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/// Allows loading dictionaries from a MongoDB collection. Deprecated, will be removed soon. -class MongoDBPocoLegacyDictionarySource final : public IDictionarySource -{ -public: - MongoDBPocoLegacyDictionarySource( - const DictionaryStructure & dict_struct_, - const std::string & uri_, - const std::string & host_, - UInt16 port_, - const std::string & user_, - const std::string & password_, - const std::string & method_, - const std::string & db_, - const std::string & collection_, - const std::string & options, - const Block & sample_block_); - - MongoDBPocoLegacyDictionarySource(const MongoDBPocoLegacyDictionarySource & other); - - ~MongoDBPocoLegacyDictionarySource() override; - - QueryPipeline loadAll() override; - - QueryPipeline loadUpdatedAll() override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadUpdatedAll is unsupported for MongoDBDictionarySource"); - } - - bool supportsSelectiveLoad() const override { return true; } - - QueryPipeline loadIds(const std::vector & ids) override; - - QueryPipeline loadKeys(const Columns & key_columns, const std::vector & requested_rows) override; - - /// @todo: for MongoDB, modification date can somehow be determined from the `_id` object field - bool isModified() const override { return true; } - - ///Not yet supported - bool hasUpdateField() const override { return false; } - - DictionarySourcePtr clone() const override { return std::make_shared(*this); } - - std::string toString() const override; - -private: - const DictionaryStructure dict_struct; - const std::string uri; - std::string host; - UInt16 port; - std::string user; - const std::string password; - const std::string method; - std::string db; - const std::string collection; - const std::string options; - Block sample_block; - - std::shared_ptr connection; -}; - -} -#endif diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index fdecd540f13..67da6ca0c05 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -36,7 +36,7 @@ void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaryDirect(DictionaryFactory & factory); -void registerDictionaries(bool use_legacy_mongodb_integration) +void registerDictionaries() { { auto & source_factory = DictionarySourceFactory::instance(); @@ -45,10 +45,7 @@ void registerDictionaries(bool use_legacy_mongodb_integration) registerDictionarySourceMysql(source_factory); registerDictionarySourceClickHouse(source_factory); - if (use_legacy_mongodb_integration) - registerDictionarySourceMongoDBPocoLegacy(source_factory); - else - registerDictionarySourceMongoDB(source_factory); + registerDictionarySourceMongoDB(source_factory); registerDictionarySourceRedis(source_factory); registerDictionarySourceCassandra(source_factory); diff --git a/src/Dictionaries/registerDictionaries.h b/src/Dictionaries/registerDictionaries.h index 4f82f7b5d29..e8480277c2c 100644 --- a/src/Dictionaries/registerDictionaries.h +++ b/src/Dictionaries/registerDictionaries.h @@ -2,5 +2,5 @@ namespace DB { -void registerDictionaries(bool use_legacy_mongodb_integration); +void registerDictionaries(); } diff --git a/src/Processors/Sources/MongoDBPocoLegacySource.cpp b/src/Processors/Sources/MongoDBPocoLegacySource.cpp deleted file mode 100644 index d39b857ee28..00000000000 --- a/src/Processors/Sources/MongoDBPocoLegacySource.cpp +++ /dev/null @@ -1,578 +0,0 @@ -#include "config.h" - -#if USE_MONGODB -#include "MongoDBPocoLegacySource.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include "base/types.h" -#include -#include - -#include -#include - -// only after poco -// naming conflict: -// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); -// src/IO/WriteHelpers.h:146 #define writeCString(s, buf) -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int TYPE_MISMATCH; - extern const int UNKNOWN_TYPE; - extern const int MONGODB_ERROR; - extern const int BAD_ARGUMENTS; -} - -namespace -{ - using ValueType = ExternalResultDescription::ValueType; - using ObjectId = Poco::MongoDB::ObjectId; - using MongoArray = Poco::MongoDB::Array; - using MongoUUID = Poco::MongoDB::Binary::Ptr; - - - UUID parsePocoUUID(const Poco::UUID & src) - { - UUID uuid; - - std::array src_node = src.getNode(); - UInt64 node = 0; - node |= UInt64(src_node[0]) << 40; - node |= UInt64(src_node[1]) << 32; - node |= UInt64(src_node[2]) << 24; - node |= UInt64(src_node[3]) << 16; - node |= UInt64(src_node[4]) << 8; - node |= src_node[5]; - - UUIDHelpers::getHighBytes(uuid) = UInt64(src.getTimeLow()) << 32 | UInt32(src.getTimeMid() << 16 | src.getTimeHiAndVersion()); - UUIDHelpers::getLowBytes(uuid) = UInt64(src.getClockSeq()) << 48 | node; - - return uuid; - } - - template - Field getNumber(const Poco::MongoDB::Element & value, const std::string & name) - { - switch (value.type()) - { - case Poco::MongoDB::ElementTraits::TypeId: - return static_cast(static_cast &>(value).value()); - case Poco::MongoDB::ElementTraits::TypeId: - return static_cast(static_cast &>(value).value()); - case Poco::MongoDB::ElementTraits::TypeId: - return static_cast(static_cast &>(value).value()); - case Poco::MongoDB::ElementTraits::TypeId: - return static_cast(static_cast &>(value).value()); - case Poco::MongoDB::ElementTraits::TypeId: - return Field(); - case Poco::MongoDB::ElementTraits::TypeId: - return parse(static_cast &>(value).value()); - default: - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}", - toString(value.type()), name); - } - } - - void prepareMongoDBArrayInfo( - std::unordered_map & array_info, size_t column_idx, const DataTypePtr data_type) - { - const auto * array_type = assert_cast(data_type.get()); - auto nested = array_type->getNestedType(); - - size_t count_dimensions = 1; - while (isArray(nested)) - { - ++count_dimensions; - nested = assert_cast(nested.get())->getNestedType(); - } - - Field default_value = nested->getDefault(); - if (nested->isNullable()) - nested = assert_cast(nested.get())->getNestedType(); - - WhichDataType which(nested); - std::function parser; - - if (which.isUInt8()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isUInt16()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isUInt32()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isUInt64()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isInt8()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isInt16()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isInt32()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isInt64()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isFloat32()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isFloat64()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field { return getNumber(value, name); }; - else if (which.isString() || which.isFixedString()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field - { - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - String string_id = value.toString(); - return Field(string_id.data(), string_id.size()); - } - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - String string = static_cast &>(value).value(); - return Field(string.data(), string.size()); - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String, got type id = {} for column {}", - toString(value.type()), name); - }; - else if (which.isDate()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field - { - if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", - toString(value.type()), name); - - return static_cast(DateLUT::instance().toDayNum( - static_cast &>(value).value().epochTime())); - }; - else if (which.isDateTime()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field - { - if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", - toString(value.type()), name); - - return static_cast(static_cast &>(value).value().epochTime()); - }; - else if (which.isUUID()) - parser = [](const Poco::MongoDB::Element & value, const std::string & name) -> Field - { - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - String string = static_cast &>(value).value(); - return parse(string); - } - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - const Poco::UUID & poco_uuid = static_cast &>(value).value()->uuid(); - return parsePocoUUID(poco_uuid); - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String/UUID, got type id = {} for column {}", - toString(value.type()), name); - }; - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName()); - - array_info[column_idx] = {count_dimensions, default_value, parser}; - } - - template - void insertNumber(IColumn & column, const Poco::MongoDB::Element & value, const std::string & name) - { - switch (value.type()) - { - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().push_back( - static_cast &>(value).value()); - break; - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().push_back( - static_cast(static_cast &>(value).value())); - break; - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().push_back(static_cast( - static_cast &>(value).value())); - break; - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().push_back( - static_cast &>(value).value()); - break; - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().emplace_back(); - break; - case Poco::MongoDB::ElementTraits::TypeId: - assert_cast &>(column).getData().push_back( - parse(static_cast &>(value).value())); - break; - default: - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}", - toString(value.type()), name); - } - } - - void insertValue( - IColumn & column, - const ValueType type, - const Poco::MongoDB::Element & value, - const std::string & name, - std::unordered_map & array_info, - size_t idx) - { - switch (type) - { - case ValueType::vtUInt8: - insertNumber(column, value, name); - break; - case ValueType::vtUInt16: - insertNumber(column, value, name); - break; - case ValueType::vtUInt32: - insertNumber(column, value, name); - break; - case ValueType::vtUInt64: - insertNumber(column, value, name); - break; - case ValueType::vtInt8: - insertNumber(column, value, name); - break; - case ValueType::vtInt16: - insertNumber(column, value, name); - break; - case ValueType::vtInt32: - insertNumber(column, value, name); - break; - case ValueType::vtInt64: - insertNumber(column, value, name); - break; - case ValueType::vtFloat32: - insertNumber(column, value, name); - break; - case ValueType::vtFloat64: - insertNumber(column, value, name); - break; - - case ValueType::vtEnum8: - case ValueType::vtEnum16: - case ValueType::vtString: - { - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - std::string string_id = value.toString(); - assert_cast(column).insertData(string_id.data(), string_id.size()); - break; - } - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - String string = static_cast &>(value).value(); - assert_cast(column).insertData(string.data(), string.size()); - break; - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String, got type id = {} for column {}", - toString(value.type()), name); - } - - case ValueType::vtDate: - { - if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", - toString(value.type()), name); - - assert_cast(column).getData().push_back(static_cast(DateLUT::instance().toDayNum( - static_cast &>(value).value().epochTime()))); - break; - } - - case ValueType::vtDateTime: - { - if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", - toString(value.type()), name); - - assert_cast(column).getData().push_back( - static_cast(static_cast &>(value).value().epochTime())); - break; - } - case ValueType::vtUUID: - { - if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - String string = static_cast &>(value).value(); - assert_cast(column).getData().push_back(parse(string)); - } - else if (value.type() == Poco::MongoDB::ElementTraits::TypeId) - { - const Poco::UUID & poco_uuid = static_cast &>(value).value()->uuid(); - UUID uuid = parsePocoUUID(poco_uuid); - assert_cast(column).getData().push_back(uuid); - } - else - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String/UUID, got type id = {} for column {}", - toString(value.type()), name); - break; - } - case ValueType::vtArray: - { - if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Array, got type id = {} for column {}", - toString(value.type()), name); - - size_t expected_dimensions = array_info[idx].num_dimensions; - const auto parse_value = array_info[idx].parser; - std::vector dimensions(expected_dimensions + 1); - - auto array = static_cast &>(value).value(); - - std::vector> arrays; - arrays.emplace_back(&value, 0); - - while (!arrays.empty()) - { - size_t dimension_idx = arrays.size() - 1; - - if (dimension_idx + 1 > expected_dimensions) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got more dimensions than expected"); - - auto [parent_ptr, child_idx] = arrays.back(); - auto parent = static_cast &>(*parent_ptr).value(); - - if (child_idx >= parent->size()) - { - arrays.pop_back(); - - if (dimension_idx == 0) - break; - - dimensions[dimension_idx].emplace_back(Array(dimensions[dimension_idx + 1].begin(), dimensions[dimension_idx + 1].end())); - dimensions[dimension_idx + 1].clear(); - - continue; - } - - Poco::MongoDB::Element::Ptr child = parent->get(static_cast(child_idx)); - arrays.back().second += 1; - - if (child->type() == Poco::MongoDB::ElementTraits::TypeId) - { - arrays.emplace_back(child.get(), 0); - } - else if (child->type() == Poco::MongoDB::ElementTraits::TypeId) - { - if (dimension_idx + 1 == expected_dimensions) - dimensions[dimension_idx + 1].emplace_back(array_info[idx].default_value); - else - dimensions[dimension_idx + 1].emplace_back(Array()); - } - else if (dimension_idx + 1 == expected_dimensions) - { - dimensions[dimension_idx + 1].emplace_back(parse_value(*child, name)); - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Got less dimensions than expected. ({} instead of {})", dimension_idx + 1, expected_dimensions); - } - } - - assert_cast(column).insert(Array(dimensions[1].begin(), dimensions[1].end())); - break; - - } - default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Value of unsupported type: {}", column.getName()); - } - } - - void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); } -} - - -bool isMongoDBWireProtocolOld(Poco::MongoDB::Connection & connection_, const std::string & database_name_) -{ - Poco::MongoDB::Database db(database_name_); - Poco::MongoDB::Document::Ptr doc = db.queryServerHello(connection_, false); - - if (doc->exists("maxWireVersion")) - { - auto wire_version = doc->getInteger("maxWireVersion"); - return wire_version < Poco::MongoDB::Database::WireVersion::VER_36; - } - - doc = db.queryServerHello(connection_, true); - if (doc->exists("maxWireVersion")) - { - auto wire_version = doc->getInteger("maxWireVersion"); - return wire_version < Poco::MongoDB::Database::WireVersion::VER_36; - } - - return true; -} - - -MongoDBPocoLegacyCursor::MongoDBPocoLegacyCursor( - const std::string & database, - const std::string & collection, - const Block & sample_block_to_select, - const Poco::MongoDB::Document & query, - Poco::MongoDB::Connection & connection) - : is_wire_protocol_old(isMongoDBWireProtocolOld(connection, database)) -{ - Poco::MongoDB::Document projection; - - /// Looks like selecting _id column is implicit by default. - if (!sample_block_to_select.has("_id")) - projection.add("_id", 0); - - for (const auto & column : sample_block_to_select) - projection.add(column.name, 1); - - if (is_wire_protocol_old) - { - old_cursor = std::make_unique(database, collection); - old_cursor->query().selector() = query; - old_cursor->query().returnFieldSelector() = projection; - } - else - { - new_cursor = std::make_unique(database, collection); - new_cursor->query().setCommandName(Poco::MongoDB::OpMsgMessage::CMD_FIND); - new_cursor->query().body().addNewDocument("filter") = query; - new_cursor->query().body().addNewDocument("projection") = projection; - } -} - -Poco::MongoDB::Document::Vector MongoDBPocoLegacyCursor::nextDocuments(Poco::MongoDB::Connection & connection) -{ - if (is_wire_protocol_old) - { - auto response = old_cursor->next(connection); - cursor_id = response.cursorID(); - return std::move(response.documents()); - } - - auto response = new_cursor->next(connection); - cursor_id = new_cursor->cursorID(); - return std::move(response.documents()); -} - -Int64 MongoDBPocoLegacyCursor::cursorID() const -{ - return cursor_id; -} - - -MongoDBPocoLegacySource::MongoDBPocoLegacySource( - std::shared_ptr & connection_, - const String & database_name_, - const String & collection_name_, - const Poco::MongoDB::Document & query_, - const Block & sample_block, - UInt64 max_block_size_) - : ISource(sample_block.cloneEmpty()) - , connection(connection_) - , cursor(database_name_, collection_name_, sample_block, query_, *connection_) - , max_block_size{max_block_size_} -{ - description.init(sample_block); - - for (const auto idx : collections::range(0, description.sample_block.columns())) - if (description.types[idx].first == ExternalResultDescription::ValueType::vtArray) - prepareMongoDBArrayInfo(array_info, idx, description.sample_block.getByPosition(idx).type); -} - - -MongoDBPocoLegacySource::~MongoDBPocoLegacySource() = default; - -Chunk MongoDBPocoLegacySource::generate() -{ - if (all_read) - return {}; - - MutableColumns columns(description.sample_block.columns()); - const size_t size = columns.size(); - - for (const auto i : collections::range(0, size)) - columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty(); - - size_t num_rows = 0; - while (num_rows < max_block_size) - { - auto documents = cursor.nextDocuments(*connection); - - for (auto & document : documents) - { - if (document->exists("ok") && document->exists("$err") - && document->exists("code") && document->getInteger("ok") == 0) - { - auto code = document->getInteger("code"); - const Poco::MongoDB::Element::Ptr value = document->get("$err"); - auto message = static_cast &>(*value).value(); - throw Exception(ErrorCodes::MONGODB_ERROR, "Got error from MongoDB: {}, code: {}", message, code); - } - ++num_rows; - - for (const auto idx : collections::range(0, size)) - { - const auto & name = description.sample_block.getByPosition(idx).name; - - bool exists_in_current_document = document->exists(name); - if (!exists_in_current_document) - { - insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column); - continue; - } - - const Poco::MongoDB::Element::Ptr value = document->get(name); - - if (value.isNull() || value->type() == Poco::MongoDB::ElementTraits::TypeId) - { - insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column); - } - else - { - bool is_nullable = description.types[idx].second; - if (is_nullable) - { - ColumnNullable & column_nullable = assert_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, *value, name, array_info, idx); - column_nullable.getNullMapData().emplace_back(0); - } - else - insertValue(*columns[idx], description.types[idx].first, *value, name, array_info, idx); - } - } - } - - if (cursor.cursorID() == 0) - { - all_read = true; - break; - } - } - - if (num_rows == 0) - return {}; - - return Chunk(std::move(columns), num_rows); -} - -} -#endif diff --git a/src/Processors/Sources/MongoDBPocoLegacySource.h b/src/Processors/Sources/MongoDBPocoLegacySource.h deleted file mode 100644 index 0c9f2c7cc9f..00000000000 --- a/src/Processors/Sources/MongoDBPocoLegacySource.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_MONGODB -#include -#include - -#include -#include -#include - -#include - - -namespace Poco -{ -namespace MongoDB -{ - class Connection; - class Document; - class Cursor; - class OpMsgCursor; -} -} - -namespace DB -{ - -struct MongoDBPocoLegacyArrayInfo -{ - size_t num_dimensions; - Field default_value; - std::function parser; -}; - -void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password); - -bool isMongoDBWireProtocolOld(Poco::MongoDB::Connection & connection_, const std::string & database_name_); - -/// Deprecated, will be removed soon. -class MongoDBPocoLegacyCursor -{ -public: - MongoDBPocoLegacyCursor( - const std::string & database, - const std::string & collection, - const Block & sample_block_to_select, - const Poco::MongoDB::Document & query, - Poco::MongoDB::Connection & connection); - - Poco::MongoDB::Document::Vector nextDocuments(Poco::MongoDB::Connection & connection); - - Int64 cursorID() const; - -private: - const bool is_wire_protocol_old; - std::unique_ptr old_cursor; - std::unique_ptr new_cursor; - Int64 cursor_id = 0; -}; - -/// Converts MongoDB Cursor to a stream of Blocks. Deprecated, will be removed soon. -class MongoDBPocoLegacySource final : public ISource -{ -public: - MongoDBPocoLegacySource( - std::shared_ptr & connection_, - const String & database_name_, - const String & collection_name_, - const Poco::MongoDB::Document & query_, - const Block & sample_block, - UInt64 max_block_size_); - - ~MongoDBPocoLegacySource() override; - - String getName() const override { return "MongoDB"; } - -private: - Chunk generate() override; - - std::shared_ptr connection; - MongoDBPocoLegacyCursor cursor; - const UInt64 max_block_size; - ExternalResultDescription description; - bool all_read = false; - - std::unordered_map array_info; -}; - -} -#endif diff --git a/src/Storages/StorageMongoDBPocoLegacy.cpp b/src/Storages/StorageMongoDBPocoLegacy.cpp deleted file mode 100644 index 04f73cb0510..00000000000 --- a/src/Storages/StorageMongoDBPocoLegacy.cpp +++ /dev/null @@ -1,327 +0,0 @@ -#include "config.h" - -#if USE_MONGODB -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int MONGODB_CANNOT_AUTHENTICATE; -} - -StorageMongoDBPocoLegacy::StorageMongoDBPocoLegacy( - const StorageID & table_id_, - const std::string & host_, - uint16_t port_, - const std::string & database_name_, - const std::string & collection_name_, - const std::string & username_, - const std::string & password_, - const std::string & options_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment) - : IStorage(table_id_) - , database_name(database_name_) - , collection_name(collection_name_) - , username(username_) - , password(password_) - , uri("mongodb://" + host_ + ":" + std::to_string(port_) + "/" + database_name_ + "?" + options_) -{ - LOG_WARNING(getLogger("StorageMongoDB (" + table_id_.table_name + ")"), "The deprecated MongoDB integartion implementation is used, this will be removed in next releases."); - - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns_); - storage_metadata.setConstraints(constraints_); - storage_metadata.setComment(comment); - setInMemoryMetadata(storage_metadata); -} - - -void StorageMongoDBPocoLegacy::connectIfNotConnected() -{ - std::lock_guard lock{connection_mutex}; - if (!connection) - { - StorageMongoDBPocoLegacySocketFactory factory; - connection = std::make_shared(uri, factory); - } - - if (!authenticated) - { - Poco::URI poco_uri(uri); - auto query_params = poco_uri.getQueryParameters(); - auto auth_source = std::find_if(query_params.begin(), query_params.end(), - [&](const std::pair & param) { return param.first == "authSource"; }); - auto auth_db = database_name; - if (auth_source != query_params.end()) - auth_db = auth_source->second; - - if (!username.empty() && !password.empty()) - { - Poco::MongoDB::Database poco_db(auth_db); - if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) - throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); - } - - authenticated = true; - } -} - -class StorageMongoDBLegacySink : public SinkToStorage -{ -public: - explicit StorageMongoDBLegacySink( - const std::string & collection_name_, - const std::string & db_name_, - const StorageMetadataPtr & metadata_snapshot_, - std::shared_ptr connection_) - : SinkToStorage(metadata_snapshot_->getSampleBlock()) - , collection_name(collection_name_) - , db_name(db_name_) - , metadata_snapshot{metadata_snapshot_} - , connection(connection_) - , is_wire_protocol_old(isMongoDBWireProtocolOld(*connection_, db_name)) - { - } - - String getName() const override { return "StorageMongoDBLegacySink"; } - - void consume(Chunk & chunk) override - { - Poco::MongoDB::Database db(db_name); - Poco::MongoDB::Document::Vector documents; - - auto block = getHeader().cloneWithColumns(chunk.getColumns()); - - size_t num_rows = block.rows(); - size_t num_cols = block.columns(); - - const auto columns = block.getColumns(); - const auto data_types = block.getDataTypes(); - const auto data_names = block.getNames(); - - documents.reserve(num_rows); - - for (const auto i : collections::range(0, num_rows)) - { - Poco::MongoDB::Document::Ptr document = new Poco::MongoDB::Document(); - - for (const auto j : collections::range(0, num_cols)) - { - insertValueIntoMongoDB(*document, data_names[j], *data_types[j], *columns[j], i); - } - - documents.push_back(std::move(document)); - } - - if (is_wire_protocol_old) - { - Poco::SharedPtr insert_request = db.createInsertRequest(collection_name); - insert_request->documents() = std::move(documents); - connection->sendRequest(*insert_request); - } - else - { - Poco::SharedPtr insert_request = db.createOpMsgMessage(collection_name); - insert_request->setCommandName(Poco::MongoDB::OpMsgMessage::CMD_INSERT); - insert_request->documents() = std::move(documents); - connection->sendRequest(*insert_request); - } - } - -private: - - void insertValueIntoMongoDB( - Poco::MongoDB::Document & document, - const std::string & name, - const IDataType & data_type, - const IColumn & column, - size_t idx) - { - WhichDataType which(data_type); - - if (which.isArray()) - { - const ColumnArray & column_array = assert_cast(column); - const ColumnArray::Offsets & offsets = column_array.getOffsets(); - - size_t offset = offsets[idx - 1]; - size_t next_offset = offsets[idx]; - - const IColumn & nested_column = column_array.getData(); - - const auto * array_type = assert_cast(&data_type); - const DataTypePtr & nested_type = array_type->getNestedType(); - - Poco::MongoDB::Array::Ptr array = new Poco::MongoDB::Array(); - for (size_t i = 0; i + offset < next_offset; ++i) - { - insertValueIntoMongoDB(*array, Poco::NumberFormatter::format(i), *nested_type, nested_column, i + offset); - } - - document.add(name, array); - return; - } - - /// MongoDB does not support UInt64 type, so just cast it to Int64 - if (which.isNativeUInt()) - document.add(name, static_cast(column.getUInt(idx))); - else if (which.isNativeInt()) - document.add(name, static_cast(column.getInt(idx))); - else if (which.isFloat32()) - document.add(name, static_cast(column.getFloat32(idx))); - else if (which.isFloat64()) - document.add(name, column.getFloat64(idx)); - else if (which.isDate()) - document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000)); - else if (which.isDateTime()) - document.add(name, Poco::Timestamp(column.getUInt(idx) * 1000000)); - else - { - WriteBufferFromOwnString ostr; - data_type.getDefaultSerialization()->serializeText(column, idx, ostr, FormatSettings{}); - document.add(name, ostr.str()); - } - } - - String collection_name; - String db_name; - StorageMetadataPtr metadata_snapshot; - std::shared_ptr connection; - - const bool is_wire_protocol_old; -}; - -Pipe StorageMongoDBPocoLegacy::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t /*num_streams*/) -{ - connectIfNotConnected(); - - storage_snapshot->check(column_names); - - Block sample_block; - for (const String & column_name : column_names) - { - auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name); - sample_block.insert({ column_data.type, column_data.name }); - } - - return Pipe(std::make_shared(connection, database_name, collection_name, Poco::MongoDB::Document{}, sample_block, max_block_size)); -} - - -SinkToStoragePtr StorageMongoDBPocoLegacy::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr /* context */, bool /*async_insert*/) -{ - connectIfNotConnected(); - return std::make_shared(collection_name, database_name, metadata_snapshot, connection); -} - -StorageMongoDBPocoLegacy::Configuration StorageMongoDBPocoLegacy::getConfiguration(ASTs engine_args, ContextPtr context) -{ - Configuration configuration; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context)) - { - validateNamedCollection( - *named_collection, - ValidateKeysMultiset{"host", "port", "user", "username", "password", "database", "db", "collection", "table"}, - {"options"}); - - configuration.host = named_collection->getAny({"host", "hostname"}); - configuration.port = static_cast(named_collection->get("port")); - configuration.username = named_collection->getAny({"user", "username"}); - configuration.password = named_collection->get("password"); - configuration.database = named_collection->getAny({"database", "db"}); - configuration.table = named_collection->getAny({"collection", "table"}); - configuration.options = named_collection->getOrDefault("options", ""); - } - else - { - if (engine_args.size() < 5 || engine_args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Storage MongoDB requires from 5 to 6 parameters: " - "MongoDB('host:port', database, collection, 'user', 'password' [, 'options'])."); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); - - /// 27017 is the default MongoDB port. - auto parsed_host_port = parseAddress(checkAndGetLiteralArgument(engine_args[0], "host:port"), 27017); - - configuration.host = parsed_host_port.first; - configuration.port = parsed_host_port.second; - configuration.database = checkAndGetLiteralArgument(engine_args[1], "database"); - configuration.table = checkAndGetLiteralArgument(engine_args[2], "table"); - configuration.username = checkAndGetLiteralArgument(engine_args[3], "username"); - configuration.password = checkAndGetLiteralArgument(engine_args[4], "password"); - - if (engine_args.size() >= 6) - configuration.options = checkAndGetLiteralArgument(engine_args[5], "database"); - } - - context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); - - return configuration; -} - - -void registerStorageMongoDBPocoLegacy(StorageFactory & factory) -{ - factory.registerStorage("MongoDB", [](const StorageFactory::Arguments & args) - { - auto configuration = StorageMongoDBPocoLegacy::getConfiguration(args.engine_args, args.getLocalContext()); - - return std::make_shared( - args.table_id, - configuration.host, - configuration.port, - configuration.database, - configuration.table, - configuration.username, - configuration.password, - configuration.options, - args.columns, - args.constraints, - args.comment); - }, - { - .source_access_type = AccessType::MONGO, - }); -} - -} -#endif diff --git a/src/Storages/StorageMongoDBPocoLegacy.h b/src/Storages/StorageMongoDBPocoLegacy.h deleted file mode 100644 index a5814ccd5dd..00000000000 --- a/src/Storages/StorageMongoDBPocoLegacy.h +++ /dev/null @@ -1,79 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_MONGODB -#include - -#include - -namespace DB -{ -/* Implements storage in the MongoDB database. - * Use ENGINE = MongoDB(host:port, database, collection, user, password [, options]); - * Read only. - */ - -/// Deprecated, will be removed soon. -class StorageMongoDBPocoLegacy final : public IStorage -{ -public: - StorageMongoDBPocoLegacy( - const StorageID & table_id_, - const std::string & host_, - uint16_t port_, - const std::string & database_name_, - const std::string & collection_name_, - const std::string & username_, - const std::string & password_, - const std::string & options_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - const String & comment); - - std::string getName() const override { return "MongoDB"; } - - Pipe read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - SinkToStoragePtr write( - const ASTPtr & query, - const StorageMetadataPtr & /*metadata_snapshot*/, - ContextPtr context, - bool async_insert) override; - - struct Configuration - { - std::string host; - UInt16 port; - std::string username; - std::string password; - std::string database; - std::string table; - std::string options; - }; - - static Configuration getConfiguration(ASTs engine_args, ContextPtr context); - -private: - void connectIfNotConnected(); - - const std::string database_name; - const std::string collection_name; - const std::string username; - const std::string password; - const std::string uri; - - std::shared_ptr connection; - bool authenticated = false; - std::mutex connection_mutex; /// Protects the variables `connection` and `authenticated`. -}; - -} -#endif diff --git a/src/Storages/StorageMongoDBPocoLegacySocketFactory.cpp b/src/Storages/StorageMongoDBPocoLegacySocketFactory.cpp deleted file mode 100644 index bcfe995dcc6..00000000000 --- a/src/Storages/StorageMongoDBPocoLegacySocketFactory.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "config.h" - -#if USE_MONGODB -#include "StorageMongoDBPocoLegacySocketFactory.h" - -#include - -#include -#include - -#if USE_SSL -# include -#endif - - -namespace DB -{ - -namespace ErrorCodes -{ -extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; -} - -Poco::Net::StreamSocket StorageMongoDBPocoLegacySocketFactory::createSocket(const std::string & host, int port, Poco::Timespan connectTimeout, bool secure) -{ - return secure ? createSecureSocket(host, port, connectTimeout) : createPlainSocket(host, port, connectTimeout); -} - -Poco::Net::StreamSocket StorageMongoDBPocoLegacySocketFactory::createPlainSocket(const std::string & host, int port, Poco::Timespan connectTimeout) -{ - Poco::Net::SocketAddress address(host, port); - Poco::Net::StreamSocket socket; - - socket.connect(address, connectTimeout); - - return socket; -} - - -Poco::Net::StreamSocket StorageMongoDBPocoLegacySocketFactory::createSecureSocket(const std::string & host [[maybe_unused]], int port [[maybe_unused]], Poco::Timespan connectTimeout [[maybe_unused]]) -{ -#if USE_SSL - Poco::Net::SocketAddress address(host, port); - Poco::Net::SecureStreamSocket socket; - - socket.setPeerHostName(host); - - socket.connect(address, connectTimeout); - - return socket; -#else - throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "SSL is not enabled at build time."); -#endif -} - -} -#endif diff --git a/src/Storages/StorageMongoDBPocoLegacySocketFactory.h b/src/Storages/StorageMongoDBPocoLegacySocketFactory.h deleted file mode 100644 index ee6ee8faa29..00000000000 --- a/src/Storages/StorageMongoDBPocoLegacySocketFactory.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once - -#include "config.h" - -#if USE_MONGODB -#include - - -namespace DB -{ - -/// Deprecated, will be removed soon. -class StorageMongoDBPocoLegacySocketFactory : public Poco::MongoDB::Connection::SocketFactory -{ -public: - Poco::Net::StreamSocket createSocket(const std::string & host, int port, Poco::Timespan connectTimeout, bool secure) override; - -private: - static Poco::Net::StreamSocket createPlainSocket(const std::string & host, int port, Poco::Timespan connectTimeout); - static Poco::Net::StreamSocket createSecureSocket(const std::string & host, int port, Poco::Timespan connectTimeout); -}; - -} -#endif diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index 458b151a400..d95464c206f 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -106,7 +106,7 @@ void registerStorageKeeperMap(StorageFactory & factory); void registerStorageObjectStorage(StorageFactory & factory); -void registerStorages(bool use_legacy_mongodb_integration [[maybe_unused]]) +void registerStorages() { auto & factory = StorageFactory::instance(); @@ -169,10 +169,7 @@ void registerStorages(bool use_legacy_mongodb_integration [[maybe_unused]]) #endif #if USE_MONGODB - if (use_legacy_mongodb_integration) - registerStorageMongoDBPocoLegacy(factory); - else - registerStorageMongoDB(factory); + registerStorageMongoDB(factory); #endif registerStorageRedis(factory); diff --git a/src/Storages/registerStorages.h b/src/Storages/registerStorages.h index 330855a49d0..d44b934ff9f 100644 --- a/src/Storages/registerStorages.h +++ b/src/Storages/registerStorages.h @@ -2,5 +2,5 @@ namespace DB { -void registerStorages(bool use_legacy_mongodb_integration); +void registerStorages(); } diff --git a/src/TableFunctions/TableFunctionMongoDBPocoLegacy.cpp b/src/TableFunctions/TableFunctionMongoDBPocoLegacy.cpp deleted file mode 100644 index 4e27fd35e12..00000000000 --- a/src/TableFunctions/TableFunctionMongoDBPocoLegacy.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include "config.h" - -#if USE_MONGODB -#include - -#include - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -namespace -{ - -/// Deprecated, will be removed soon. -class TableFunctionMongoDBPocoLegacy : public ITableFunction -{ -public: - static constexpr auto name = "mongodb"; - - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl( - const ASTPtr & ast_function, ContextPtr context, - const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "MongoDB"; } - - ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - std::optional configuration; - String structure; -}; - -StoragePtr TableFunctionMongoDBPocoLegacy::executeImpl(const ASTPtr & /*ast_function*/, - ContextPtr context, const String & table_name, ColumnsDescription /*cached_columns*/, bool is_insert_query) const -{ - auto columns = getActualTableStructure(context, is_insert_query); - auto storage = std::make_shared( - StorageID(configuration->database, table_name), - configuration->host, - configuration->port, - configuration->database, - configuration->table, - configuration->username, - configuration->password, - configuration->options, - columns, - ConstraintsDescription(), - String{}); - storage->startup(); - return storage; -} - -ColumnsDescription TableFunctionMongoDBPocoLegacy::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const -{ - return parseColumnsListFromString(structure, context); -} - -void TableFunctionMongoDBPocoLegacy::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - const auto & func_args = ast_function->as(); - if (!func_args.arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'mongodb' must have arguments."); - - ASTs & args = func_args.arguments->children; - - if (args.size() < 6 || args.size() > 7) - { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function 'mongodb' requires from 6 to 7 parameters: " - "mongodb('host:port', database, collection, 'user', 'password', structure, [, 'options'])"); - } - - ASTs main_arguments(args.begin(), args.begin() + 5); - - for (size_t i = 5; i < args.size(); ++i) - { - if (const auto * ast_func = typeid_cast(args[i].get())) - { - const auto & [arg_name, arg_value] = getKeyValueMongoDBArgument(ast_func); - if (arg_name == "structure") - structure = checkAndGetLiteralArgument(arg_value, "structure"); - else if (arg_name == "options") - main_arguments.push_back(arg_value); - } - else if (i == 5) - { - structure = checkAndGetLiteralArgument(args[i], "structure"); - } - else if (i == 6) - { - main_arguments.push_back(args[i]); - } - } - - configuration = StorageMongoDBPocoLegacy::getConfiguration(main_arguments, context); -} - -} - -void registerTableFunctionMongoDBPocoLegacy(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} -#endif diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index fbe2c7c59ed..156a0fd2c92 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -3,7 +3,7 @@ namespace DB { -void registerTableFunctions(bool use_legacy_mongodb_integration [[maybe_unused]]) +void registerTableFunctions() { auto & factory = TableFunctionFactory::instance(); @@ -23,10 +23,7 @@ void registerTableFunctions(bool use_legacy_mongodb_integration [[maybe_unused]] registerTableFunctionInput(factory); registerTableFunctionGenerate(factory); #if USE_MONGODB - if (use_legacy_mongodb_integration) - registerTableFunctionMongoDBPocoLegacy(factory); - else - registerTableFunctionMongoDB(factory); + registerTableFunctionMongoDB(factory); #endif registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index e22ba7346fa..64d1584c5db 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -73,6 +73,6 @@ void registerDataLakeTableFunctions(TableFunctionFactory & factory); void registerTableFunctionTimeSeries(TableFunctionFactory & factory); -void registerTableFunctions(bool use_legacy_mongodb_integration [[maybe_unused]]); +void registerTableFunctions(); } diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py index 1b2c73142f3..6c1fb4d0d13 100644 --- a/tests/integration/helpers/external_sources.py +++ b/tests/integration/helpers/external_sources.py @@ -170,7 +170,6 @@ class SourceMongo(ExternalSource): user, password, secure=False, - legacy=False, ): ExternalSource.__init__( self, @@ -183,13 +182,10 @@ class SourceMongo(ExternalSource): password, ) self.secure = secure - self.legacy = legacy def get_source_str(self, table_name): options = "" - if self.secure and self.legacy: - options = "ssl=true" - if self.secure and not self.legacy: + if self.secure: options = "tls=true&tlsAllowInvalidCertificates=true" return """ @@ -267,9 +263,7 @@ class SourceMongoURI(SourceMongo): def get_source_str(self, table_name): options = "" - if self.secure and self.legacy: - options = "ssl=true" - if self.secure and not self.legacy: + if self.secure: options = "tls=true&tlsAllowInvalidCertificates=true" return """ diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/legacy.xml b/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/legacy.xml deleted file mode 100644 index 4ee05db9d1e..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/legacy.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 1 - diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/new.xml b/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/new.xml deleted file mode 100644 index eb2d328060a..00000000000 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/configs/mongo/new.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 0 - diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py index d188705cf6c..2cf6250b106 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py @@ -19,20 +19,14 @@ def secure_connection(request): @pytest.fixture(scope="module") -def legacy(request): - return request.param - - -@pytest.fixture(scope="module") -def cluster(secure_connection, legacy): +def cluster(secure_connection): cluster_name = __file__.removeprefix("test_").removesuffix(".py") - cluster_name += "_legacy" if legacy else "_new" cluster_name += "_secure" if secure_connection else "_insecure" return ClickHouseCluster(cluster_name) @pytest.fixture(scope="module") -def source(secure_connection, legacy, cluster): +def source(secure_connection, cluster): return SourceMongo( "MongoDB", "localhost", @@ -42,7 +36,6 @@ def source(secure_connection, legacy, cluster): "root", "clickhouse", secure=secure_connection, - legacy=legacy, ) @@ -69,18 +62,10 @@ def ranged_tester(source): @pytest.fixture(scope="module") -def main_config(secure_connection, legacy): - if legacy: - main_config = [os.path.join("configs", "mongo", "legacy.xml")] - else: - main_config = [os.path.join("configs", "mongo", "new.xml")] - +def main_config(secure_connection): if secure_connection: - main_config.append(os.path.join("configs", "disable_ssl_verification.xml")) - else: - main_config.append(os.path.join("configs", "ssl_verification.xml")) - - return main_config + return [os.path.join("configs", "disable_ssl_verification.xml")] + return [os.path.join("configs", "ssl_verification.xml")] @pytest.fixture(scope="module") @@ -113,32 +98,28 @@ def started_cluster( @pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) -def test_simple(secure_connection, legacy, started_cluster, layout_name, simple_tester): +def test_simple(secure_connection, started_cluster, layout_name, simple_tester): simple_tester.execute(layout_name, started_cluster.instances["node1"]) @pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX)) def test_complex( - secure_connection, legacy, started_cluster, layout_name, complex_tester + secure_connection, started_cluster, layout_name, complex_tester ): complex_tester.execute(layout_name, started_cluster.instances["node1"]) @pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_RANGED)) -def test_ranged(secure_connection, legacy, started_cluster, layout_name, ranged_tester): +def test_ranged(secure_connection, started_cluster, layout_name, ranged_tester): ranged_tester.execute(layout_name, started_cluster.instances["node1"]) @pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) def test_simple_ssl( - secure_connection, legacy, started_cluster, layout_name, simple_tester + secure_connection, started_cluster, layout_name, simple_tester ): simple_tester.execute(layout_name, started_cluster.instances["node1"]) diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py index bdaebab1149..407031ed7d3 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py @@ -17,18 +17,13 @@ def secure_connection(request): return request.param -@pytest.fixture(scope="module") -def legacy(request): - return request.param - - @pytest.fixture(scope="module") def cluster(secure_connection): return ClickHouseCluster(__file__) @pytest.fixture(scope="module") -def source(secure_connection, legacy, cluster): +def source(secure_connection, cluster): return SourceMongoURI( "MongoDB", "localhost", @@ -38,7 +33,6 @@ def source(secure_connection, legacy, cluster): "root", "clickhouse", secure=secure_connection, - legacy=legacy, ) @@ -51,22 +45,14 @@ def simple_tester(source): @pytest.fixture(scope="module") -def main_config(secure_connection, legacy): - if legacy: - main_config = [os.path.join("configs", "mongo", "legacy.xml")] - else: - main_config = [os.path.join("configs", "mongo", "new.xml")] - +def main_config(secure_connection): if secure_connection: - main_config.append(os.path.join("configs", "disable_ssl_verification.xml")) - else: - main_config.append(os.path.join("configs", "ssl_verification.xml")) - - return main_config + return [os.path.join("configs", "disable_ssl_verification.xml")] + return [os.path.join("configs", "ssl_verification.xml")] @pytest.fixture(scope="module") -def started_cluster(secure_connection, legacy, cluster, main_config, simple_tester): +def started_cluster(secure_connection, cluster, main_config, simple_tester): dictionaries = simple_tester.list_dictionaries() node = cluster.add_instance( @@ -85,16 +71,14 @@ def started_cluster(secure_connection, legacy, cluster, main_config, simple_test # See comment in SourceMongoURI @pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", ["flat"]) -def test_simple(secure_connection, legacy, started_cluster, simple_tester, layout_name): +def test_simple(secure_connection, started_cluster, simple_tester, layout_name): simple_tester.execute(layout_name, started_cluster.instances["uri_node"]) @pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"]) -@pytest.mark.parametrize("legacy", [False, True], indirect=["legacy"]) @pytest.mark.parametrize("layout_name", ["flat"]) def test_simple_ssl( - secure_connection, legacy, started_cluster, simple_tester, layout_name + secure_connection, started_cluster, simple_tester, layout_name ): simple_tester.execute(layout_name, started_cluster.instances["uri_node"]) diff --git a/tests/integration/test_storage_mongodb/configs/feature_flag.xml b/tests/integration/test_storage_mongodb/configs/feature_flag.xml deleted file mode 100644 index eb2d328060a..00000000000 --- a/tests/integration/test_storage_mongodb/configs/feature_flag.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 0 - diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index e810b613290..d69b9763b13 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -17,7 +17,6 @@ def started_cluster(request): "node", main_configs=[ "configs/named_collections.xml", - "configs/feature_flag.xml", ], user_configs=["configs/users.xml"], with_mongo=True, diff --git a/tests/integration/test_storage_mongodb_legacy/__init__.py b/tests/integration/test_storage_mongodb_legacy/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_storage_mongodb_legacy/configs/feature_flag.xml b/tests/integration/test_storage_mongodb_legacy/configs/feature_flag.xml deleted file mode 100644 index 4ee05db9d1e..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/configs/feature_flag.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 1 - diff --git a/tests/integration/test_storage_mongodb_legacy/configs/named_collections.xml b/tests/integration/test_storage_mongodb_legacy/configs/named_collections.xml deleted file mode 100644 index 5f7db390982..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/configs/named_collections.xml +++ /dev/null @@ -1,12 +0,0 @@ - - - - root - clickhouse - mongo1 - 27017 - test - simple_table - - - diff --git a/tests/integration/test_storage_mongodb_legacy/configs/users.xml b/tests/integration/test_storage_mongodb_legacy/configs/users.xml deleted file mode 100644 index 4b6ba057ecb..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/configs/users.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - default - 1 - - - diff --git a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/cert.crt b/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/cert.crt deleted file mode 100644 index 94249109d41..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/cert.crt +++ /dev/null @@ -1,24 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIEGzCCAwOgAwIBAgIUaoGlyuJAyvs6yowFXymfu7seEiUwDQYJKoZIhvcNAQEL -BQAwgZwxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDbGlja0hvdXNlMRMwEQYDVQQH -DApDbGlja0hvdXNlMREwDwYDVQQKDAhQZXJzb25hbDETMBEGA1UECwwKQ2xpY2tI -b3VzZTEkMCIGCSqGSIb3DQEJARYVY2xpY2tob3VzZUBjbGlja2hvdXNlMRUwEwYD -VQQDDAxtb25nb19zZWN1cmUwHhcNMjQwNTI2MTYwMDMxWhcNMzQwNTI0MTYwMDMx -WjCBnDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNsaWNrSG91c2UxEzARBgNVBAcM -CkNsaWNrSG91c2UxETAPBgNVBAoMCFBlcnNvbmFsMRMwEQYDVQQLDApDbGlja0hv -dXNlMSQwIgYJKoZIhvcNAQkBFhVjbGlja2hvdXNlQGNsaWNraG91c2UxFTATBgNV -BAMMDG1vbmdvX3NlY3VyZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AJSeQfMG7xd0+kPehYlEsEw0Sm1DB05SXVCEzIX3DFD6XJrd8eeWwlzYaBatkcwj -+8yvqske02X/3YwpzJyFizAqJIoKql5c5Yii2xH1S9PFP0y+LoJre+eQziHyO33t -eeedeGNJ05Sm2ZAzjfMQ7Rdh6S+gdIO4Y102iQR5yr2aTrh7tu7XkNCjwKTqMMvz -SikP1Rft2J6ECim+MjYCCtH/4yXGeEJ5epU4t3y6Q23B2ZEhY+sqUdwgK9pu8oe4 -mkZ1Qvwakc9Qg12owRSDjBBYrPvghXVpkJ2JkgKTrIAIz9tZ53eDVHNXbWMAotov -jEmRSoGIS1yzwmQ9PdxUwYcCAwEAAaNTMFEwHQYDVR0OBBYEFJyz3Kt5XBDg5cvI -0v1ioqejqX+CMB8GA1UdIwQYMBaAFJyz3Kt5XBDg5cvI0v1ioqejqX+CMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAHAQFA5VMYvaQFnKtKfHg9TF -qfJ4uM3YsGdgsgmGWgflD1S4Z290H6Q2QvyZAEceTrlJxqArlWlVp5DAU6EeXjEh -QMAgdkJHF1Hg2jsZKPtdkb88UtuzwAME357T8NtEJSHzNE5QqYwlVM71JkWpdqvA -UUdOJbWhhJfowIf4tMmL1DUuIy2qYpoP/tEBXEw9uwpmZqb7KELwT3lRyOMaGFN7 -RHVwbvJWlHiu83QDNaWz6ijQkWl3tCN6TWcFD1qc1x8GpMzjbsAAYbCx7fbHM2LD -9kGSCiyv5K0MLNK5u67RtUFfPHtyD8RA0TtxIZ4PEN/eFANKS2/5NEi1ZuZ5/Pk= ------END CERTIFICATE----- diff --git a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml b/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml deleted file mode 100644 index 3efe98e7045..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - none - - - diff --git a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/key.pem b/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/key.pem deleted file mode 100644 index 9444d19a3d2..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/key.pem +++ /dev/null @@ -1,52 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCUnkHzBu8XdPpD -3oWJRLBMNEptQwdOUl1QhMyF9wxQ+lya3fHnlsJc2GgWrZHMI/vMr6rJHtNl/92M -KcychYswKiSKCqpeXOWIotsR9UvTxT9Mvi6Ca3vnkM4h8jt97XnnnXhjSdOUptmQ -M43zEO0XYekvoHSDuGNdNokEecq9mk64e7bu15DQo8Ck6jDL80opD9UX7diehAop -vjI2AgrR/+MlxnhCeXqVOLd8ukNtwdmRIWPrKlHcICvabvKHuJpGdUL8GpHPUINd -qMEUg4wQWKz74IV1aZCdiZICk6yACM/bWed3g1RzV21jAKLaL4xJkUqBiEtcs8Jk -PT3cVMGHAgMBAAECggEAAul6qiHchB+uQMCWyC5xTeRqAXR3tAv4Tj4fGJjkXY4Z -OrAjr9Kp38EvX1amgvUWV3FT3NMevDf5xd9OdzAA0g0uJIF+mAhYFW48i1FnQcHQ -mOf0zmiZR7l8o7ROb3JvooXHxW+ba/qjGPVwC801gJvruehgbOCRxh9DTRp7sH5K -BmcddhULhKBEQjWUmYNEM3A2axpdi3g1aYKERRLn8J0DXcItTwbxuxbNcs3erl8W -3yyv/JKmqnWF5sNyX3wEWuQcDEZZy+W7Hn4KPMxyU+WA5el5nJ8kFlxhpInmajwu -8Ytn6IEyThyXutVomosVBuP16QORl2Nad0hnQO9toQKBgQDDgiehXr3k2wfVaVOD -PocW4leXausIU2XcCn6FxTG9vLUDMPANw0MxgenC2nrjaUU9J9UjdRYgMcFGWrl4 -E27wEn5e0nZ/Y7F2cfhuOc9vNmZ+eHm2KQRyfAjIVL5Hpldqk2jXyCnLBNeWGHSw -kPQMU+FLqmrOFUvXlD2my+OSHwKBgQDCmgS9r+xFh4BCB9dY6eyQJF/jYmAQHs26 -80WJ6gAhbUw1O71uDtS9/3PZVXwwNCOHrcc49BPrpJdxGPHGvd2Q5y+j5LDDbQSZ -aLTiCZ2B0RM5Bd2dXD8gEHN4WCX7pJ/o4kDi4zONBmp5mg/tFfer5z5IU/1P7Wak -1Mu0JIHzmQKBgDNaNoqeVgaMuYwGtFbez6DlJtiwzrdLIJAheYYte5k4vdruub8D -sNyKIRp7RJgDCJq9obBEiuE98GRIZDrz78nDMco6QcHIL87KtNRO/vtZMKa7gkyk -jXR8u9nS2H/9YyytN3amLsQSq4XTOqM+D7xFNAIp6w/ibB9d4quzFj1FAoGBAKTE -x/LcO897NWuzO/D6z+QUCGR87R15F3SNenmVedrTskz4ciH3yMW+v5ZrPSWLX/IH -f8GHWD6TM+780eoW5L1GIh5BCjHN4rEJ6O3iekxqfD4x6zzL2F8Lztk8uZxh/Uuw -FoSFHybvIcQoYAe8K+KPfzq6cqb0OY6i5n920dkxAoGAJkw6ADqsJfH3NR+bQfgF -oEA1KqriMxyEJm44Y7E80C+iF4iNALF+Er9TSnr4mDxX5e/dW9d1YeS9o0nOfkpF -MaBmJfxqo4QQJLPRaxYQ2Jhfn7irir4BroxeNXQgNNhgSuKIvkfRyGYwl7P0AT4v -8H8rkZGneMD3gLB5MfnRhGk= ------END PRIVATE KEY----- ------BEGIN CERTIFICATE----- -MIIEGzCCAwOgAwIBAgIUaoGlyuJAyvs6yowFXymfu7seEiUwDQYJKoZIhvcNAQEL -BQAwgZwxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDbGlja0hvdXNlMRMwEQYDVQQH -DApDbGlja0hvdXNlMREwDwYDVQQKDAhQZXJzb25hbDETMBEGA1UECwwKQ2xpY2tI -b3VzZTEkMCIGCSqGSIb3DQEJARYVY2xpY2tob3VzZUBjbGlja2hvdXNlMRUwEwYD -VQQDDAxtb25nb19zZWN1cmUwHhcNMjQwNTI2MTYwMDMxWhcNMzQwNTI0MTYwMDMx -WjCBnDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNsaWNrSG91c2UxEzARBgNVBAcM -CkNsaWNrSG91c2UxETAPBgNVBAoMCFBlcnNvbmFsMRMwEQYDVQQLDApDbGlja0hv -dXNlMSQwIgYJKoZIhvcNAQkBFhVjbGlja2hvdXNlQGNsaWNraG91c2UxFTATBgNV -BAMMDG1vbmdvX3NlY3VyZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AJSeQfMG7xd0+kPehYlEsEw0Sm1DB05SXVCEzIX3DFD6XJrd8eeWwlzYaBatkcwj -+8yvqske02X/3YwpzJyFizAqJIoKql5c5Yii2xH1S9PFP0y+LoJre+eQziHyO33t -eeedeGNJ05Sm2ZAzjfMQ7Rdh6S+gdIO4Y102iQR5yr2aTrh7tu7XkNCjwKTqMMvz -SikP1Rft2J6ECim+MjYCCtH/4yXGeEJ5epU4t3y6Q23B2ZEhY+sqUdwgK9pu8oe4 -mkZ1Qvwakc9Qg12owRSDjBBYrPvghXVpkJ2JkgKTrIAIz9tZ53eDVHNXbWMAotov -jEmRSoGIS1yzwmQ9PdxUwYcCAwEAAaNTMFEwHQYDVR0OBBYEFJyz3Kt5XBDg5cvI -0v1ioqejqX+CMB8GA1UdIwQYMBaAFJyz3Kt5XBDg5cvI0v1ioqejqX+CMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAHAQFA5VMYvaQFnKtKfHg9TF -qfJ4uM3YsGdgsgmGWgflD1S4Z290H6Q2QvyZAEceTrlJxqArlWlVp5DAU6EeXjEh -QMAgdkJHF1Hg2jsZKPtdkb88UtuzwAME357T8NtEJSHzNE5QqYwlVM71JkWpdqvA -UUdOJbWhhJfowIf4tMmL1DUuIy2qYpoP/tEBXEw9uwpmZqb7KELwT3lRyOMaGFN7 -RHVwbvJWlHiu83QDNaWz6ijQkWl3tCN6TWcFD1qc1x8GpMzjbsAAYbCx7fbHM2LD -9kGSCiyv5K0MLNK5u67RtUFfPHtyD8RA0TtxIZ4PEN/eFANKS2/5NEi1ZuZ5/Pk= ------END CERTIFICATE----- diff --git a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_cert.pem b/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_cert.pem deleted file mode 100644 index 9444d19a3d2..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_cert.pem +++ /dev/null @@ -1,52 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCUnkHzBu8XdPpD -3oWJRLBMNEptQwdOUl1QhMyF9wxQ+lya3fHnlsJc2GgWrZHMI/vMr6rJHtNl/92M -KcychYswKiSKCqpeXOWIotsR9UvTxT9Mvi6Ca3vnkM4h8jt97XnnnXhjSdOUptmQ -M43zEO0XYekvoHSDuGNdNokEecq9mk64e7bu15DQo8Ck6jDL80opD9UX7diehAop -vjI2AgrR/+MlxnhCeXqVOLd8ukNtwdmRIWPrKlHcICvabvKHuJpGdUL8GpHPUINd -qMEUg4wQWKz74IV1aZCdiZICk6yACM/bWed3g1RzV21jAKLaL4xJkUqBiEtcs8Jk -PT3cVMGHAgMBAAECggEAAul6qiHchB+uQMCWyC5xTeRqAXR3tAv4Tj4fGJjkXY4Z -OrAjr9Kp38EvX1amgvUWV3FT3NMevDf5xd9OdzAA0g0uJIF+mAhYFW48i1FnQcHQ -mOf0zmiZR7l8o7ROb3JvooXHxW+ba/qjGPVwC801gJvruehgbOCRxh9DTRp7sH5K -BmcddhULhKBEQjWUmYNEM3A2axpdi3g1aYKERRLn8J0DXcItTwbxuxbNcs3erl8W -3yyv/JKmqnWF5sNyX3wEWuQcDEZZy+W7Hn4KPMxyU+WA5el5nJ8kFlxhpInmajwu -8Ytn6IEyThyXutVomosVBuP16QORl2Nad0hnQO9toQKBgQDDgiehXr3k2wfVaVOD -PocW4leXausIU2XcCn6FxTG9vLUDMPANw0MxgenC2nrjaUU9J9UjdRYgMcFGWrl4 -E27wEn5e0nZ/Y7F2cfhuOc9vNmZ+eHm2KQRyfAjIVL5Hpldqk2jXyCnLBNeWGHSw -kPQMU+FLqmrOFUvXlD2my+OSHwKBgQDCmgS9r+xFh4BCB9dY6eyQJF/jYmAQHs26 -80WJ6gAhbUw1O71uDtS9/3PZVXwwNCOHrcc49BPrpJdxGPHGvd2Q5y+j5LDDbQSZ -aLTiCZ2B0RM5Bd2dXD8gEHN4WCX7pJ/o4kDi4zONBmp5mg/tFfer5z5IU/1P7Wak -1Mu0JIHzmQKBgDNaNoqeVgaMuYwGtFbez6DlJtiwzrdLIJAheYYte5k4vdruub8D -sNyKIRp7RJgDCJq9obBEiuE98GRIZDrz78nDMco6QcHIL87KtNRO/vtZMKa7gkyk -jXR8u9nS2H/9YyytN3amLsQSq4XTOqM+D7xFNAIp6w/ibB9d4quzFj1FAoGBAKTE -x/LcO897NWuzO/D6z+QUCGR87R15F3SNenmVedrTskz4ciH3yMW+v5ZrPSWLX/IH -f8GHWD6TM+780eoW5L1GIh5BCjHN4rEJ6O3iekxqfD4x6zzL2F8Lztk8uZxh/Uuw -FoSFHybvIcQoYAe8K+KPfzq6cqb0OY6i5n920dkxAoGAJkw6ADqsJfH3NR+bQfgF -oEA1KqriMxyEJm44Y7E80C+iF4iNALF+Er9TSnr4mDxX5e/dW9d1YeS9o0nOfkpF -MaBmJfxqo4QQJLPRaxYQ2Jhfn7irir4BroxeNXQgNNhgSuKIvkfRyGYwl7P0AT4v -8H8rkZGneMD3gLB5MfnRhGk= ------END PRIVATE KEY----- ------BEGIN CERTIFICATE----- -MIIEGzCCAwOgAwIBAgIUaoGlyuJAyvs6yowFXymfu7seEiUwDQYJKoZIhvcNAQEL -BQAwgZwxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDbGlja0hvdXNlMRMwEQYDVQQH -DApDbGlja0hvdXNlMREwDwYDVQQKDAhQZXJzb25hbDETMBEGA1UECwwKQ2xpY2tI -b3VzZTEkMCIGCSqGSIb3DQEJARYVY2xpY2tob3VzZUBjbGlja2hvdXNlMRUwEwYD -VQQDDAxtb25nb19zZWN1cmUwHhcNMjQwNTI2MTYwMDMxWhcNMzQwNTI0MTYwMDMx -WjCBnDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNsaWNrSG91c2UxEzARBgNVBAcM -CkNsaWNrSG91c2UxETAPBgNVBAoMCFBlcnNvbmFsMRMwEQYDVQQLDApDbGlja0hv -dXNlMSQwIgYJKoZIhvcNAQkBFhVjbGlja2hvdXNlQGNsaWNraG91c2UxFTATBgNV -BAMMDG1vbmdvX3NlY3VyZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AJSeQfMG7xd0+kPehYlEsEw0Sm1DB05SXVCEzIX3DFD6XJrd8eeWwlzYaBatkcwj -+8yvqske02X/3YwpzJyFizAqJIoKql5c5Yii2xH1S9PFP0y+LoJre+eQziHyO33t -eeedeGNJ05Sm2ZAzjfMQ7Rdh6S+gdIO4Y102iQR5yr2aTrh7tu7XkNCjwKTqMMvz -SikP1Rft2J6ECim+MjYCCtH/4yXGeEJ5epU4t3y6Q23B2ZEhY+sqUdwgK9pu8oe4 -mkZ1Qvwakc9Qg12owRSDjBBYrPvghXVpkJ2JkgKTrIAIz9tZ53eDVHNXbWMAotov -jEmRSoGIS1yzwmQ9PdxUwYcCAwEAAaNTMFEwHQYDVR0OBBYEFJyz3Kt5XBDg5cvI -0v1ioqejqX+CMB8GA1UdIwQYMBaAFJyz3Kt5XBDg5cvI0v1ioqejqX+CMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAHAQFA5VMYvaQFnKtKfHg9TF -qfJ4uM3YsGdgsgmGWgflD1S4Z290H6Q2QvyZAEceTrlJxqArlWlVp5DAU6EeXjEh -QMAgdkJHF1Hg2jsZKPtdkb88UtuzwAME357T8NtEJSHzNE5QqYwlVM71JkWpdqvA -UUdOJbWhhJfowIf4tMmL1DUuIy2qYpoP/tEBXEw9uwpmZqb7KELwT3lRyOMaGFN7 -RHVwbvJWlHiu83QDNaWz6ijQkWl3tCN6TWcFD1qc1x8GpMzjbsAAYbCx7fbHM2LD -9kGSCiyv5K0MLNK5u67RtUFfPHtyD8RA0TtxIZ4PEN/eFANKS2/5NEi1ZuZ5/Pk= ------END CERTIFICATE----- diff --git a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_secure.conf b/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_secure.conf deleted file mode 100644 index 42d9853c6eb..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/mongo_secure_config/mongo_secure.conf +++ /dev/null @@ -1,6 +0,0 @@ -net: - ssl: - mode: requireSSL - PEMKeyFile: /mongo/key.pem - CAFile: /mongo/cert.crt - allowConnectionsWithoutCertificates: true diff --git a/tests/integration/test_storage_mongodb_legacy/test.py b/tests/integration/test_storage_mongodb_legacy/test.py deleted file mode 100644 index ddfd21279c9..00000000000 --- a/tests/integration/test_storage_mongodb_legacy/test.py +++ /dev/null @@ -1,509 +0,0 @@ -import datetime -from uuid import UUID - -import pymongo -import pytest - -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster - - -@pytest.fixture(scope="module") -def started_cluster(request): - try: - cluster = ClickHouseCluster(__file__) - node = cluster.add_instance( - "node", - main_configs=[ - "mongo_secure_config/config.d/ssl_conf.xml", - "configs/named_collections.xml", - "configs/feature_flag.xml", - ], - user_configs=["configs/users.xml"], - with_mongo=True, - ) - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def get_mongo_connection(started_cluster, secure=False, with_credentials=True): - connection_str = "" - if with_credentials: - connection_str = "mongodb://root:clickhouse@localhost:{}".format( - started_cluster.mongo_secure_port if secure else started_cluster.mongo_port - ) - else: - connection_str = "mongodb://localhost:{}".format( - started_cluster.mongo_no_cred_port - ) - if secure: - connection_str += "/?tls=true&tlsAllowInvalidCertificates=true" - return pymongo.MongoClient(connection_str) - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_uuid(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - mongo_table = db["uuid_table"] - mongo_table.insert({"key": 0, "data": UUID("f0e77736-91d1-48ce-8f01-15123ca1c7ed")}) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE uuid_mongo_table(key UInt64, data UUID) ENGINE = MongoDB('mongo1:27017', 'test', 'uuid_table', 'root', 'clickhouse')" - ) - - assert node.query("SELECT COUNT() FROM uuid_mongo_table") == "1\n" - assert ( - node.query("SELECT data from uuid_mongo_table where key = 0") - == "f0e77736-91d1-48ce-8f01-15123ca1c7ed\n" - ) - node.query("DROP TABLE uuid_mongo_table") - mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_simple_select(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE simple_mongo_table(key UInt64, data String) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse')" - ) - - assert node.query("SELECT COUNT() FROM simple_mongo_table") == "100\n" - assert ( - node.query("SELECT sum(key) FROM simple_mongo_table") - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query("SELECT data from simple_mongo_table where key = 42") - == hex(42 * 42) + "\n" - ) - node.query("DROP TABLE simple_mongo_table") - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_simple_select_from_view(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - simple_mongo_table_view = db.create_collection( - "simple_table_view", viewOn="simple_table" - ) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE simple_mongo_table(key UInt64, data String) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table_view', 'root', 'clickhouse')" - ) - - assert node.query("SELECT COUNT() FROM simple_mongo_table") == "100\n" - assert ( - node.query("SELECT sum(key) FROM simple_mongo_table") - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query("SELECT data from simple_mongo_table where key = 42") - == hex(42 * 42) + "\n" - ) - node.query("DROP TABLE simple_mongo_table") - simple_mongo_table_view.drop() - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_arrays(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - arrays_mongo_table = db["arrays_table"] - data = [] - for i in range(0, 100): - data.append( - { - "key": i, - "arr_int64": [-(i + 1), -(i + 2), -(i + 3)], - "arr_int32": [-(i + 1), -(i + 2), -(i + 3)], - "arr_int16": [-(i + 1), -(i + 2), -(i + 3)], - "arr_int8": [-(i + 1), -(i + 2), -(i + 3)], - "arr_uint64": [i + 1, i + 2, i + 3], - "arr_uint32": [i + 1, i + 2, i + 3], - "arr_uint16": [i + 1, i + 2, i + 3], - "arr_uint8": [i + 1, i + 2, i + 3], - "arr_float32": [i + 1.125, i + 2.5, i + 3.750], - "arr_float64": [i + 1.125, i + 2.5, i + 3.750], - "arr_date": [ - datetime.datetime(2002, 10, 27), - datetime.datetime(2024, 1, 8), - ], - "arr_datetime": [ - datetime.datetime(2023, 3, 31, 6, 3, 12), - datetime.datetime(1999, 2, 28, 12, 46, 34), - ], - "arr_string": [str(i + 1), str(i + 2), str(i + 3)], - "arr_uuid": [ - "f0e77736-91d1-48ce-8f01-15123ca1c7ed", - "93376a07-c044-4281-a76e-ad27cf6973c5", - ], - "arr_mongo_uuid": [ - UUID("f0e77736-91d1-48ce-8f01-15123ca1c7ed"), - UUID("93376a07-c044-4281-a76e-ad27cf6973c5"), - ], - "arr_arr_bool": [ - [True, False, True], - [True], - [], - None, - [False], - [None], - ], - "arr_empty": [], - "arr_null": None, - "arr_nullable": None, - } - ) - - arrays_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE arrays_mongo_table(" - "key UInt64," - "arr_int64 Array(Int64)," - "arr_int32 Array(Int32)," - "arr_int16 Array(Int16)," - "arr_int8 Array(Int8)," - "arr_uint64 Array(UInt64)," - "arr_uint32 Array(UInt32)," - "arr_uint16 Array(UInt16)," - "arr_uint8 Array(UInt8)," - "arr_float32 Array(Float32)," - "arr_float64 Array(Float64)," - "arr_date Array(Date)," - "arr_datetime Array(DateTime)," - "arr_string Array(String)," - "arr_uuid Array(UUID)," - "arr_mongo_uuid Array(UUID)," - "arr_arr_bool Array(Array(Bool))," - "arr_empty Array(UInt64)," - "arr_null Array(UInt64)," - "arr_arr_null Array(Array(UInt64))," - "arr_nullable Array(Nullable(UInt64))" - ") ENGINE = MongoDB('mongo1:27017', 'test', 'arrays_table', 'root', 'clickhouse')" - ) - - assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "100\n" - - for column_name in ["arr_int64", "arr_int32", "arr_int16", "arr_int8"]: - assert ( - node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42") - == "[-43,-44,-45]\n" - ) - - for column_name in ["arr_uint64", "arr_uint32", "arr_uint16", "arr_uint8"]: - assert ( - node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42") - == "[43,44,45]\n" - ) - - for column_name in ["arr_float32", "arr_float64"]: - assert ( - node.query(f"SELECT {column_name} FROM arrays_mongo_table WHERE key = 42") - == "[43.125,44.5,45.75]\n" - ) - - assert ( - node.query(f"SELECT arr_date FROM arrays_mongo_table WHERE key = 42") - == "['2002-10-27','2024-01-08']\n" - ) - - assert ( - node.query(f"SELECT arr_datetime FROM arrays_mongo_table WHERE key = 42") - == "['2023-03-31 06:03:12','1999-02-28 12:46:34']\n" - ) - - assert ( - node.query(f"SELECT arr_string FROM arrays_mongo_table WHERE key = 42") - == "['43','44','45']\n" - ) - - assert ( - node.query(f"SELECT arr_uuid FROM arrays_mongo_table WHERE key = 42") - == "['f0e77736-91d1-48ce-8f01-15123ca1c7ed','93376a07-c044-4281-a76e-ad27cf6973c5']\n" - ) - - assert ( - node.query(f"SELECT arr_mongo_uuid FROM arrays_mongo_table WHERE key = 42") - == "['f0e77736-91d1-48ce-8f01-15123ca1c7ed','93376a07-c044-4281-a76e-ad27cf6973c5']\n" - ) - - assert ( - node.query(f"SELECT arr_arr_bool FROM arrays_mongo_table WHERE key = 42") - == "[[true,false,true],[true],[],[],[false],[false]]\n" - ) - - assert ( - node.query(f"SELECT arr_empty FROM arrays_mongo_table WHERE key = 42") == "[]\n" - ) - - assert ( - node.query(f"SELECT arr_null FROM arrays_mongo_table WHERE key = 42") == "[]\n" - ) - - assert ( - node.query(f"SELECT arr_arr_null FROM arrays_mongo_table WHERE key = 42") - == "[]\n" - ) - - assert ( - node.query(f"SELECT arr_nullable FROM arrays_mongo_table WHERE key = 42") - == "[]\n" - ) - - # Test INSERT SELECT - node.query("INSERT INTO arrays_mongo_table SELECT * FROM arrays_mongo_table") - - assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "200\n" - assert node.query("SELECT COUNT(DISTINCT *) FROM arrays_mongo_table") == "100\n" - - node.query("DROP TABLE arrays_mongo_table") - arrays_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_complex_data_type(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - incomplete_mongo_table = db["complex_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i), "dict": {"a": i, "b": str(i)}}) - incomplete_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE incomplete_mongo_table(key UInt64, data String) ENGINE = MongoDB('mongo1:27017', 'test', 'complex_table', 'root', 'clickhouse')" - ) - - assert node.query("SELECT COUNT() FROM incomplete_mongo_table") == "100\n" - assert ( - node.query("SELECT sum(key) FROM incomplete_mongo_table") - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query("SELECT data from incomplete_mongo_table where key = 42") - == hex(42 * 42) + "\n" - ) - node.query("DROP TABLE incomplete_mongo_table") - incomplete_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_incorrect_data_type(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - strange_mongo_table = db["strange_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i), "aaaa": "Hello"}) - strange_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE strange_mongo_table(key String, data String) ENGINE = MongoDB('mongo1:27017', 'test', 'strange_table', 'root', 'clickhouse')" - ) - - with pytest.raises(QueryRuntimeException): - node.query("SELECT COUNT() FROM strange_mongo_table") - - with pytest.raises(QueryRuntimeException): - node.query("SELECT uniq(key) FROM strange_mongo_table") - - node.query( - "CREATE TABLE strange_mongo_table2(key UInt64, data String, bbbb String) ENGINE = MongoDB('mongo1:27017', 'test', 'strange_table', 'root', 'clickhouse')" - ) - - node.query("DROP TABLE strange_mongo_table") - node.query("DROP TABLE strange_mongo_table2") - strange_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [True], indirect=["started_cluster"]) -def test_secure_connection(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, secure=True) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "CREATE TABLE simple_mongo_table(key UInt64, data String) ENGINE = MongoDB('mongo_secure:27017', 'test', 'simple_table', 'root', 'clickhouse', 'ssl=true')" - ) - - assert node.query("SELECT COUNT() FROM simple_mongo_table") == "100\n" - assert ( - node.query("SELECT sum(key) FROM simple_mongo_table") - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query("SELECT data from simple_mongo_table where key = 42") - == hex(42 * 42) + "\n" - ) - node.query("DROP TABLE simple_mongo_table") - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_predefined_connection_configuration(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query("drop table if exists simple_mongo_table") - node.query( - "create table simple_mongo_table(key UInt64, data String) engine = MongoDB(mongo1)" - ) - assert node.query("SELECT count() FROM simple_mongo_table") == "100\n" - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_no_credentials(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) - db = mongo_connection["test"] - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - f"create table simple_mongo_table_2(key UInt64, data String) engine = MongoDB('mongo_no_cred:27017', 'test', 'simple_table', '', '')" - ) - assert node.query("SELECT count() FROM simple_mongo_table_2") == "100\n" - simple_mongo_table.drop() - node.query("DROP TABLE IF EXISTS simple_mongo_table_2") - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_auth_source(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) - admin_db = mongo_connection["admin"] - admin_db.add_user( - "root", - "clickhouse", - roles=[{"role": "userAdminAnyDatabase", "db": "admin"}, "readWriteAnyDatabase"], - ) - simple_mongo_table_admin = admin_db["simple_table"] - data = [] - for i in range(0, 50): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table_admin.insert_many(data) - - db = mongo_connection["test"] - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query( - "create table simple_mongo_table_fail(key UInt64, data String) engine = MongoDB('mongo_no_cred:27017', 'test', 'simple_table', 'root', 'clickhouse')" - ) - node.query_and_get_error("SELECT count() FROM simple_mongo_table_fail") - node.query( - "create table simple_mongo_table_ok(key UInt64, data String) engine = MongoDB('mongo_no_cred:27017', 'test', 'simple_table', 'root', 'clickhouse', 'authSource=admin')" - ) - assert node.query("SELECT count() FROM simple_mongo_table_ok") == "100\n" - simple_mongo_table.drop() - simple_mongo_table_admin.drop() - node.query("DROP TABLE IF EXISTS simple_mongo_table_ok") - node.query("DROP TABLE IF EXISTS simple_mongo_table_fail") - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_missing_columns(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 10): - data.append({"key": i, "data": hex(i * i)}) - for i in range(0, 10): - data.append({"key": i}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - node.query("drop table if exists simple_mongo_table") - node.query( - "create table simple_mongo_table(key UInt64, data Nullable(String)) engine = MongoDB(mongo1)" - ) - result = node.query("SELECT count() FROM simple_mongo_table WHERE isNull(data)") - assert result == "10\n" - simple_mongo_table.drop() - node.query("DROP TABLE IF EXISTS simple_mongo_table") - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_simple_insert_select(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - - node = started_cluster.instances["node"] - node.query("DROP TABLE IF EXISTS simple_mongo_table") - node.query( - "CREATE TABLE simple_mongo_table(key UInt64, data String) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse')" - ) - node.query( - "INSERT INTO simple_mongo_table SELECT number, 'kek' || toString(number) FROM numbers(10)" - ) - - assert ( - node.query("SELECT data from simple_mongo_table where key = 7").strip() - == "kek7" - ) - node.query("INSERT INTO simple_mongo_table(key) SELECT 12") - assert int(node.query("SELECT count() from simple_mongo_table")) == 11 - assert ( - node.query("SELECT data from simple_mongo_table where key = 12").strip() == "" - ) - - node.query("DROP TABLE simple_mongo_table") - simple_mongo_table.drop() diff --git a/tests/integration/test_table_function_mongodb/configs/feature_flag.xml b/tests/integration/test_table_function_mongodb/configs/feature_flag.xml deleted file mode 100644 index eb2d328060a..00000000000 --- a/tests/integration/test_table_function_mongodb/configs/feature_flag.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 0 - diff --git a/tests/integration/test_table_function_mongodb/test.py b/tests/integration/test_table_function_mongodb/test.py index b3e990cf73b..c4c27b079f2 100644 --- a/tests/integration/test_table_function_mongodb/test.py +++ b/tests/integration/test_table_function_mongodb/test.py @@ -14,7 +14,6 @@ def started_cluster(request): with_mongo=True, main_configs=[ "configs/named_collections.xml", - "configs/feature_flag.xml", ], user_configs=["configs/users.xml"], ) diff --git a/tests/integration/test_table_function_mongodb_legacy/__init__.py b/tests/integration/test_table_function_mongodb_legacy/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_table_function_mongodb_legacy/configs/feature_flag.xml b/tests/integration/test_table_function_mongodb_legacy/configs/feature_flag.xml deleted file mode 100644 index 4ee05db9d1e..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/configs/feature_flag.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 1 - diff --git a/tests/integration/test_table_function_mongodb_legacy/configs/users.xml b/tests/integration/test_table_function_mongodb_legacy/configs/users.xml deleted file mode 100644 index 4b6ba057ecb..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/configs/users.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - default - 1 - - - diff --git a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/cert.crt b/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/cert.crt deleted file mode 100644 index 94249109d41..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/cert.crt +++ /dev/null @@ -1,24 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIEGzCCAwOgAwIBAgIUaoGlyuJAyvs6yowFXymfu7seEiUwDQYJKoZIhvcNAQEL -BQAwgZwxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDbGlja0hvdXNlMRMwEQYDVQQH -DApDbGlja0hvdXNlMREwDwYDVQQKDAhQZXJzb25hbDETMBEGA1UECwwKQ2xpY2tI -b3VzZTEkMCIGCSqGSIb3DQEJARYVY2xpY2tob3VzZUBjbGlja2hvdXNlMRUwEwYD -VQQDDAxtb25nb19zZWN1cmUwHhcNMjQwNTI2MTYwMDMxWhcNMzQwNTI0MTYwMDMx -WjCBnDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNsaWNrSG91c2UxEzARBgNVBAcM -CkNsaWNrSG91c2UxETAPBgNVBAoMCFBlcnNvbmFsMRMwEQYDVQQLDApDbGlja0hv -dXNlMSQwIgYJKoZIhvcNAQkBFhVjbGlja2hvdXNlQGNsaWNraG91c2UxFTATBgNV -BAMMDG1vbmdvX3NlY3VyZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AJSeQfMG7xd0+kPehYlEsEw0Sm1DB05SXVCEzIX3DFD6XJrd8eeWwlzYaBatkcwj -+8yvqske02X/3YwpzJyFizAqJIoKql5c5Yii2xH1S9PFP0y+LoJre+eQziHyO33t -eeedeGNJ05Sm2ZAzjfMQ7Rdh6S+gdIO4Y102iQR5yr2aTrh7tu7XkNCjwKTqMMvz -SikP1Rft2J6ECim+MjYCCtH/4yXGeEJ5epU4t3y6Q23B2ZEhY+sqUdwgK9pu8oe4 -mkZ1Qvwakc9Qg12owRSDjBBYrPvghXVpkJ2JkgKTrIAIz9tZ53eDVHNXbWMAotov -jEmRSoGIS1yzwmQ9PdxUwYcCAwEAAaNTMFEwHQYDVR0OBBYEFJyz3Kt5XBDg5cvI -0v1ioqejqX+CMB8GA1UdIwQYMBaAFJyz3Kt5XBDg5cvI0v1ioqejqX+CMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAHAQFA5VMYvaQFnKtKfHg9TF -qfJ4uM3YsGdgsgmGWgflD1S4Z290H6Q2QvyZAEceTrlJxqArlWlVp5DAU6EeXjEh -QMAgdkJHF1Hg2jsZKPtdkb88UtuzwAME357T8NtEJSHzNE5QqYwlVM71JkWpdqvA -UUdOJbWhhJfowIf4tMmL1DUuIy2qYpoP/tEBXEw9uwpmZqb7KELwT3lRyOMaGFN7 -RHVwbvJWlHiu83QDNaWz6ijQkWl3tCN6TWcFD1qc1x8GpMzjbsAAYbCx7fbHM2LD -9kGSCiyv5K0MLNK5u67RtUFfPHtyD8RA0TtxIZ4PEN/eFANKS2/5NEi1ZuZ5/Pk= ------END CERTIFICATE----- diff --git a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml b/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml deleted file mode 100644 index 3efe98e7045..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/config.d/ssl_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - none - - - diff --git a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/key.pem b/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/key.pem deleted file mode 100644 index 9444d19a3d2..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/key.pem +++ /dev/null @@ -1,52 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCUnkHzBu8XdPpD -3oWJRLBMNEptQwdOUl1QhMyF9wxQ+lya3fHnlsJc2GgWrZHMI/vMr6rJHtNl/92M -KcychYswKiSKCqpeXOWIotsR9UvTxT9Mvi6Ca3vnkM4h8jt97XnnnXhjSdOUptmQ -M43zEO0XYekvoHSDuGNdNokEecq9mk64e7bu15DQo8Ck6jDL80opD9UX7diehAop -vjI2AgrR/+MlxnhCeXqVOLd8ukNtwdmRIWPrKlHcICvabvKHuJpGdUL8GpHPUINd -qMEUg4wQWKz74IV1aZCdiZICk6yACM/bWed3g1RzV21jAKLaL4xJkUqBiEtcs8Jk -PT3cVMGHAgMBAAECggEAAul6qiHchB+uQMCWyC5xTeRqAXR3tAv4Tj4fGJjkXY4Z -OrAjr9Kp38EvX1amgvUWV3FT3NMevDf5xd9OdzAA0g0uJIF+mAhYFW48i1FnQcHQ -mOf0zmiZR7l8o7ROb3JvooXHxW+ba/qjGPVwC801gJvruehgbOCRxh9DTRp7sH5K -BmcddhULhKBEQjWUmYNEM3A2axpdi3g1aYKERRLn8J0DXcItTwbxuxbNcs3erl8W -3yyv/JKmqnWF5sNyX3wEWuQcDEZZy+W7Hn4KPMxyU+WA5el5nJ8kFlxhpInmajwu -8Ytn6IEyThyXutVomosVBuP16QORl2Nad0hnQO9toQKBgQDDgiehXr3k2wfVaVOD -PocW4leXausIU2XcCn6FxTG9vLUDMPANw0MxgenC2nrjaUU9J9UjdRYgMcFGWrl4 -E27wEn5e0nZ/Y7F2cfhuOc9vNmZ+eHm2KQRyfAjIVL5Hpldqk2jXyCnLBNeWGHSw -kPQMU+FLqmrOFUvXlD2my+OSHwKBgQDCmgS9r+xFh4BCB9dY6eyQJF/jYmAQHs26 -80WJ6gAhbUw1O71uDtS9/3PZVXwwNCOHrcc49BPrpJdxGPHGvd2Q5y+j5LDDbQSZ -aLTiCZ2B0RM5Bd2dXD8gEHN4WCX7pJ/o4kDi4zONBmp5mg/tFfer5z5IU/1P7Wak -1Mu0JIHzmQKBgDNaNoqeVgaMuYwGtFbez6DlJtiwzrdLIJAheYYte5k4vdruub8D -sNyKIRp7RJgDCJq9obBEiuE98GRIZDrz78nDMco6QcHIL87KtNRO/vtZMKa7gkyk -jXR8u9nS2H/9YyytN3amLsQSq4XTOqM+D7xFNAIp6w/ibB9d4quzFj1FAoGBAKTE -x/LcO897NWuzO/D6z+QUCGR87R15F3SNenmVedrTskz4ciH3yMW+v5ZrPSWLX/IH -f8GHWD6TM+780eoW5L1GIh5BCjHN4rEJ6O3iekxqfD4x6zzL2F8Lztk8uZxh/Uuw -FoSFHybvIcQoYAe8K+KPfzq6cqb0OY6i5n920dkxAoGAJkw6ADqsJfH3NR+bQfgF -oEA1KqriMxyEJm44Y7E80C+iF4iNALF+Er9TSnr4mDxX5e/dW9d1YeS9o0nOfkpF -MaBmJfxqo4QQJLPRaxYQ2Jhfn7irir4BroxeNXQgNNhgSuKIvkfRyGYwl7P0AT4v -8H8rkZGneMD3gLB5MfnRhGk= ------END PRIVATE KEY----- ------BEGIN CERTIFICATE----- -MIIEGzCCAwOgAwIBAgIUaoGlyuJAyvs6yowFXymfu7seEiUwDQYJKoZIhvcNAQEL -BQAwgZwxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDbGlja0hvdXNlMRMwEQYDVQQH -DApDbGlja0hvdXNlMREwDwYDVQQKDAhQZXJzb25hbDETMBEGA1UECwwKQ2xpY2tI -b3VzZTEkMCIGCSqGSIb3DQEJARYVY2xpY2tob3VzZUBjbGlja2hvdXNlMRUwEwYD -VQQDDAxtb25nb19zZWN1cmUwHhcNMjQwNTI2MTYwMDMxWhcNMzQwNTI0MTYwMDMx -WjCBnDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNsaWNrSG91c2UxEzARBgNVBAcM -CkNsaWNrSG91c2UxETAPBgNVBAoMCFBlcnNvbmFsMRMwEQYDVQQLDApDbGlja0hv -dXNlMSQwIgYJKoZIhvcNAQkBFhVjbGlja2hvdXNlQGNsaWNraG91c2UxFTATBgNV -BAMMDG1vbmdvX3NlY3VyZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AJSeQfMG7xd0+kPehYlEsEw0Sm1DB05SXVCEzIX3DFD6XJrd8eeWwlzYaBatkcwj -+8yvqske02X/3YwpzJyFizAqJIoKql5c5Yii2xH1S9PFP0y+LoJre+eQziHyO33t -eeedeGNJ05Sm2ZAzjfMQ7Rdh6S+gdIO4Y102iQR5yr2aTrh7tu7XkNCjwKTqMMvz -SikP1Rft2J6ECim+MjYCCtH/4yXGeEJ5epU4t3y6Q23B2ZEhY+sqUdwgK9pu8oe4 -mkZ1Qvwakc9Qg12owRSDjBBYrPvghXVpkJ2JkgKTrIAIz9tZ53eDVHNXbWMAotov -jEmRSoGIS1yzwmQ9PdxUwYcCAwEAAaNTMFEwHQYDVR0OBBYEFJyz3Kt5XBDg5cvI -0v1ioqejqX+CMB8GA1UdIwQYMBaAFJyz3Kt5XBDg5cvI0v1ioqejqX+CMA8GA1Ud -EwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggEBAHAQFA5VMYvaQFnKtKfHg9TF -qfJ4uM3YsGdgsgmGWgflD1S4Z290H6Q2QvyZAEceTrlJxqArlWlVp5DAU6EeXjEh -QMAgdkJHF1Hg2jsZKPtdkb88UtuzwAME357T8NtEJSHzNE5QqYwlVM71JkWpdqvA -UUdOJbWhhJfowIf4tMmL1DUuIy2qYpoP/tEBXEw9uwpmZqb7KELwT3lRyOMaGFN7 -RHVwbvJWlHiu83QDNaWz6ijQkWl3tCN6TWcFD1qc1x8GpMzjbsAAYbCx7fbHM2LD -9kGSCiyv5K0MLNK5u67RtUFfPHtyD8RA0TtxIZ4PEN/eFANKS2/5NEi1ZuZ5/Pk= ------END CERTIFICATE----- diff --git a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/mongo_secure.conf b/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/mongo_secure.conf deleted file mode 100644 index 42d9853c6eb..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/mongo_secure_config/mongo_secure.conf +++ /dev/null @@ -1,6 +0,0 @@ -net: - ssl: - mode: requireSSL - PEMKeyFile: /mongo/key.pem - CAFile: /mongo/cert.crt - allowConnectionsWithoutCertificates: true diff --git a/tests/integration/test_table_function_mongodb_legacy/test.py b/tests/integration/test_table_function_mongodb_legacy/test.py deleted file mode 100644 index 353ef4d1dff..00000000000 --- a/tests/integration/test_table_function_mongodb_legacy/test.py +++ /dev/null @@ -1,276 +0,0 @@ -import pymongo -import pytest - -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster - - -@pytest.fixture(scope="module") -def started_cluster(request): - try: - cluster = ClickHouseCluster(__file__) - node = cluster.add_instance( - "node", - with_mongo=True, - main_configs=[ - "mongo_secure_config/config.d/ssl_conf.xml", - "configs/feature_flag.xml", - ], - user_configs=["configs/users.xml"], - ) - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def get_mongo_connection(started_cluster, secure=False, with_credentials=True): - connection_str = "" - if with_credentials: - connection_str = "mongodb://root:clickhouse@localhost:{}".format( - started_cluster.mongo_secure_port if secure else started_cluster.mongo_port - ) - else: - connection_str = "mongodb://localhost:{}".format( - started_cluster.mongo_no_cred_port - ) - if secure: - connection_str += "/?tls=true&tlsAllowInvalidCertificates=true" - return pymongo.MongoClient(connection_str) - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_simple_select(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - - node = started_cluster.instances["node"] - for i in range(0, 100): - node.query( - "INSERT INTO FUNCTION mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String') (key, data) VALUES ({}, '{}')".format( - i, hex(i * i) - ) - ) - assert ( - node.query( - "SELECT COUNT() FROM mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String')" - ) - == "100\n" - ) - assert ( - node.query( - "SELECT sum(key) FROM mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String')" - ) - == str(sum(range(0, 100))) + "\n" - ) - assert ( - node.query( - "SELECT sum(key) FROM mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', 'key UInt64, data String')" - ) - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query( - "SELECT data from mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String') where key = 42" - ) - == hex(42 * 42) + "\n" - ) - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_complex_data_type(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - incomplete_mongo_table = db["complex_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i), "dict": {"a": i, "b": str(i)}}) - incomplete_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - - assert ( - node.query( - "SELECT COUNT() FROM mongodb('mongo1:27017', 'test', 'complex_table', 'root', 'clickhouse', structure='key UInt64, data String, dict Map(UInt64, String)')" - ) - == "100\n" - ) - assert ( - node.query( - "SELECT sum(key) FROM mongodb('mongo1:27017', 'test', 'complex_table', 'root', 'clickhouse', structure='key UInt64, data String, dict Map(UInt64, String)')" - ) - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query( - "SELECT data from mongodb('mongo1:27017', 'test', 'complex_table', 'root', 'clickhouse', structure='key UInt64, data String, dict Map(UInt64, String)') where key = 42" - ) - == hex(42 * 42) + "\n" - ) - incomplete_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_incorrect_data_type(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - strange_mongo_table = db["strange_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i), "aaaa": "Hello"}) - strange_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - - with pytest.raises(QueryRuntimeException): - node.query( - "SELECT aaaa FROM mongodb('mongo1:27017', 'test', 'strange_table', 'root', 'clickhouse', structure='key UInt64, data String')" - ) - - strange_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [True], indirect=["started_cluster"]) -def test_secure_connection(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, secure=True) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - - assert ( - node.query( - "SELECT COUNT() FROM mongodb('mongo_secure:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String', options='ssl=true')" - ) - == "100\n" - ) - assert ( - node.query( - "SELECT sum(key) FROM mongodb('mongo_secure:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String', options='ssl=true')" - ) - == str(sum(range(0, 100))) + "\n" - ) - assert ( - node.query( - "SELECT sum(key) FROM mongodb('mongo_secure:27017', 'test', 'simple_table', 'root', 'clickhouse', 'key UInt64, data String', 'ssl=true')" - ) - == str(sum(range(0, 100))) + "\n" - ) - - assert ( - node.query( - "SELECT data from mongodb('mongo_secure:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String', options='ssl=true') where key = 42" - ) - == hex(42 * 42) + "\n" - ) - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_predefined_connection_configuration(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - assert ( - node.query( - "SELECT count() FROM mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String')" - ) - == "100\n" - ) - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_no_credentials(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) - db = mongo_connection["test"] - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - assert ( - node.query( - "SELECT count() FROM mongodb('mongo_no_cred:27017', 'test', 'simple_table', '', '', structure='key UInt64, data String')" - ) - == "100\n" - ) - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_auth_source(started_cluster): - mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) - admin_db = mongo_connection["admin"] - admin_db.add_user( - "root", - "clickhouse", - roles=[{"role": "userAdminAnyDatabase", "db": "admin"}, "readWriteAnyDatabase"], - ) - simple_mongo_table = admin_db["simple_table"] - data = [] - for i in range(0, 50): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - db = mongo_connection["test"] - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 100): - data.append({"key": i, "data": hex(i * i)}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - - node.query_and_get_error( - "SELECT count() FROM mongodb('mongo_no_cred:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String')" - ) - - assert ( - node.query( - "SELECT count() FROM mongodb('mongo_no_cred:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data String', options='authSource=admin')" - ) - == "100\n" - ) - simple_mongo_table.drop() - - -@pytest.mark.parametrize("started_cluster", [False], indirect=["started_cluster"]) -def test_missing_columns(started_cluster): - mongo_connection = get_mongo_connection(started_cluster) - db = mongo_connection["test"] - db.add_user("root", "clickhouse") - simple_mongo_table = db["simple_table"] - data = [] - for i in range(0, 10): - data.append({"key": i, "data": hex(i * i)}) - for i in range(0, 10): - data.append({"key": i}) - simple_mongo_table.insert_many(data) - - node = started_cluster.instances["node"] - result = node.query( - "SELECT count() FROM mongodb('mongo1:27017', 'test', 'simple_table', 'root', 'clickhouse', structure='key UInt64, data Nullable(String)') WHERE isNull(data)" - ) - assert result == "10\n" - simple_mongo_table.drop() diff --git a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql index 7e4adbb7b5b..0e01bd7afbb 100644 --- a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql @@ -3,5 +3,4 @@ SELECT name FROM system.table_functions WHERE length(description) < 10 AND name NOT IN ( 'cosn', 'oss', 'hdfs', 'hdfsCluster', 'hive', 'mysql', 'postgresql', 's3', 's3Cluster', 'sqlite', 'urlCluster', 'mergeTreeParts' -- these functions are not enabled in fast test - , 'mongodb' -- will be removed when `use_legacy_mongodb_integration` setting will be purged will with the old implementation ) ORDER BY name; From 1c5b362f170e16a934f80f26f93238a070de22e1 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Sat, 16 Nov 2024 16:12:12 +0400 Subject: [PATCH 02/56] fix style --- .../test_mongo.py | 8 ++------ .../test_mongo_uri.py | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py index 2cf6250b106..fa271ae5cc0 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo.py @@ -105,9 +105,7 @@ def test_simple(secure_connection, started_cluster, layout_name, simple_tester): @pytest.mark.parametrize("secure_connection", [False], indirect=["secure_connection"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_COMPLEX)) -def test_complex( - secure_connection, started_cluster, layout_name, complex_tester -): +def test_complex(secure_connection, started_cluster, layout_name, complex_tester): complex_tester.execute(layout_name, started_cluster.instances["node1"]) @@ -119,7 +117,5 @@ def test_ranged(secure_connection, started_cluster, layout_name, ranged_tester): @pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"]) @pytest.mark.parametrize("layout_name", sorted(LAYOUTS_SIMPLE)) -def test_simple_ssl( - secure_connection, started_cluster, layout_name, simple_tester -): +def test_simple_ssl(secure_connection, started_cluster, layout_name, simple_tester): simple_tester.execute(layout_name, started_cluster.instances["node1"]) diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py index 407031ed7d3..5d632ba6996 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mongo_uri.py @@ -78,7 +78,5 @@ def test_simple(secure_connection, started_cluster, simple_tester, layout_name): @pytest.mark.parametrize("secure_connection", [True], indirect=["secure_connection"]) @pytest.mark.parametrize("layout_name", ["flat"]) -def test_simple_ssl( - secure_connection, started_cluster, simple_tester, layout_name -): +def test_simple_ssl(secure_connection, started_cluster, simple_tester, layout_name): simple_tester.execute(layout_name, started_cluster.instances["uri_node"]) From 2906524f0dbd1d182d848c143f951b5cc4b85b85 Mon Sep 17 00:00:00 2001 From: Kirill Nikiforov Date: Sat, 16 Nov 2024 16:56:05 +0400 Subject: [PATCH 03/56] fix --- src/Dictionaries/tests/gtest_dictionary_configuration.cpp | 8 ++++---- src/Interpreters/fuzzers/execute_query_fuzzer.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 1f803567cf8..08aad663a8c 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -30,7 +30,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) { if (!registered) { - registerDictionaries(false); + registerDictionaries(); registered = true; } @@ -103,7 +103,7 @@ TEST(ConvertDictionaryAST, TrickyAttributes) { if (!registered) { - registerDictionaries(false); + registerDictionaries(); registered = true; } @@ -147,7 +147,7 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) { if (!registered) { - registerDictionaries(false); + registerDictionaries(); registered = true; } @@ -198,7 +198,7 @@ TEST(ConvertDictionaryAST, ComplexSource) { if (!registered) { - registerDictionaries(false); + registerDictionaries(); registered = true; } diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index 908ac6322d0..c29efae1e7d 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -29,10 +29,10 @@ extern "C" int LLVMFuzzerInitialize(int *, char ***) registerInterpreters(); registerFunctions(); registerAggregateFunctions(); - registerTableFunctions(false); + registerTableFunctions(); registerDatabases(); - registerStorages(false); - registerDictionaries(false); + registerStorages(); + registerDictionaries(); registerDisks(/* global_skip_access_check= */ true); registerFormats(); From a74d615aafde161caa2ec2cb9e88932d6803d34a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 19 Nov 2024 16:15:12 +0000 Subject: [PATCH 04/56] added primary index cache --- programs/local/LocalServer.cpp | 13 ++ programs/server/Server.cpp | 14 ++ src/Access/Common/AccessType.h | 2 + src/Common/ProfileEvents.cpp | 5 + src/Core/ServerSettings.cpp | 4 + src/Interpreters/Context.cpp | 41 ++++ src/Interpreters/Context.h | 6 + src/Interpreters/InterpreterSystemQuery.cpp | 36 ++- src/Interpreters/InterpreterSystemQuery.h | 4 +- .../ServerAsynchronousMetrics.cpp | 6 + src/Parsers/ASTSystemQuery.cpp | 2 + src/Parsers/ASTSystemQuery.h | 2 + src/Parsers/ParserSystemQuery.cpp | 1 + src/Processors/QueryPlan/PartsSplitter.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 210 +++++++++++------- src/Storages/MergeTree/IMergeTreeDataPart.h | 21 +- .../MergeTree/IMergeTreeDataPartWriter.cpp | 6 +- .../MergeTree/IMergeTreeDataPartWriter.h | 2 +- .../MergeTree/IMergedBlockOutputStream.h | 1 + .../MergeTree/MergeFromLogEntryTask.cpp | 9 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 11 +- src/Storages/MergeTree/MergeTask.cpp | 7 - src/Storages/MergeTree/MergeTask.h | 4 + src/Storages/MergeTree/MergeTreeData.cpp | 62 ++++-- src/Storages/MergeTree/MergeTreeData.h | 12 +- .../MergeTreeDataPartWriterOnDisk.cpp | 15 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 5 - .../MergeTree/MergeTreeIOSettings.cpp | 2 + src/Storages/MergeTree/MergeTreeIOSettings.h | 2 + src/Storages/MergeTree/MergeTreeSettings.cpp | 2 + src/Storages/MergeTree/MergeTreeSink.cpp | 11 +- .../MergeTree/MergedBlockOutputStream.cpp | 15 +- .../MergeTree/MergedBlockOutputStream.h | 1 - .../MergedColumnOnlyOutputStream.cpp | 14 +- .../MergeTree/MergedColumnOnlyOutputStream.h | 1 - src/Storages/MergeTree/MutateTask.cpp | 2 - src/Storages/MergeTree/PrimaryIndexCache.cpp | 8 + src/Storages/MergeTree/PrimaryIndexCache.h | 73 ++++++ .../MergeTree/ReplicatedMergeTreeSink.cpp | 42 ++-- .../MergeTree/ReplicatedMergeTreeSink.h | 2 + src/Storages/MergeTree/new_index | 0 src/Storages/StorageMergeTree.cpp | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 11 +- .../03273_primary_index_cache.reference | 16 ++ .../0_stateless/03273_primary_index_cache.sql | 45 ++++ ...3274_prewarm_primary_index_cache.reference | 16 ++ .../03274_prewarm_primary_index_cache.sql | 46 ++++ 48 files changed, 646 insertions(+), 174 deletions(-) create mode 100644 src/Storages/MergeTree/PrimaryIndexCache.cpp create mode 100644 src/Storages/MergeTree/PrimaryIndexCache.h create mode 100644 src/Storages/MergeTree/new_index create mode 100644 tests/queries/0_stateless/03273_primary_index_cache.reference create mode 100644 tests/queries/0_stateless/03273_primary_index_cache.sql create mode 100644 tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference create mode 100644 tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e6f8ecef097..adfd3adcb2a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -110,6 +110,9 @@ namespace ServerSetting extern const ServerSettingsString uncompressed_cache_policy; extern const ServerSettingsUInt64 uncompressed_cache_size; extern const ServerSettingsDouble uncompressed_cache_size_ratio; + extern const ServerSettingsString primary_index_cache_policy; + extern const ServerSettingsUInt64 primary_index_cache_size; + extern const ServerSettingsDouble primary_index_cache_size_ratio; extern const ServerSettingsBool use_legacy_mongodb_integration; } @@ -778,6 +781,16 @@ void LocalServer::processConfig() } global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio); + String primary_index_cache_policy = server_settings[ServerSetting::primary_index_cache_policy]; + size_t primary_index_cache_size = server_settings[ServerSetting::primary_index_cache_size]; + double primary_index_cache_size_ratio = server_settings[ServerSetting::primary_index_cache_size_ratio]; + if (primary_index_cache_size > max_cache_size) + { + primary_index_cache_size = max_cache_size; + LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setPrimaryIndexCache(primary_index_cache_policy, primary_index_cache_size, primary_index_cache_size_ratio); + size_t mmap_cache_size = server_settings[ServerSetting::mmap_cache_size]; if (mmap_cache_size > max_cache_size) { diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 68f262079ff..0b76adebb41 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -279,6 +279,9 @@ namespace ServerSetting extern const ServerSettingsString uncompressed_cache_policy; extern const ServerSettingsUInt64 uncompressed_cache_size; extern const ServerSettingsDouble uncompressed_cache_size_ratio; + extern const ServerSettingsString primary_index_cache_policy; + extern const ServerSettingsUInt64 primary_index_cache_size; + extern const ServerSettingsDouble primary_index_cache_size_ratio; extern const ServerSettingsBool use_legacy_mongodb_integration; } @@ -1562,6 +1565,16 @@ try } global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio); + String primary_index_cache_policy = server_settings[ServerSetting::primary_index_cache_policy]; + size_t primary_index_cache_size = server_settings[ServerSetting::primary_index_cache_size]; + double primary_index_cache_size_ratio = server_settings[ServerSetting::primary_index_cache_size_ratio]; + if (primary_index_cache_size > max_cache_size) + { + primary_index_cache_size = max_cache_size; + LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + } + global_context->setPrimaryIndexCache(primary_index_cache_policy, primary_index_cache_size, primary_index_cache_size_ratio); + size_t page_cache_size = server_settings[ServerSetting::page_cache_size]; if (page_cache_size != 0) global_context->setPageCache( @@ -1895,6 +1908,7 @@ try global_context->updateUncompressedCacheConfiguration(*config); global_context->updateMarkCacheConfiguration(*config); + global_context->updatePrimaryIndexCacheConfiguration(*config); global_context->updateIndexUncompressedCacheConfiguration(*config); global_context->updateIndexMarkCacheConfiguration(*config); global_context->updateMMappedFileCacheConfiguration(*config); diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index ec543104167..1027567a753 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -165,6 +165,8 @@ enum class AccessType : uint8_t M(SYSTEM_DROP_CONNECTIONS_CACHE, "SYSTEM DROP CONNECTIONS CACHE, DROP CONNECTIONS CACHE", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_PREWARM_MARK_CACHE, "SYSTEM PREWARM MARK, PREWARM MARK CACHE, PREWARM MARKS", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_MARK_CACHE, "SYSTEM DROP MARK, DROP MARK CACHE, DROP MARKS", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, "SYSTEM PREWARM PRIMARY INDEX, PREWARM PRIMARY INDEX CACHE, PREWARM PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \ + M(SYSTEM_DROP_PRIMARY_INDEX_CACHE, "SYSTEM DROP PRIMARY INDEX, DROP PRIMARY INDEX CACHE, DROP PRIMARY INDEX", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_UNCOMPRESSED_CACHE, "SYSTEM DROP UNCOMPRESSED, DROP UNCOMPRESSED CACHE, DROP UNCOMPRESSED", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_MMAP_CACHE, "SYSTEM DROP MMAP, DROP MMAP CACHE, DROP MMAP", GLOBAL, SYSTEM_DROP_CACHE) \ M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7b9f670d340..2eeb7bb6e3b 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -63,6 +63,8 @@ M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \ M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \ + M(PrimaryIndexCacheHits, "Number of times an entry has been found in the primary index cache, so we didn't have to load a index file.", ValueType::Number) \ + M(PrimaryIndexCacheMisses, "Number of times an entry has not been found in the primary index cache, so we had to load a index file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \ M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \ M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \ /* Each page cache chunk access increments exactly one of the following 5 PageCacheChunk* counters. */ \ @@ -229,6 +231,9 @@ M(BackgroundLoadingMarksTasks, "Number of background tasks for loading marks", ValueType::Number) \ M(LoadedMarksCount, "Number of marks loaded (total across columns).", ValueType::Number) \ M(LoadedMarksMemoryBytes, "Size of in-memory representations of loaded marks.", ValueType::Bytes) \ + M(LoadedPrimaryIndexFiles, "Number of primary index files loaded.", ValueType::Number) \ + M(LoadedPrimaryIndexRows, "Number of rows of primary key loaded.", ValueType::Number) \ + M(LoadedPrimaryIndexBytes, "Number of rows of primary key loaded.", ValueType::Bytes) \ \ M(Merge, "Number of launched background merges.", ValueType::Number) \ M(MergeSourceParts, "Number of source parts scheduled for merges.", ValueType::Number) \ diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 2f8e7b6843a..78a3ad3c419 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -101,6 +101,10 @@ namespace DB DECLARE(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \ DECLARE(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \ DECLARE(Double, mark_cache_prewarm_ratio, 0.95, "The ratio of total size of mark cache to fill during prewarm.", 0) \ + DECLARE(String, primary_index_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Primary index cache policy name.", 0) \ + DECLARE(UInt64, primary_index_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for primary index (index of MergeTree family of tables).", 0) \ + DECLARE(Double, primary_index_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the primary index cache relative to the cache's total size.", 0) \ + DECLARE(Double, primary_index_cache_prewarm_ratio, 0.95, "The ratio of total size of mark cache to fill during prewarm.", 0) \ DECLARE(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \ DECLARE(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \ DECLARE(Double, index_uncompressed_cache_size_ratio, DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO, "The size of the protected queue in the secondary index uncompressed cache relative to the cache's total size.", 0) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d2aad0a52d8..b923c1aaf82 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -406,6 +407,7 @@ struct ContextSharedPart : boost::noncopyable mutable ResourceManagerPtr resource_manager; mutable UncompressedCachePtr uncompressed_cache TSA_GUARDED_BY(mutex); /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache TSA_GUARDED_BY(mutex); /// Cache of marks in compressed files. + mutable PrimaryIndexCachePtr primary_index_cache TSA_GUARDED_BY(mutex); mutable OnceFlag load_marks_threadpool_initialized; mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. mutable OnceFlag prefetch_threadpool_initialized; @@ -3234,6 +3236,41 @@ ThreadPool & Context::getLoadMarksThreadpool() const return *shared->load_marks_threadpool; } +void Context::setPrimaryIndexCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio) +{ + std::lock_guard lock(shared->mutex); + + if (shared->primary_index_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Primary index cache has been already created."); + + shared->primary_index_cache = std::make_shared(cache_policy, max_cache_size_in_bytes, size_ratio); +} + +void Context::updatePrimaryIndexCacheConfiguration(const Poco::Util::AbstractConfiguration & config) +{ + std::lock_guard lock(shared->mutex); + + if (!shared->primary_index_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + + size_t max_size_in_bytes = config.getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + shared->primary_index_cache->setMaxSizeInBytes(max_size_in_bytes); +} + +PrimaryIndexCachePtr Context::getPrimaryIndexCache() const +{ + SharedLockGuard lock(shared->mutex); + return shared->primary_index_cache; +} + +void Context::clearPrimaryIndexCache() const +{ + std::lock_guard lock(shared->mutex); + + if (shared->primary_index_cache) + shared->primary_index_cache->clear(); +} + void Context::setIndexUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) { std::lock_guard lock(shared->mutex); @@ -3389,6 +3426,10 @@ void Context::clearCaches() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); shared->mark_cache->clear(); + if (!shared->primary_index_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Primary index cache was not created yet."); + shared->primary_index_cache->clear(); + if (!shared->index_uncompressed_cache) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache was not created yet."); shared->index_uncompressed_cache->clear(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index e8ccc31f597..e4db880da6a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -22,6 +22,7 @@ #include #include +#include "Storages/MergeTree/PrimaryIndexCache.h" #include "config.h" #include @@ -1075,6 +1076,11 @@ public: void clearMarkCache() const; ThreadPool & getLoadMarksThreadpool() const; + void setPrimaryIndexCache(const String & cache_policy, size_t max_cache_size_in_bytes, double size_ratio); + void updatePrimaryIndexCacheConfiguration(const Poco::Util::AbstractConfiguration & config); + std::shared_ptr getPrimaryIndexCache() const; + void clearPrimaryIndexCache() const; + void setIndexUncompressedCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio); void updateIndexUncompressedCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getIndexUncompressedCache() const; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index b651bfb245e..41b677efe45 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -365,13 +365,22 @@ BlockIO InterpreterSystemQuery::execute() } case Type::PREWARM_MARK_CACHE: { - prewarmMarkCache(); + prewarmCaches(getContext()->getMarkCache(), nullptr); + break; + } + case Type::PREWARM_PRIMARY_INDEX_CACHE: + { + prewarmCaches(nullptr, getContext()->getPrimaryIndexCache()); break; } case Type::DROP_MARK_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE); system_context->clearMarkCache(); break; + case Type::DROP_PRIMARY_INDEX_CACHE: + getContext()->checkAccess(AccessType::SYSTEM_DROP_PRIMARY_INDEX_CACHE); + system_context->clearPrimaryIndexCache(); + break; case Type::DROP_UNCOMPRESSED_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE); system_context->clearUncompressedCache(); @@ -1307,18 +1316,25 @@ RefreshTaskList InterpreterSystemQuery::getRefreshTasks() return tasks; } -void InterpreterSystemQuery::prewarmMarkCache() +void InterpreterSystemQuery::prewarmCaches(MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache) { - if (table_id.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table is not specified for PREWARM MARK CACHE command"); + if (!mark_cache && !index_cache) + return; - getContext()->checkAccess(AccessType::SYSTEM_PREWARM_MARK_CACHE, table_id); + if (table_id.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table is not specified for PREWARM CACHE command"); + + if (mark_cache) + getContext()->checkAccess(AccessType::SYSTEM_PREWARM_MARK_CACHE, table_id); + + if (index_cache) + getContext()->checkAccess(AccessType::SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, table_id); auto table_ptr = DatabaseCatalog::instance().getTable(table_id, getContext()); auto * merge_tree = dynamic_cast(table_ptr.get()); if (!merge_tree) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Command PREWARM MARK CACHE is supported only for MergeTree table, but got: {}", table_ptr->getName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Command PREWARM CACHE is supported only for MergeTree table, but got: {}", table_ptr->getName()); ThreadPool pool( CurrentMetrics::MergeTreePartsLoaderThreads, @@ -1326,7 +1342,7 @@ void InterpreterSystemQuery::prewarmMarkCache() CurrentMetrics::MergeTreePartsLoaderThreadsScheduled, getContext()->getSettingsRef()[Setting::max_threads]); - merge_tree->prewarmMarkCache(pool); + merge_tree->prewarmCaches(pool, std::move(mark_cache), std::move(index_cache)); } @@ -1348,6 +1364,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_DNS_CACHE: case Type::DROP_CONNECTIONS_CACHE: case Type::DROP_MARK_CACHE: + case Type::DROP_PRIMARY_INDEX_CACHE: case Type::DROP_MMAP_CACHE: case Type::DROP_QUERY_CACHE: case Type::DROP_COMPILED_EXPRESSION_CACHE: @@ -1535,6 +1552,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_PREWARM_MARK_CACHE, query.getDatabase(), query.getTable()); break; } + case Type::PREWARM_PRIMARY_INDEX_CACHE: + { + required_access.emplace_back(AccessType::SYSTEM_PREWARM_MARK_CACHE, query.getDatabase(), query.getTable()); + break; + } case Type::SYNC_DATABASE_REPLICA: { required_access.emplace_back(AccessType::SYSTEM_SYNC_DATABASE_REPLICA, query.getDatabase()); diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index e31c6cd739b..6ae84fed672 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -6,6 +6,8 @@ #include #include #include +#include "Storages/MarkCache.h" +#include "Storages/MergeTree/PrimaryIndexCache.h" #include @@ -82,7 +84,7 @@ private: AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); - void prewarmMarkCache(); + void prewarmCaches(MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache); void stopReplicatedDDLQueries(); void startReplicatedDDLQueries(); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 46a811822c2..dfaebec03a8 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -83,6 +83,12 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr new_values["MarkCacheFiles"] = { mark_cache->count(), "Total number of mark files cached in the mark cache" }; } + if (auto primary_index_cache = getContext()->getPrimaryIndexCache()) + { + new_values["PrimaryIndexCacheBytes"] = { primary_index_cache->sizeInBytes(), "Total size of primary index cache in bytes" }; + new_values["PrimaryIndexCacheFiles"] = { primary_index_cache->count(), "Total number of index files cached in the primary index cache" }; + } + if (auto page_cache = getContext()->getPageCache()) { auto rss = page_cache->getResidentSetSize(); diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index d76d33ce708..ec908b02e7b 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -192,6 +192,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::WAIT_LOADING_PARTS: case Type::FLUSH_DISTRIBUTED: case Type::PREWARM_MARK_CACHE: + case Type::PREWARM_PRIMARY_INDEX_CACHE: { if (table) { @@ -408,6 +409,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s case Type::DROP_MMAP_CACHE: case Type::DROP_QUERY_CACHE: case Type::DROP_MARK_CACHE: + case Type::DROP_PRIMARY_INDEX_CACHE: case Type::DROP_INDEX_MARK_CACHE: case Type::DROP_UNCOMPRESSED_CACHE: case Type::DROP_INDEX_UNCOMPRESSED_CACHE: diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index d9ee4d8aa22..78852cd9fac 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -24,7 +24,9 @@ public: DROP_DNS_CACHE, DROP_CONNECTIONS_CACHE, PREWARM_MARK_CACHE, + PREWARM_PRIMARY_INDEX_CACHE, DROP_MARK_CACHE, + DROP_PRIMARY_INDEX_CACHE, DROP_UNCOMPRESSED_CACHE, DROP_INDEX_MARK_CACHE, DROP_INDEX_UNCOMPRESSED_CACHE, diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 453ae0b5032..c1f33dc74a3 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -277,6 +277,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::SYNC_REPLICA: case Type::WAIT_LOADING_PARTS: case Type::PREWARM_MARK_CACHE: + case Type::PREWARM_PRIMARY_INDEX_CACHE: { if (!parseQueryWithOnCluster(res, pos, expected)) return false; diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 57fd41e2a32..92b2a0b05a9 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -205,7 +205,7 @@ public: } private: const RangesInDataParts & parts; - std::vector indices; + std::vector indices; size_t loaded_columns = std::numeric_limits::max(); }; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index ea01a0ed0f9..f955d649f6d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include "Common/Logger.h" #include #include #include @@ -57,6 +59,13 @@ namespace CurrentMetrics extern const Metric PartsCompact; } +namespace ProfileEvents +{ + extern const Event LoadedPrimaryIndexFiles; + extern const Event LoadedPrimaryIndexRows; + extern const Event LoadedPrimaryIndexBytes; +} + namespace DB { @@ -351,7 +360,6 @@ IMergeTreeDataPart::IMergeTreeDataPart( incrementStateMetric(state); incrementTypeMetric(part_type); - index = std::make_shared(); minmax_idx = std::make_shared(); initializeIndexGranularityInfo(); @@ -364,46 +372,62 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } - -IMergeTreeDataPart::Index IMergeTreeDataPart::getIndex() const +IMergeTreeDataPart::IndexPtr IMergeTreeDataPart::getIndex() const { std::scoped_lock lock(index_mutex); - if (!index_loaded) - loadIndex(); - index_loaded = true; + + if (index) + return index; + + if (auto index_cache = storage.getPrimaryIndexCache()) + return loadIndexToCache(*index_cache); + + index = loadIndex(); return index; } - -void IMergeTreeDataPart::setIndex(const Columns & cols_) +IMergeTreeDataPart::IndexPtr IMergeTreeDataPart::loadIndexToCache(PrimaryIndexCache & index_cache) const { - std::scoped_lock lock(index_mutex); - if (!index->empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); - index = std::make_shared(cols_); - index_loaded = true; + LOG_DEBUG(getLogger("KEK"), "part name: {}, load index path: {}", name, getDataPartStorage().getFullPath()); + auto key = PrimaryIndexCache::hash(getDataPartStorage().getFullPath()); + auto callback = [this] { return loadIndex(); }; + return index_cache.getOrSet(key, callback); } -void IMergeTreeDataPart::setIndex(Columns && cols_) +void IMergeTreeDataPart::moveIndexToCache(PrimaryIndexCache & index_cache) { std::scoped_lock lock(index_mutex); - if (!index->empty()) + if (!index) + return; + + auto key = PrimaryIndexCache::hash(getDataPartStorage().getFullPath()); + index_cache.set(key, std::const_pointer_cast(index)); + index.reset(); + + for (const auto & [_, projection] : projection_parts) + projection->moveIndexToCache(index_cache); +} + +void IMergeTreeDataPart::setIndex(Columns index_columns) +{ + std::scoped_lock lock(index_mutex); + if (index) throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); - index = std::make_shared(std::move(cols_)); - index_loaded = true; + + optimizeIndexColumns(index_granularity.getMarksCount(), index_columns); + index = std::make_shared(std::move(index_columns)); } void IMergeTreeDataPart::unloadIndex() { std::scoped_lock lock(index_mutex); - index = std::make_shared(); - index_loaded = false; + index.reset(); } bool IMergeTreeDataPart::isIndexLoaded() const { std::scoped_lock lock(index_mutex); - return index_loaded; + return index != nullptr; } void IMergeTreeDataPart::setName(const String & new_name) @@ -609,8 +633,11 @@ void IMergeTreeDataPart::removeIfNeeded() UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const { std::scoped_lock lock(index_mutex); + if (!index) + return 0; + UInt64 res = 0; - for (const ColumnPtr & column : *index) + for (const auto & column : *index) res += column->byteSize(); return res; } @@ -618,8 +645,11 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const { std::scoped_lock lock(index_mutex); + if (!index) + return 0; + UInt64 res = 0; - for (const ColumnPtr & column : *index) + for (const auto & column : *index) res += column->allocatedBytes(); return res; } @@ -923,7 +953,36 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co { } -void IMergeTreeDataPart::loadIndex() const +template +void IMergeTreeDataPart::optimizeIndexColumns(size_t marks_count, Columns & index_columns) const +{ + size_t key_size = index_columns.size(); + Float64 ratio_to_drop_suffix_columns = (*storage.getSettings())[MergeTreeSetting::primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns]; + + /// Cut useless suffix columns, if necessary. + if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1) + { + chassert(marks_count > 0); + for (size_t j = 0; j < key_size - 1; ++j) + { + size_t num_changes = 0; + for (size_t i = 1; i < marks_count; ++i) + { + if (0 != index_columns[j]->compareAt(i, i - 1, *index_columns[j], 0)) + ++num_changes; + } + + if (static_cast(num_changes) / marks_count >= ratio_to_drop_suffix_columns) + { + key_size = j + 1; + index_columns.resize(key_size); + break; + } + } + } +} + +std::shared_ptr IMergeTreeDataPart::loadIndex() const { /// Memory for index must not be accounted as memory usage for query, because it belongs to a table. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; @@ -931,68 +990,59 @@ void IMergeTreeDataPart::loadIndex() const auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) metadata_snapshot = metadata_snapshot->projections.get(name).metadata; + const auto & primary_key = metadata_snapshot->getPrimaryKey(); size_t key_size = primary_key.column_names.size(); - if (key_size) + if (!key_size) + return std::make_shared(); + + MutableColumns loaded_index; + loaded_index.resize(key_size); + + for (size_t i = 0; i < key_size; ++i) { - MutableColumns loaded_index; - loaded_index.resize(key_size); - - for (size_t i = 0; i < key_size; ++i) - { - loaded_index[i] = primary_key.data_types[i]->createColumn(); - loaded_index[i]->reserve(index_granularity.getMarksCount()); - } - - String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()); - String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name; - auto index_file = metadata_manager->read(index_name); - size_t marks_count = index_granularity.getMarksCount(); - - Serializations key_serializations(key_size); - for (size_t j = 0; j < key_size; ++j) - key_serializations[j] = primary_key.data_types[j]->getDefaultSerialization(); - - for (size_t i = 0; i < marks_count; ++i) - for (size_t j = 0; j < key_size; ++j) - key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {}); - - /// Cut useless suffix columns, if necessary. - Float64 ratio_to_drop_suffix_columns = (*storage.getSettings())[MergeTreeSetting::primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns]; - if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1) - { - chassert(marks_count > 0); - for (size_t j = 0; j < key_size - 1; ++j) - { - size_t num_changes = 0; - for (size_t i = 1; i < marks_count; ++i) - if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0)) - ++num_changes; - - if (static_cast(num_changes) / marks_count >= ratio_to_drop_suffix_columns) - { - key_size = j + 1; - loaded_index.resize(key_size); - break; - } - } - } - - for (size_t i = 0; i < key_size; ++i) - { - loaded_index[i]->shrinkToFit(); - loaded_index[i]->protect(); - if (loaded_index[i]->size() != marks_count) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: " - "{}, read: {})", index_path, marks_count, loaded_index[i]->size()); - } - - if (!index_file->eof()) - throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); - - index = std::make_shared(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); + loaded_index[i] = primary_key.data_types[i]->createColumn(); + loaded_index[i]->reserve(index_granularity.getMarksCount()); } + + String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()); + String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name; + auto index_file = metadata_manager->read(index_name); + size_t marks_count = index_granularity.getMarksCount(); + + Serializations key_serializations(key_size); + for (size_t j = 0; j < key_size; ++j) + key_serializations[j] = primary_key.data_types[j]->getDefaultSerialization(); + + for (size_t i = 0; i < marks_count; ++i) + { + for (size_t j = 0; j < key_size; ++j) + key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {}); + } + + optimizeIndexColumns(marks_count, loaded_index); + size_t total_bytes = 0; + + for (size_t i = 0; i < key_size; ++i) + { + loaded_index[i]->shrinkToFit(); + loaded_index[i]->protect(); + total_bytes += loaded_index[i]->byteSize(); + + if (loaded_index[i]->size() != marks_count) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: " + "{}, read: {})", index_path, marks_count, loaded_index[i]->size()); + } + + if (!index_file->eof()) + throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); + + ProfileEvents::increment(ProfileEvents::LoadedPrimaryIndexFiles); + ProfileEvents::increment(ProfileEvents::LoadedPrimaryIndexRows, marks_count); + ProfileEvents::increment(ProfileEvents::LoadedPrimaryIndexBytes, total_bytes); + + return std::make_shared(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); } void IMergeTreeDataPart::appendFilesOfIndex(Strings & files) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 24625edf154..3e70f9940f8 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -25,6 +25,7 @@ #include #include #include +#include "Storages/MergeTree/PrimaryIndexCache.h" namespace zkutil @@ -77,7 +78,8 @@ public: using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; - using Index = std::shared_ptr; + using Index = Columns; + using IndexPtr = std::shared_ptr; using IndexSizeByName = std::unordered_map; using Type = MergeTreeDataPartType; @@ -371,9 +373,11 @@ public: /// Version of part metadata (columns, pk and so on). Managed properly only for replicated merge tree. int32_t metadata_version; - Index getIndex() const; - void setIndex(const Columns & cols_); - void setIndex(Columns && cols_); + IndexPtr getIndex() const; + IndexPtr loadIndexToCache(PrimaryIndexCache & index_cache) const; + void moveIndexToCache(PrimaryIndexCache & index_cache); + + void setIndex(Columns index_columns); void unloadIndex(); bool isIndexLoaded() const; @@ -598,8 +602,7 @@ protected: /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple. /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h. mutable std::mutex index_mutex; - mutable Index index TSA_GUARDED_BY(index_mutex); - mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false; + mutable IndexPtr index; /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk ColumnSize total_columns_size; @@ -694,7 +697,11 @@ private: virtual void appendFilesOfIndexGranularity(Strings & files) const; /// Loads the index file. - void loadIndex() const TSA_REQUIRES(index_mutex); + std::shared_ptr loadIndex() const; + + /// Optimize index. Drop useless columns from suffix of primary key. + template + void optimizeIndexColumns(size_t marks_count, Columns & index_columns) const; void appendFilesOfIndex(Strings & files) const; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index dbfdbbdea88..e66b44aa2d7 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -72,8 +73,11 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( { } -Columns IMergeTreeDataPartWriter::releaseIndexColumns() +std::optional IMergeTreeDataPartWriter::releaseIndexColumns() { + if (!settings.save_primary_index_in_memory) + return {}; + /// The memory for index was allocated without thread memory tracker. /// We need to deallocate it in shrinkToFit without memory tracker as well. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 8923f6a59ca..3bf488f2c04 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -48,7 +48,7 @@ public: virtual size_t getNumberOfOpenStreams() const = 0; - Columns releaseIndexColumns(); + std::optional releaseIndexColumns(); PlainMarksByName releaseCachedMarks(); diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 7dd6d720170..84aec1ea30a 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index d7e807c689f..fa03b3f63fb 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -445,8 +445,13 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite finish_callback = [storage_ptr = &storage]() { storage_ptr->merge_selecting_task->schedule(); }; ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); - if (auto * mark_cache = storage.getContext()->getMarkCache().get()) - addMarksToCache(*part, cached_marks, mark_cache); + if (auto mark_cache = storage.getMarkCacheToPrewarm()) + addMarksToCache(*part, cached_marks, mark_cache.get()); + + /// Move index to cache and reset it here because we need + /// a correct part name after rename for a key of cache entry. + if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) + part->moveIndexToCache(*index_cache); write_part_log({}); StorageReplicatedMergeTree::incrementMergedPartsProfileEvent(part->getType()); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 6aca58faf47..d8b84fbf2e5 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -152,10 +152,17 @@ void MergePlainMergeTreeTask::finish() ThreadFuzzer::maybeInjectSleep(); ThreadFuzzer::maybeInjectMemoryLimitException(); - if (auto * mark_cache = storage.getContext()->getMarkCache().get()) + if (auto mark_cache = storage.getMarkCacheToPrewarm()) { auto marks = merge_task->releaseCachedMarks(); - addMarksToCache(*new_part, marks, mark_cache); + addMarksToCache(*new_part, marks, mark_cache.get()); + } + + if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) + { + /// Move index to cache and reset it here because we need + /// a correct part name after rename for a key of cache entry. + new_part->moveIndexToCache(*index_cache); } write_part_log({}); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 08066113375..59169589730 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -93,7 +93,6 @@ namespace MergeTreeSetting extern const MergeTreeSettingsUInt64 vertical_merge_algorithm_min_columns_to_activate; extern const MergeTreeSettingsUInt64 vertical_merge_algorithm_min_rows_to_activate; extern const MergeTreeSettingsBool vertical_merge_remote_filesystem_prefetch; - extern const MergeTreeSettingsBool prewarm_mark_cache; } namespace ErrorCodes @@ -547,8 +546,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const } } - bool save_marks_in_cache = (*global_ctx->data->getSettings())[MergeTreeSetting::prewarm_mark_cache] && global_ctx->context->getMarkCache(); - global_ctx->to = std::make_shared( global_ctx->new_data_part, global_ctx->metadata_snapshot, @@ -558,7 +555,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() const ctx->compression_codec, global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, - save_marks_in_cache, ctx->blocks_are_granules_size, global_ctx->context->getWriteSettings()); @@ -1089,8 +1085,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const ctx->executor = std::make_unique(ctx->column_parts_pipeline); NamesAndTypesList columns_list = {*ctx->it_name_and_type}; - bool save_marks_in_cache = (*global_ctx->data->getSettings())[MergeTreeSetting::prewarm_mark_cache] && global_ctx->context->getMarkCache(); - ctx->column_to = std::make_unique( global_ctx->new_data_part, global_ctx->metadata_snapshot, @@ -1099,7 +1093,6 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const column_pipepline.indexes_to_recalc, getStatisticsForColumns(columns_list, global_ctx->metadata_snapshot), &global_ctx->written_offset_columns, - save_marks_in_cache, global_ctx->to->getIndexGranularity()); ctx->column_elems_written = 0; diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 53792165987..b863c5c65cc 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -5,6 +5,8 @@ #include #include +#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include "Storages/MergeTree/PrimaryIndexCache.h" #include #include @@ -217,7 +219,9 @@ private: std::promise promise{}; IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns{}; + PlainMarksByName cached_marks; + std::unique_ptr cached_index; MergeTreeTransactionPtr txn; bool need_prefix; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b2f35d0a309..8738eea1100 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -231,12 +231,15 @@ namespace MergeTreeSetting extern const MergeTreeSettingsString storage_policy; extern const MergeTreeSettingsFloat zero_copy_concurrent_part_removal_max_postpone_ratio; extern const MergeTreeSettingsUInt64 zero_copy_concurrent_part_removal_max_split_times; + extern const MergeTreeSettingsBool use_primary_index_cache; + extern const MergeTreeSettingsBool prewarm_primary_index_cache; extern const MergeTreeSettingsBool prewarm_mark_cache; } namespace ServerSetting { extern const ServerSettingsDouble mark_cache_prewarm_ratio; + extern const ServerSettingsDouble primary_index_cache_prewarm_ratio; } namespace ErrorCodes @@ -2343,32 +2346,49 @@ void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() } } -void MergeTreeData::prewarmMarkCacheIfNeeded(ThreadPool & pool) +PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCache() const { - if (!(*getSettings())[MergeTreeSetting::prewarm_mark_cache]) - return; + if (!(*getSettings())[MergeTreeSetting::use_primary_index_cache]) + return nullptr; - prewarmMarkCache(pool); + return getContext()->getPrimaryIndexCache(); } -void MergeTreeData::prewarmMarkCache(ThreadPool & pool) +PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCacheToPrewarm() const { - auto * mark_cache = getContext()->getMarkCache().get(); - if (!mark_cache) + if (!(*getSettings())[MergeTreeSetting::prewarm_primary_index_cache]) + return nullptr; + + return getPrimaryIndexCache(); +} + +MarkCachePtr MergeTreeData::getMarkCacheToPrewarm() const +{ + if (!(*getSettings())[MergeTreeSetting::prewarm_mark_cache]) + return nullptr; + + return getContext()->getMarkCache(); +} + +void MergeTreeData::prewarmCaches(ThreadPool & pool, MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache) +{ + if (!mark_cache && !index_cache) return; - auto metadata_snaphost = getInMemoryMetadataPtr(); - auto column_names = getColumnsToPrewarmMarks(*getSettings(), metadata_snaphost->getColumns().getAllPhysical()); + Names columns_to_prewarm_marks; - if (column_names.empty()) - return; + if (mark_cache) + { + auto metadata_snaphost = getInMemoryMetadataPtr(); + columns_to_prewarm_marks = getColumnsToPrewarmMarks(*getSettings(), metadata_snaphost->getColumns().getAllPhysical()); + } Stopwatch watch; - LOG_TRACE(log, "Prewarming mark cache"); + LOG_TRACE(log, "Prewarming mark and/or primary index caches"); auto data_parts = getDataPartsVectorForInternalUsage(); - /// Prewarm mark cache firstly for the most fresh parts according + /// Prewarm caches firstly for the most fresh parts according /// to time columns in partition key (if exists) and by modification time. auto to_tuple = [](const auto & part) @@ -2381,20 +2401,22 @@ void MergeTreeData::prewarmMarkCache(ThreadPool & pool) return to_tuple(lhs) > to_tuple(rhs); }); - ThreadPoolCallbackRunnerLocal runner(pool, "PrewarmMarks"); - double ratio_to_prewarm = getContext()->getServerSettings()[ServerSetting::mark_cache_prewarm_ratio]; + ThreadPoolCallbackRunnerLocal runner(pool, "PrewarmCaches"); + + double marks_ratio_to_prewarm = getContext()->getServerSettings()[ServerSetting::mark_cache_prewarm_ratio]; + double index_ratio_to_prewarm = getContext()->getServerSettings()[ServerSetting::primary_index_cache_prewarm_ratio]; for (const auto & part : data_parts) { - if (mark_cache->sizeInBytes() >= mark_cache->maxSizeInBytes() * ratio_to_prewarm) - break; + if (index_cache && index_cache->sizeInBytes() < index_cache->maxSizeInBytes() * index_ratio_to_prewarm) + runner([&] { part->loadIndexToCache(*index_cache); }); - runner([&] { part->loadMarksToCache(column_names, mark_cache); }); + if (mark_cache && mark_cache->sizeInBytes() < mark_cache->maxSizeInBytes() * marks_ratio_to_prewarm) + runner([&] { part->loadMarksToCache(columns_to_prewarm_marks, mark_cache.get()); }); } runner.waitForAllToFinishAndRethrowFirstError(); - watch.stop(); - LOG_TRACE(log, "Prewarmed mark cache in {} seconds", watch.elapsedSeconds()); + LOG_TRACE(log, "Prewarmed mark and/or primary index caches in {} seconds", watch.elapsedSeconds()); } /// Is the part directory old. diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index fe360907875..58a909e6a2d 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include #include @@ -506,9 +508,15 @@ public: /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks, std::optional> expected_parts); + /// Returns a pointer to primary index cache if it is enabled. + PrimaryIndexCachePtr getPrimaryIndexCache() const; + /// Returns a pointer to primary index cache if it is enabled and required to be prewarmed. + PrimaryIndexCachePtr getPrimaryIndexCacheToPrewarm() const; + /// Returns a pointer to primary mark cache if it is required to be prewarmed. + MarkCachePtr getMarkCacheToPrewarm() const; + /// Prewarm mark cache for the most recent data parts. - void prewarmMarkCache(ThreadPool & pool); - void prewarmMarkCacheIfNeeded(ThreadPool & pool); + void prewarmCaches(ThreadPool & pool, MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache); String getLogName() const { return log.loadName(); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index c483d47fed7..893afcc7b0e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -327,9 +327,10 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const B for (size_t i = 0; i < index_block.columns(); ++i) { const auto & column = index_block.getByPosition(i).column; - - index_columns[i]->insertFrom(*column, row); index_serializations[i]->serializeBinary(*column, row, index_stream, {}); + + if (settings.save_primary_index_in_memory) + index_columns[i]->insertFrom(*column, row); } } @@ -347,8 +348,14 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc */ MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - if (index_columns.empty()) - index_columns = primary_index_block.cloneEmptyColumns(); + if (settings.save_primary_index_in_memory) + { + if (index_columns.empty()) + index_columns = primary_index_block.cloneEmptyColumns(); + + for (const auto & column : index_columns) + column->reserve(column->size() + granules_to_write.size()); + } /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c4ca545ca90..ebc73a2e90f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1074,7 +1074,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( DataTypes key_types; if (!key_indices.empty()) { - const auto & index = part->getIndex(); + const auto index = part->getIndex(); for (size_t i : key_indices) { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 6d19f45e2c4..f9a6c7de034 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -73,7 +73,6 @@ namespace MergeTreeSetting extern const MergeTreeSettingsFloat min_free_disk_ratio_to_perform_insert; extern const MergeTreeSettingsBool optimize_row_order; extern const MergeTreeSettingsFloat ratio_of_defaults_for_sparse_serialization; - extern const MergeTreeSettingsBool prewarm_mark_cache; } namespace ErrorCodes @@ -685,7 +684,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0); - bool save_marks_in_cache = (*data_settings)[MergeTreeSetting::prewarm_mark_cache] && data.getContext()->getMarkCache(); auto out = std::make_unique( new_data_part, @@ -696,7 +694,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( compression_codec, context->getCurrentTransaction() ? context->getCurrentTransaction()->tid : Tx::PrehistoricTID, /*reset_columns=*/ false, - save_marks_in_cache, /*blocks_are_granules_size=*/ false, context->getWriteSettings()); @@ -832,7 +829,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( /// This effectively chooses minimal compression method: /// either default lz4 or compression method with zero thresholds on absolute and relative part size. auto compression_codec = data.getContext()->chooseCompressionCodec(0, 0); - bool save_marks_in_cache = (*data.getSettings())[MergeTreeSetting::prewarm_mark_cache] && data.getContext()->getMarkCache(); auto out = std::make_unique( new_data_part, @@ -844,7 +840,6 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( compression_codec, Tx::PrehistoricTID, /*reset_columns=*/ false, - save_marks_in_cache, /*blocks_are_granules_size=*/ false, data.getContext()->getWriteSettings()); diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.cpp b/src/Storages/MergeTree/MergeTreeIOSettings.cpp index bacfbbd5720..67f2ca31be7 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeIOSettings.cpp @@ -35,6 +35,7 @@ MergeTreeWriterSettings::MergeTreeWriterSettings( bool can_use_adaptive_granularity_, bool rewrite_primary_key_, bool save_marks_in_cache_, + bool save_primary_index_in_memory_, bool blocks_are_granules_size_) : min_compress_block_size( (*storage_settings)[MergeTreeSetting::min_compress_block_size] ? (*storage_settings)[MergeTreeSetting::min_compress_block_size] : global_settings[Setting::min_compress_block_size]) @@ -48,6 +49,7 @@ MergeTreeWriterSettings::MergeTreeWriterSettings( , can_use_adaptive_granularity(can_use_adaptive_granularity_) , rewrite_primary_key(rewrite_primary_key_) , save_marks_in_cache(save_marks_in_cache_) + , save_primary_index_in_memory(save_primary_index_in_memory_) , blocks_are_granules_size(blocks_are_granules_size_) , query_write_settings(query_write_settings_) , low_cardinality_max_dictionary_size(global_settings[Setting::low_cardinality_max_dictionary_size]) diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 7506c726bc4..fdcab59724d 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -64,6 +64,7 @@ struct MergeTreeWriterSettings bool can_use_adaptive_granularity_, bool rewrite_primary_key_, bool save_marks_in_cache_, + bool save_primary_index_in_memory_, bool blocks_are_granules_size_); size_t min_compress_block_size; @@ -79,6 +80,7 @@ struct MergeTreeWriterSettings bool can_use_adaptive_granularity; bool rewrite_primary_key; bool save_marks_in_cache; + bool save_primary_index_in_memory; bool blocks_are_granules_size; WriteSettings query_write_settings; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index fcd4e05cf00..eea03be20dc 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -236,6 +236,8 @@ namespace ErrorCodes DECLARE(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ DECLARE(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ DECLARE(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ + DECLARE(Bool, use_primary_index_cache, false, "Use cache for primary index instead of saving all indexes in memory. Can be useful for very large tables", 0) \ + DECLARE(Bool, prewarm_primary_index_cache, false, "If true primary index cache will be prewarmed by saving marks to mark cache on inserts, merges, fetches and on startup of server", 0) \ DECLARE(Bool, prewarm_mark_cache, false, "If true mark cache will be prewarmed by saving marks to mark cache on inserts, merges, fetches and on startup of server", 0) \ DECLARE(String, columns_to_prewarm_mark_cache, "", "List of columns to prewarm mark cache for (if enabled). Empty means all columns", 0) \ /** Projection settings. */ \ diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 99852309c77..6de4fa4feef 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -247,15 +247,22 @@ void MergeTreeSink::finishDelayedChunk() /// Part can be deduplicated, so increment counters and add to part log only if it's really added if (added) { - if (auto * mark_cache = storage.getContext()->getMarkCache().get()) + if (auto mark_cache = storage.getMarkCacheToPrewarm()) { for (const auto & stream : partition.temp_part.streams) { auto marks = stream.stream->releaseCachedMarks(); - addMarksToCache(*part, marks, mark_cache); + addMarksToCache(*part, marks, mark_cache.get()); } } + if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) + { + /// Move index to cache and reset it here because we need + /// a correct part name after rename for a key of cache entry. + part->moveIndexToCache(*index_cache); + } + auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot)); StorageMergeTree::incrementInsertedPartsProfileEvent(part->getType()); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 14a521ce429..52bc40e7555 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -15,6 +16,10 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace MergeTreeSetting +{ + extern const MergeTreeSettingsBool use_primary_index_cache; +} MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeMutableDataPartPtr & data_part, @@ -25,7 +30,6 @@ MergedBlockOutputStream::MergedBlockOutputStream( CompressionCodecPtr default_codec_, TransactionID tid, bool reset_columns_, - bool save_marks_in_cache, bool blocks_are_granules_size, const WriteSettings & write_settings_, const MergeTreeIndexGranularity & computed_index_granularity) @@ -34,6 +38,9 @@ MergedBlockOutputStream::MergedBlockOutputStream( , default_codec(default_codec_) , write_settings(write_settings_) { + bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; + bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); + MergeTreeWriterSettings writer_settings( data_part->storage.getContext()->getSettingsRef(), write_settings, @@ -41,6 +48,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( data_part->index_granularity_info.mark_type.adaptive, /* rewrite_primary_key = */ true, save_marks_in_cache, + save_primary_index_in_memory, blocks_are_granules_size); /// TODO: looks like isStoredOnDisk() is always true for MergeTreeDataPart @@ -202,7 +210,10 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - new_part->setIndex(writer->releaseIndexColumns()); + + if (auto computed_index = writer->releaseIndexColumns()) + new_part->setIndex(std::move(*computed_index)); + new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 060778866e0..e212fe5bb5a 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -24,7 +24,6 @@ public: CompressionCodecPtr default_codec_, TransactionID tid, bool reset_columns_ = false, - bool save_marks_in_cache = false, bool blocks_are_granules_size = false, const WriteSettings & write_settings = {}, const MergeTreeIndexGranularity & computed_index_granularity = {}); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index bed539dfe02..ef10c9c492a 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -1,16 +1,23 @@ #include #include +#include #include #include #include namespace DB { + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; } +namespace MergeTreeSetting +{ + extern const MergeTreeSettingsBool use_primary_index_cache; +} + MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, @@ -19,20 +26,21 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeIndices & indices_to_recalc, const ColumnsStatistics & stats_to_recalc_, WrittenOffsetColumns * offset_columns_, - bool save_marks_in_cache, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { - const auto & global_settings = data_part->storage.getContext()->getSettingsRef(); + bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; + bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); MergeTreeWriterSettings writer_settings( - global_settings, + data_part->storage.getContext()->getSettingsRef(), data_part->storage.getContext()->getWriteSettings(), storage_settings, index_granularity_info ? index_granularity_info->mark_type.adaptive : data_part->storage.canUseAdaptiveGranularity(), /* rewrite_primary_key = */ false, save_marks_in_cache, + save_primary_index_in_memory, /* blocks_are_granules_size = */ false); writer = createMergeTreeDataPartWriter( diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index f6bf9e37a58..e837a62743e 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -22,7 +22,6 @@ public: const MergeTreeIndices & indices_to_recalc_, const ColumnsStatistics & stats_to_recalc_, WrittenOffsetColumns * offset_columns_ = nullptr, - bool save_marks_in_cache = false, const MergeTreeIndexGranularity & index_granularity = {}, const MergeTreeIndexGranularityInfo * index_granularity_info_ = nullptr); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 7f6588fc632..aea6d3d1505 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1625,7 +1625,6 @@ private: ctx->compression_codec, ctx->txn ? ctx->txn->tid : Tx::PrehistoricTID, /*reset_columns=*/ true, - /*save_marks_in_cache=*/ false, /*blocks_are_granules_size=*/ false, ctx->context->getWriteSettings(), computed_granularity); @@ -1854,7 +1853,6 @@ private: std::vector(ctx->indices_to_recalc.begin(), ctx->indices_to_recalc.end()), ColumnsStatistics(ctx->stats_to_recalc.begin(), ctx->stats_to_recalc.end()), nullptr, - /*save_marks_in_cache=*/ false, ctx->source_part->index_granularity, &ctx->source_part->index_granularity_info ); diff --git a/src/Storages/MergeTree/PrimaryIndexCache.cpp b/src/Storages/MergeTree/PrimaryIndexCache.cpp new file mode 100644 index 00000000000..aeb9969f578 --- /dev/null +++ b/src/Storages/MergeTree/PrimaryIndexCache.cpp @@ -0,0 +1,8 @@ +#include + +namespace DB +{ + +template class CacheBase; + +} diff --git a/src/Storages/MergeTree/PrimaryIndexCache.h b/src/Storages/MergeTree/PrimaryIndexCache.h new file mode 100644 index 00000000000..758f18dbed8 --- /dev/null +++ b/src/Storages/MergeTree/PrimaryIndexCache.h @@ -0,0 +1,73 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event PrimaryIndexCacheHits; + extern const Event PrimaryIndexCacheMisses; +} + +namespace DB +{ + +using PrimaryIndex = std::vector; + +/// Estimate of number of bytes in cache for primaryindexs. +struct PrimaryIndexWeightFunction +{ + /// We spent additional bytes on key in hashmap, linked lists, shared pointers, etc ... + static constexpr size_t PRIMARY_INDEX_CACHE_OVERHEAD = 128; + + size_t operator()(const PrimaryIndex & index) const + { + size_t res = 0; + for (const auto & column : index) + res += column->byteSize(); + return res; + } +}; + +extern template class CacheBase; + +/** Cache of primary index for StorageMergeTree. + * PrimaryIndex is an index structure that addresses ranges in column file, corresponding to ranges of primary key. + */ +class PrimaryIndexCache : public CacheBase +{ +private: + using Base = CacheBase; + +public: + PrimaryIndexCache(const String & cache_policy, size_t max_size_in_bytes, double size_ratio) + : Base(cache_policy, max_size_in_bytes, 0, size_ratio) + { + } + + /// Calculate key from path to file and offset. + static UInt128 hash(const String & part_path) + { + SipHash hash; + hash.update(part_path.data(), part_path.size() + 1); + return hash.get128(); + } + + template + MappedPtr getOrSet(const Key & key, LoadFunc && load) + { + auto result = Base::getOrSet(key, load); + if (result.second) + ProfileEvents::increment(ProfileEvents::PrimaryIndexCacheMisses); + else + ProfileEvents::increment(ProfileEvents::PrimaryIndexCacheHits); + + return result.first; + } +}; + +using PrimaryIndexCachePtr = std::shared_ptr; + +} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index c0e25a54bf3..d884b3aca7e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -466,6 +466,28 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) ++num_blocks_processed; } +template +void ReplicatedMergeTreeSinkImpl::prewarmCaches(const MergeTreeDataWriter::TemporaryPart & temp_part) const +{ + const auto & part = temp_part.part; + + if (auto mark_cache = storage.getMarkCacheToPrewarm()) + { + for (const auto & stream : temp_part.streams) + { + auto marks = stream.stream->releaseCachedMarks(); + addMarksToCache(*part, marks, mark_cache.get()); + } + } + + if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) + { + /// Move index to cache and reset it here because we need + /// a correct part name after rename for a key of cache entry. + part->moveIndexToCache(*index_cache); + } +} + template<> void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper) { @@ -486,16 +508,9 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; - auto * mark_cache = storage.getContext()->getMarkCache().get(); - if (!error && mark_cache) - { - for (const auto & stream : partition.temp_part.streams) - { - auto marks = stream.stream->releaseCachedMarks(); - addMarksToCache(*part, marks, mark_cache); - } - } + if (!error) + prewarmCaches(partition.temp_part); auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error)); @@ -540,14 +555,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa if (conflict_block_ids.empty()) { - if (auto * mark_cache = storage.getContext()->getMarkCache().get()) - { - for (const auto & stream : partition.temp_part.streams) - { - auto marks = stream.stream->releaseCachedMarks(); - addMarksToCache(*partition.temp_part.part, marks, mark_cache); - } - } + prewarmCaches(partition.temp_part); auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart( diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 7d025361717..b467cc167f8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -129,6 +130,7 @@ private: std::unique_ptr delayed_chunk; void finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper); + void prewarmCaches(const MergeTreeDataWriter::TemporaryPart & temp_part) const; }; using ReplicatedMergeTreeSinkWithAsyncDeduplicate = ReplicatedMergeTreeSinkImpl; diff --git a/src/Storages/MergeTree/new_index b/src/Storages/MergeTree/new_index new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 1ba0617d8ae..81e6a22ba58 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -155,7 +155,11 @@ StorageMergeTree::StorageMergeTree( loadMutations(); loadDeduplicationLog(); - prewarmMarkCacheIfNeeded(getActivePartsLoadingThreadPool().get()); + + prewarmCaches( + getActivePartsLoadingThreadPool().get(), + getMarkCacheToPrewarm(), + getPrimaryIndexCacheToPrewarm()); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 793fd02c656..c49e7fba7e5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -208,7 +208,6 @@ namespace MergeTreeSetting extern const MergeTreeSettingsBool use_minimalistic_checksums_in_zookeeper; extern const MergeTreeSettingsBool use_minimalistic_part_header_in_zookeeper; extern const MergeTreeSettingsMilliseconds wait_for_unique_parts_send_before_shutdown_ms; - extern const MergeTreeSettingsBool prewarm_mark_cache; } namespace FailPoints @@ -509,7 +508,11 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } loadDataParts(skip_sanity_checks, expected_parts_on_this_replica); - prewarmMarkCacheIfNeeded(getActivePartsLoadingThreadPool().get()); + + prewarmCaches( + getActivePartsLoadingThreadPool().get(), + getMarkCacheToPrewarm(), + getPrimaryIndexCacheToPrewarm()); if (LoadingStrictnessLevel::ATTACH <= mode) { @@ -5082,10 +5085,10 @@ bool StorageReplicatedMergeTree::fetchPart( ProfileEvents::increment(ProfileEvents::ObsoleteReplicatedParts); } - if ((*getSettings())[MergeTreeSetting::prewarm_mark_cache] && getContext()->getMarkCache()) + if (auto mark_cache = getMarkCacheToPrewarm()) { auto column_names = getColumnsToPrewarmMarks(*getSettings(), part->getColumns()); - part->loadMarksToCache(column_names, getContext()->getMarkCache().get()); + part->loadMarksToCache(column_names, mark_cache.get()); } write_part_log({}); diff --git a/tests/queries/0_stateless/03273_primary_index_cache.reference b/tests/queries/0_stateless/03273_primary_index_cache.reference new file mode 100644 index 00000000000..611787366ee --- /dev/null +++ b/tests/queries/0_stateless/03273_primary_index_cache.reference @@ -0,0 +1,16 @@ +0 +PrimaryIndexCacheBytes 0 +PrimaryIndexCacheFiles 0 +99 +0 +PrimaryIndexCacheBytes 1280 +PrimaryIndexCacheFiles 2 +0 +PrimaryIndexCacheBytes 0 +PrimaryIndexCacheFiles 0 +49 +0 +PrimaryIndexCacheBytes 640 +PrimaryIndexCacheFiles 1 +2 160 1280 +1 80 640 diff --git a/tests/queries/0_stateless/03273_primary_index_cache.sql b/tests/queries/0_stateless/03273_primary_index_cache.sql new file mode 100644 index 00000000000..18c2703689b --- /dev/null +++ b/tests/queries/0_stateless/03273_primary_index_cache.sql @@ -0,0 +1,45 @@ +-- Tags: no-parallel + +DROP TABLE IF EXISTS t_primary_index_cache; + +CREATE TABLE t_primary_index_cache (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY a PARTITION BY a % 2 +SETTINGS use_primary_index_cache = 1, prewarm_primary_index_cache = 0, index_granularity = 64, index_granularity_bytes = '10M', min_bytes_for_wide_part = 0; + +SYSTEM DROP PRIMARY INDEX CACHE; + +INSERT INTO t_primary_index_cache SELECT number, number FROM numbers(10000); + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SELECT count() FROM t_primary_index_cache WHERE a > 100 AND a < 200; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SYSTEM DROP PRIMARY INDEX CACHE; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SELECT count() FROM t_primary_index_cache WHERE a > 100 AND a < 200 AND a % 2 = 0; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SYSTEM FLUSH LOGS; + +SELECT + ProfileEvents['LoadedPrimaryIndexFiles'], + ProfileEvents['LoadedPrimaryIndexRows'], + ProfileEvents['LoadedPrimaryIndexBytes'] +FROM system.query_log +WHERE query LIKE 'SELECT count() FROM t_primary_index_cache%' AND current_database = currentDatabase() AND type = 'QueryFinish' +ORDER BY event_time_microseconds; + +DROP TABLE t_primary_index_cache; diff --git a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference new file mode 100644 index 00000000000..ed46312d77d --- /dev/null +++ b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference @@ -0,0 +1,16 @@ +0 +PrimaryIndexCacheBytes 1280 +PrimaryIndexCacheFiles 2 +99 +0 +PrimaryIndexCacheBytes 1280 +PrimaryIndexCacheFiles 2 +0 +PrimaryIndexCacheBytes 1280 +PrimaryIndexCacheFiles 2 +49 +0 +PrimaryIndexCacheBytes 1280 +PrimaryIndexCacheFiles 2 +0 0 0 +0 0 0 diff --git a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql new file mode 100644 index 00000000000..e9e04cff6ec --- /dev/null +++ b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql @@ -0,0 +1,46 @@ +-- Tags: no-parallel + +DROP TABLE IF EXISTS t_primary_index_cache_2; + +CREATE TABLE t_primary_index_cache_2 (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY a PARTITION BY a % 2 +SETTINGS use_primary_index_cache = 1, prewarm_primary_index_cache = 1, index_granularity = 64, index_granularity_bytes = '10M', min_bytes_for_wide_part = 0; + +SYSTEM DROP PRIMARY INDEX CACHE; + +INSERT INTO t_primary_index_cache_2 SELECT number, number FROM numbers(10000); + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SELECT count() FROM t_primary_index_cache_2 WHERE a > 100 AND a < 200; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SYSTEM DROP PRIMARY INDEX CACHE; +SYSTEM PREWARM PRIMARY INDEX CACHE t_primary_index_cache_2; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SELECT count() FROM t_primary_index_cache_2 WHERE a > 100 AND a < 200 AND a % 2 = 0; + +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; +SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; + +SYSTEM FLUSH LOGS; + +SELECT + ProfileEvents['LoadedPrimaryIndexFiles'], + ProfileEvents['LoadedPrimaryIndexRows'], + ProfileEvents['LoadedPrimaryIndexBytes'] +FROM system.query_log +WHERE query LIKE 'SELECT count() FROM t_primary_index_cache_2%' AND current_database = currentDatabase() AND type = 'QueryFinish' +ORDER BY event_time_microseconds; + +DROP TABLE t_primary_index_cache_2; \ No newline at end of file From 7881ae22866b14862c09f1ae4bb9414c012d56d5 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 19 Nov 2024 17:18:39 +0000 Subject: [PATCH 05/56] better primary index cache --- src/Interpreters/InterpreterSystemQuery.cpp | 51 +++++++--- src/Interpreters/InterpreterSystemQuery.h | 4 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 +- src/Storages/MergeTree/MergeTreeData.cpp | 35 +++++-- .../MergeTree/MergeTreeDataWriter.cpp | 23 ++++- src/Storages/MergeTree/MergeTreeDataWriter.h | 1 + src/Storages/MergeTree/MergeTreeSink.cpp | 16 +--- .../MergeTree/MergedBlockOutputStream.cpp | 3 +- .../MergedColumnOnlyOutputStream.cpp | 7 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 26 +---- src/Storages/StorageReplicatedMergeTree.cpp | 5 + .../01271_show_privileges.reference | 2 + ...3274_prewarm_primary_index_cache.reference | 30 +++--- .../03274_prewarm_primary_index_cache.sql | 96 ++++++++++++------- 14 files changed, 185 insertions(+), 124 deletions(-) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 41b677efe45..d1b6b9e1ace 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -365,12 +365,12 @@ BlockIO InterpreterSystemQuery::execute() } case Type::PREWARM_MARK_CACHE: { - prewarmCaches(getContext()->getMarkCache(), nullptr); + prewarmMarkCache(); break; } case Type::PREWARM_PRIMARY_INDEX_CACHE: { - prewarmCaches(nullptr, getContext()->getPrimaryIndexCache()); + prewarmPrimaryIndexCache(); break; } case Type::DROP_MARK_CACHE: @@ -1316,25 +1316,21 @@ RefreshTaskList InterpreterSystemQuery::getRefreshTasks() return tasks; } -void InterpreterSystemQuery::prewarmCaches(MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache) +void InterpreterSystemQuery::prewarmMarkCache() { - if (!mark_cache && !index_cache) - return; - if (table_id.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table is not specified for PREWARM CACHE command"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table is not specified for PREWARM MARK CACHE command"); - if (mark_cache) - getContext()->checkAccess(AccessType::SYSTEM_PREWARM_MARK_CACHE, table_id); - - if (index_cache) - getContext()->checkAccess(AccessType::SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, table_id); + getContext()->checkAccess(AccessType::SYSTEM_PREWARM_MARK_CACHE, table_id); auto table_ptr = DatabaseCatalog::instance().getTable(table_id, getContext()); auto * merge_tree = dynamic_cast(table_ptr.get()); - if (!merge_tree) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Command PREWARM CACHE is supported only for MergeTree table, but got: {}", table_ptr->getName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Command PREWARM MARK CACHE is supported only for MergeTree table, but got: {}", table_ptr->getName()); + + auto mark_cache = getContext()->getMarkCache(); + if (!mark_cache) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Mark cache is not configured"); ThreadPool pool( CurrentMetrics::MergeTreePartsLoaderThreads, @@ -1342,7 +1338,32 @@ void InterpreterSystemQuery::prewarmCaches(MarkCachePtr mark_cache, PrimaryIndex CurrentMetrics::MergeTreePartsLoaderThreadsScheduled, getContext()->getSettingsRef()[Setting::max_threads]); - merge_tree->prewarmCaches(pool, std::move(mark_cache), std::move(index_cache)); + merge_tree->prewarmCaches(pool, std::move(mark_cache), nullptr); +} + +void InterpreterSystemQuery::prewarmPrimaryIndexCache() +{ + if (table_id.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table is not specified for PREWARM PRIMARY INDEX CACHE command"); + + getContext()->checkAccess(AccessType::SYSTEM_PREWARM_PRIMARY_INDEX_CACHE, table_id); + + auto table_ptr = DatabaseCatalog::instance().getTable(table_id, getContext()); + auto * merge_tree = dynamic_cast(table_ptr.get()); + if (!merge_tree) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Command PREWARM PRIMARY INDEX CACHE is supported only for MergeTree table, but got: {}", table_ptr->getName()); + + auto index_cache = merge_tree->getPrimaryIndexCache(); + if (!index_cache) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary index cache is not configured or is not enabled for table {}", table_id.getFullTableName()); + + ThreadPool pool( + CurrentMetrics::MergeTreePartsLoaderThreads, + CurrentMetrics::MergeTreePartsLoaderThreadsActive, + CurrentMetrics::MergeTreePartsLoaderThreadsScheduled, + getContext()->getSettingsRef()[Setting::max_threads]); + + merge_tree->prewarmCaches(pool, nullptr, std::move(index_cache)); } diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 6ae84fed672..47fac8330a4 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -84,7 +84,9 @@ private: AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); - void prewarmCaches(MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache); + + void prewarmMarkCache(); + void prewarmPrimaryIndexCache(); void stopReplicatedDDLQueries(); void startReplicatedDDLQueries(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a591289c68f..2aa9ac6a17c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -33,7 +33,6 @@ #include #include #include -#include "Common/Logger.h" #include #include #include @@ -389,7 +388,6 @@ IMergeTreeDataPart::IndexPtr IMergeTreeDataPart::getIndex() const IMergeTreeDataPart::IndexPtr IMergeTreeDataPart::loadIndexToCache(PrimaryIndexCache & index_cache) const { - LOG_DEBUG(getLogger("KEK"), "part name: {}, load index path: {}", name, getDataPartStorage().getFullPath()); auto key = PrimaryIndexCache::hash(getDataPartStorage().getFullPath()); auto callback = [this] { return loadIndex(); }; return index_cache.getOrSet(key, callback); @@ -415,7 +413,7 @@ void IMergeTreeDataPart::setIndex(Columns index_columns) if (index) throw Exception(ErrorCodes::LOGICAL_ERROR, "The index of data part can be set only once"); - optimizeIndexColumns(index_granularity.getMarksCount(), index_columns); + optimizeIndexColumns(index_granularity->getMarksCount(), index_columns); index = std::make_shared(std::move(index_columns)); } @@ -780,7 +778,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadIndexGranularity(); if (!(*storage.getSettings())[MergeTreeSetting::primary_key_lazy_load]) - getIndex(); + index = loadIndex(); calculateColumnsAndSecondaryIndicesSizesOnDisk(); loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. @@ -1006,13 +1004,13 @@ std::shared_ptr IMergeTreeDataPart::loadIndex() const for (size_t i = 0; i < key_size; ++i) { loaded_index[i] = primary_key.data_types[i]->createColumn(); - loaded_index[i]->reserve(index_granularity.getMarksCount()); + loaded_index[i]->reserve(index_granularity->getMarksCount()); } String index_name = "primary" + getIndexExtensionFromFilesystem(getDataPartStorage()); String index_path = fs::path(getDataPartStorage().getRelativePath()) / index_name; auto index_file = metadata_manager->read(index_name); - size_t marks_count = index_granularity.getMarksCount(); + size_t marks_count = index_granularity->getMarksCount(); Serializations key_serializations(key_size); for (size_t j = 0; j < key_size; ++j) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 81bafae209c..10b518822ac 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -235,6 +235,7 @@ namespace MergeTreeSetting extern const MergeTreeSettingsBool use_primary_index_cache; extern const MergeTreeSettingsBool prewarm_primary_index_cache; extern const MergeTreeSettingsBool prewarm_mark_cache; + extern const MergeTreeSettingsBool primary_key_lazy_load; } namespace ServerSetting @@ -2349,7 +2350,10 @@ void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCache() const { - if (!(*getSettings())[MergeTreeSetting::use_primary_index_cache]) + bool use_primary_index_cache = (*getSettings())[MergeTreeSetting::use_primary_index_cache]; + bool primary_key_lazy_load = (*getSettings())[MergeTreeSetting::primary_key_lazy_load]; + + if (!use_primary_index_cache || !primary_key_lazy_load) return nullptr; return getContext()->getPrimaryIndexCache(); @@ -2376,14 +2380,6 @@ void MergeTreeData::prewarmCaches(ThreadPool & pool, MarkCachePtr mark_cache, Pr if (!mark_cache && !index_cache) return; - Names columns_to_prewarm_marks; - - if (mark_cache) - { - auto metadata_snaphost = getInMemoryMetadataPtr(); - columns_to_prewarm_marks = getColumnsToPrewarmMarks(*getSettings(), metadata_snaphost->getColumns().getAllPhysical()); - } - Stopwatch watch; LOG_TRACE(log, "Prewarming mark and/or primary index caches"); @@ -2407,13 +2403,32 @@ void MergeTreeData::prewarmCaches(ThreadPool & pool, MarkCachePtr mark_cache, Pr double marks_ratio_to_prewarm = getContext()->getServerSettings()[ServerSetting::mark_cache_prewarm_ratio]; double index_ratio_to_prewarm = getContext()->getServerSettings()[ServerSetting::primary_index_cache_prewarm_ratio]; + Names columns_to_prewarm_marks; + + if (mark_cache) + { + auto metadata_snaphost = getInMemoryMetadataPtr(); + columns_to_prewarm_marks = getColumnsToPrewarmMarks(*getSettings(), metadata_snaphost->getColumns().getAllPhysical()); + } + for (const auto & part : data_parts) { - if (index_cache && index_cache->sizeInBytes() < index_cache->maxSizeInBytes() * index_ratio_to_prewarm) + bool added_task = false; + + if (index_cache && !part->isIndexLoaded() && index_cache->sizeInBytes() < index_cache->maxSizeInBytes() * index_ratio_to_prewarm) + { + added_task = true; runner([&] { part->loadIndexToCache(*index_cache); }); + } if (mark_cache && mark_cache->sizeInBytes() < mark_cache->maxSizeInBytes() * marks_ratio_to_prewarm) + { + added_task = true; runner([&] { part->loadMarksToCache(columns_to_prewarm_marks, mark_cache.get()); }); + } + + if (!added_task) + break; } runner.waitForAllToFinishAndRethrowFirstError(); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 588390f012b..a0e1de24cf1 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -14,7 +14,7 @@ #include #include #include -#include "Common/logger_useful.h" +#include #include #include #include @@ -225,6 +225,27 @@ void MergeTreeDataWriter::TemporaryPart::finalize() projection->getDataPartStorage().precommitTransaction(); } +void MergeTreeDataWriter::TemporaryPart::prewarmCaches() +{ + /// This method must be called after rename and commit of part + /// because a correct path is required for the keys of caches. + + if (auto mark_cache = part->storage.getMarkCacheToPrewarm()) + { + for (const auto & stream : streams) + { + auto marks = stream.stream->releaseCachedMarks(); + addMarksToCache(*part, marks, mark_cache.get()); + } + } + + if (auto index_cache = part->storage.getPrimaryIndexCacheToPrewarm()) + { + /// Index was already set during writing. Now move it to cache. + part->moveIndexToCache(*index_cache); + } +} + std::vector scatterAsyncInsertInfoBySelector(AsyncInsertInfoPtr async_insert_info, const IColumn::Selector & selector, size_t partition_num) { if (nullptr == async_insert_info) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index 863c951d957..c2224a72683 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -75,6 +75,7 @@ public: void cancel(); void finalize(); + void prewarmCaches(); }; /** All rows must correspond to same partition. diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 6de4fa4feef..d65d1f3212f 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -247,21 +247,7 @@ void MergeTreeSink::finishDelayedChunk() /// Part can be deduplicated, so increment counters and add to part log only if it's really added if (added) { - if (auto mark_cache = storage.getMarkCacheToPrewarm()) - { - for (const auto & stream : partition.temp_part.streams) - { - auto marks = stream.stream->releaseCachedMarks(); - addMarksToCache(*part, marks, mark_cache.get()); - } - } - - if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) - { - /// Move index to cache and reset it here because we need - /// a correct part name after rename for a key of cache entry. - part->moveIndexToCache(*index_cache); - } + partition.temp_part.prewarmCaches(); auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot)); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 0aa0778ebe4..ab07bbd424e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -17,7 +17,6 @@ namespace ErrorCodes namespace MergeTreeSetting { - extern const MergeTreeSettingsBool use_primary_index_cache; extern const MergeTreeSettingsBool enable_index_granularity_compression; } @@ -38,7 +37,9 @@ MergedBlockOutputStream::MergedBlockOutputStream( , default_codec(default_codec_) , write_settings(write_settings_) { + /// Save marks in memory if prewarm is enabled to avoid rereading marks file. bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; + /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid rereading marks file. bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); MergeTreeWriterSettings writer_settings( diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index a330fe55aa0..46754d005ba 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -13,11 +13,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -namespace MergeTreeSetting -{ - extern const MergeTreeSettingsBool use_primary_index_cache; -} - MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeMutableDataPartPtr & data_part, const StorageMetadataPtr & metadata_snapshot_, @@ -29,7 +24,9 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns) : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { + /// Save marks in memory if prewarm is enabled to avoid rereading marks file. bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; + /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid rereading marks file. bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); /// Granularity is never recomputed while writing only columns. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index d884b3aca7e..0a25a4582d5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -466,28 +466,6 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) ++num_blocks_processed; } -template -void ReplicatedMergeTreeSinkImpl::prewarmCaches(const MergeTreeDataWriter::TemporaryPart & temp_part) const -{ - const auto & part = temp_part.part; - - if (auto mark_cache = storage.getMarkCacheToPrewarm()) - { - for (const auto & stream : temp_part.streams) - { - auto marks = stream.stream->releaseCachedMarks(); - addMarksToCache(*part, marks, mark_cache.get()); - } - } - - if (auto index_cache = storage.getPrimaryIndexCacheToPrewarm()) - { - /// Move index to cache and reset it here because we need - /// a correct part name after rename for a key of cache entry. - part->moveIndexToCache(*index_cache); - } -} - template<> void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFaultInjectionPtr & zookeeper) { @@ -510,7 +488,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; if (!error) - prewarmCaches(partition.temp_part); + partition.temp_part.prewarmCaches(); auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error)); @@ -555,7 +533,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa if (conflict_block_ids.empty()) { - prewarmCaches(partition.temp_part); + partition.temp_part.prewarmCaches(); auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart( diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0600beacd83..79ca6fd4c97 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5091,6 +5091,11 @@ bool StorageReplicatedMergeTree::fetchPart( part->loadMarksToCache(column_names, mark_cache.get()); } + if (auto index_cache = getPrimaryIndexCacheToPrewarm()) + { + part->loadIndexToCache(*index_cache); + } + write_part_log({}); } else diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index de6df7ac021..dddedb25f5a 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -114,6 +114,8 @@ SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYS SYSTEM DROP CONNECTIONS CACHE ['SYSTEM DROP CONNECTIONS CACHE','DROP CONNECTIONS CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM PREWARM MARK CACHE ['SYSTEM PREWARM MARK','PREWARM MARK CACHE','PREWARM MARKS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE +SYSTEM PREWARM PRIMARY INDEX CACHE ['SYSTEM PREWARM PRIMARY INDEX','PREWARM PRIMARY INDEX CACHE','PREWARM PRIMARY INDEX'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP PRIMARY INDEX CACHE ['SYSTEM DROP PRIMARY INDEX','DROP PRIMARY INDEX CACHE','DROP PRIMARY INDEX'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP QUERY CACHE ['SYSTEM DROP QUERY','DROP QUERY CACHE','DROP QUERY'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference index ed46312d77d..1a9e1167eb4 100644 --- a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference +++ b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.reference @@ -1,16 +1,22 @@ +449 0 -PrimaryIndexCacheBytes 1280 -PrimaryIndexCacheFiles 2 -99 +449 0 -PrimaryIndexCacheBytes 1280 -PrimaryIndexCacheFiles 2 +898 +898 0 -PrimaryIndexCacheBytes 1280 -PrimaryIndexCacheFiles 2 -49 +898 +898 +0 +898 +0 +898 +0 +0 +0 +0 +0 +0 +0 +1 0 -PrimaryIndexCacheBytes 1280 -PrimaryIndexCacheFiles 2 -0 0 0 -0 0 0 diff --git a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql index e9e04cff6ec..08c41d0fc72 100644 --- a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql +++ b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql @@ -1,46 +1,74 @@ --- Tags: no-parallel +-- Tags: no-parallel, no-shared-merge-tree -DROP TABLE IF EXISTS t_primary_index_cache_2; +DROP TABLE IF EXISTS t_prewarm_cache_rmt_1; +DROP TABLE IF EXISTS t_prewarm_cache_rmt_2; -CREATE TABLE t_primary_index_cache_2 (a UInt64, b UInt64) -ENGINE = MergeTree ORDER BY a PARTITION BY a % 2 -SETTINGS use_primary_index_cache = 1, prewarm_primary_index_cache = 1, index_granularity = 64, index_granularity_bytes = '10M', min_bytes_for_wide_part = 0; +CREATE TABLE t_prewarm_cache_rmt_1 (a UInt64, b UInt64, c UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03274_prewarm_mark_cache_smt/t_prewarm_cache', '1') +ORDER BY a PARTITION BY a % 2 +SETTINGS prewarm_primary_index_cache = 1, use_primary_index_cache = 1; + +CREATE TABLE t_prewarm_cache_rmt_2 (a UInt64, b UInt64, c UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03274_prewarm_mark_cache_smt/t_prewarm_cache', '2') +ORDER BY a PARTITION BY a % 2 +SETTINGS prewarm_primary_index_cache = 1, use_primary_index_cache = 1; + +SYSTEM DROP PRIMARY INDEX CACHE; +SYSTEM STOP FETCHES t_prewarm_cache_rmt_2; + +-- Check that prewarm works on insert. +INSERT INTO t_prewarm_cache_rmt_1 SELECT number, rand(), rand() FROM numbers(20000); + +SELECT count() FROM t_prewarm_cache_rmt_1 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); + +-- Check that prewarm works on fetch. +SYSTEM DROP PRIMARY INDEX CACHE; +SYSTEM START FETCHES t_prewarm_cache_rmt_2; +SYSTEM SYNC REPLICA t_prewarm_cache_rmt_2; + +SELECT count() FROM t_prewarm_cache_rmt_2 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); + +-- Check that prewarm works on merge. +INSERT INTO t_prewarm_cache_rmt_1 SELECT number, rand(), rand() FROM numbers(20000); +OPTIMIZE TABLE t_prewarm_cache_rmt_1 FINAL; + +SYSTEM SYNC REPLICA t_prewarm_cache_rmt_2; + +SELECT count() FROM t_prewarm_cache_rmt_1 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT count() FROM t_prewarm_cache_rmt_2 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); + +-- Check that prewarm works on restart. +SYSTEM DROP PRIMARY INDEX CACHE; + +DETACH TABLE t_prewarm_cache_rmt_1; +DETACH TABLE t_prewarm_cache_rmt_2; + +ATTACH TABLE t_prewarm_cache_rmt_1; +ATTACH TABLE t_prewarm_cache_rmt_2; + +SELECT count() FROM t_prewarm_cache_rmt_1 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT count() FROM t_prewarm_cache_rmt_2 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); SYSTEM DROP PRIMARY INDEX CACHE; -INSERT INTO t_primary_index_cache_2 SELECT number, number FROM numbers(10000); +SELECT count() FROM t_prewarm_cache_rmt_1 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); -SYSTEM RELOAD ASYNCHRONOUS METRICS; -SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; -SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; +--- Check that system query works. +SYSTEM PREWARM PRIMARY INDEX CACHE t_prewarm_cache_rmt_1; -SELECT count() FROM t_primary_index_cache_2 WHERE a > 100 AND a < 200; - -SYSTEM RELOAD ASYNCHRONOUS METRICS; -SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; -SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; - -SYSTEM DROP PRIMARY INDEX CACHE; -SYSTEM PREWARM PRIMARY INDEX CACHE t_primary_index_cache_2; - -SYSTEM RELOAD ASYNCHRONOUS METRICS; -SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; -SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; - -SELECT count() FROM t_primary_index_cache_2 WHERE a > 100 AND a < 200 AND a % 2 = 0; - -SYSTEM RELOAD ASYNCHRONOUS METRICS; -SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE table = 't_primary_index_cache_2' AND active; -SELECT metric, value FROM system.asynchronous_metrics WHERE metric IN ('PrimaryIndexCacheFiles', 'PrimaryIndexCacheBytes') ORDER BY metric; +SELECT count() FROM t_prewarm_cache_rmt_1 WHERE a % 2 = 0 AND a > 100 AND a < 1000; +SELECT sum(primary_key_bytes_in_memory) FROM system.parts WHERE database = currentDatabase() AND table IN ('t_prewarm_cache_rmt_1', 't_prewarm_cache_rmt_2'); SYSTEM FLUSH LOGS; -SELECT - ProfileEvents['LoadedPrimaryIndexFiles'], - ProfileEvents['LoadedPrimaryIndexRows'], - ProfileEvents['LoadedPrimaryIndexBytes'] -FROM system.query_log -WHERE query LIKE 'SELECT count() FROM t_primary_index_cache_2%' AND current_database = currentDatabase() AND type = 'QueryFinish' +SELECT ProfileEvents['LoadedPrimaryIndexFiles'] FROM system.query_log +WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND query LIKE 'SELECT count() FROM t_prewarm_cache%' ORDER BY event_time_microseconds; -DROP TABLE t_primary_index_cache_2; \ No newline at end of file +DROP TABLE IF EXISTS t_prewarm_cache_rmt_1; +DROP TABLE IF EXISTS t_prewarm_cache_rmt_2; From 10cd060fb317e77e2510a5a3d5e537b14b6be0a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 21 Nov 2024 14:25:49 +0100 Subject: [PATCH 06/56] Stop wasting disk space and link time --- src/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 55228b2d1ec..ecd559ebab8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -107,11 +107,6 @@ list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_dele add_headers_and_sources(clickhouse_compression Compression) -add_headers_and_sources(clickhouse_compression Parsers) -add_headers_and_sources(clickhouse_compression Core) -#Included these specific files to avoid linking grpc -add_glob(clickhouse_compression_headers Server/ServerType.h) -add_glob(clickhouse_compression_sources Server/ServerType.cpp) add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources}) From 334bf3bfe02cd3b9c19aecd09f6e14f90bf4222f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 21 Nov 2024 13:37:31 +0000 Subject: [PATCH 07/56] Fix calculating dynamic columns sizes on vertical merge --- .../MergeTree/IMergeTreeDataPartWriter.h | 2 +- src/Storages/MergeTree/MergeTask.cpp | 4 ++- src/Storages/MergeTree/MergeTask.h | 1 + .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 17 ++++++++++--- .../MergeTree/MergedBlockOutputStream.h | 6 +++-- .../MergeTree/MergedColumnOnlyOutputStream.h | 1 + ...amic_column_sizes_vertical_merge.reference | 1 + ...74_dynamic_column_sizes_vertical_merge.sql | 25 +++++++++++++++++++ 9 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.reference create mode 100644 tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index d6d8cbd115b..878122f6428 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -55,7 +55,7 @@ public: MergeTreeIndexGranularityPtr getIndexGranularity() const { return index_granularity; } - virtual Block getColumnsSample() const = 0; + virtual const Block & getColumnsSample() const = 0; protected: SerializationPtr getSerialization(const String & column_name) const; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index b978beae14b..abcee67b47c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1167,6 +1167,8 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const ctx->executor.reset(); auto changed_checksums = ctx->column_to->fillChecksums(global_ctx->new_data_part, global_ctx->checksums_gathered_columns); global_ctx->checksums_gathered_columns.add(std::move(changed_checksums)); + const auto & columns_sample = ctx->column_to->getColumnsSample().getColumnsWithTypeAndName(); + global_ctx->gathered_columns_samples.insert(global_ctx->gathered_columns_samples.end(), columns_sample.begin(), columns_sample.end()); auto cached_marks = ctx->column_to->releaseCachedMarks(); for (auto & [name, marks] : cached_marks) @@ -1316,7 +1318,7 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical) global_ctx->to->finalizePart(global_ctx->new_data_part, ctx->need_sync); else - global_ctx->to->finalizePart(global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns); + global_ctx->to->finalizePart(global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns, &global_ctx->gathered_columns_samples); auto cached_marks = global_ctx->to->releaseCachedMarks(); for (auto & [name, marks] : cached_marks) diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 721a2a933e1..3dbc885735c 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -194,6 +194,7 @@ private: NamesAndTypesList merging_columns{}; NamesAndTypesList storage_columns{}; MergeTreeData::DataPart::Checksums checksums_gathered_columns{}; + ColumnsWithTypeAndName gathered_columns_samples{}; IndicesDescription merging_skip_indexes; std::unordered_map skip_indexes_by_column; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 5b869c252c3..2e289ab8324 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -133,7 +133,7 @@ public: void cancel() noexcept override; - Block getColumnsSample() const override { return block_sample; } + const Block & getColumnsSample() const override { return block_sample; } protected: /// Count index_granularity for block and store in `index_granularity` diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 979b4698738..2dcf88e3bc2 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -193,16 +193,18 @@ void MergedBlockOutputStream::finalizePart( const MergeTreeMutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + MergeTreeData::DataPart::Checksums * additional_column_checksums, + ColumnsWithTypeAndName * additional_columns_samples) { - finalizePartAsync(new_part, sync, total_columns_list, additional_column_checksums).finish(); + finalizePartAsync(new_part, sync, total_columns_list, additional_column_checksums, additional_columns_samples).finish(); } MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( const MergeTreeMutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list, - MergeTreeData::DataPart::Checksums * additional_column_checksums) + MergeTreeData::DataPart::Checksums * additional_column_checksums, + ColumnsWithTypeAndName * additional_columns_samples) { /// Finish write and get checksums. MergeTreeData::DataPart::Checksums checksums; @@ -248,7 +250,14 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); - new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(writer->getColumnsSample()); + + auto columns_sample = writer->getColumnsSample(); + if (additional_columns_samples) + { + for (const auto & column : *additional_columns_samples) + columns_sample.insert(column); + } + new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(columns_sample); if ((*new_part->storage.getSettings())[MergeTreeSetting::enable_index_granularity_compression]) { diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index afa2eaf18ec..0149c0f2101 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -64,13 +64,15 @@ public: const MergeTreeMutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr, + ColumnsWithTypeAndName * additional_columns_samples = nullptr); void finalizePart( const MergeTreeMutableDataPartPtr & new_part, bool sync, const NamesAndTypesList * total_columns_list = nullptr, - MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr); + MergeTreeData::DataPart::Checksums * additional_column_checksums = nullptr, + ColumnsWithTypeAndName * additional_columns_samples = nullptr); private: /** If `permutation` is given, it rearranges the values in the columns when writing. diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index f2f2f10d6ff..62159a8b7fd 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -30,6 +30,7 @@ public: MergeTreeData::DataPart::Checksums fillChecksums(MergeTreeData::MutableDataPartPtr & new_part, MergeTreeData::DataPart::Checksums & all_checksums); + const Block & getColumnsSample() const { return writer->getColumnsSample(); } void finish(bool sync); void cancel() noexcept override; }; diff --git a/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.reference b/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.reference new file mode 100644 index 00000000000..777c6e539df --- /dev/null +++ b/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.reference @@ -0,0 +1 @@ +test 2000000 70 7 7 diff --git a/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.sql b/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.sql new file mode 100644 index 00000000000..b3b1c080114 --- /dev/null +++ b/tests/queries/0_stateless/03274_dynamic_column_sizes_vertical_merge.sql @@ -0,0 +1,25 @@ +-- Tags: no-random-settings, no-fasttest + +set allow_experimental_dynamic_type = 1; +set allow_experimental_json_type = 1; + + +drop table if exists test; +create table test (d Dynamic, json JSON) engine=MergeTree order by tuple() settings min_rows_for_wide_part=0, min_bytes_for_wide_part=0, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=0; +insert into test select number, '{"a" : 42, "b" : "Hello, World"}' from numbers(1000000); +insert into test select number, '{"a" : 42, "b" : "Hello, World"}' from numbers(1000000); +optimize table test final; + +SELECT + `table`, + sum(rows) AS rows, + floor(sum(data_uncompressed_bytes) / (1024 * 1024)) AS data_size_uncompressed, + floor(sum(data_compressed_bytes) / (1024 * 1024)) AS data_size_compressed, + floor(sum(bytes_on_disk) / (1024 * 1024)) AS total_size_on_disk +FROM system.parts +WHERE active AND (database = currentDatabase()) AND (`table` = 'test') +GROUP BY `table` +ORDER BY `table` ASC; + +drop table test; + From 99916f85fc1918a624b9577421581fff7db73597 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 Nov 2024 14:29:11 +0000 Subject: [PATCH 08/56] fix setting of priamry index --- src/Storages/MergeTree/MergedBlockOutputStream.cpp | 6 +++--- src/Storages/MergeTree/MutateTask.cpp | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index ab07bbd424e..01359af021e 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -249,9 +249,6 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->rows_count = rows_count; new_part->modification_time = time(nullptr); - if (auto computed_index = writer->releaseIndexColumns()) - new_part->setIndex(std::move(*computed_index)); - new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->setBytesUncompressedOnDisk(checksums.getTotalSizeUncompressedOnDisk()); @@ -264,6 +261,9 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index_granularity = std::move(new_index_granularity); } + if (auto computed_index = writer->releaseIndexColumns()) + new_part->setIndex(std::move(*computed_index)); + /// In mutation, existing_rows_count is already calculated in PartMergerWriter /// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count if (!new_part->existing_rows_count.has_value()) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 750fb91c557..8c8c07fa266 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -985,7 +985,6 @@ void finalizeMutatedPart( new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; - new_data_part->setIndex(*source_part->getIndex()); new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); @@ -995,6 +994,9 @@ void finalizeMutatedPart( new_data_part->index_granularity = std::move(new_index_granularity); } + if (!new_data_part->storage.getPrimaryIndexCache()) + new_data_part->setIndex(*source_part->getIndex()); + /// Load rest projections which are hardlinked bool noop; new_data_part->loadProjections(false, false, noop, true /* if_not_loaded */); From e19ac0aa1181f9215f51935aa8e4fc9a7ec607aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 21 Nov 2024 16:27:51 +0100 Subject: [PATCH 09/56] Remove duplicate building and linking of common files --- src/CMakeLists.txt | 3 +-- src/Common/Config/CMakeLists.txt | 9 --------- src/Common/ZooKeeper/CMakeLists.txt | 23 ++++++++++++++--------- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ecd559ebab8..ac03f40cd93 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -109,7 +109,6 @@ list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_dele add_headers_and_sources(clickhouse_compression Compression) add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources}) - add_headers_and_sources(dbms Disks/IO) add_headers_and_sources(dbms Disks/ObjectStorages) if (TARGET ch_contrib::sqlite) @@ -217,7 +216,6 @@ add_object_library(clickhouse_access Access) add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) add_object_library(clickhouse_core_mysql Core/MySQL) -add_object_library(clickhouse_compression Compression) add_object_library(clickhouse_querypipeline QueryPipeline) add_object_library(clickhouse_datatypes DataTypes) add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations) @@ -424,6 +422,7 @@ dbms_target_link_libraries ( Poco::JSON PUBLIC boost::system + clickhouse_compression clickhouse_common_io Poco::Redis ) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index 2bd32b98bda..e91a01568d5 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -16,15 +16,6 @@ target_link_libraries(clickhouse_common_config Poco::XML ) -add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) -target_link_libraries(clickhouse_common_config_no_zookeeper_log - PUBLIC - clickhouse_common_zookeeper_no_log - common - Poco::XML -) - if (TARGET ch_contrib::yaml_cpp) target_link_libraries(clickhouse_common_config PRIVATE ch_contrib::yaml_cpp) - target_link_libraries(clickhouse_common_config_no_zookeeper_log PRIVATE ch_contrib::yaml_cpp) endif() diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 8b6c420e565..b70a2299ba6 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -2,25 +2,30 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_common_zookeeper .) -list(APPEND clickhouse_common_zookeeper_sources ${CMAKE_CURRENT_SOURCE_DIR}/../../../src/Coordination/KeeperFeatureFlags.cpp) +# Needs to be built differently depending on ZOOKEEPER_LOG +list(REMOVE_ITEM clickhouse_common_zookeeper_sources "ZooKeeperImpl.cpp") -# for clickhouse server -add_library(clickhouse_common_zookeeper ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) -target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG) -target_link_libraries (clickhouse_common_zookeeper +add_library(clickhouse_common_zookeeper_base ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) +target_link_libraries (clickhouse_common_zookeeper_base PUBLIC clickhouse_common_io clickhouse_compression common ) +# for clickhouse server +add_library(clickhouse_common_zookeeper ZooKeeperImpl.cpp) +target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG) +target_link_libraries (clickhouse_common_zookeeper + PUBLIC + clickhouse_common_zookeeper_base +) + # for examples -- no logging (to avoid extra dependencies) -add_library(clickhouse_common_zookeeper_no_log ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) +add_library(clickhouse_common_zookeeper_no_log ZooKeeperImpl.cpp) target_link_libraries (clickhouse_common_zookeeper_no_log PUBLIC - clickhouse_common_io - clickhouse_compression - common + clickhouse_common_zookeeper_base ) if (ENABLE_EXAMPLES) add_subdirectory(examples) From a329ea7768acab65b920dd5f41a7175ef751bab8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 21 Nov 2024 16:03:35 +0000 Subject: [PATCH 10/56] fix test for lazy index load --- .../03128_merge_tree_index_lazy_load.reference | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference index 022457178ec..5022e543cd2 100644 --- a/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference +++ b/tests/queries/0_stateless/03128_merge_tree_index_lazy_load.reference @@ -1,8 +1,8 @@ 0 0 0 -1 4 4 -2 8 8 -3 9 9 -0 0 0 +1 4 0 +2 8 0 +3 9 0 +0 0 0 1 4 0 2 8 0 3 9 0 From 069c4b198aa51ac8682dded8c1762facf72af412 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 22 Nov 2024 17:21:38 +0000 Subject: [PATCH 11/56] minor enhancements --- programs/local/LocalServer.cpp | 2 +- programs/server/Server.cpp | 2 +- src/Core/Defines.h | 3 +++ src/Core/ServerSettings.cpp | 6 +++--- src/Interpreters/Context.cpp | 4 ++-- src/Interpreters/Context.h | 2 +- src/Interpreters/InterpreterSystemQuery.h | 2 -- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 - src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp | 1 - src/Storages/MergeTree/IMergedBlockOutputStream.h | 1 - src/Storages/MergeTree/MergeTask.h | 4 ---- src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++----- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataPartWriterOnDisk.cpp | 8 ++------ src/Storages/MergeTree/MergeTreeSettings.cpp | 4 ++-- src/Storages/MergeTree/MergedBlockOutputStream.cpp | 4 ++-- .../MergeTree/MergedColumnOnlyOutputStream.cpp | 4 ++-- src/Storages/MergeTree/PrimaryIndexCache.h | 7 ++++--- src/Storages/MergeTree/new_index | 0 20 files changed, 30 insertions(+), 39 deletions(-) delete mode 100644 src/Storages/MergeTree/new_index diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index adfd3adcb2a..38cab468b29 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -787,7 +787,7 @@ void LocalServer::processConfig() if (primary_index_cache_size > max_cache_size) { primary_index_cache_size = max_cache_size; - LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(primary_index_cache_size)); } global_context->setPrimaryIndexCache(primary_index_cache_policy, primary_index_cache_size, primary_index_cache_size_ratio); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0b76adebb41..ebf69481f0a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1571,7 +1571,7 @@ try if (primary_index_cache_size > max_cache_size) { primary_index_cache_size = max_cache_size; - LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size)); + LOG_INFO(log, "Lowered primary index cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(primary_index_cache_size)); } global_context->setPrimaryIndexCache(primary_index_cache_policy, primary_index_cache_size, primary_index_cache_size_ratio); diff --git a/src/Core/Defines.h b/src/Core/Defines.h index c6e65f34e90..faa5dc3a3fe 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -95,6 +95,9 @@ static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5_GiB; static constexpr auto DEFAULT_MARK_CACHE_SIZE_RATIO = 0.5l; +static constexpr auto DEFAULT_PRIMARY_INDEX_CACHE_POLICY = "SLRU"; +static constexpr auto DEFAULT_PRIMARY_INDEX_CACHE_MAX_SIZE = 5_GiB; +static constexpr auto DEFAULT_PRIMARY_INDEX_CACHE_SIZE_RATIO = 0.5l; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY = "SLRU"; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0; static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO = 0.5; diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 78a3ad3c419..034b1392367 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -101,9 +101,9 @@ namespace DB DECLARE(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \ DECLARE(Double, mark_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the mark cache relative to the cache's total size.", 0) \ DECLARE(Double, mark_cache_prewarm_ratio, 0.95, "The ratio of total size of mark cache to fill during prewarm.", 0) \ - DECLARE(String, primary_index_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Primary index cache policy name.", 0) \ - DECLARE(UInt64, primary_index_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for primary index (index of MergeTree family of tables).", 0) \ - DECLARE(Double, primary_index_cache_size_ratio, DEFAULT_MARK_CACHE_SIZE_RATIO, "The size of the protected queue in the primary index cache relative to the cache's total size.", 0) \ + DECLARE(String, primary_index_cache_policy, DEFAULT_PRIMARY_INDEX_CACHE_POLICY, "Primary index cache policy name.", 0) \ + DECLARE(UInt64, primary_index_cache_size, DEFAULT_PRIMARY_INDEX_CACHE_MAX_SIZE, "Size of cache for primary index (index of MergeTree family of tables).", 0) \ + DECLARE(Double, primary_index_cache_size_ratio, DEFAULT_PRIMARY_INDEX_CACHE_SIZE_RATIO, "The size of the protected queue in the primary index cache relative to the cache's total size.", 0) \ DECLARE(Double, primary_index_cache_prewarm_ratio, 0.95, "The ratio of total size of mark cache to fill during prewarm.", 0) \ DECLARE(String, index_uncompressed_cache_policy, DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY, "Secondary index uncompressed cache policy name.", 0) \ DECLARE(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of secondary indices. Zero means disabled.", 0) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc8b68f72c2..6b571275f18 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3259,9 +3259,9 @@ void Context::updatePrimaryIndexCacheConfiguration(const Poco::Util::AbstractCon std::lock_guard lock(shared->mutex); if (!shared->primary_index_cache) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache was not created yet."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Primary index cache was not created yet."); - size_t max_size_in_bytes = config.getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE); + size_t max_size_in_bytes = config.getUInt64("primary_index_cache_size", DEFAULT_PRIMARY_INDEX_CACHE_MAX_SIZE); shared->primary_index_cache->setMaxSizeInBytes(max_size_in_bytes); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index e4db880da6a..8dd68b51ac4 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -22,7 +22,6 @@ #include #include -#include "Storages/MergeTree/PrimaryIndexCache.h" #include "config.h" #include @@ -90,6 +89,7 @@ class RefreshSet; class Cluster; class Compiler; class MarkCache; +class PrimaryIndexCache; class PageCache; class MMappedFileCache; class UncompressedCache; diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 47fac8330a4..09cdeb72093 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -6,8 +6,6 @@ #include #include #include -#include "Storages/MarkCache.h" -#include "Storages/MergeTree/PrimaryIndexCache.h" #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 2aa9ac6a17c..4322f56c4cd 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index cf8989ee4df..2dff4fff80e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -25,7 +25,7 @@ #include #include #include -#include "Storages/MergeTree/PrimaryIndexCache.h" +#include namespace zkutil diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index 41185447c7c..37a9cbffa4c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 53727d4578a..77f22711b8d 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 29b48f8d362..721a2a933e1 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -6,8 +6,6 @@ #include #include #include -#include "Storages/MergeTree/IMergeTreeDataPart.h" -#include "Storages/MergeTree/PrimaryIndexCache.h" #include #include @@ -223,9 +221,7 @@ private: std::promise promise{}; IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns{}; - PlainMarksByName cached_marks; - std::unique_ptr cached_index; MergeTreeTransactionPtr txn; bool need_prefix; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10b518822ac..14a0835d99c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -232,8 +232,8 @@ namespace MergeTreeSetting extern const MergeTreeSettingsString storage_policy; extern const MergeTreeSettingsFloat zero_copy_concurrent_part_removal_max_postpone_ratio; extern const MergeTreeSettingsUInt64 zero_copy_concurrent_part_removal_max_split_times; - extern const MergeTreeSettingsBool use_primary_index_cache; - extern const MergeTreeSettingsBool prewarm_primary_index_cache; + extern const MergeTreeSettingsBool use_primary_key_cache; + extern const MergeTreeSettingsBool prewarm_primary_key_cache; extern const MergeTreeSettingsBool prewarm_mark_cache; extern const MergeTreeSettingsBool primary_key_lazy_load; } @@ -2350,10 +2350,10 @@ void MergeTreeData::stopOutdatedAndUnexpectedDataPartsLoadingTask() PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCache() const { - bool use_primary_index_cache = (*getSettings())[MergeTreeSetting::use_primary_index_cache]; + bool use_primary_key_cache = (*getSettings())[MergeTreeSetting::use_primary_key_cache]; bool primary_key_lazy_load = (*getSettings())[MergeTreeSetting::primary_key_lazy_load]; - if (!use_primary_index_cache || !primary_key_lazy_load) + if (!use_primary_key_cache || !primary_key_lazy_load) return nullptr; return getContext()->getPrimaryIndexCache(); @@ -2361,7 +2361,7 @@ PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCache() const PrimaryIndexCachePtr MergeTreeData::getPrimaryIndexCacheToPrewarm() const { - if (!(*getSettings())[MergeTreeSetting::prewarm_primary_index_cache]) + if (!(*getSettings())[MergeTreeSetting::prewarm_primary_key_cache]) return nullptr; return getPrimaryIndexCache(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 58a909e6a2d..240cfa71748 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -515,7 +515,7 @@ public: /// Returns a pointer to primary mark cache if it is required to be prewarmed. MarkCachePtr getMarkCacheToPrewarm() const; - /// Prewarm mark cache for the most recent data parts. + /// Prewarm mark cache and primary index cache for the most recent data parts. void prewarmCaches(ThreadPool & pool, MarkCachePtr mark_cache, PrimaryIndexCachePtr index_cache); String getLogName() const { return log.loadName(); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index f6e42079d41..bd6feb99329 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -338,13 +338,9 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc */ MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - if (settings.save_primary_index_in_memory) + if (settings.save_primary_index_in_memory && index_columns.empty()) { - if (index_columns.empty()) - index_columns = primary_index_block.cloneEmptyColumns(); - - for (const auto & column : index_columns) - column->reserve(column->size() + granules_to_write.size()); + index_columns = primary_index_block.cloneEmptyColumns(); } /// Write index. The index contains Primary Key value for each `index_granularity` row. diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index d37e2ec219e..fbb55d11416 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -238,8 +238,8 @@ namespace ErrorCodes DECLARE(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ DECLARE(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ DECLARE(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ - DECLARE(Bool, use_primary_index_cache, false, "Use cache for primary index instead of saving all indexes in memory. Can be useful for very large tables", 0) \ - DECLARE(Bool, prewarm_primary_index_cache, false, "If true primary index cache will be prewarmed by saving marks to mark cache on inserts, merges, fetches and on startup of server", 0) \ + DECLARE(Bool, use_primary_key_cache, false, "Use cache for primary index instead of saving all indexes in memory. Can be useful for very large tables", 0) \ + DECLARE(Bool, prewarm_primary_key_cache, false, "If true primary index cache will be prewarmed by saving marks to mark cache on inserts, merges, fetches and on startup of server", 0) \ DECLARE(Bool, prewarm_mark_cache, false, "If true mark cache will be prewarmed by saving marks to mark cache on inserts, merges, fetches and on startup of server", 0) \ DECLARE(String, columns_to_prewarm_mark_cache, "", "List of columns to prewarm mark cache for (if enabled). Empty means all columns", 0) \ /** Projection settings. */ \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 01359af021e..db162bd82cb 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -37,9 +37,9 @@ MergedBlockOutputStream::MergedBlockOutputStream( , default_codec(default_codec_) , write_settings(write_settings_) { - /// Save marks in memory if prewarm is enabled to avoid rereading marks file. + /// Save marks in memory if prewarm is enabled to avoid re-reading marks file. bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; - /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid rereading marks file. + /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid re-reading primary index file. bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); MergeTreeWriterSettings writer_settings( diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index 46754d005ba..9f6ab952bc3 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -24,9 +24,9 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( WrittenOffsetColumns * offset_columns) : IMergedBlockOutputStream(data_part->storage.getSettings(), data_part->getDataPartStoragePtr(), metadata_snapshot_, columns_list_, /*reset_columns=*/ true) { - /// Save marks in memory if prewarm is enabled to avoid rereading marks file. + /// Save marks in memory if prewarm is enabled to avoid re-reading marks file. bool save_marks_in_cache = data_part->storage.getMarkCacheToPrewarm() != nullptr; - /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid rereading marks file. + /// Save primary index in memory if cache is disabled or is enabled with prewarm to avoid re-reading priamry index file. bool save_primary_index_in_memory = !data_part->storage.getPrimaryIndexCache() || data_part->storage.getPrimaryIndexCacheToPrewarm(); /// Granularity is never recomputed while writing only columns. diff --git a/src/Storages/MergeTree/PrimaryIndexCache.h b/src/Storages/MergeTree/PrimaryIndexCache.h index 758f18dbed8..5ec185dcf58 100644 --- a/src/Storages/MergeTree/PrimaryIndexCache.h +++ b/src/Storages/MergeTree/PrimaryIndexCache.h @@ -16,7 +16,7 @@ namespace DB using PrimaryIndex = std::vector; -/// Estimate of number of bytes in cache for primaryindexs. +/// Estimate of number of bytes in cache for primary index. struct PrimaryIndexWeightFunction { /// We spent additional bytes on key in hashmap, linked lists, shared pointers, etc ... @@ -33,8 +33,9 @@ struct PrimaryIndexWeightFunction extern template class CacheBase; -/** Cache of primary index for StorageMergeTree. - * PrimaryIndex is an index structure that addresses ranges in column file, corresponding to ranges of primary key. +/** Cache of primary index for MergeTree tables. + * Primary index is a list of columns from primary key + * that store first row for each granule of data part. */ class PrimaryIndexCache : public CacheBase { diff --git a/src/Storages/MergeTree/new_index b/src/Storages/MergeTree/new_index deleted file mode 100644 index e69de29bb2d..00000000000 From bd3f300220b113fc08a0a11c68296513913bb5a9 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 22 Nov 2024 17:33:37 +0000 Subject: [PATCH 12/56] Move JSON/Dynamic/Variant types from experimental to beta --- docs/en/sql-reference/data-types/dynamic.md | 4 ++-- docs/en/sql-reference/data-types/newjson.md | 4 ++-- docs/en/sql-reference/data-types/variant.md | 2 +- src/Core/Settings.cpp | 19 ++++++++++--------- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/en/sql-reference/data-types/dynamic.md b/docs/en/sql-reference/data-types/dynamic.md index aa7455c8f68..08ccc4b8827 100644 --- a/docs/en/sql-reference/data-types/dynamic.md +++ b/docs/en/sql-reference/data-types/dynamic.md @@ -17,7 +17,7 @@ To declare a column of `Dynamic` type, use the following syntax: Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`. :::note -The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`. +The Dynamic data type is a beta feature. To use it, set `enable_dynamic_type = 1`. ::: ## Creating Dynamic @@ -54,7 +54,7 @@ SELECT 'Hello, World!'::Dynamic as d, dynamicType(d); Using CAST from `Variant` column: ```sql -SET allow_experimental_variant_type = 1, use_variant_as_common_type = 1; +SET enable_variant_type = 1, use_variant_as_common_type = 1; SELECT multiIf((number % 3) = 0, number, (number % 3) = 1, range(number + 1), NULL)::Dynamic AS d, dynamicType(d) FROM numbers(3) ``` diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index 05197d9e692..bd65742d06f 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -10,8 +10,8 @@ keywords: [json, data type] Stores JavaScript Object Notation (JSON) documents in a single column. :::note -This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. -If you want to use JSON type, set `allow_experimental_json_type = 1`. +This feature is beta and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead. +If you want to use JSON type, set `enable_json_type = 1`. ::: To declare a column of `JSON` type, use the following syntax: diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7cb0f4ad4ea..37a7bf1894e 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -18,7 +18,7 @@ because working with values of such types can lead to ambiguity. By default, cre ::: :::note -The Variant data type is an experimental feature. To use it, set `allow_experimental_variant_type = 1`. +The Variant data type is a beta feature. To use it, set `enable_variant_type = 1`. ::: ## Creating Variant diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 734523a208b..f58e854d50a 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -5741,6 +5741,16 @@ In `clickhouse-local` it is enabled by default and can be explicitly disabled. DECLARE(Bool, push_external_roles_in_interserver_queries, true, R"( Enable pushing user roles from originator to other nodes while performing a query. )", 0) \ + \ + DECLARE(Bool, allow_experimental_variant_type, false, R"( +Allows creation of [Variant](../../sql-reference/data-types/variant.md) data type. +)", BETA) ALIAS(enable_variant_type) \ + DECLARE(Bool, allow_experimental_dynamic_type, false, R"( +Allows creation of [Dynamic](../../sql-reference/data-types/dynamic.md) data type. +)", BETA) ALIAS(enable_dynamic_type) \ + DECLARE(Bool, allow_experimental_json_type, false, R"( +Allows creation of [JSON](../../sql-reference/data-types/newjson.md) data type. +)", BETA) ALIAS(enable_json_type) \ \ \ /* ####################################################### */ \ @@ -5776,15 +5786,6 @@ Possible values: )", EXPERIMENTAL) \ DECLARE(Bool, allow_experimental_vector_similarity_index, false, R"( Allow experimental vector similarity index -)", EXPERIMENTAL) \ - DECLARE(Bool, allow_experimental_variant_type, false, R"( -Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). -)", EXPERIMENTAL) \ - DECLARE(Bool, allow_experimental_dynamic_type, false, R"( -Allow Dynamic data type -)", EXPERIMENTAL) \ - DECLARE(Bool, allow_experimental_json_type, false, R"( -Allow JSON data type )", EXPERIMENTAL) \ DECLARE(Bool, allow_experimental_codecs, false, R"( If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing). From eadcde4ca5e543f3d8e11417ebc735d1b6337e8f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 22 Nov 2024 22:03:53 +0000 Subject: [PATCH 13/56] fix tests --- tests/queries/0_stateless/03273_primary_index_cache.sql | 2 +- .../queries/0_stateless/03274_prewarm_primary_index_cache.sql | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03273_primary_index_cache.sql b/tests/queries/0_stateless/03273_primary_index_cache.sql index 18c2703689b..04a03797bcb 100644 --- a/tests/queries/0_stateless/03273_primary_index_cache.sql +++ b/tests/queries/0_stateless/03273_primary_index_cache.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS t_primary_index_cache; CREATE TABLE t_primary_index_cache (a UInt64, b UInt64) ENGINE = MergeTree ORDER BY a PARTITION BY a % 2 -SETTINGS use_primary_index_cache = 1, prewarm_primary_index_cache = 0, index_granularity = 64, index_granularity_bytes = '10M', min_bytes_for_wide_part = 0; +SETTINGS use_primary_key_cache = 1, prewarm_primary_key_cache = 0, index_granularity = 64, index_granularity_bytes = '10M', min_bytes_for_wide_part = 0; SYSTEM DROP PRIMARY INDEX CACHE; diff --git a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql index 08c41d0fc72..16e895a7798 100644 --- a/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql +++ b/tests/queries/0_stateless/03274_prewarm_primary_index_cache.sql @@ -6,12 +6,12 @@ DROP TABLE IF EXISTS t_prewarm_cache_rmt_2; CREATE TABLE t_prewarm_cache_rmt_1 (a UInt64, b UInt64, c UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03274_prewarm_mark_cache_smt/t_prewarm_cache', '1') ORDER BY a PARTITION BY a % 2 -SETTINGS prewarm_primary_index_cache = 1, use_primary_index_cache = 1; +SETTINGS prewarm_primary_key_cache = 1, use_primary_key_cache = 1; CREATE TABLE t_prewarm_cache_rmt_2 (a UInt64, b UInt64, c UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03274_prewarm_mark_cache_smt/t_prewarm_cache', '2') ORDER BY a PARTITION BY a % 2 -SETTINGS prewarm_primary_index_cache = 1, use_primary_index_cache = 1; +SETTINGS prewarm_primary_key_cache = 1, use_primary_key_cache = 1; SYSTEM DROP PRIMARY INDEX CACHE; SYSTEM STOP FETCHES t_prewarm_cache_rmt_2; From 989c85ef52cadf5c1ddafa55ff76c0353438af0f Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 10:12:11 +0100 Subject: [PATCH 14/56] De-virtualize compareAt and getUInt calls in comparator --- src/Columns/ColumnLowCardinality.cpp | 68 +++++++++++++++++++++++++--- src/Columns/IColumnImpl.h | 22 +++++++-- 2 files changed, 79 insertions(+), 11 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 8f94416459c..ec82f4249bd 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -426,14 +426,24 @@ void ColumnLowCardinality::getPermutation(IColumn::PermutationSortDirection dire getPermutationImpl(direction, stability, limit, nan_direction_hint, res); } -void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +namespace +{ + +template +void updatePermutationWithTypedColumns( + const ColumnLowCardinality & column, + IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) { bool ascending = direction == IColumn::PermutationSortDirection::Ascending; - auto comparator = [this, ascending, stability, nan_direction_hint](size_t lhs, size_t rhs) + /// Cast indexes and dictionary columns to their real types so that compareAt and getUInt methods can be inlined. + const IndexColumn & indexes = assert_cast(column.getIndexes()); + const DictinaryColumn & dictionary = assert_cast(*column.getDictionary().getNestedColumn()); + + auto comparator = [&indexes, &dictionary, ascending, stability, nan_direction_hint](size_t lhs, size_t rhs) { - int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint); + int ret = dictionary.compareAt(indexes.getUInt(lhs), indexes.getUInt(rhs), dictionary, nan_direction_hint); if (unlikely(stability == IColumn::PermutationSortStability::Stable && ret == 0)) return lhs < rhs; @@ -442,13 +452,57 @@ void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection d return ret > 0; }; - auto equal_comparator = [this, nan_direction_hint](size_t lhs, size_t rhs) + auto equal_comparator = [&indexes, &dictionary, nan_direction_hint](size_t lhs, size_t rhs) { - int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint); + int ret = dictionary.compareAt(indexes.getUInt(lhs), indexes.getUInt(rhs), dictionary, nan_direction_hint); return ret == 0; }; - updatePermutationImpl(limit, res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort()); + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort()); +} + +template +void updatePermutationWithIndexType( + const ColumnLowCardinality & column, + IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) +{ + /// Dispatch by dictionary column type. + if (typeid_cast(column.getDictionary().getNestedColumn().get())) + { + updatePermutationWithTypedColumns(column, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + } + else + { + /// Use default implementation for other types. + updatePermutationWithTypedColumns(column, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + } +} + +} + +void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + /// Dispatch by index column type. + switch (idx.getSizeOfIndexType()) + { + case sizeof(UInt8): + updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + case sizeof(UInt16): + updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + case sizeof(UInt32): + updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + case sizeof(UInt64): + updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + return; + default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of index type for low cardinality column."); + } } void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 80c08f51346..c50338cc5da 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -127,19 +127,20 @@ void IColumn::getPermutationImpl( } template -void IColumn::updatePermutationImpl( +void updateColumnPermutationImpl( size_t limit, - Permutation & res, + size_t size, + IColumn::Permutation & res, EqualRanges & equal_ranges, Compare compare, Equals equals, Sort full_sort, - PartialSort partial_sort) const + PartialSort partial_sort) { if (equal_ranges.empty()) return; - if (limit >= size() || limit > equal_ranges.back().to) + if (limit >= size || limit > equal_ranges.back().to) limit = 0; EqualRanges new_ranges; @@ -210,4 +211,17 @@ void IColumn::updatePermutationImpl( equal_ranges = std::move(new_ranges); } +template +void IColumn::updatePermutationImpl( + size_t limit, + Permutation & res, + EqualRanges & equal_ranges, + Compare compare, + Equals equals, + Sort full_sort, + PartialSort partial_sort) const +{ + updateColumnPermutationImpl(limit, size(), res, equal_ranges, compare, equals, full_sort, partial_sort); +} + } From a64ed742973e8544eb512b9c6f142330e3d65bdb Mon Sep 17 00:00:00 2001 From: Zaynulla <36727185+Zaynulla@users.noreply.github.com> Date: Sat, 23 Nov 2024 15:52:48 +0300 Subject: [PATCH 15/56] varPop doc latex formula added - There is a latex formula in the documentation of the [covarPop](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/covarpop) method, which is quite similar to varPop method. - In Russian docs there are formulas both for [varPop](https://clickhouse.com/docs/ru/sql-reference/aggregate-functions/reference/varpop), and [covarPop](https://clickhouse.com/docs/ru/sql-reference/aggregate-functions/reference/covarpop). Therefore, for consistency, it is suggested to add formula here too. --- .../sql-reference/aggregate-functions/reference/varpop.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 182e830f19f..2b275540863 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -6,7 +6,11 @@ sidebar_position: 210 ## varPop -Calculates the population variance. +Calculates the population variance: + +$$ +\frac{\Sigma{(x - \bar{x})^2}}{n} +$$ **Syntax** From 18a881437d5dc7b80ff86a4157faa21c53f68c69 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 15:40:06 +0100 Subject: [PATCH 16/56] Use sorted dictionary to further speed up the comparison --- src/Columns/ColumnLowCardinality.cpp | 76 +++++++++++++++++----------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index ec82f4249bd..3d767cbbf45 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -429,37 +429,34 @@ void ColumnLowCardinality::getPermutation(IColumn::PermutationSortDirection dire namespace { -template -void updatePermutationWithTypedColumns( - const ColumnLowCardinality & column, - IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) +/// Comapator for sorting LowCardinality column with the help of sorted dictionary. +template +struct LowCardinalityComparator { - bool ascending = direction == IColumn::PermutationSortDirection::Ascending; + const IndexColumn & real_indexes; /// Indexes column + const PaddedPODArray & position_by_index; /// Maps original dictionary index to position in sorted dictionary - /// Cast indexes and dictionary columns to their real types so that compareAt and getUInt methods can be inlined. - const IndexColumn & indexes = assert_cast(column.getIndexes()); - const DictinaryColumn & dictionary = assert_cast(*column.getDictionary().getNestedColumn()); - - auto comparator = [&indexes, &dictionary, ascending, stability, nan_direction_hint](size_t lhs, size_t rhs) + inline bool operator () (size_t lhs, size_t rhs) const { - int ret = dictionary.compareAt(indexes.getUInt(lhs), indexes.getUInt(rhs), dictionary, nan_direction_hint); - if (unlikely(stability == IColumn::PermutationSortStability::Stable && ret == 0)) + int ret; + + const UInt64 lhs_index = real_indexes.getUInt(lhs); + const UInt64 rhs_index = real_indexes.getUInt(rhs); + + if (lhs_index == rhs_index) + ret = 0; + else + ret = CompareHelper::compare(position_by_index[lhs_index], position_by_index[rhs_index], 0); + + if (stable && ret == 0) return lhs < rhs; if (ascending) return ret < 0; + return ret > 0; - }; - - auto equal_comparator = [&indexes, &dictionary, nan_direction_hint](size_t lhs, size_t rhs) - { - int ret = dictionary.compareAt(indexes.getUInt(lhs), indexes.getUInt(rhs), dictionary, nan_direction_hint); - return ret == 0; - }; - - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort()); -} + } +}; template void updatePermutationWithIndexType( @@ -467,17 +464,36 @@ void updatePermutationWithIndexType( IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) { - /// Dispatch by dictionary column type. - if (typeid_cast(column.getDictionary().getNestedColumn().get())) + /// Cast indexes column to the real type so that compareAt and getUInt methods can be inlined. + const IndexColumn * real_indexes = assert_cast(&column.getIndexes()); + + IColumn::Permutation dict_perm; + column.getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); + + PaddedPODArray position_by_index(dict_perm.size()); + for (size_t i = 0; i < dict_perm.size(); ++i) + position_by_index[dict_perm[i]] = i; + + auto equal_comparator = [real_indexes](size_t lhs, size_t rhs) { - updatePermutationWithTypedColumns(column, direction, stability, limit, nan_direction_hint, res, equal_ranges); - return; + return real_indexes->getUInt(lhs) == real_indexes->getUInt(rhs); + }; + + const bool ascending = (direction == IColumn::PermutationSortDirection::Ascending); + const bool stable = (stability == IColumn::PermutationSortStability::Stable); + if (ascending) + { + if (stable) + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); + else + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); } else { - /// Use default implementation for other types. - updatePermutationWithTypedColumns(column, direction, stability, limit, nan_direction_hint, res, equal_ranges); - return; + if (stable) + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); + else + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); } } From 7e730bba9eacb9eb9646368427bd71adfb50d660 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 18:56:54 +0100 Subject: [PATCH 17/56] Fix: dictionary is already sorted in requested direction --- src/Columns/ColumnLowCardinality.cpp | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 3d767cbbf45..f038f682f36 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -430,7 +430,8 @@ namespace { /// Comapator for sorting LowCardinality column with the help of sorted dictionary. -template +/// NOTE: Dictionary itself must be sorted in ASC or DESC order depending on the requested direction. +template struct LowCardinalityComparator { const IndexColumn & real_indexes; /// Indexes column @@ -451,10 +452,7 @@ struct LowCardinalityComparator if (stable && ret == 0) return lhs < rhs; - if (ascending) - return ret < 0; - - return ret > 0; + return ret < 0; } }; @@ -479,22 +477,11 @@ void updatePermutationWithIndexType( return real_indexes->getUInt(lhs) == real_indexes->getUInt(rhs); }; - const bool ascending = (direction == IColumn::PermutationSortDirection::Ascending); const bool stable = (stability == IColumn::PermutationSortStability::Stable); - if (ascending) - { - if (stable) - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); - else - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); - } + if (stable) + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); else - { - if (stable) - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); - else - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); - } + updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); } } From f14f794e7dcf31225b079a39cd6a42edebb86125 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 20:32:37 +0100 Subject: [PATCH 18/56] Undo changes to IColumnImpl.h --- src/Columns/ColumnLowCardinality.cpp | 27 +++++++++++++-------------- src/Columns/ColumnLowCardinality.h | 5 +++++ src/Columns/IColumnImpl.h | 20 +++----------------- 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index f038f682f36..cbc02570381 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -429,7 +429,7 @@ void ColumnLowCardinality::getPermutation(IColumn::PermutationSortDirection dire namespace { -/// Comapator for sorting LowCardinality column with the help of sorted dictionary. +/// Compator for sorting LowCardinality column with the help of sorted dictionary. /// NOTE: Dictionary itself must be sorted in ASC or DESC order depending on the requested direction. template struct LowCardinalityComparator @@ -456,17 +456,18 @@ struct LowCardinalityComparator } }; +} + template -void updatePermutationWithIndexType( - const ColumnLowCardinality & column, +void ColumnLowCardinality::updatePermutationWithIndexType( IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const { /// Cast indexes column to the real type so that compareAt and getUInt methods can be inlined. - const IndexColumn * real_indexes = assert_cast(&column.getIndexes()); + const IndexColumn * real_indexes = assert_cast(&getIndexes()); IColumn::Permutation dict_perm; - column.getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); + getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); PaddedPODArray position_by_index(dict_perm.size()); for (size_t i = 0; i < dict_perm.size(); ++i) @@ -479,11 +480,9 @@ void updatePermutationWithIndexType( const bool stable = (stability == IColumn::PermutationSortStability::Stable); if (stable) - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); + updatePermutationImpl(limit, res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); else - updateColumnPermutationImpl(limit, column.size(), res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); -} - + updatePermutationImpl(limit, res, equal_ranges, LowCardinalityComparator{*real_indexes, position_by_index}, equal_comparator, DefaultSort(), DefaultPartialSort()); } void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, @@ -493,16 +492,16 @@ void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection d switch (idx.getSizeOfIndexType()) { case sizeof(UInt8): - updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); return; case sizeof(UInt16): - updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); return; case sizeof(UInt32): - updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); return; case sizeof(UInt64): - updatePermutationWithIndexType(*this, direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); return; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of index type for low cardinality column."); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 3cc1c8919c0..7380b5eff07 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -389,6 +389,11 @@ private: int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; void getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + template + void updatePermutationWithIndexType( + IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, + size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const; }; bool isColumnLowCardinalityNullable(const IColumn & column); diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index c50338cc5da..4515e7a48e8 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -127,20 +127,19 @@ void IColumn::getPermutationImpl( } template -void updateColumnPermutationImpl( +void IColumn::updatePermutationImpl( size_t limit, - size_t size, IColumn::Permutation & res, EqualRanges & equal_ranges, Compare compare, Equals equals, Sort full_sort, - PartialSort partial_sort) + PartialSort partial_sort) const { if (equal_ranges.empty()) return; - if (limit >= size || limit > equal_ranges.back().to) + if (limit >= size() || limit > equal_ranges.back().to) limit = 0; EqualRanges new_ranges; @@ -211,17 +210,4 @@ void updateColumnPermutationImpl( equal_ranges = std::move(new_ranges); } -template -void IColumn::updatePermutationImpl( - size_t limit, - Permutation & res, - EqualRanges & equal_ranges, - Compare compare, - Equals equals, - Sort full_sort, - PartialSort partial_sort) const -{ - updateColumnPermutationImpl(limit, size(), res, equal_ranges, compare, equals, full_sort, partial_sort); -} - } From 833b137390e388571c5b8847a4f04d628b05e233 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 20:33:43 +0100 Subject: [PATCH 19/56] Undo changes to IColumnImpl.h --- src/Columns/IColumnImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/IColumnImpl.h b/src/Columns/IColumnImpl.h index 4515e7a48e8..80c08f51346 100644 --- a/src/Columns/IColumnImpl.h +++ b/src/Columns/IColumnImpl.h @@ -129,7 +129,7 @@ void IColumn::getPermutationImpl( template void IColumn::updatePermutationImpl( size_t limit, - IColumn::Permutation & res, + Permutation & res, EqualRanges & equal_ranges, Compare compare, Equals equals, From 8d8d1e85030f751fdd3fe45db840413ca3ece484 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sat, 23 Nov 2024 20:37:14 +0100 Subject: [PATCH 20/56] typo --- src/Columns/ColumnLowCardinality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index cbc02570381..2317cca01fa 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -429,7 +429,7 @@ void ColumnLowCardinality::getPermutation(IColumn::PermutationSortDirection dire namespace { -/// Compator for sorting LowCardinality column with the help of sorted dictionary. +/// Comparator for sorting LowCardinality column with the help of sorted dictionary. /// NOTE: Dictionary itself must be sorted in ASC or DESC order depending on the requested direction. template struct LowCardinalityComparator From 140f5987da4dbbce3ad082f5a84a501ef3d3e346 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Sun, 24 Nov 2024 09:46:38 +0100 Subject: [PATCH 21/56] Perf test for LowCardinality sorting at INSERT --- tests/performance/low_cardinality_insert.xml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/performance/low_cardinality_insert.xml diff --git a/tests/performance/low_cardinality_insert.xml b/tests/performance/low_cardinality_insert.xml new file mode 100644 index 00000000000..77e62e9f12c --- /dev/null +++ b/tests/performance/low_cardinality_insert.xml @@ -0,0 +1,11 @@ + + DROP TABLE IF EXISTS test_lc_insert + + CREATE TABLE test_lc_insert (k1 LowCardinality(String), k2 LowCardinality(String)) ENGINE = MergeTree order by (k1, k2) + + + + INSERT INTO test_lc_insert(k1, k2) SELECT number%1000 AS k1, number%1001 AS k2 FROM numbers(2000000) + + DROP TABLE IF EXISTS test_lc_insert + From bf102d2a00df6012f5c3cfa55c581a45409ae862 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Sun, 24 Nov 2024 14:23:49 +0000 Subject: [PATCH 22/56] Remove flaky test and extend stable one --- src/Common/FailPoint.cpp | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 5 ++ .../test_s3_zero_copy_replication/test.py | 69 ++++--------------- 3 files changed, 22 insertions(+), 54 deletions(-) diff --git a/src/Common/FailPoint.cpp b/src/Common/FailPoint.cpp index 027cc347386..85e8129b668 100644 --- a/src/Common/FailPoint.cpp +++ b/src/Common/FailPoint.cpp @@ -79,6 +79,8 @@ static struct InitFiu REGULAR(zero_copy_lock_zk_fail_after_op) \ REGULAR(plain_object_storage_write_fail_on_directory_create) \ REGULAR(plain_object_storage_write_fail_on_directory_move) \ + REGULAR(zero_copy_unlock_zk_fail_before_op) \ + REGULAR(zero_copy_unlock_zk_fail_after_op) \ namespace FailPoints diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 264644ffd28..d753ee2a175 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -218,6 +218,8 @@ namespace FailPoints extern const char finish_set_quorum_failed_parts[]; extern const char zero_copy_lock_zk_fail_before_op[]; extern const char zero_copy_lock_zk_fail_after_op[]; + extern const char zero_copy_unlock_zk_fail_before_op[]; + extern const char zero_copy_unlock_zk_fail_after_op[]; } namespace ErrorCodes @@ -9852,6 +9854,9 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( LOG_TRACE(logger, "Removing zookeeper lock {} for part {} (files to keep: [{}])", zookeeper_part_replica_node, part_name, fmt::join(files_not_to_remove, ", ")); + fiu_do_on(FailPoints::zero_copy_unlock_zk_fail_before_op, { zookeeper_ptr->forceFailureBeforeOperation(); }); + fiu_do_on(FailPoints::zero_copy_unlock_zk_fail_after_op, { zookeeper_ptr->forceFailureAfterOperation(); }); + if (auto ec = zookeeper_ptr->tryRemove(zookeeper_part_replica_node); ec != Coordination::Error::ZOK) { /// Very complex case. It means that lock already doesn't exist when we tried to remove it. diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index c7d03d4301d..e40a0c7beed 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -693,9 +693,16 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): @pytest.mark.parametrize( - "failpoint", ["zero_copy_lock_zk_fail_before_op", "zero_copy_lock_zk_fail_after_op"] + "failpoint_lock", + ["zero_copy_lock_zk_fail_before_op", "zero_copy_lock_zk_fail_after_op"], ) -def test_move_shared_lock_fail_once(started_cluster, test_table, failpoint): +@pytest.mark.parametrize( + "failpoint_unlock", + [None, "zero_copy_unlock_zk_fail_before_op", "zero_copy_unlock_zk_fail_after_op"], +) +def test_move_shared_zero_copy_lock_fail( + started_cluster, test_table, failpoint_lock, failpoint_unlock +): node1 = cluster.instances["node1"] node2 = cluster.instances["node2"] @@ -714,7 +721,9 @@ def test_move_shared_lock_fail_once(started_cluster, test_table, failpoint): node1.query(f"INSERT INTO {test_table} VALUES (1, '{date}')") # Try to move and get fail on acquring zero-copy shared lock - node1.query(f"SYSTEM ENABLE FAILPOINT {failpoint}") + node1.query(f"SYSTEM ENABLE FAILPOINT {failpoint_lock}") + if failpoint_unlock: + node1.query(f"SYSTEM ENABLE FAILPOINT {failpoint_unlock}") node1.query_and_get_error( f"ALTER TABLE {test_table} MOVE PARTITION '{date}' TO VOLUME 'external'" ) @@ -729,7 +738,9 @@ def test_move_shared_lock_fail_once(started_cluster, test_table, failpoint): # Try another attempt after zk connection is restored # It should not failed due to leftovers of previous attempt (temporary cloned files) - node1.query(f"SYSTEM DISABLE FAILPOINT {failpoint}") + node1.query(f"SYSTEM DISABLE FAILPOINT {failpoint_lock}") + if failpoint_unlock: + node1.query(f"SYSTEM DISABLE FAILPOINT {failpoint_unlock}") node1.query( f"ALTER TABLE {test_table} MOVE PARTITION '{date}' TO VOLUME 'external'" ) @@ -748,53 +759,3 @@ def test_move_shared_lock_fail_once(started_cluster, test_table, failpoint): node1.query(f"DROP TABLE IF EXISTS {test_table} SYNC") node2.query(f"DROP TABLE IF EXISTS {test_table} SYNC") - - -def test_move_shared_lock_fail_keeper_unavailable(started_cluster, test_table): - node1 = cluster.instances["node1"] - node2 = cluster.instances["node2"] - - node1.query( - f""" - CREATE TABLE {test_table} ON CLUSTER test_cluster (num UInt64, date DateTime) - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{test_table}', '{{replica}}') - ORDER BY date PARTITION BY date - SETTINGS storage_policy='hybrid' - """ - ) - - date = "2024-10-23" - node2.query(f"SYSTEM STOP FETCHES {test_table}") - - node1.query(f"INSERT INTO {test_table} VALUES (1, '{date}')") - # Pause moving after part cloning, but before swapping - node1.query("SYSTEM ENABLE FAILPOINT stop_moving_part_before_swap_with_active") - - def move(node): - node.query_and_get_error( - f"ALTER TABLE {test_table} MOVE PARTITION '{date}' TO VOLUME 'external'" - ) - - # Start moving - t1 = threading.Thread(target=move, args=[node1]) - t1.start() - - with PartitionManager() as pm: - pm.drop_instance_zk_connections(node1) - # Continue moving and try to swap - node1.query("SYSTEM DISABLE FAILPOINT stop_moving_part_before_swap_with_active") - t1.join() - - # Previous MOVE was failed, try another one after zk connection is restored - # It should not failed due to leftovers of previous attempt (temporary cloned files) - node1.query_with_retry( - f"ALTER TABLE {test_table} MOVE PARTITION '{date}' TO VOLUME 'external'" - ) - - # Sanity check - node2.query(f"SYSTEM START FETCHES {test_table}") - wait_for_active_parts(node2, 1, test_table, disk_name="s31") - assert node2.query(f"SELECT sum(num) FROM {test_table}") == "1\n" - - node1.query(f"DROP TABLE IF EXISTS {test_table} SYNC") - node2.query(f"DROP TABLE IF EXISTS {test_table} SYNC") From dcd07e25ac53248e6557eb253c884d293f099915 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 13:20:47 +0100 Subject: [PATCH 23/56] Check proper sorting in debug builds --- src/Interpreters/sortBlock.cpp | 58 ++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 7b19d338ee8..bdf672623da 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -272,6 +272,56 @@ bool isAlreadySortedImpl(size_t rows, Comparator compare) return true; } +template +void checkSortedWithPermutationImpl(size_t rows, Comparator compare, UInt64 limit, const IColumn::Permutation & permutation) +{ + if (limit && limit < rows) + rows = limit; + + for (size_t i = 1; i < rows; ++i) + { + const size_t current_row = permutation[i]; + const size_t previous_row = permutation[i - 1]; + + if (compare(current_row, previous_row)) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Rows are not sorted with permuation, position {}, previous_row index {}, current_row index {}", i, previous_row, current_row); + } +} + +void checkSortedWithPermutation(const Block & block, const SortDescription & description, UInt64 limit, const IColumn::Permutation & permutation) +{ + if (!block) + return; + + ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); + bool is_collation_required = false; + + for (auto & column_with_sort_desc : columns_with_sort_desc) + { + if (isCollationRequired(column_with_sort_desc.description)) + { + is_collation_required = true; + break; + } + } + + size_t rows = block.rows(); + + if (is_collation_required) + { + PartialSortingLessWithCollation less(columns_with_sort_desc); + checkSortedWithPermutationImpl(rows, less, limit, permutation); + return; + } + else + { + PartialSortingLess less(columns_with_sort_desc); + checkSortedWithPermutationImpl(rows, less, limit, permutation); + return; + } +} + } void sortBlock(Block & block, const SortDescription & description, UInt64 limit) @@ -279,6 +329,10 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) IColumn::Permutation permutation; getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Unstable, limit, permutation); +#ifndef NDEBUG + checkSortedWithPermutation(block, description, limit, permutation); +#endif + if (permutation.empty()) return; @@ -303,6 +357,10 @@ void stableGetPermutation(const Block & block, const SortDescription & descripti return; getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, out_permutation); + +#ifndef NDEBUG + checkSortedWithPermutation(block, description, 0, out_permutation); +#endif } bool isAlreadySorted(const Block & block, const SortDescription & description) From 22de29a70f4dd7db92076b25d7b11b078858e429 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 14:04:05 +0100 Subject: [PATCH 24/56] stype fixes --- src/Interpreters/sortBlock.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index bdf672623da..3ebfde8b7a3 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -16,6 +16,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_COLLATION; + extern const int LOGICAL_ERROR; } /// Column with description for sort @@ -285,7 +286,7 @@ void checkSortedWithPermutationImpl(size_t rows, Comparator compare, UInt64 limi if (compare(current_row, previous_row)) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Rows are not sorted with permuation, position {}, previous_row index {}, current_row index {}", i, previous_row, current_row); + "Rows are not sorted with permutation, position {}, previous_row index {}, current_row index {}", i, previous_row, current_row); } } From 115df8705dcb8a6e7bfc09e1565f1c13f9eb4af8 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 14:14:32 +0100 Subject: [PATCH 25/56] Move dictionary sorting out of template method --- src/Columns/ColumnLowCardinality.cpp | 26 +++++++++++++------------- src/Columns/ColumnLowCardinality.h | 4 ++-- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 2317cca01fa..284b00d621a 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -460,19 +460,12 @@ struct LowCardinalityComparator template void ColumnLowCardinality::updatePermutationWithIndexType( - IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const + IColumn::PermutationSortStability stability, size_t limit, const PaddedPODArray & position_by_index, + IColumn::Permutation & res, EqualRanges & equal_ranges) const { /// Cast indexes column to the real type so that compareAt and getUInt methods can be inlined. const IndexColumn * real_indexes = assert_cast(&getIndexes()); - IColumn::Permutation dict_perm; - getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); - - PaddedPODArray position_by_index(dict_perm.size()); - for (size_t i = 0; i < dict_perm.size(); ++i) - position_by_index[dict_perm[i]] = i; - auto equal_comparator = [real_indexes](size_t lhs, size_t rhs) { return real_indexes->getUInt(lhs) == real_indexes->getUInt(rhs); @@ -488,20 +481,27 @@ void ColumnLowCardinality::updatePermutationWithIndexType( void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const { + IColumn::Permutation dict_perm; + getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); + + PaddedPODArray position_by_index(dict_perm.size()); + for (size_t i = 0; i < dict_perm.size(); ++i) + position_by_index[dict_perm[i]] = i; + /// Dispatch by index column type. switch (idx.getSizeOfIndexType()) { case sizeof(UInt8): - updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(stability, limit, position_by_index, res, equal_ranges); return; case sizeof(UInt16): - updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(stability, limit, position_by_index, res, equal_ranges); return; case sizeof(UInt32): - updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(stability, limit, position_by_index, res, equal_ranges); return; case sizeof(UInt64): - updatePermutationWithIndexType(direction, stability, limit, nan_direction_hint, res, equal_ranges); + updatePermutationWithIndexType(stability, limit, position_by_index, res, equal_ranges); return; default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of index type for low cardinality column."); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 7380b5eff07..e09c51e2054 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -392,8 +392,8 @@ private: template void updatePermutationWithIndexType( - IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability, - size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const; + IColumn::PermutationSortStability stability, size_t limit, const PaddedPODArray & position_by_index, + IColumn::Permutation & res, EqualRanges & equal_ranges) const; }; bool isColumnLowCardinalityNullable(const IColumn & column); From cc0f8271e21a30c5aec495060533cac7ef44e3d6 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Mon, 25 Nov 2024 13:14:55 +0000 Subject: [PATCH 26/56] add a warning to lagInFrame/leadInFrame --- .../window-functions/lagInFrame.md | 18 +++++++--- .../window-functions/leadInFrame.md | 36 ++++++++++++++----- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index 01bf809e76e..c4b7b377761 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -8,11 +8,17 @@ sidebar_position: 9 Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +:::warning +`lagInFrame` behavior differs from the standard SQL `lag` window function. +Clickhouse window function `lagInFrame` respects the window frame. +To get behavior identical to the `lag`, use `rows between unbounded preceding and unbounded following`. +::: + **Syntax** ```sql lagInFrame(x[, offset[, default]]) - OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) @@ -21,7 +27,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). **Parameters** -- `x` — Column name. +- `x` — Column name. - `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default). - `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - default value of column type when omitted). @@ -59,11 +65,13 @@ INSERT INTO stock_prices FORMAT Values SELECT date, close, - lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close, + lagInFrame(close, 1, close) OVER (ORDER BY date ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS previous_day_close, COALESCE(ROUND(close - previous_day_close, 2)) AS delta, COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change FROM stock_prices -ORDER BY date DESC; +ORDER BY date DESC ``` Result: @@ -76,4 +84,4 @@ Result: 4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │ 5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │ └────────────┴────────┴────────────────────┴───────┴────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index dae4353b582..16c7aefd81a 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -8,11 +8,17 @@ sidebar_position: 10 Returns a value evaluated at the row that is offset rows after the current row within the ordered frame. +:::warning +`leadInFrame` behavior differs from the standard SQL `lead` window function. +Clickhouse window function `leadInFrame` respects the window frame. +To get behavior identical to the `lead`, use `rows between unbounded preceding and unbounded following`. +::: + **Syntax** ```sql leadInFrame(x[, offset[, default]]) - OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] [ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name]) FROM table_name WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]) @@ -36,25 +42,37 @@ This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe Query: ```sql -CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *; +CREATE OR REPLACE VIEW nobel_prize_laureates +AS SELECT * +FROM file('nobel_laureates_data.csv'); ``` ```sql -FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9; +SELECT + fullName, + leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS year, + category, + motivation +FROM nobel_prize_laureates +WHERE category = 'physics' +ORDER BY year DESC +LIMIT 9 ``` Result: ```response ┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ -1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ -2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ -3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +1. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +2. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ +3. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │ 4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ 5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ 6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │ -7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ +7. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ 8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ -9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │ +9. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │ └──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ -``` \ No newline at end of file +``` From fc61f33bea945099c27759f49cfadcbc19a51cde Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 14:29:29 +0100 Subject: [PATCH 27/56] Move all debug code under #ifndef NDEBUG --- src/Interpreters/sortBlock.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 3ebfde8b7a3..1c4754b1ef2 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -16,7 +16,9 @@ namespace DB namespace ErrorCodes { extern const int BAD_COLLATION; +#ifndef NDEBUG extern const int LOGICAL_ERROR; +#endif } /// Column with description for sort @@ -273,6 +275,7 @@ bool isAlreadySortedImpl(size_t rows, Comparator compare) return true; } +#ifndef NDEBUG template void checkSortedWithPermutationImpl(size_t rows, Comparator compare, UInt64 limit, const IColumn::Permutation & permutation) { @@ -322,6 +325,7 @@ void checkSortedWithPermutation(const Block & block, const SortDescription & des return; } } +#endif } From bf23cc865e16cf07ed81186042c8b7b9c1ac2756 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 25 Nov 2024 13:47:06 +0000 Subject: [PATCH 28/56] Fix data race in Squashing with LowCardinality --- src/Columns/ColumnLowCardinality.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 3cc1c8919c0..1e9d576b423 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -190,6 +190,26 @@ public: callback(dictionary.getColumnUniquePtr()); } + void forEachSubcolumnRecursively(RecursiveColumnCallback callback) const override + { + /** It is important to have both const and non-const versions here. + * The behavior of ColumnUnique::forEachSubcolumnRecursively differs between const and non-const versions. + * The non-const version will update a field in ColumnUnique. + * In the meantime, the default implementation IColumn::forEachSubcolumnRecursively uses const_cast, + * so when the const version is called, the field will still be mutated. + * This can lead to a data race if constness is expected. + */ + callback(*idx.getPositionsPtr()); + idx.getPositionsPtr()->forEachSubcolumnRecursively(callback); + + /// Column doesn't own dictionary if it's shared. + if (!dictionary.isShared()) + { + callback(*dictionary.getColumnUniquePtr()); + dictionary.getColumnUniquePtr()->forEachSubcolumnRecursively(callback); + } + } + void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override { callback(*idx.getPositionsPtr()); From 0073a74881d6392ca4a263dc79ca0a1d6a65333e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 25 Nov 2024 14:09:11 +0000 Subject: [PATCH 29/56] fix mismatched types after optimize_function_to_subcolumns --- .../Passes/FunctionToSubcolumnsPass.cpp | 25 +++++++++++++------ ...03276_functions_to_subcolumns_lc.reference | 1 + .../03276_functions_to_subcolumns_lc.sql | 14 +++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03276_functions_to_subcolumns_lc.reference create mode 100644 tests/queries/0_stateless/03276_functions_to_subcolumns_lc.sql diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index abd0a95c6f2..92dc0301fd6 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -46,7 +46,7 @@ using NodeToSubcolumnTransformer = std::function()}; @@ -56,8 +56,8 @@ void optimizeFunctionLength(QueryTreeNodePtr & node, FunctionNode &, ColumnConte template void optimizeFunctionEmpty(QueryTreeNodePtr &, FunctionNode & function_node, ColumnContext & ctx) { - /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive - /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive + /// Replace `empty(argument)` with `equals(argument.size0, 0)` if positive. + /// Replace `notEmpty(argument)` with `notEquals(argument.size0, 0)` if not positive. /// `argument` may be Array or Map. NameAndTypePair column{ctx.column.name + ".size0", std::make_shared()}; @@ -136,19 +136,25 @@ std::map, NodeToSubcolumnTransformer> node_transfor }, { {TypeIndex::Map, "mapKeys"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `mapKeys(map_argument)` with `map_argument.keys` - NameAndTypePair column{ctx.column.name + ".keys", function_node.getResultType()}; + const auto & data_type_map = assert_cast(*ctx.column.type); + auto key_type = std::make_shared(data_type_map.getKeyType()); + + NameAndTypePair column{ctx.column.name + ".keys", key_type}; node = std::make_shared(column, ctx.column_source); }, }, { {TypeIndex::Map, "mapValues"}, - [](QueryTreeNodePtr & node, FunctionNode & function_node, ColumnContext & ctx) + [](QueryTreeNodePtr & node, FunctionNode &, ColumnContext & ctx) { /// Replace `mapValues(map_argument)` with `map_argument.values` - NameAndTypePair column{ctx.column.name + ".values", function_node.getResultType()}; + const auto & data_type_map = assert_cast(*ctx.column.type); + auto value_type = std::make_shared(data_type_map.getValueType()); + + NameAndTypePair column{ctx.column.name + ".values", value_type}; node = std::make_shared(column, ctx.column_source); }, }, @@ -439,11 +445,16 @@ public: if (!identifiers_to_optimize.contains(qualified_name)) return; + auto result_type = function_node->getResultType(); auto transformer_it = node_transformers.find({column.type->getTypeId(), function_node->getFunctionName()}); + if (transformer_it != node_transformers.end()) { ColumnContext ctx{std::move(column), first_argument_column_node->getColumnSource(), getContext()}; transformer_it->second(node, *function_node, ctx); + + if (!result_type->equals(*node->getResultType())) + node = buildCastFunction(node, result_type, getContext()); } } }; diff --git a/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.reference b/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.reference new file mode 100644 index 00000000000..3bc835eaeac --- /dev/null +++ b/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.reference @@ -0,0 +1 @@ +['foo'] ['bar'] diff --git a/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.sql b/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.sql new file mode 100644 index 00000000000..b3b8c1a79f8 --- /dev/null +++ b/tests/queries/0_stateless/03276_functions_to_subcolumns_lc.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS t_map_lc; + +CREATE TABLE t_map_lc +( + kv Map(LowCardinality(String), LowCardinality(String)), + k Array(LowCardinality(String)) ALIAS mapKeys(kv), + v Array(LowCardinality(String)) ALIAS mapValues(kv) +) ENGINE = Memory; + +INSERT INTO t_map_lc VALUES (map('foo', 'bar')); + +SELECT k, v FROM t_map_lc SETTINGS optimize_functions_to_subcolumns=1; + +DROP TABLE t_map_lc; From 207cc837f5103e27a45296db89aee9eddb69175a Mon Sep 17 00:00:00 2001 From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:29:03 +0100 Subject: [PATCH 30/56] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 4f468ac600e..eb5b939469c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -91,6 +91,9 @@ static std::initializer_list Date: Mon, 25 Nov 2024 16:13:02 +0100 Subject: [PATCH 31/56] Rename setting in error message --- src/DataTypes/DataTypeObject.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index eaae7ffebca..9a60d1e55b8 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -550,7 +550,7 @@ static DataTypePtr createJSON(const ASTPtr & arguments) if (context->getSettingsRef()[Setting::allow_experimental_object_type] && context->getSettingsRef()[Setting::use_json_alias_for_old_object_type]) { if (arguments && !arguments->children.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set settings allow_experimental_json_type = 1 and use_json_alias_for_old_object_type = 0"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Experimental Object type doesn't support any arguments. If you want to use new JSON type, set settings enable_json_type = 1 and use_json_alias_for_old_object_type = 0"); return std::make_shared("JSON", false); } From 84fc0fa6fdb96306b9db44dc6e73e9da1dfd760a Mon Sep 17 00:00:00 2001 From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:14:20 +0100 Subject: [PATCH 32/56] Update parseColumnsListForTableFunction.h --- src/Interpreters/parseColumnsListForTableFunction.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/parseColumnsListForTableFunction.h b/src/Interpreters/parseColumnsListForTableFunction.h index 39b9f092d89..f44e8d0c398 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.h +++ b/src/Interpreters/parseColumnsListForTableFunction.h @@ -19,12 +19,12 @@ struct DataTypeValidationSettings bool allow_suspicious_low_cardinality_types = true; bool allow_experimental_object_type = true; bool allow_suspicious_fixed_string_types = true; - bool allow_experimental_variant_type = true; + bool enable_variant_type = true; bool allow_experimental_bfloat16_type = true; bool allow_suspicious_variant_types = true; bool validate_nested_types = true; - bool allow_experimental_dynamic_type = true; - bool allow_experimental_json_type = true; + bool enable_dynamic_type = true; + bool enable_json_type = true; }; void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings); From c97cdedd9bf38decda521026364b837f068e91a0 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:16:12 +0100 Subject: [PATCH 33/56] Rename settings in error messages --- .../parseColumnsListForTableFunction.cpp | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index d3bf6f860f3..a375cb18297 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -16,10 +16,10 @@ namespace DB { namespace Setting { - extern const SettingsBool allow_experimental_dynamic_type; - extern const SettingsBool allow_experimental_json_type; + extern const SettingsBool enable_dynamic_type; + extern const SettingsBool enable_json_type; extern const SettingsBool allow_experimental_object_type; - extern const SettingsBool allow_experimental_variant_type; + extern const SettingsBool enable_variant_type; extern const SettingsBool allow_experimental_bfloat16_type; extern const SettingsBool allow_suspicious_fixed_string_types; extern const SettingsBool allow_suspicious_low_cardinality_types; @@ -42,12 +42,12 @@ DataTypeValidationSettings::DataTypeValidationSettings(const DB::Settings & sett : allow_suspicious_low_cardinality_types(settings[Setting::allow_suspicious_low_cardinality_types]) , allow_experimental_object_type(settings[Setting::allow_experimental_object_type]) , allow_suspicious_fixed_string_types(settings[Setting::allow_suspicious_fixed_string_types]) - , allow_experimental_variant_type(settings[Setting::allow_experimental_variant_type]) + , enable_variant_type(settings[Setting::enable_variant_type]) , allow_experimental_bfloat16_type(settings[Setting::allow_experimental_bfloat16_type]) , allow_suspicious_variant_types(settings[Setting::allow_suspicious_variant_types]) , validate_nested_types(settings[Setting::validate_experimental_and_suspicious_types_inside_nested_types]) - , allow_experimental_dynamic_type(settings[Setting::allow_experimental_dynamic_type]) - , allow_experimental_json_type(settings[Setting::allow_experimental_json_type]) + , enable_dynamic_type(settings[Setting::enable_dynamic_type]) + , enable_json_type(settings[Setting::enable_json_type]) { } @@ -95,14 +95,14 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio } } - if (!settings.allow_experimental_variant_type) + if (!settings.enable_variant_type) { if (isVariant(data_type)) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Variant type is not allowed. " - "Set setting allow_experimental_variant_type = 1 in order to allow it", + "Cannot create column with type '{}' because Variant type is not allowed. " + "Set setting enable_variant_type = 1 in order to allow it", data_type.getName()); } } @@ -151,27 +151,27 @@ void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidatio } } - if (!settings.allow_experimental_dynamic_type) + if (!settings.enable_dynamic_type) { if (isDynamic(data_type)) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Dynamic type is not allowed. " - "Set setting allow_experimental_dynamic_type = 1 in order to allow it", + "Cannot create column with type '{}' because Dynamic type is not allowed. " + "Set setting enable_dynamic_type = 1 in order to allow it", data_type.getName()); } } - if (!settings.allow_experimental_json_type) + if (!settings.enable_json_type) { const auto * object_type = typeid_cast(&data_type); if (object_type && object_type->getSchemaFormat() == DataTypeObject::SchemaFormat::JSON) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental JSON type is not allowed. " - "Set setting allow_experimental_json_type = 1 in order to allow it", + "Cannot create column with type '{}' because JSON type is not allowed. " + "Set setting enable_json_type = 1 in order to allow it", data_type.getName()); } } From 2fb3871a62b94e532fa7d385b53bd7c6d11e26c0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 22 Nov 2024 23:02:46 +0100 Subject: [PATCH 34/56] fix uncaught exception --- src/Interpreters/HashJoin/ScatteredBlock.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/HashJoin/ScatteredBlock.h b/src/Interpreters/HashJoin/ScatteredBlock.h index 729377f6758..31ff773d04d 100644 --- a/src/Interpreters/HashJoin/ScatteredBlock.h +++ b/src/Interpreters/HashJoin/ScatteredBlock.h @@ -302,10 +302,11 @@ struct ScatteredBlock : private boost::noncopyable /// Cut first `num_rows` rows from `block` in place and returns block with remaining rows ScatteredBlock cut(size_t num_rows) { - SCOPE_EXIT(filterBySelector()); - if (num_rows >= rows()) + { + filterBySelector(); return ScatteredBlock{Block{}}; + } chassert(block); @@ -314,6 +315,7 @@ struct ScatteredBlock : private boost::noncopyable auto remaining = ScatteredBlock{block, std::move(remaining_selector)}; selector = std::move(first_num_rows); + filterBySelector(); return remaining; } From 12040bbf249f13e6a57ddfd90a6f92801ff5be9e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 18:30:54 +0100 Subject: [PATCH 35/56] Fix for empty permutation --- src/Interpreters/sortBlock.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 1c4754b1ef2..064632d4c7b 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -282,10 +282,12 @@ void checkSortedWithPermutationImpl(size_t rows, Comparator compare, UInt64 limi if (limit && limit < rows) rows = limit; + const bool no_permutaiton = permutation.empty(); + for (size_t i = 1; i < rows; ++i) { - const size_t current_row = permutation[i]; - const size_t previous_row = permutation[i - 1]; + const size_t current_row = no_permutaiton ? i : permutation[i]; + const size_t previous_row = no_permutaiton ? (i - 1) : permutation[i - 1]; if (compare(current_row, previous_row)) throw Exception(ErrorCodes::LOGICAL_ERROR, From 7773873e726708a96857a4c3f9e5d91c84840826 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 25 Nov 2024 17:34:17 +0000 Subject: [PATCH 36/56] Restore deleted server setting --- src/Core/ServerSettings.cpp | 1 + src/Core/Settings.cpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index b920c905a54..d622d60221c 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -197,6 +197,7 @@ namespace DB DECLARE(UInt64, parts_kill_delay_period_random_add, 10, "Add uniformly distributed value from 0 to x seconds to kill_delay_period to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables. Only available in ClickHouse Cloud", 0) \ DECLARE(UInt64, parts_killer_pool_size, 128, "Threads for cleanup of shared merge tree outdated threads. Only available in ClickHouse Cloud", 0) \ DECLARE(UInt64, keeper_multiread_batch_size, 10'000, "Maximum size of batch for MultiRead request to [Zoo]Keeper that support batching. If set to 0, batching is disabled. Available only in ClickHouse Cloud.", 0) \ + DECLARE(Bool, use_legacy_mongodb_integration, true, "Obsolete, has no effect", 0) \ \ DECLARE(UInt64, prefetch_threadpool_pool_size, 100, "Size of background pool for prefetches for remote object storages", 0) \ DECLARE(UInt64, prefetch_threadpool_queue_size, 1000000, "Number of tasks which is possible to push into prefetches pool", 0) \ diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 698ceab0f76..178a9167d69 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -5731,7 +5731,7 @@ Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting If enabled, only allow identifiers containing alphanumeric characters and underscores. )", 0) \ DECLARE(Bool, mongodb_throw_on_unsupported_query, true, R"( -If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option is not applied when 'allow_experimental_analyzer=0'. +If enabled, MongoDB tables will return an error when a MongoDB query cannot be built. Otherwise, ClickHouse reads the full table and processes it locally. This option is not applied when 'enable_analyzer=0'. )", 0) \ DECLARE(Bool, implicit_select, false, R"( Allow writing simple SELECT queries without the leading SELECT keyword, which makes it simple for calculator-style usage, e.g. `1 + 2` becomes a valid query. From 658ac29111b1db51d181e21b51fa162195bbcc1b Mon Sep 17 00:00:00 2001 From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com> Date: Mon, 25 Nov 2024 20:00:05 +0100 Subject: [PATCH 37/56] Update parseColumnsListForTableFunction.cpp --- .../parseColumnsListForTableFunction.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index a375cb18297..737e28447d7 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -16,10 +16,10 @@ namespace DB { namespace Setting { - extern const SettingsBool enable_dynamic_type; - extern const SettingsBool enable_json_type; + extern const SettingsBool allow_experimental_dynamic_type; + extern const SettingsBool allow_experimental_json_type; extern const SettingsBool allow_experimental_object_type; - extern const SettingsBool enable_variant_type; + extern const SettingsBool allow_experimental_variant_type; extern const SettingsBool allow_experimental_bfloat16_type; extern const SettingsBool allow_suspicious_fixed_string_types; extern const SettingsBool allow_suspicious_low_cardinality_types; @@ -42,12 +42,12 @@ DataTypeValidationSettings::DataTypeValidationSettings(const DB::Settings & sett : allow_suspicious_low_cardinality_types(settings[Setting::allow_suspicious_low_cardinality_types]) , allow_experimental_object_type(settings[Setting::allow_experimental_object_type]) , allow_suspicious_fixed_string_types(settings[Setting::allow_suspicious_fixed_string_types]) - , enable_variant_type(settings[Setting::enable_variant_type]) + , enable_variant_type(settings[Setting::allow_experimental_variant_type]) , allow_experimental_bfloat16_type(settings[Setting::allow_experimental_bfloat16_type]) , allow_suspicious_variant_types(settings[Setting::allow_suspicious_variant_types]) , validate_nested_types(settings[Setting::validate_experimental_and_suspicious_types_inside_nested_types]) - , enable_dynamic_type(settings[Setting::enable_dynamic_type]) - , enable_json_type(settings[Setting::enable_json_type]) + , enable_dynamic_type(settings[Setting::allow_experimental_dynamic_type]) + , enable_json_type(settings[Setting::allow_experimental_json_type]) { } From 10160fed8e893a0b175f681761e4e90d2e951061 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 20:09:03 +0100 Subject: [PATCH 38/56] Check dictionary permutation size --- src/Columns/ColumnLowCardinality.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 284b00d621a..82d2092907b 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -484,6 +484,12 @@ void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection d IColumn::Permutation dict_perm; getDictionary().getNestedColumn()->getPermutation(direction, stability, 0, nan_direction_hint, dict_perm); + /// This is a paranoid check, but in other places in code empty permutation is used to indicate that no sorting is needed. + if (dict_perm.size() != getDictionary().size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Dictionary permutation size {} is equal to dictionary size {}. It is a bug.", + dict_perm.size(), getDictionary().size()); + PaddedPODArray position_by_index(dict_perm.size()); for (size_t i = 0; i < dict_perm.size(); ++i) position_by_index[dict_perm[i]] = i; From a35971f7498e15000d17ff8754a7840b81a43ed6 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 25 Nov 2024 20:19:46 +0100 Subject: [PATCH 39/56] style --- src/Columns/ColumnLowCardinality.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 82d2092907b..ae8971c96aa 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -489,7 +489,6 @@ void ColumnLowCardinality::updatePermutation(IColumn::PermutationSortDirection d throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary permutation size {} is equal to dictionary size {}. It is a bug.", dict_perm.size(), getDictionary().size()); - PaddedPODArray position_by_index(dict_perm.size()); for (size_t i = 0; i < dict_perm.size(); ++i) position_by_index[dict_perm[i]] = i; From cb7d33dc6ef931f34466ff76b795f96bf070d939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 25 Nov 2024 21:10:14 +0100 Subject: [PATCH 40/56] Fix build --- programs/extract-from-config/CMakeLists.txt | 1 + programs/keeper/CMakeLists.txt | 1 + programs/server/CMakeLists.txt | 1 + src/CMakeLists.txt | 2 ++ src/Common/Config/CMakeLists.txt | 1 + src/Common/ZooKeeper/CMakeLists.txt | 11 +++++++++-- src/Common/ZooKeeper/examples/CMakeLists.txt | 6 +++--- src/Storages/System/CMakeLists.txt | 1 + src/Storages/examples/CMakeLists.txt | 2 +- utils/zookeeper-cli/CMakeLists.txt | 1 + utils/zookeeper-dump-tree/CMakeLists.txt | 1 + utils/zookeeper-remove-by-list/CMakeLists.txt | 1 + 12 files changed, 23 insertions(+), 6 deletions(-) diff --git a/programs/extract-from-config/CMakeLists.txt b/programs/extract-from-config/CMakeLists.txt index ff2d7937117..50585a564a7 100644 --- a/programs/extract-from-config/CMakeLists.txt +++ b/programs/extract-from-config/CMakeLists.txt @@ -5,6 +5,7 @@ set (CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK boost::program_options clickhouse_common_config clickhouse_common_io + clickhouse_common_zookeeper_base clickhouse_common_zookeeper ) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 9b931c49c24..eaba1581ee4 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -7,6 +7,7 @@ set (CLICKHOUSE_KEEPER_LINK PRIVATE clickhouse_common_config clickhouse_common_io + clickhouse_common_zookeeper_base clickhouse_common_zookeeper daemon clickhouse-keeper-converter-lib diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index be696ff2afe..d3565211d14 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -8,6 +8,7 @@ set (CLICKHOUSE_SERVER_LINK clickhouse_aggregate_functions clickhouse_common_config clickhouse_common_io + clickhouse_common_zookeeper_base clickhouse_common_zookeeper clickhouse_functions clickhouse_parsers diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ac03f40cd93..a3631c856cb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -415,6 +415,7 @@ dbms_target_link_libraries ( boost::filesystem boost::program_options clickhouse_common_config + clickhouse_common_zookeeper_base clickhouse_common_zookeeper clickhouse_dictionaries_embedded clickhouse_parsers @@ -660,6 +661,7 @@ if (ENABLE_TESTS) clickhouse_parsers clickhouse_storages_system dbms + clickhouse_common_zookeeper_base clickhouse_common_config clickhouse_common_zookeeper hilite_comparator) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index e91a01568d5..05c2663b3f5 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -11,6 +11,7 @@ set (SRCS add_library(clickhouse_common_config ${SRCS}) target_link_libraries(clickhouse_common_config PUBLIC + clickhouse_common_zookeeper_base clickhouse_common_zookeeper common Poco::XML diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index b70a2299ba6..20a490ce085 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -1,10 +1,13 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_common_zookeeper .) +# Needed to build without Keeper (FreeBSD and other builds) +list(APPEND clickhouse_common_zookeeper_sources ${CMAKE_CURRENT_SOURCE_DIR}/../../../src/Coordination/KeeperFeatureFlags.cpp) # Needs to be built differently depending on ZOOKEEPER_LOG list(REMOVE_ITEM clickhouse_common_zookeeper_sources "ZooKeeperImpl.cpp") + add_library(clickhouse_common_zookeeper_base ${clickhouse_common_zookeeper_headers} ${clickhouse_common_zookeeper_sources}) target_link_libraries (clickhouse_common_zookeeper_base PUBLIC @@ -18,14 +21,18 @@ add_library(clickhouse_common_zookeeper ZooKeeperImpl.cpp) target_compile_definitions (clickhouse_common_zookeeper PRIVATE -DZOOKEEPER_LOG) target_link_libraries (clickhouse_common_zookeeper PUBLIC - clickhouse_common_zookeeper_base + clickhouse_common_io + clickhouse_compression + common ) # for examples -- no logging (to avoid extra dependencies) add_library(clickhouse_common_zookeeper_no_log ZooKeeperImpl.cpp) target_link_libraries (clickhouse_common_zookeeper_no_log PUBLIC - clickhouse_common_zookeeper_base + clickhouse_common_io + clickhouse_compression + common ) if (ENABLE_EXAMPLES) add_subdirectory(examples) diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index 678b302a512..dd738e993de 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -1,15 +1,15 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) target_link_libraries(zkutil_test_commands PRIVATE - clickhouse_common_zookeeper_no_log + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log dbms) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) target_link_libraries(zkutil_test_commands_new_lib PRIVATE - clickhouse_common_zookeeper_no_log + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log clickhouse_compression dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) target_link_libraries(zkutil_test_async PRIVATE - clickhouse_common_zookeeper_no_log + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log dbms) diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index c7583713d2d..90fc7bf0541 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -47,6 +47,7 @@ add_library(clickhouse_storages_system ${storages_system_sources}) target_link_libraries(clickhouse_storages_system PRIVATE dbms common + clickhouse_common_zookeeper_base clickhouse_common_zookeeper clickhouse_parsers Poco::JSON diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index b4786b7313b..0c9a5b46801 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -5,4 +5,4 @@ clickhouse_add_executable (merge_selector2 merge_selector2.cpp) target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) -target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) +target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper_base clickhouse_common_zookeeper) diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index fd2fa669f40..2d0769b7bf2 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -2,6 +2,7 @@ clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) target_link_libraries(clickhouse-zookeeper-cli PRIVATE + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log dbms clickhouse_functions diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 3f3df65776a..835d37bd1cd 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,5 +1,6 @@ clickhouse_add_executable (zookeeper-dump-tree main.cpp ${SRCS}) target_link_libraries(zookeeper-dump-tree PRIVATE + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log clickhouse_common_io dbms diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index a4d7dccef65..4365c716596 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,5 +1,6 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) target_link_libraries(zookeeper-remove-by-list PRIVATE + clickhouse_common_zookeeper_base clickhouse_common_zookeeper_no_log dbms clickhouse_functions From 33e5bc136a873f8668cc61a1bce221df57ccb389 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 25 Nov 2024 21:24:10 +0100 Subject: [PATCH 41/56] Remove build hack --- src/Common/ZooKeeper/CMakeLists.txt | 2 -- src/Common/ZooKeeper/IKeeper.h | 2 +- src/{Coordination => Common/ZooKeeper}/KeeperFeatureFlags.cpp | 2 +- src/{Coordination => Common/ZooKeeper}/KeeperFeatureFlags.h | 0 src/Common/ZooKeeper/TestKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 2 +- src/Common/ZooKeeper/ZooKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeperImpl.h | 2 +- src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperContext.cpp | 2 +- src/Coordination/KeeperContext.h | 2 +- src/Coordination/tests/gtest_coordination.cpp | 2 +- src/Storages/System/StorageSystemZooKeeperConnection.cpp | 2 +- 13 files changed, 11 insertions(+), 13 deletions(-) rename src/{Coordination => Common/ZooKeeper}/KeeperFeatureFlags.cpp (98%) rename src/{Coordination => Common/ZooKeeper}/KeeperFeatureFlags.h (100%) diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 20a490ce085..12aa3270359 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -1,8 +1,6 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_common_zookeeper .) -# Needed to build without Keeper (FreeBSD and other builds) -list(APPEND clickhouse_common_zookeeper_sources ${CMAKE_CURRENT_SOURCE_DIR}/../../../src/Coordination/KeeperFeatureFlags.cpp) # Needs to be built differently depending on ZOOKEEPER_LOG list(REMOVE_ITEM clickhouse_common_zookeeper_sources "ZooKeeperImpl.cpp") diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index d6f1716c781..bc949aefa41 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Coordination/KeeperFeatureFlags.cpp b/src/Common/ZooKeeper/KeeperFeatureFlags.cpp similarity index 98% rename from src/Coordination/KeeperFeatureFlags.cpp rename to src/Common/ZooKeeper/KeeperFeatureFlags.cpp index 2aad6cbed32..5c875c2d163 100644 --- a/src/Coordination/KeeperFeatureFlags.cpp +++ b/src/Common/ZooKeeper/KeeperFeatureFlags.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Coordination/KeeperFeatureFlags.h b/src/Common/ZooKeeper/KeeperFeatureFlags.h similarity index 100% rename from src/Coordination/KeeperFeatureFlags.h rename to src/Common/ZooKeeper/KeeperFeatureFlags.h diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 4cf08c9c8f6..52b045cdd3e 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace Coordination diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 5bc9ad5df0a..9ce927b9715 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -1,5 +1,5 @@ #include "ZooKeeper.h" -#include "Coordination/KeeperFeatureFlags.h" +#include "Common/ZooKeeper/KeeperFeatureFlags.h" #include "ZooKeeperImpl.h" #include "KeeperException.h" #include "TestKeeper.h" diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 5782c49a7f3..fb693b7b267 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 20926bbd5fd..5a1695b34a9 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 728dca75bad..7643bd95420 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -11,7 +11,7 @@ #include #include #include -#include "Coordination/KeeperFeatureFlags.h" +#include "Common/ZooKeeper/KeeperFeatureFlags.h" #include #include #include diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 167f40bc991..a634ba87599 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index a13459d7c65..02cdd9809d7 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 4cd0eaa8657..7e4f44f6194 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index 9a83e518058..c75efdb865d 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include From 66b874dafd769bb6ad9df5e4f040305175683f89 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 26 Nov 2024 03:34:17 +0100 Subject: [PATCH 42/56] Omit database names from backup query when we do not need to specify it --- src/Parsers/ASTBackupQuery.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ASTBackupQuery.cpp b/src/Parsers/ASTBackupQuery.cpp index 0372d0e16da..9766190fe45 100644 --- a/src/Parsers/ASTBackupQuery.cpp +++ b/src/Parsers/ASTBackupQuery.cpp @@ -46,7 +46,7 @@ namespace } } - void formatExceptTables(const std::set & except_tables, const IAST::FormatSettings & format) + void formatExceptTables(const std::set & except_tables, const IAST::FormatSettings & format, bool only_table_names=false) { if (except_tables.empty()) return; @@ -60,7 +60,7 @@ namespace if (std::exchange(need_comma, true)) format.ostr << ", "; - if (!table_name.first.empty()) + if (!table_name.first.empty() && !only_table_names) format.ostr << backQuoteIfNeed(table_name.first) << "."; format.ostr << backQuoteIfNeed(table_name.second); } @@ -117,7 +117,7 @@ namespace format.ostr << backQuoteIfNeed(element.new_database_name); } - formatExceptTables(element.except_tables, format); + formatExceptTables(element.except_tables, format, /*only_table_names*/true); break; } From 81f6e993e9723bb89f5ce4b68a5eb61774c4ab76 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 26 Nov 2024 03:45:15 +0100 Subject: [PATCH 43/56] Allow parsing both variants of BACKUP EXCEPT TABLES query --- src/Parsers/ParserBackupQuery.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/Parsers/ParserBackupQuery.cpp b/src/Parsers/ParserBackupQuery.cpp index 6d2f4d8311d..f6993fdb811 100644 --- a/src/Parsers/ParserBackupQuery.cpp +++ b/src/Parsers/ParserBackupQuery.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -16,6 +17,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + namespace { using Kind = ASTBackupQuery::Kind; @@ -78,19 +84,20 @@ namespace auto parse_list_element = [&] { DatabaseAndTableName table_name; - if (database_name) - { - ASTPtr ast; - if (!ParserIdentifier{}.parse(pos, ast, expected)) - return false; + + if (!parseDatabaseAndTableName(pos, expected, table_name.first, table_name.second)) + return false; + + if (database_name && table_name.first.empty()) table_name.first = *database_name; - table_name.second = getIdentifierName(ast); - } - else - { - if (!parseDatabaseAndTableName(pos, expected, table_name.first, table_name.second)) - return false; - } + + if (database_name && table_name.first != *database_name) + throw Exception( + ErrorCodes::SYNTAX_ERROR, + "Database name in EXCEPT TABLES clause doesn't match the database name in DATABASE clause: {} != {}", + table_name.first, + *database_name + ); result.emplace(std::move(table_name)); return true; From b2d358c27bca164ad3c1ddba8414b96a60b7976b Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Tue, 26 Nov 2024 05:06:14 +0100 Subject: [PATCH 44/56] Add tests --- .../test_backup_restore_new/test.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index a7a22be1cf8..af141ad4c98 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1560,6 +1560,36 @@ def test_backup_all(exclude_system_log_tables): instance.query("DROP USER u1") +@pytest.mark.parametrize("include_database_name", [False, True]) +def test_backup_database_except(include_database_name): + create_and_fill_table() + + session_id = new_session_id() + instance.query( + "CREATE TABLE test.omit_table (s String) ENGINE = MergeTree ORDER BY s", + ) + + omit_table_name = "test.omit_table" if include_database_name else "omit_table" + backup_name = new_backup_name() + backup_command = f"BACKUP DATABASE test EXCEPT TABLES {omit_table_name} TO {backup_name}" + + instance.http_query(backup_command, params={"session_id": session_id}) + + instance.query("DROP TABLE test.table") + instance.query("DROP TABLE test.omit_table") + + restore_command = f"RESTORE ALL FROM {backup_name}" + + session_id = new_session_id() + instance.http_query( + restore_command, params={"session_id": session_id}, method="POST" + ) + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + assert instance.query("EXISTS TABLE test.omit_table") == "0\n" + + instance.query("DROP TABLE test.table") + def test_operation_id(): create_and_fill_table(n=30) From 7ed67942e8ca47d7bb92cfa640dbd4ce635cc034 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 26 Nov 2024 04:14:28 +0000 Subject: [PATCH 45/56] Automatic style fix --- tests/integration/test_backup_restore_new/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index af141ad4c98..762a4de2fbe 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1571,7 +1571,9 @@ def test_backup_database_except(include_database_name): omit_table_name = "test.omit_table" if include_database_name else "omit_table" backup_name = new_backup_name() - backup_command = f"BACKUP DATABASE test EXCEPT TABLES {omit_table_name} TO {backup_name}" + backup_command = ( + f"BACKUP DATABASE test EXCEPT TABLES {omit_table_name} TO {backup_name}" + ) instance.http_query(backup_command, params={"session_id": session_id}) @@ -1590,6 +1592,7 @@ def test_backup_database_except(include_database_name): instance.query("DROP TABLE test.table") + def test_operation_id(): create_and_fill_table(n=30) From 6e0f888347103fa7d9716e02f474db45587ae331 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 26 Nov 2024 11:18:54 +0000 Subject: [PATCH 46/56] Bump Google test to latest HEAD --- contrib/googletest | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/googletest b/contrib/googletest index a7f443b80b1..35d0c365609 160000 --- a/contrib/googletest +++ b/contrib/googletest @@ -1 +1 @@ -Subproject commit a7f443b80b105f940225332ed3c31f2790092f47 +Subproject commit 35d0c365609296fa4730d62057c487e3cfa030ff From 6762c30a883f29fb57c85a7a1c744ff2b7e1b0dc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 26 Nov 2024 11:02:49 +0000 Subject: [PATCH 47/56] Use std::string::contains where possible --- .clang-tidy | 2 +- programs/disks/DisksClient.cpp | 2 +- src/Client/ClientBase.cpp | 2 +- .../Config/AbstractConfigurationComparison.cpp | 2 +- src/Common/FileRenamer.cpp | 2 +- src/Common/Macros.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 2 +- src/Common/getNumberOfCPUCoresToUse.cpp | 4 ++-- src/Common/mysqlxx/PoolWithFailover.cpp | 2 +- src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp | 2 +- src/Common/parseGlobs.cpp | 2 +- src/Compression/tests/gtest_compressionCodec.cpp | 2 +- .../Serializations/SerializationNullable.cpp | 7 +++---- src/Databases/DatabaseReplicated.cpp | 12 ++++++------ src/Databases/MySQL/MaterializeMetadata.cpp | 8 ++++---- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 3 +-- ...kObjectStorageRemoteMetadataRestoreHelper.cpp | 4 ++-- src/Formats/CapnProtoSchema.cpp | 4 ++-- src/Functions/FunctionsConversion.h | 3 +-- src/IO/CompressionMethod.cpp | 16 ++++++++-------- src/IO/S3/Client.cpp | 4 ++-- src/IO/S3/URI.cpp | 2 +- src/Interpreters/Cluster.cpp | 2 +- src/Interpreters/Context.cpp | 2 +- .../GatherFunctionQuantileVisitor.cpp | 4 ++-- .../MySQL/InterpretersMySQLDDLQuery.cpp | 4 ++-- .../MySQL/tests/gtest_create_rewritten.cpp | 2 +- .../KustoFunctions/KQLDateTimeFunctions.cpp | 2 +- src/Parsers/ParserDataType.cpp | 2 +- .../Formats/Impl/CSVRowInputFormat.cpp | 2 +- src/Processors/Merges/Algorithms/Graphite.cpp | 2 +- src/Server/HTTPHandlerFactory.h | 2 +- src/Server/HTTPHandlerRequestFilter.h | 2 +- src/Server/PostgreSQLHandler.cpp | 2 +- src/Storages/ColumnsDescription.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeIndexGranularityInfo.cpp | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 2 +- src/Storages/StorageFile.cpp | 4 ++-- src/Storages/StorageURL.cpp | 4 ++-- src/Storages/System/StorageSystemZooKeeper.cpp | 2 +- 42 files changed, 67 insertions(+), 70 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index d7e35f16ad5..8c079b9692f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -10,7 +10,7 @@ HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$' Checks: [ '*', - '-abseil-*', + '-abseil-string-find-str-contains', # disabled to avoid a misleading suggestion (obsolete absl::StrContains() instead of C++23 std::string::contains()) '-altera-*', diff --git a/programs/disks/DisksClient.cpp b/programs/disks/DisksClient.cpp index dcfb51c420e..4a0c758c686 100644 --- a/programs/disks/DisksClient.cpp +++ b/programs/disks/DisksClient.cpp @@ -100,7 +100,7 @@ void DiskWithPath::setPath(const String & any_path) String DiskWithPath::validatePathAndGetAsRelative(const String & path) { String lexically_normal_path = fs::path(path).lexically_normal(); - if (lexically_normal_path.find("..") != std::string::npos) + if (lexically_normal_path.contains("..")) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Path {} is not normalized", path); /// If path is absolute we should keep it as relative inside disk, so disk will look like diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c0f5744a4d5..93dea07a43f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2614,7 +2614,7 @@ bool ClientBase::addMergeTreeSettings(ASTCreateQuery & ast_create) || !ast_create.storage || !ast_create.storage->isExtendedStorageDefinition() || !ast_create.storage->engine - || ast_create.storage->engine->name.find("MergeTree") == std::string::npos) + || !ast_create.storage->engine->name.contains("MergeTree")) return false; auto all_changed = cmd_merge_tree_settings.changes(); diff --git a/src/Common/Config/AbstractConfigurationComparison.cpp b/src/Common/Config/AbstractConfigurationComparison.cpp index 73f305ce669..9001a3659dc 100644 --- a/src/Common/Config/AbstractConfigurationComparison.cpp +++ b/src/Common/Config/AbstractConfigurationComparison.cpp @@ -41,7 +41,7 @@ namespace #if defined(DEBUG_OR_SANITIZER_BUILD) /// Compound `ignore_keys` are not yet implemented. for (const auto & ignore_key : *ignore_keys) - chassert(ignore_key.find('.') == std::string_view::npos); + chassert(!ignore_key.contains('.')); #endif } diff --git a/src/Common/FileRenamer.cpp b/src/Common/FileRenamer.cpp index b43b870b94e..f500c89dcc8 100644 --- a/src/Common/FileRenamer.cpp +++ b/src/Common/FileRenamer.cpp @@ -38,7 +38,7 @@ String FileRenamer::generateNewFilename(const String & filename) const // Get current timestamp in microseconds String timestamp; - if (rule.find("%t") != String::npos) + if (rule.contains("%t")) { auto now = std::chrono::system_clock::now(); timestamp = std::to_string(timeInMicroseconds(now)); diff --git a/src/Common/Macros.cpp b/src/Common/Macros.cpp index 4b5300985e3..cd0fef21283 100644 --- a/src/Common/Macros.cpp +++ b/src/Common/Macros.cpp @@ -53,7 +53,7 @@ String Macros::expand(const String & s, /// Do not allow recursion if we expand only special macros, because it will be infinite recursion assert(info.level == 0 || !info.expand_special_macros_only); - if (s.find('{') == String::npos) + if (!s.contains('{')) return s; if (info.level && s.size() > 65536) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 00f095cd0e3..32dfa4261bd 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -206,7 +206,7 @@ std::string ZooKeeperAuthRequest::toStringImpl(bool /*short_format*/) const void ZooKeeperCreateRequest::writeImpl(WriteBuffer & out) const { /// See https://github.com/ClickHouse/clickhouse-private/issues/3029 - if (path.starts_with("/clickhouse/tables/") && path.find("/parts/") != std::string::npos) + if (path.starts_with("/clickhouse/tables/") && path.contains("/parts/")) { LOG_TRACE(getLogger(__PRETTY_FUNCTION__), "Creating part at path {}", path); } diff --git a/src/Common/getNumberOfCPUCoresToUse.cpp b/src/Common/getNumberOfCPUCoresToUse.cpp index e6eff773a9d..77ac726cd1d 100644 --- a/src/Common/getNumberOfCPUCoresToUse.cpp +++ b/src/Common/getNumberOfCPUCoresToUse.cpp @@ -143,13 +143,13 @@ try std::string key = line.substr(0, pos); std::string val = line.substr(pos + 1); - if (key.find("physical id") != std::string::npos) + if (key.contains("physical id")) { cur_core_entry.first = std::stoi(val); continue; } - if (key.find("core id") != std::string::npos) + if (key.contains("core id")) { cur_core_entry.second = std::stoi(val); core_entries.insert(cur_core_entry); diff --git a/src/Common/mysqlxx/PoolWithFailover.cpp b/src/Common/mysqlxx/PoolWithFailover.cpp index bbf077d3aa1..8c7dae739b7 100644 --- a/src/Common/mysqlxx/PoolWithFailover.cpp +++ b/src/Common/mysqlxx/PoolWithFailover.cpp @@ -165,7 +165,7 @@ PoolWithFailover::Entry PoolWithFailover::get() } catch (const Poco::Exception & e) { - if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos) /// NOTE: String comparison is trashy code. + if (e.displayText().contains("mysqlxx::Pool is full")) /// NOTE: String comparison is trashy code. { full_pool = &pool; } diff --git a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp index 121767edc84..5c7473f721b 100644 --- a/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp +++ b/src/Common/mysqlxx/tests/mysqlxx_pool_test.cpp @@ -26,7 +26,7 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool) } catch (const Poco::Exception & e) { - if (e.displayText().find("mysqlxx::Pool is full") != std::string::npos) + if (e.displayText().contains("mysqlxx::Pool is full")) { std::cerr << e.displayText() << std::endl; } diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 72e67619859..de6caec3149 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -46,7 +46,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob std::string buffer(matched); oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '('; - if (buffer.find(',') == std::string::npos) + if (!buffer.contains(',')) { size_t range_begin = 0; size_t range_end = 0; diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 8265ba63fc2..4317eb413ac 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -521,7 +521,7 @@ public: TEST_P(CodecTest, TranscodingWithDataType) { /// Gorilla can only be applied to floating point columns - bool codec_is_gorilla = std::get<0>(GetParam()).codec_statement.find("Gorilla") != std::string::npos; + bool codec_is_gorilla = std::get<0>(GetParam()).codec_statement.contains("Gorilla"); WhichDataType which(std::get<1>(GetParam()).data_type.get()); bool data_is_float = which.isFloat(); if (codec_is_gorilla && !data_is_float) diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index e72dd3a42f5..cf12ed5972a 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -370,10 +370,10 @@ ReturnType deserializeTextEscapedAndRawImpl(IColumn & column, ReadBuffer & istr if constexpr (!throw_exception) return ReturnType(false); - if (null_representation.find('\t') != std::string::npos || null_representation.find('\n') != std::string::npos) + if (null_representation.contains('\t') || null_representation.contains('\n')) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " "containing '\\t' or '\\n' may not work correctly for large input."); - if (settings.tsv.crlf_end_of_line_input && null_representation.find('\r') != std::string::npos) + if (settings.tsv.crlf_end_of_line_input && null_representation.contains('\r')) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "TSV custom null representation " "containing '\\r' may not work correctly for large input."); @@ -747,8 +747,7 @@ ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const For if constexpr (!throw_exception) return ReturnType(false); - if (null_representation.find(settings.csv.delimiter) != std::string::npos || null_representation.find('\r') != std::string::npos - || null_representation.find('\n') != std::string::npos) + if (null_representation.contains(settings.csv.delimiter) || null_representation.contains('\r') || null_representation.contains('\n')) throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "CSV custom null representation containing " "format_csv_delimiter, '\\r' or '\\n' may not work correctly for large input."); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 8992a9d8548..9ea6ec27df1 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -140,9 +140,9 @@ DatabaseReplicated::DatabaseReplicated( { if (zookeeper_path.empty() || shard_name.empty() || replica_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ZooKeeper path, shard and replica names must be non-empty"); - if (shard_name.find('/') != std::string::npos || replica_name.find('/') != std::string::npos) + if (shard_name.contains('/') || replica_name.contains('/')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Shard and replica names should not contain '/'"); - if (shard_name.find('|') != std::string::npos || replica_name.find('|') != std::string::npos) + if (shard_name.contains('|') || replica_name.contains('|')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Shard and replica names should not contain '|'"); if (zookeeper_path.back() == '/') @@ -1105,9 +1105,9 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context) { - bool looks_like_replicated = metadata.find("Replicated") != std::string::npos; - bool looks_like_shared = metadata.find("Shared") != std::string::npos; - bool looks_like_merge_tree = metadata.find("MergeTree") != std::string::npos; + bool looks_like_replicated = metadata.contains("Replicated"); + bool looks_like_shared = metadata.contains("Shared"); + bool looks_like_merge_tree = metadata.contains("MergeTree"); if (!(looks_like_replicated || looks_like_shared) || !looks_like_merge_tree) return UUIDHelpers::Nil; @@ -1539,7 +1539,7 @@ void DatabaseReplicated::dropReplica( String full_replica_name = shard.empty() ? replica : getFullReplicaName(shard, replica); - if (full_replica_name.find('/') != std::string::npos) + if (full_replica_name.contains('/')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid replica name, '/' is not allowed: {}", full_replica_name); auto zookeeper = Context::getGlobalContextInstance()->getZooKeeper(); diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index b187b2904f2..52a33f2872f 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -165,11 +165,11 @@ static bool checkSyncUserPrivImpl(const mysqlxx::PoolWithFailover::Entry & conne grants_query = (*block.getByPosition(0).column)[index].safeGet(); out << grants_query << "; "; sub_privs = grants_query.substr(0, grants_query.find(" ON ")); - if (sub_privs.find("ALL PRIVILEGES") == std::string::npos) + if (!sub_privs.contains("ALL PRIVILEGES")) { - if ((sub_privs.find("RELOAD") != std::string::npos and - sub_privs.find("REPLICATION SLAVE") != std::string::npos and - sub_privs.find("REPLICATION CLIENT") != std::string::npos)) + if ((sub_privs.contains("RELOAD") and + sub_privs.contains("REPLICATION SLAVE") and + sub_privs.contains("REPLICATION CLIENT"))) return true; } else diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 8d5a6998d80..b4a87b60117 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -135,8 +135,7 @@ DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local bool DatabasePostgreSQL::checkPostgresTable(const String & table_name) const { - if (table_name.find('\'') != std::string::npos - || table_name.find('\\') != std::string::npos) + if (table_name.contains('\'') || table_name.contains('\\')) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "PostgreSQL table name cannot contain single quote or backslash characters, passed {}", table_name); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index b9f963c4590..67eddd634a5 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -367,7 +367,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path); /// Skip file operations objects. They will be processed separately. - if (object->relative_path.find("/operations/") != String::npos) + if (object->relative_path.contains("/operations/")) continue; const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path); @@ -541,7 +541,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject for (const auto & path : renames) { /// Skip already detached parts. - if (path.find("/detached/") != std::string::npos) + if (path.contains("/detached/")) continue; /// Skip not finished parts. They shouldn't be in 'detached' directory, because CH wouldn't be able to finish processing them. diff --git a/src/Formats/CapnProtoSchema.cpp b/src/Formats/CapnProtoSchema.cpp index 6076dae4157..a09d5f963a5 100644 --- a/src/Formats/CapnProtoSchema.cpp +++ b/src/Formats/CapnProtoSchema.cpp @@ -43,10 +43,10 @@ capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaIn /// That's not good to determine the type of error by its description, but /// this is the only way to do it here, because kj doesn't specify the type of error. auto description = std::string_view(e.getDescription().cStr()); - if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos || description.find("no such file") != String::npos) + if (description.contains("No such file or directory") || description.contains("no such directory") || description.contains("no such file")) throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); - if (description.find("Parse error") != String::npos) + if (description.contains("Parse error")) throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 09640bc1d2b..2a0f349e25a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -2473,8 +2473,7 @@ public: if (!isStringOrFixedString(arguments[0].type)) { - if (this->getName().find("OrZero") != std::string::npos || - this->getName().find("OrNull") != std::string::npos) + if (this->getName().contains("OrZero") || this->getName().contains("OrNull")) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", arguments[0].type->getName(), getName()); diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index 82a7a0d6340..e6b7df5b73f 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -58,21 +58,21 @@ CompressionMethod chooseHTTPCompressionMethod(const std::string & list) { /// The compression methods are ordered from most to least preferred. - if (std::string::npos != list.find("zstd")) + if (list.contains("zstd")) return CompressionMethod::Zstd; - if (std::string::npos != list.find("br")) + if (list.contains("br")) return CompressionMethod::Brotli; - if (std::string::npos != list.find("lz4")) + if (list.contains("lz4")) return CompressionMethod::Lz4; - if (std::string::npos != list.find("snappy")) + if (list.contains("snappy")) return CompressionMethod::Snappy; - if (std::string::npos != list.find("gzip")) + if (list.contains("gzip")) return CompressionMethod::Gzip; - if (std::string::npos != list.find("deflate")) + if (list.contains("deflate")) return CompressionMethod::Zlib; - if (std::string::npos != list.find("xz")) + if (list.contains("xz")) return CompressionMethod::Xz; - if (std::string::npos != list.find("bz2")) + if (list.contains("bz2")) return CompressionMethod::Bzip2; return CompressionMethod::None; } diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 088087458c7..84eb8ff20a3 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -156,10 +156,10 @@ namespace ProviderType deduceProviderType(const std::string & url) { - if (url.find(".amazonaws.com") != std::string::npos) + if (url.contains(".amazonaws.com")) return ProviderType::AWS; - if (url.find("storage.googleapis.com") != std::string::npos) + if (url.contains("storage.googleapis.com")) return ProviderType::GCS; return ProviderType::UNKNOWN; diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index aefe3ff338c..e2a10b78733 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -99,7 +99,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// '?' can not be used as a wildcard, otherwise it will be ambiguous. /// If no "versionId" in the http parameter, '?' can be used as a wildcard. /// It is necessary to encode '?' to avoid deletion during parsing path. - if (!has_version_id && uri_.find('?') != String::npos) + if (!has_version_id && uri_.contains('?')) { String uri_with_question_mark_encode; Poco::URI::encode(uri_, "?", uri_with_question_mark_encode); diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 006e3f75937..910a1f9b4ea 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -383,7 +383,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf continue; } - if (key.find('.') != String::npos) + if (key.contains('.')) throw Exception(ErrorCodes::SYNTAX_ERROR, "Cluster names with dots are not supported: '{}'", key); /// If old config is set and cluster config wasn't changed, don't update this cluster. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 78d41a336b6..d50fd66f3ac 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3893,7 +3893,7 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const auto zookeeper = shared->auxiliary_zookeepers.find(name); if (zookeeper == shared->auxiliary_zookeepers.end()) { - if (name.find(':') != std::string::npos || name.find('/') != std::string::npos) + if (name.contains(':') || name.contains('/')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid auxiliary ZooKeeper name {}: ':' and '/' are not allowed", name); const auto & config = shared->auxiliary_zookeepers_config ? *shared->auxiliary_zookeepers_config : getConfigRef(); diff --git a/src/Interpreters/GatherFunctionQuantileVisitor.cpp b/src/Interpreters/GatherFunctionQuantileVisitor.cpp index 6b6dc362771..03bd68e1a09 100644 --- a/src/Interpreters/GatherFunctionQuantileVisitor.cpp +++ b/src/Interpreters/GatherFunctionQuantileVisitor.cpp @@ -71,12 +71,12 @@ void GatherFunctionQuantileData::FuseQuantileAggregatesData::addFuncNode(ASTPtr if (arguments.size() != (need_two_args ? 2 : 1)) return; - if (arguments[0]->getColumnName().find(',') != std::string::npos) + if (arguments[0]->getColumnName().contains(',')) return; String arg_name = arguments[0]->getColumnName(); if (need_two_args) { - if (arguments[1]->getColumnName().find(',') != std::string::npos) + if (arguments[1]->getColumnName().contains(',')) return; arg_name += "," + arguments[1]->getColumnName(); } diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index c7fd800cacc..26092da09b8 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -103,7 +103,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) if (is_unsigned) { /// For example(in MySQL): CREATE TABLE test(column_name INT NOT NULL ... UNSIGNED) - if (type_name_upper.find("INT") != String::npos && !endsWith(type_name_upper, "SIGNED") + if (type_name_upper.contains("INT") && !endsWith(type_name_upper, "SIGNED") && !endsWith(type_name_upper, "UNSIGNED")) data_type_node->name = type_name_upper + " UNSIGNED"; } @@ -115,7 +115,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) /// For example ENUM('a', 'b', 'c') -> ENUM('a'=1, 'b'=2, 'c'=3) /// Elements on a position further than 32767 are assigned negative values, starting with -32768. /// Note: Enum would be transformed to Enum8 if number of elements is less then 128, otherwise it would be transformed to Enum16. - if (type_name_upper.find("ENUM") != String::npos) + if (type_name_upper.contains("ENUM")) { UInt16 i = 0; for (ASTPtr & child : data_type_node->arguments->children) diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 81e6e6a8761..43b17c3a606 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -62,7 +62,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType) MATERIALIZEDMYSQL_TABLE_COLUMNS + ") ENGINE = " "ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY tuple(key)"); - if (Poco::toUpper(test_type).find("INT") != std::string::npos) + if (Poco::toUpper(test_type).contains("INT")) { EXPECT_EQ(queryToString(tryRewrittenCreateQuery( "CREATE TABLE `test_database`.`test_table_1`(`key` INT NOT NULL PRIMARY KEY, test " + test_type + " UNSIGNED)", context_holder.context)), diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp index f059fd9aa6b..a8af4d1bb69 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDateTimeFunctions.cpp @@ -331,7 +331,7 @@ bool FormatDateTime::convertImpl(String & out, IParser::Pos & pos) i = i + arg.size(); } } - if (decimal > 0 && formatspecifier.find('.') != String::npos) + if (decimal > 0 && formatspecifier.contains('.')) { out = std::format( "concat(" diff --git a/src/Parsers/ParserDataType.cpp b/src/Parsers/ParserDataType.cpp index d86b659df90..cd2dfcf2fa1 100644 --- a/src/Parsers/ParserDataType.cpp +++ b/src/Parsers/ParserDataType.cpp @@ -189,7 +189,7 @@ bool ParserDataType::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (ParserKeyword(Keyword::PRECISION).ignore(pos)) type_name_suffix = toStringView(Keyword::PRECISION); } - else if (type_name_upper.find("INT") != std::string::npos) + else if (type_name_upper.contains("INT")) { /// Support SIGNED and UNSIGNED integer type modifiers for compatibility with MySQL if (ParserKeyword(Keyword::SIGNED).ignore(pos, expected)) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index c89268d98fb..0eaf07bee9a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -34,7 +34,7 @@ namespace return; } constexpr std::string_view bad_delimiters = " \t\"'.UL"; - if (bad_delimiters.find(delimiter) != std::string_view::npos) + if (bad_delimiters.contains(delimiter)) throw Exception( ErrorCodes::BAD_ARGUMENTS, "CSV format may not work correctly with delimiter '{}'. Try use CustomSeparated format instead", diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index ceb86d89500..718de3655af 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -93,7 +93,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param if (params.patterns_typed) { std::string_view path_view = path; - if (path_view.find("?"sv) == std::string::npos) + if (!path_view.contains("?"sv)) return params.patterns_plain; return params.patterns_tagged; } diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index cbb0cdee1dd..b3451e0a433 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -90,7 +90,7 @@ public: { addFilter([](const auto & request) { - return (request.getURI().find('?') != std::string::npos + return (request.getURI().contains('?') && (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET || request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD)) || request.getMethod() == Poco::Net::HTTPRequest::HTTP_OPTIONS diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index de1920bd535..ba204ce5623 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -78,7 +78,7 @@ static inline auto emptyQueryStringFilter() return [](const HTTPServerRequest & request) { const auto & uri = request.getURI(); - return std::string::npos == uri.find('?'); + return !uri.contains('?'); }; } diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 5dad826cde4..048239f5a86 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -281,7 +281,7 @@ void PostgreSQLHandler::processQuery() } bool psycopg2_cond = query->query == "BEGIN" || query->query == "COMMIT"; // psycopg2 starts and ends queries with BEGIN/COMMIT commands - bool jdbc_cond = query->query.find("SET extra_float_digits") != String::npos || query->query.find("SET application_name") != String::npos; // jdbc starts with setting this parameter + bool jdbc_cond = query->query.contains("SET extra_float_digits") || query->query.contains("SET application_name"); // jdbc starts with setting this parameter if (psycopg2_cond || jdbc_cond) { message_transport->send( diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 18514f0b58a..95d0f08fdf0 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -970,7 +970,7 @@ std::vector ColumnsDescription::getAllRegisteredNames() const names.reserve(columns.size()); for (const auto & column : columns) { - if (column.name.find('.') == std::string::npos) + if (!column.name.contains('.')) names.push_back(column.name); } return names; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 907fa0bc418..99f2d766b11 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6466,7 +6466,7 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const void MergeTreeData::validateDetachedPartName(const String & name) { - if (name.find('/') != std::string::npos || name == "." || name == "..") + if (name.contains('/') || name == "." || name == "..") throw DB::Exception(ErrorCodes::INCORRECT_FILE_NAME, "Invalid part name '{}'", name); if (startsWith(name, "attaching_") || startsWith(name, "deleting_")) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 9211ab51ad5..57edaae0f8e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -69,7 +69,7 @@ MarkType::MarkType(bool adaptive_, bool compressed_, MergeTreeDataPartType::Valu bool MarkType::isMarkFileExtension(std::string_view extension) { - return extension.find("mrk") != std::string_view::npos; + return extension.contains("mrk"); } std::string MarkType::getFileExtension() const diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index a68ec7d9948..9f66a079998 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -515,7 +515,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (zookeeper_info.replica_name.empty()) throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); // '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText - if (zookeeper_info.replica_name.find('\t') != String::npos || zookeeper_info.replica_name.find('\n') != String::npos) + if (zookeeper_info.replica_name.contains('\t') || zookeeper_info.replica_name.contains('\n')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'"); arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments. diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index abbdd91caf2..6ad1155e888 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -943,7 +943,7 @@ bool MaterializedPostgreSQLConsumer::consume() /// https://github.com/postgres/postgres/blob/master/src/backend/replication/pgoutput/pgoutput.c#L1128 /// So at some point will get out of limit and then they will be cleaned. std::string error_message = e.what(); - if (error_message.find("out of relcache_callback_list slots") == std::string::npos) + if (!error_message.contains("out of relcache_callback_list slots")) tryLogCurrentException(__PRETTY_FUNCTION__); connection->tryUpdateConnection(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index a629812e114..f5f72e1b68a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -385,7 +385,7 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user String path = fs::absolute(fs_table_path).lexically_normal(); /// Normalize path. bool can_be_directory = true; - if (path.find(PartitionedSink::PARTITION_ID_WILDCARD) != std::string::npos) + if (path.contains(PartitionedSink::PARTITION_ID_WILDCARD)) { paths.push_back(path); } @@ -1976,7 +1976,7 @@ SinkToStoragePtr StorageFile::write( if (context->getSettingsRef()[Setting::engine_file_truncate_on_insert]) flags |= O_TRUNC; - bool has_wildcards = path_for_partitioned_write.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; + bool has_wildcards = path_for_partitioned_write.contains(PartitionedSink::PARTITION_ID_WILDCARD); const auto * insert_query = dynamic_cast(query.get()); bool is_partitioned_implementation = insert_query && insert_query->partition_by && has_wildcards; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 3ba8d1fa304..e6bd4df6d7a 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -116,7 +116,7 @@ static const std::vector> optional_regex_keys = { bool urlWithGlobs(const String & uri) { - return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos; + return (uri.contains('{') && uri.contains('}')) || uri.contains('|'); } String getSampleURI(String uri, ContextPtr context) @@ -1338,7 +1338,7 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad if (http_method.empty()) http_method = Poco::Net::HTTPRequest::HTTP_POST; - bool has_wildcards = uri.find(PartitionedSink::PARTITION_ID_WILDCARD) != String::npos; + bool has_wildcards = uri.contains(PartitionedSink::PARTITION_ID_WILDCARD); const auto * insert_query = dynamic_cast(query.get()); auto partition_by_ast = insert_query ? (insert_query->partition_by ? insert_query->partition_by : partition_by) : nullptr; bool is_partitioned_implementation = partition_by_ast && has_wildcards; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 468fa3c58fa..000098af80d 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -141,7 +141,7 @@ public: String path = block.getByPosition(2).column->getDataAt(i).toString(); /// We don't expect a "name" contains a path. - if (name.find('/') != std::string::npos) + if (name.contains('/')) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `name` should not contain '/'"); } From b487f59496ff33d53180975b566ffb19a4bfc946 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 26 Nov 2024 12:30:57 +0100 Subject: [PATCH 48/56] Rename allowed_feature_tier to allow_feature_tier --- CHANGELOG.md | 2 +- .../server-configuration-parameters/settings.md | 2 +- programs/local/LocalServer.cpp | 4 ++-- programs/server/Server.cpp | 4 ++-- src/Access/SettingsConstraints.cpp | 4 ++-- src/Core/ServerSettings.cpp | 4 ++-- src/Interpreters/Context.cpp | 2 +- src/Storages/MergeTree/MergeTreeSettings.cpp | 4 ++-- .../__init__.py | 0 .../configs/allow_feature_tier.xml | 3 +++ .../configs/users.d/users.xml | 0 .../test.py | 12 ++++++------ .../configs/allowed_feature_tier.xml | 3 --- 13 files changed, 22 insertions(+), 22 deletions(-) rename tests/integration/{test_allowed_feature_tier => test_allow_feature_tier}/__init__.py (100%) create mode 100644 tests/integration/test_allow_feature_tier/configs/allow_feature_tier.xml rename tests/integration/{test_allowed_feature_tier => test_allow_feature_tier}/configs/users.d/users.xml (100%) rename tests/integration/{test_allowed_feature_tier => test_allow_feature_tier}/test.py (96%) delete mode 100644 tests/integration/test_allowed_feature_tier/configs/allowed_feature_tier.xml diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dfc32d2821..a915b9be7d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,7 @@ * When retrieving data directly from a dictionary using Dictionary storage, dictionary table function, or direct SELECT from the dictionary itself, it is now enough to have `SELECT` permission or `dictGet` permission for the dictionary. This aligns with previous attempts to prevent ACL bypasses: https://github.com/ClickHouse/ClickHouse/pull/57362 and https://github.com/ClickHouse/ClickHouse/pull/65359. It also makes the latter one backward compatible. [#72051](https://github.com/ClickHouse/ClickHouse/pull/72051) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Experimental feature -* Implement `allowed_feature_tier` as a global switch to disable all experimental / beta features. [#71841](https://github.com/ClickHouse/ClickHouse/pull/71841) [#71145](https://github.com/ClickHouse/ClickHouse/pull/71145) ([Raúl Marín](https://github.com/Algunenano)). +* Implement `allow_feature_tier` as a global switch to disable all experimental / beta features. [#71841](https://github.com/ClickHouse/ClickHouse/pull/71841) [#71145](https://github.com/ClickHouse/ClickHouse/pull/71145) ([Raúl Marín](https://github.com/Algunenano)). * Fix possible error `No such file or directory` due to unescaped special symbols in files for JSON subcolumns. [#71182](https://github.com/ClickHouse/ClickHouse/pull/71182) ([Pavel Kruglov](https://github.com/Avogar)). * Support alter from String to JSON. This PR also changes the serialization of JSON and Dynamic types to new version V2. Old version V1 can be still used by enabling setting `merge_tree_use_v1_object_and_dynamic_serialization` (can be used during upgrade to be able to rollback the version without issues). [#70442](https://github.com/ClickHouse/ClickHouse/pull/70442) ([Pavel Kruglov](https://github.com/Avogar)). * Implement simple CAST from Map/Tuple/Object to new JSON through serialization/deserialization from JSON string. [#71320](https://github.com/ClickHouse/ClickHouse/pull/71320) ([Pavel Kruglov](https://github.com/Avogar)). diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 9442aad230b..006f022c744 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -3287,7 +3287,7 @@ Type: Bool Default value: `true`. -## allowed_feature_tier +## allow_feature_tier Controls if the user can change settings related to the different feature tiers. 0 - Changes to any setting are allowed (experimental, beta, production). diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3ecc6ecf24d..4fc1970e353 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -79,7 +79,7 @@ namespace Setting namespace ServerSetting { - extern const ServerSettingsUInt32 allowed_feature_tier; + extern const ServerSettingsUInt32 allow_feature_tier; extern const ServerSettingsDouble cache_size_to_ram_max_ratio; extern const ServerSettingsUInt64 compiled_expression_cache_elements_size; extern const ServerSettingsUInt64 compiled_expression_cache_size; @@ -791,7 +791,7 @@ void LocalServer::processConfig() global_context->setQueryCache(0, 0, 0, 0); /// Initialize allowed tiers - global_context->getAccessControl().setAllowTierSettings(server_settings[ServerSetting::allowed_feature_tier]); + global_context->getAccessControl().setAllowTierSettings(server_settings[ServerSetting::allow_feature_tier]); #if USE_EMBEDDED_COMPILER size_t compiled_expression_cache_max_size_in_bytes = server_settings[ServerSetting::compiled_expression_cache_size]; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index af383334128..8f8e3a34f45 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -166,7 +166,7 @@ namespace MergeTreeSetting namespace ServerSetting { - extern const ServerSettingsUInt32 allowed_feature_tier; + extern const ServerSettingsUInt32 allow_feature_tier; extern const ServerSettingsUInt32 asynchronous_heavy_metrics_update_period_s; extern const ServerSettingsUInt32 asynchronous_metrics_update_period_s; extern const ServerSettingsBool asynchronous_metrics_enable_heavy_metrics; @@ -1772,7 +1772,7 @@ try global_context->setMaxDictionaryNumToWarn(new_server_settings[ServerSetting::max_dictionary_num_to_warn]); global_context->setMaxDatabaseNumToWarn(new_server_settings[ServerSetting::max_database_num_to_warn]); global_context->setMaxPartNumToWarn(new_server_settings[ServerSetting::max_part_num_to_warn]); - global_context->getAccessControl().setAllowTierSettings(new_server_settings[ServerSetting::allowed_feature_tier]); + global_context->getAccessControl().setAllowTierSettings(new_server_settings[ServerSetting::allow_feature_tier]); /// Only for system.server_settings global_context->setConfigReloaderInterval(new_server_settings[ServerSetting::config_reload_interval_ms]); diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index cb1d433766a..67f13f8430a 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -414,13 +414,13 @@ SettingsConstraints::Checker SettingsConstraints::getChecker(const Settings & cu if (setting_tier == SettingsTierType::EXPERIMENTAL && !allowed_experimental) return Checker( PreformattedMessage::create( - "Cannot modify setting '{}'. Changes to EXPERIMENTAL settings are disabled in the server config ('allowed_feature_tier')", + "Cannot modify setting '{}'. Changes to EXPERIMENTAL settings are disabled in the server config ('allow_feature_tier')", setting_name), ErrorCodes::READONLY); if (setting_tier == SettingsTierType::BETA && !allowed_beta) return Checker( PreformattedMessage::create( - "Cannot modify setting '{}'. Changes to BETA settings are disabled in the server config ('allowed_feature_tier')", + "Cannot modify setting '{}'. Changes to BETA settings are disabled in the server config ('allow_feature_tier')", setting_name), ErrorCodes::READONLY); } diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index 4bea23d4e90..4c6361cb113 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -205,7 +205,7 @@ namespace DB DECLARE(UInt64, load_marks_threadpool_queue_size, 1000000, "Number of tasks which is possible to push into prefetches pool", 0) \ DECLARE(UInt64, threadpool_writer_pool_size, 100, "Size of background pool for write requests to object storages", 0) \ DECLARE(UInt64, threadpool_writer_queue_size, 1000000, "Number of tasks which is possible to push into background pool for write requests to object storages", 0) \ - DECLARE(UInt32, allowed_feature_tier, 0, "0 - All feature tiers allowed (experimental, beta, production). 1 - Only beta and production feature tiers allowed. 2 - Only production feature tier allowed", 0) \ + DECLARE(UInt32, allow_feature_tier, 0, "0 - All feature tiers allowed (experimental, beta, production). 1 - Only beta and production feature tiers allowed. 2 - Only production feature tier allowed", 0) \ // clang-format on @@ -324,7 +324,7 @@ void ServerSettings::dumpToSystemServerSettingsColumns(ServerSettingColumnsParam {"mutation_workload", {context->getMutationWorkload(), ChangeableWithoutRestart::Yes}}, {"config_reload_interval_ms", {std::to_string(context->getConfigReloaderInterval()), ChangeableWithoutRestart::Yes}}, - {"allowed_feature_tier", + {"allow_feature_tier", {std::to_string(context->getAccessControl().getAllowTierSettings()), ChangeableWithoutRestart::Yes}}, }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 78d41a336b6..b418a9a0df7 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4991,7 +4991,7 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi /// Don't check for constraints on first load. This makes the default profile consistent with other users, where /// the default value set in the config might be outside of the constraints range - /// It makes it possible to change the value of experimental settings with `allowed_feature_tier` != 2 + /// It makes it possible to change the value of experimental settings with `allow_feature_tier` != 2 bool check_constraints = false; setCurrentProfile(shared->system_profile_name, check_constraints); diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index 097c5b7036d..07533b593f3 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -394,14 +394,14 @@ void MergeTreeSettingsImpl::sanityCheck(size_t background_pool_tasks, bool allow throw Exception( ErrorCodes::READONLY, "Cannot modify setting '{}'. Changes to EXPERIMENTAL settings are disabled in the server config " - "('allowed_feature_tier')", + "('allow_feature_tier')", setting.getName()); } if (!allow_beta && tier == BETA) { throw Exception( ErrorCodes::READONLY, - "Cannot modify setting '{}'. Changes to BETA settings are disabled in the server config ('allowed_feature_tier')", + "Cannot modify setting '{}'. Changes to BETA settings are disabled in the server config ('allow_feature_tier')", setting.getName()); } } diff --git a/tests/integration/test_allowed_feature_tier/__init__.py b/tests/integration/test_allow_feature_tier/__init__.py similarity index 100% rename from tests/integration/test_allowed_feature_tier/__init__.py rename to tests/integration/test_allow_feature_tier/__init__.py diff --git a/tests/integration/test_allow_feature_tier/configs/allow_feature_tier.xml b/tests/integration/test_allow_feature_tier/configs/allow_feature_tier.xml new file mode 100644 index 00000000000..a0bd0fa6c24 --- /dev/null +++ b/tests/integration/test_allow_feature_tier/configs/allow_feature_tier.xml @@ -0,0 +1,3 @@ + + 0 + diff --git a/tests/integration/test_allowed_feature_tier/configs/users.d/users.xml b/tests/integration/test_allow_feature_tier/configs/users.d/users.xml similarity index 100% rename from tests/integration/test_allowed_feature_tier/configs/users.d/users.xml rename to tests/integration/test_allow_feature_tier/configs/users.d/users.xml diff --git a/tests/integration/test_allowed_feature_tier/test.py b/tests/integration/test_allow_feature_tier/test.py similarity index 96% rename from tests/integration/test_allowed_feature_tier/test.py rename to tests/integration/test_allow_feature_tier/test.py index fff1ff76906..dd649a68a93 100644 --- a/tests/integration/test_allowed_feature_tier/test.py +++ b/tests/integration/test_allow_feature_tier/test.py @@ -5,14 +5,14 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( "instance", - main_configs=["configs/allowed_feature_tier.xml"], + main_configs=["configs/allow_feature_tier.xml"], user_configs=[ "configs/users.d/users.xml", ], stay_alive=True, ) -feature_tier_path = "/etc/clickhouse-server/config.d/allowed_feature_tier.xml" +feature_tier_path = "/etc/clickhouse-server/config.d/allow_feature_tier.xml" @pytest.fixture(scope="module") @@ -26,12 +26,12 @@ def start_cluster(): def get_current_tier_value(instance): query_with_current_tier_value = ( - "SELECT value FROM system.server_settings where name = 'allowed_feature_tier'" + "SELECT value FROM system.server_settings where name = 'allow_feature_tier'" ) return instance.query(query_with_current_tier_value).strip() -def test_allowed_feature_tier_in_general_settings(start_cluster): +def test_allow_feature_tier_in_general_settings(start_cluster): # We use these settings as an example. If it fails in the future because you've changed the tier of the setting # please change it to another setting in the same tier. If there is none, feel free to comment out the test for that tier query_with_experimental_setting = ( @@ -82,7 +82,7 @@ def test_allowed_feature_tier_in_general_settings(start_cluster): assert "0" == get_current_tier_value(instance) -def test_allowed_feature_tier_in_mergetree_settings(start_cluster): +def test_allow_feature_tier_in_mergetree_settings(start_cluster): assert "0" == get_current_tier_value(instance) instance.query("DROP TABLE IF EXISTS test_experimental") @@ -170,7 +170,7 @@ def test_allowed_feature_tier_in_mergetree_settings(start_cluster): instance.query("DROP TABLE IF EXISTS test_experimental") -def test_allowed_feature_tier_in_user(start_cluster): +def test_allow_feature_tier_in_user(start_cluster): instance.query("DROP USER IF EXISTS user_experimental") assert "0" == get_current_tier_value(instance) diff --git a/tests/integration/test_allowed_feature_tier/configs/allowed_feature_tier.xml b/tests/integration/test_allowed_feature_tier/configs/allowed_feature_tier.xml deleted file mode 100644 index f24c54711f4..00000000000 --- a/tests/integration/test_allowed_feature_tier/configs/allowed_feature_tier.xml +++ /dev/null @@ -1,3 +0,0 @@ - - 0 - From 9865ca5fd52f993fe8df121798a6cc4ae094b48f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 26 Nov 2024 11:56:59 +0000 Subject: [PATCH 49/56] Remove duplicates of removed bugprone-reserved-identifier --- .clang-tidy | 2 -- 1 file changed, 2 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 8c079b9692f..b8ab780c7ee 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -32,8 +32,6 @@ Checks: [ '-bugprone-crtp-constructor-accessibility', '-cert-dcl16-c', - '-cert-dcl37-c', - '-cert-dcl51-cpp', '-cert-err58-cpp', '-cert-msc32-c', '-cert-msc51-cpp', From 2ed07b21d350d36aef5bbb6d2c3391fbd0f48881 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 26 Nov 2024 12:43:08 +0000 Subject: [PATCH 50/56] Fix test_mask_sensitive_info --- tests/integration/README.md | 2 +- tests/integration/test_mask_sensitive_info/test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index b246eeb0674..b857ca42bfa 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -47,7 +47,7 @@ sudo -H pip install \ nats-py ``` -(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose-v2 python3-pytest python3-dicttoxml python3-docker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python python3-pytest-timeout python3-minio` +(highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker.io docker-compose-v2 python3-pytest python3-dicttoxml python3-djocker python3-pymysql python3-protobuf python3-pymongo python3-tzlocal python3-kazoo python3-psycopg2 kafka-python3 python3-pytest-timeout python3-minio` Some tests have other dependencies, e.g. spark. See docker/test/integration/runner/Dockerfile for how to install those. See docker/test/integration/runner/dockerd-entrypoint.sh for environment variables that need to be set (e.g. JAVA_PATH). diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index d6a0cfb282e..97a0e33e8bd 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -194,7 +194,7 @@ def test_create_table(): f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')", f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')", f"PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '{password}')", - f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')", + f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')", f"S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '{password}', format = 'CSV')", f"S3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV', 'gzip')", f"S3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '{password}')", @@ -264,7 +264,7 @@ def test_create_table(): "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '[HIDDEN]', `table` = 'mysql_table', user = 'mysql_user')", "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')", "CREATE TABLE table11 (`x` int) ENGINE = PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', `table` = 'postgres_table', user = 'postgres_user', password = '[HIDDEN]')", - "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'", + "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'", "CREATE TABLE table13 (`x` int) ENGINE = S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]', format = 'CSV')", "CREATE TABLE table14 (x int) ENGINE = S3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV', 'gzip')", "CREATE TABLE table15 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')", From 0ebee19f2e3494df1c53b1d260fce31472772dcb Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Tue, 26 Nov 2024 09:21:58 -0400 Subject: [PATCH 51/56] Update docs/en/sql-reference/window-functions/leadInFrame.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- docs/en/sql-reference/window-functions/leadInFrame.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index 16c7aefd81a..c2bc2f525c5 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -11,7 +11,7 @@ Returns a value evaluated at the row that is offset rows after the current row w :::warning `leadInFrame` behavior differs from the standard SQL `lead` window function. Clickhouse window function `leadInFrame` respects the window frame. -To get behavior identical to the `lead`, use `rows between unbounded preceding and unbounded following`. +To get behavior identical to the `lead`, use `ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`. ::: **Syntax** From 5b1bdef54f047e3395031f8bc1bc4acba0c15eaf Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Tue, 26 Nov 2024 09:24:42 -0400 Subject: [PATCH 52/56] Update lagInFrame.md --- docs/en/sql-reference/window-functions/lagInFrame.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index c4b7b377761..fb311ab6fcb 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -11,7 +11,7 @@ Returns a value evaluated at the row that is at a specified physical offset row :::warning `lagInFrame` behavior differs from the standard SQL `lag` window function. Clickhouse window function `lagInFrame` respects the window frame. -To get behavior identical to the `lag`, use `rows between unbounded preceding and unbounded following`. +To get behavior identical to the `lag`, use `ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`. ::: **Syntax** From 934ae95e396035b7229d25cc02a727e0c2398902 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 26 Nov 2024 17:34:27 +0000 Subject: [PATCH 53/56] Fix bad conflict resolution --- docs/en/operations/server-configuration-parameters/settings.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 044a650744b..fe38e00b637 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -3277,6 +3277,8 @@ Type: UInt64 Default value: 100 +Zero means unlimited + ## allow_feature_tier Controls if the user can change settings related to the different feature tiers. From 22f34a2fe9393543f345de03990150fce12108b7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 26 Nov 2024 18:03:24 +0000 Subject: [PATCH 54/56] Update version_date.tsv and changelogs after v24.11.1.2557-stable --- SECURITY.md | 1 + docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.11.1.2557-stable.md | 376 ++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 381 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.11.1.2557-stable.md diff --git a/SECURITY.md b/SECURITY.md index 1b0648dc489..5722b896b52 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,6 +14,7 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.11 | ✔️ | | 24.10 | ✔️ | | 24.9 | ✔️ | | 24.8 | ✔️ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f3f25c1a247..b933b426e01 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -38,7 +38,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.10.3.21" +ARG VERSION="24.11.1.2557" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 3bf23767150..36ceb78e6ef 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -35,7 +35,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.10.3.21" +ARG VERSION="24.11.1.2557" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 31fdcb8a490..b2732613142 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.10.3.21" +ARG VERSION="24.11.1.2557" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.11.1.2557-stable.md b/docs/changelogs/v24.11.1.2557-stable.md new file mode 100644 index 00000000000..4d18897263c --- /dev/null +++ b/docs/changelogs/v24.11.1.2557-stable.md @@ -0,0 +1,376 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.11.1.2557-stable (1574e794bf9) FIXME as compared to v24.11.1.1-new (c82cf25b3e5) + +#### Backward Incompatible Change +* Remove system tables `generate_series` and `generateSeries`. They were added by mistake here: [#59390](https://github.com/ClickHouse/ClickHouse/issues/59390). [#71091](https://github.com/ClickHouse/ClickHouse/pull/71091) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `StorageExternalDistributed`. Closes [#70600](https://github.com/ClickHouse/ClickHouse/issues/70600). ### Documentation entry for user-facing changes. [#71176](https://github.com/ClickHouse/ClickHouse/pull/71176) ([flynn](https://github.com/ucasfl)). +* Fix possible error `No such file or directory` due to unescaped special symbols in files for JSON subcolumns. [#71182](https://github.com/ClickHouse/ClickHouse/pull/71182) ([Pavel Kruglov](https://github.com/Avogar)). +* The table engines Kafka, NATS and RabbitMQ are now covered by their own grants in the `SOURCES` hierarchy. Add grants to any non-default database users that create tables with these engine types. [#71250](https://github.com/ClickHouse/ClickHouse/pull/71250) ([Christoph Wurm](https://github.com/cwurm)). +* Check the full mutation query before executing it (including subqueries). This prevents accidentally running an invalid query and building up dead mutations that block valid mutations. [#71300](https://github.com/ClickHouse/ClickHouse/pull/71300) ([Christoph Wurm](https://github.com/cwurm)). +* Rename filesystem cache setting `skip_download_if_exceeds_query_cache` to `filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit`. [#71578](https://github.com/ClickHouse/ClickHouse/pull/71578) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove support for `Enum` as well as `UInt128` and `UInt256` arguments in `deltaSumTimestamp`. Remove support for `Int8`, `UInt8`, `Int16`, and `UInt16` of the second ("timestamp") argument of `deltaSumTimestamp`. [#71790](https://github.com/ClickHouse/ClickHouse/pull/71790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* A new data type, `BFloat16`, represents 16-bit floating point numbers with 8-bit exponent, sign, and 7-bit mantissa. This closes [#44206](https://github.com/ClickHouse/ClickHouse/issues/44206). This closes [#49937](https://github.com/ClickHouse/ClickHouse/issues/49937). [#64712](https://github.com/ClickHouse/ClickHouse/pull/64712) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ~~Added an option to select the side of the join that will act as the inner table in the query plan. This is controlled by `query_plan_join_inner_table_selection`, which can be set to `auto`. In this mode, ClickHouse will try to choose the table with the smallest number of rows.~~ Resubmitted https://github.com/ClickHouse/ClickHouse/pull/71577. [#68682](https://github.com/ClickHouse/ClickHouse/pull/68682) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Add `CHECK GRANT` query to check whether the current user/role has been granted the specific privilege and whether the corresponding table/column exists in the memory. [#68885](https://github.com/ClickHouse/ClickHouse/pull/68885) ([Unalian](https://github.com/Unalian)). +* Added SQL syntax to describe workload and resource management. https://clickhouse.com/docs/en/operations/workload-scheduling. [#69187](https://github.com/ClickHouse/ClickHouse/pull/69187) ([Sergei Trifonov](https://github.com/serxa)). +* Added server setting `async_load_system_database` that allows the server to start with not fully loaded system database. This helps to start ClickHouse faster if there are many system tables. [#69847](https://github.com/ClickHouse/ClickHouse/pull/69847) ([Sergei Trifonov](https://github.com/serxa)). +* Allow each authentication method to have its own expiration date, remove from user entity. [#70090](https://github.com/ClickHouse/ClickHouse/pull/70090) ([Arthur Passos](https://github.com/arthurpassos)). +* Push external user roles from query originator to other nodes in cluster. Helpful when only originator has access to the external authenticator (like LDAP). [#70332](https://github.com/ClickHouse/ClickHouse/pull/70332) ([Andrey Zvonov](https://github.com/zvonand)). +* Support alter from String to JSON. This PR also changes the serialization of JSON and Dynamic types to new version V2. Old version V1 can be still used by enabling setting `merge_tree_use_v1_object_and_dynamic_serialization` (can be used during upgrade to be able to rollback the version without issues). [#70442](https://github.com/ClickHouse/ClickHouse/pull/70442) ([Pavel Kruglov](https://github.com/Avogar)). +* Added a new header type for S3 endpoints for user authentication (`access_header`). This allows to get some access header with the lowest priority, which will be overwritten with `access_key_id` from any other source (for example, a table schema or a named collection). [#71011](https://github.com/ClickHouse/ClickHouse/pull/71011) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Initial implementation of settings tiers. [#71145](https://github.com/ClickHouse/ClickHouse/pull/71145) ([Raúl Marín](https://github.com/Algunenano)). +* Add support for staleness clause in order by with fill operator. [#71151](https://github.com/ClickHouse/ClickHouse/pull/71151) ([Mikhail Artemenko](https://github.com/Michicosun)). +* Implement simple CAST from Map/Tuple/Object to new JSON through serialization/deserialization from JSON string. [#71320](https://github.com/ClickHouse/ClickHouse/pull/71320) ([Pavel Kruglov](https://github.com/Avogar)). +* Added aliases `anyRespectNulls`, `firstValueRespectNulls`, and `anyValueRespectNulls` for aggregation function `any`. Also added aliases `anyLastRespectNulls` and `lastValueRespectNulls` for aggregation function `anyLast`. This allows using more natural camel-case-only syntax rather than mixed camel-case/underscore syntax, for example: `SELECT anyLastRespectNullsStateIf` instead of `anyLast_respect_nullsStateIf`. [#71403](https://github.com/ClickHouse/ClickHouse/pull/71403) ([Peter Nguyen](https://github.com/petern48)). +* Added the configuration `date_time_utc` parameter, enabling JSON log formatting to support UTC date-time in RFC 3339/ISO8601 format. [#71560](https://github.com/ClickHouse/ClickHouse/pull/71560) ([Ali](https://github.com/xogoodnow)). +* Optimized memory usage for values of index granularity if granularity is constant for part. Added an ability to always select constant granularity for part (setting `use_const_adaptive_granularity`), which helps to ensure that it is always optimized in memory. It helps in large workloads (trillions of rows in shared storage) to avoid constantly growing memory usage by metadata (values of index granularity) of data parts. [#71786](https://github.com/ClickHouse/ClickHouse/pull/71786) ([Anton Popov](https://github.com/CurtizJ)). +* Implement `allowed_feature_tier` as a global switch to disable all experimental / beta features. [#71841](https://github.com/ClickHouse/ClickHouse/pull/71841) ([Raúl Marín](https://github.com/Algunenano)). +* Add `iceberg[S3;HDFS;Azure]Cluster`, `deltaLakeCluster`, `hudiCluster` table functions. [#72045](https://github.com/ClickHouse/ClickHouse/pull/72045) ([Mikhail Artemenko](https://github.com/Michicosun)). + +#### Performance Improvement +* Add 2 new settings `short_circuit_function_evaluation_for_nulls` and `short_circuit_function_evaluation_for_nulls_threshold` that allow to execute functions over `Nullable` columns in short-circuit manner when the ratio of NULL values in the block of data exceeds the specified threshold. It means that the function will be executed only on rows with non-null values. It applies only to functions that return NULL value for rows where at least one argument is NULL. [#60129](https://github.com/ClickHouse/ClickHouse/pull/60129) ([李扬](https://github.com/taiyang-li)). +* Now we won't copy input blocks columns for `join_algorithm='parallel_hash'` when distribute them between threads for parallel processing. [#67782](https://github.com/ClickHouse/ClickHouse/pull/67782) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized `Replacing` merge algorithm for non intersecting parts. [#70977](https://github.com/ClickHouse/ClickHouse/pull/70977) ([Anton Popov](https://github.com/CurtizJ)). +* Do not list detached parts from readonly and write-once disks for metrics and system.detached_parts. [#71086](https://github.com/ClickHouse/ClickHouse/pull/71086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not calculate heavy asynchronous metrics by default. The feature was introduced in [#40332](https://github.com/ClickHouse/ClickHouse/issues/40332), but it isn't good to have a heavy background job that is needed for only a single customer. [#71087](https://github.com/ClickHouse/ClickHouse/pull/71087) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve the performance and accuracy of system.query_metric_log collection interval by reducing the critical region. [#71473](https://github.com/ClickHouse/ClickHouse/pull/71473) ([Pablo Marcos](https://github.com/pamarcos)). + +#### Improvement +* Higher-order functions with constant arrays and constant captured arguments will return constants. [#58400](https://github.com/ClickHouse/ClickHouse/pull/58400) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Read-in-order optimization via generating virtual rows, so less data would be read during merge sort especially useful when multiple parts exist. [#62125](https://github.com/ClickHouse/ClickHouse/pull/62125) ([Shichao Jin](https://github.com/jsc0218)). +* Query plan step names (`EXPLAIN PLAN json=1`) and pipeline processor names (`EXPLAIN PIPELINE compact=0,graph=1`) now have a unique id as a suffix. This allows to match processors profiler output and OpenTelemetry traces with explain output. [#63518](https://github.com/ClickHouse/ClickHouse/pull/63518) ([qhsong](https://github.com/qhsong)). +* Added option to check object exists after writing to Azure Blob Storage, this is controlled by setting `check_objects_after_upload`. [#64847](https://github.com/ClickHouse/ClickHouse/pull/64847) ([Smita Kulkarni](https://github.com/SmitaRKulkarni)). +* Fix use-after-dtor logic in HashTable destroyElements. [#65279](https://github.com/ClickHouse/ClickHouse/pull/65279) ([cangyin](https://github.com/cangyin)). +* Use `Atomic` database by default in `clickhouse-local`. Address items 1 and 5 from [#50647](https://github.com/ClickHouse/ClickHouse/issues/50647). Closes [#44817](https://github.com/ClickHouse/ClickHouse/issues/44817). [#68024](https://github.com/ClickHouse/ClickHouse/pull/68024) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Write buffer has to be canceled or finalized explicitly. Exceptions break the HTTP protocol in order to alert the client about error. [#68800](https://github.com/ClickHouse/ClickHouse/pull/68800) ([Sema Checherinda](https://github.com/CheSema)). +* Report running DDLWorker hosts by creating replica_dir and mark replicas active in DDLWorker. [#69658](https://github.com/ClickHouse/ClickHouse/pull/69658) ([tuanpach](https://github.com/tuanpach)). +* 1. Refactor `DDLQueryStatusSource`: * Rename `DDLQueryStatusSource` to `DistributedQueryStatusSource`, and make it a base class * Create two subclasses `DDLOnClusterQueryStatusSource` and `ReplicatedDatabaseQueryStatusSource` derived from `DDLQueryStatusSource` to query the status of DDL tasks from `DDL On Cluster and Replicated databases respectively. 2. Support stop waiting for offline hosts in `DDLOnClusterQueryStatusSource`. [#69660](https://github.com/ClickHouse/ClickHouse/pull/69660) ([tuanpach](https://github.com/tuanpach)). +* Don't allow Variant/Dynamic types in ORDER BY/GROUP BY/PARTITION BY/PRIMARY KEY by default because it may lead to unexpected results. [#69731](https://github.com/ClickHouse/ClickHouse/pull/69731) ([Pavel Kruglov](https://github.com/Avogar)). +* Better error-handling and cancellation of `ON CLUSTER` backups and restores: - If a backup or restore fails on one host then it'll be cancelled on other hosts automatically - No weird errors must be produced because some hosts failed while other hosts continued their work - If a backup or restore is cancelled on one host then it'll be cancelled on other hosts automatically - Fix issues with `test_disallow_concurrency` - now disabling of concurrency must work better - Backups and restores now are much more resistant to ZooKeeper disconnects. [#70027](https://github.com/ClickHouse/ClickHouse/pull/70027) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable `parallel_replicas_local_plan` by default. Building a full-fledged local plan on the query initiator improves parallel replicas performance with less resource consumption, provides opportunities to apply more query optimizations. [#70171](https://github.com/ClickHouse/ClickHouse/pull/70171) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix the metadata_version record in ZooKeeper in restarting thread rather than in attach thread. [#70297](https://github.com/ClickHouse/ClickHouse/pull/70297) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add ability to set user/password in http_handlers (for `dynamic_query_handler`/`predefined_query_handler`). [#70725](https://github.com/ClickHouse/ClickHouse/pull/70725) ([Azat Khuzhin](https://github.com/azat)). +* Support `ALTER TABLE ... MODIFY/RESET SETTING ...` for certain settings in storage S3Queue. [#70811](https://github.com/ClickHouse/ClickHouse/pull/70811) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Do not call the object storage API when listing directories, as this may be cost-inefficient. Instead, store the list of filenames in the memory. The trade-offs are increased initial load time and memory required to store filenames. [#70823](https://github.com/ClickHouse/ClickHouse/pull/70823) ([Julia Kartseva](https://github.com/jkartseva)). +* Add `--threads` parameter to `clickhouse-compressor`, which allows to compress data in parallel. [#70860](https://github.com/ClickHouse/ClickHouse/pull/70860) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the issue where ClickHouse in Docker containers printed "get_mempolicy: Operation not permitted" into stderr due to restricted syscalls. [#70900](https://github.com/ClickHouse/ClickHouse/pull/70900) ([filimonov](https://github.com/filimonov)). +* Added the ability to reload client certificates in the same way as the procedure for reloading server certificates. [#70997](https://github.com/ClickHouse/ClickHouse/pull/70997) ([Roman Antonov](https://github.com/Romeo58rus)). +* Refactored internal structure of files which work with DataLake Storages. [#71012](https://github.com/ClickHouse/ClickHouse/pull/71012) ([Daniil Ivanik](https://github.com/divanik)). +* Make the Replxx client history size configurable. [#71014](https://github.com/ClickHouse/ClickHouse/pull/71014) ([Jiří Kozlovský](https://github.com/jirislav)). +* Added a setting `prewarm_mark_cache` which enables loading of marks to mark cache on inserts, merges, fetches of parts and on startup of the table. [#71053](https://github.com/ClickHouse/ClickHouse/pull/71053) ([Anton Popov](https://github.com/CurtizJ)). +* Boolean support for parquet native reader. [#71055](https://github.com/ClickHouse/ClickHouse/pull/71055) ([Arthur Passos](https://github.com/arthurpassos)). +* Retry more errors when interacting with S3, such as "Malformed message". [#71088](https://github.com/ClickHouse/ClickHouse/pull/71088) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lower log level for some messages about S3. [#71090](https://github.com/ClickHouse/ClickHouse/pull/71090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support write hdfs files with space. [#71105](https://github.com/ClickHouse/ClickHouse/pull/71105) ([exmy](https://github.com/exmy)). +* `system.session_log` is quite okay. This closes [#51760](https://github.com/ClickHouse/ClickHouse/issues/51760). [#71150](https://github.com/ClickHouse/ClickHouse/pull/71150) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes RIGHT / FULL joins in queries with parallel replicas. Now, RIGHT joins can be executed with parallel replicas (right table reading is distributed). FULL joins can't be parallelized among nodes, - executed locally. [#71162](https://github.com/ClickHouse/ClickHouse/pull/71162) ([Igor Nikonov](https://github.com/devcrafter)). +* Added settings limiting the number of replicated tables, dictionaries and views. [#71179](https://github.com/ClickHouse/ClickHouse/pull/71179) ([Kirill](https://github.com/kirillgarbar)). +* Use `AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE` instead of `AWS_CONTAINER_AUTHORIZATION_TOKEN` if former is available. Fixes [#71074](https://github.com/ClickHouse/ClickHouse/issues/71074). [#71269](https://github.com/ClickHouse/ClickHouse/pull/71269) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Remove the metadata_version ZooKeeper node creation from RMT restarting thread. The only scenario where we need to create this node is when the user updated from a version earlier than 20.4 straight to one later than 24.10. ClickHouse does not support upgrades that span more than a year, so we should throw an exception and ask the user to update gradually, instead of creating the node. [#71385](https://github.com/ClickHouse/ClickHouse/pull/71385) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add per host dashboards `Overview (host)` and `Cloud overview (host)` to advanced dashboard. [#71422](https://github.com/ClickHouse/ClickHouse/pull/71422) ([alesapin](https://github.com/alesapin)). +* The methods `removeObject` and `removeObjects` are not idempotent. When retries happen due to network errors, the result could be `object not found` because it has been deleted at previous attempts. [#71529](https://github.com/ClickHouse/ClickHouse/pull/71529) ([Sema Checherinda](https://github.com/CheSema)). +* Added new functions `parseDateTime64`, `parseDateTime64OrNull` and `parseDateTime64OrZero`. Compared to the existing function `parseDateTime` (and variants), they return a value of type `DateTime64` instead of `DateTime`. [#71581](https://github.com/ClickHouse/ClickHouse/pull/71581) ([kevinyhzou](https://github.com/KevinyhZou)). +* Allow using clickhouse with a file argument as --queries-file. [#71589](https://github.com/ClickHouse/ClickHouse/pull/71589) ([Raúl Marín](https://github.com/Algunenano)). +* Shrink to fit index_granularity array in memory to reduce memory footprint for MergeTree table engines family. [#71595](https://github.com/ClickHouse/ClickHouse/pull/71595) ([alesapin](https://github.com/alesapin)). +* `clickhouse-local` uses implicit SELECT by default, which allows to use it as a calculator. Improve the syntax highlighting for the implicit SELECT mode. [#71620](https://github.com/ClickHouse/ClickHouse/pull/71620) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The command line applications will highlight syntax even for multi-statements. [#71622](https://github.com/ClickHouse/ClickHouse/pull/71622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Command-line applications will return non-zero exit codes on errors. In previous versions, the `disks` application returned zero on errors, and other applications returned zero for errors 256 (`PARTITION_ALREADY_EXISTS`) and 512 (`SET_NON_GRANTED_ROLE`). [#71623](https://github.com/ClickHouse/ClickHouse/pull/71623) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When user/group is given as ID, the `clickhouse su` fails. This patch fixes it to accept `UID:GID` as well. ### Documentation entry for user-facing changes. [#71626](https://github.com/ClickHouse/ClickHouse/pull/71626) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The `Vertical` format (which is also activated when you end your query with `\G`) gets the features of Pretty formats, such as: - highlighting thousand groups in numbers; - printing a readable number tip. [#71630](https://github.com/ClickHouse/ClickHouse/pull/71630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow to disable memory buffer increase for filesystem cache via setting `filesystem_cache_prefer_bigger_buffer_size`. [#71640](https://github.com/ClickHouse/ClickHouse/pull/71640) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a separate setting `background_download_max_file_segment_size` for background download max file segment size in filesystem cache. [#71648](https://github.com/ClickHouse/ClickHouse/pull/71648) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Forbid Dynamic/Variant types in min/max functions to avoid confusion. [#71761](https://github.com/ClickHouse/ClickHouse/pull/71761) ([Pavel Kruglov](https://github.com/Avogar)). +* Changes the default value of `enable_http_compression` from 0 to 1. Closes [#71591](https://github.com/ClickHouse/ClickHouse/issues/71591). [#71774](https://github.com/ClickHouse/ClickHouse/pull/71774) ([Peter Nguyen](https://github.com/petern48)). +* Slightly better JSON type parsing: if current block for the JSON path contains values of several types, try to choose the best type by trying types in special best-effort order. [#71785](https://github.com/ClickHouse/ClickHouse/pull/71785) ([Pavel Kruglov](https://github.com/Avogar)). +* Previously reading from `system.asynchronous_metrics` would wait for concurrent update to finish. This can take long time if system is under heavy load. With this change the previously collected values can always be read. [#71798](https://github.com/ClickHouse/ClickHouse/pull/71798) ([Alexander Gololobov](https://github.com/davenger)). +* Set `polling_max_timeout_ms` to 10 minutes, `polling_backoff_ms` to 30 seconds. [#71817](https://github.com/ClickHouse/ClickHouse/pull/71817) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Turn-off filesystem cache setting `boundary_alignment` for non-disk read. [#71827](https://github.com/ClickHouse/ClickHouse/pull/71827) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update `HostResolver` 3 times in a `history` period. [#71863](https://github.com/ClickHouse/ClickHouse/pull/71863) ([Sema Checherinda](https://github.com/CheSema)). +* Queries like 'SELECT * FROM t LIMIT 1' used to load part indexes even though they were not used. [#71866](https://github.com/ClickHouse/ClickHouse/pull/71866) ([Alexander Gololobov](https://github.com/davenger)). +* Allow_reorder_prewhere_conditions is on by default with old compatibility settings. [#71867](https://github.com/ClickHouse/ClickHouse/pull/71867) ([Raúl Marín](https://github.com/Algunenano)). +* Do not increment the `ILLEGAL_TYPE_OF_ARGUMENT` counter in the `system.errors` table when the `bitmapTransform` function is used, and argument types are valid. [#71971](https://github.com/ClickHouse/ClickHouse/pull/71971) ([Dmitry Novik](https://github.com/novikd)). +* When retrieving data directly from a dictionary using Dictionary storage, dictionary table function, or direct SELECT from the dictionary itself, it is now enough to have `SELECT` permission or `dictGet` permission for the dictionary. This aligns with previous attempts to prevent ACL bypasses: https://github.com/ClickHouse/ClickHouse/pull/57362 and https://github.com/ClickHouse/ClickHouse/pull/65359. It also makes the latter one backward compatible. [#72051](https://github.com/ClickHouse/ClickHouse/pull/72051) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* On the advanced dashboard HTML page added a dropdown selector for the dashboard from `system.dashboards` table. [#72081](https://github.com/ClickHouse/ClickHouse/pull/72081) ([Sergei Trifonov](https://github.com/serxa)). +* Backported in [#72471](https://github.com/ClickHouse/ClickHouse/issues/72471): Move JSON/Dynamic/Variant types from experimental features to beta. [#72294](https://github.com/ClickHouse/ClickHouse/pull/72294) ([Pavel Kruglov](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* The parts deduplicated during `ATTACH PART` query don't get stuck with the `attaching_` prefix anymore. [#65636](https://github.com/ClickHouse/ClickHouse/pull/65636) ([Kirill](https://github.com/kirillgarbar)). +* Fix for the bug when dateTime64 losing precision for the `IN` function. [#67230](https://github.com/ClickHouse/ClickHouse/pull/67230) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix possible logical error when using functions with `IGNORE/RESPECT NULLS` in `ORDER BY ... WITH FILL`, close [#57609](https://github.com/ClickHouse/ClickHouse/issues/57609). [#68234](https://github.com/ClickHouse/ClickHouse/pull/68234) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fixed rare logical errors in asynchronous inserts with format `Native` in case of reached memory limit. [#68965](https://github.com/ClickHouse/ClickHouse/pull/68965) ([Anton Popov](https://github.com/CurtizJ)). +* Fix COMMENT in CREATE TABLE for EPHEMERAL column. [#70458](https://github.com/ClickHouse/ClickHouse/pull/70458) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix logical error in JSONExtract with LowCardinality(Nullable). [#70549](https://github.com/ClickHouse/ClickHouse/pull/70549) ([Pavel Kruglov](https://github.com/Avogar)). +* Allow system drop replica zkpath when there is another replica with the same zk path. [#70642](https://github.com/ClickHouse/ClickHouse/pull/70642) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* `GroupArraySortedData` uses a PODArray with non-POD elements, manually calling constructors and destructors for the elements as needed. But it wasn't careful enough: in two places it forgot to call destructor, in one place it left elements uninitialized if an exception is thrown when deserializing previous elements. Then `GroupArraySortedData`'s destructor called destructors on uninitialized elements and crashed: ``` 2024.10.17 22:58:23.523790 [ 5233 ] {} BaseDaemon: ########## Short fault info ############ 2024.10.17 22:58:23.523834 [ 5233 ] {} BaseDaemon: (version 24.6.1.4609 (official build), build id: 5423339A6571004018D55BBE05D464AFA35E6718, git hash: fa6cdfda8a94890eb19bc7f22f8b0b56292f7a26) (from thread 682) Received signal 11 2024.10.17 22:58:23.523862 [ 5233 ] {} BaseDaemon: Signal description: Segmentation fault 2024.10.17 22:58:23.523883 [ 5233 ] {} BaseDaemon: Address: 0x8f. Access: . Address not mapped to object. 2024.10.17 22:58:23.523908 [ 5233 ] {} BaseDaemon: Stack trace: 0x0000aaaac4b78308 0x0000ffffb7701850 0x0000aaaac0104855 0x0000aaaac01048a0 0x0000aaaac501e84c 0x0000aaaac7c510d0 0x0000aaaac7c4ba20 0x0000aaaac968bbfc 0x0000aaaac968fab0 0x0000aaaac969bf50 0x0000aaaac9b7520c 0x0000aaaac9b74c74 0x0000aaaac9b8a150 0x0000aaaac9b809f0 0x0000aaaac9b80574 0x0000aaaac9b8e364 0x0000aaaac9b8e4fc 0x0000aaaac94f4328 0x0000aaaac94f428c 0x0000aaaac94f7df0 0x0000aaaac98b5a3c 0x0000aaaac950b234 0x0000aaaac49ae264 0x0000aaaac49b1dd0 0x0000aaaac49b0a80 0x0000ffffb755d5c8 0x0000ffffb75c5edc 2024.10.17 22:58:23.523936 [ 5233 ] {} BaseDaemon: ######################################## 2024.10.17 22:58:23.523959 [ 5233 ] {} BaseDaemon: (version 24.6.1.4609 (official build), build id: 5423339A6571004018D55BBE05D464AFA35E6718, git hash: fa6cdfda8a94890eb19bc7f22f8b0b56292f7a26) (from thread 682) (query_id: 6c8a33a2-f45a-4a3b-bd71-ded6a1c9ccd3::202410_534066_534078_2) (query: ) Received signal Segmentation fault (11) 2024.10.17 22:58:23.523977 [ 5233 ] {} BaseDaemon: Address: 0x8f. Access: . Address not mapped to object. 2024.10.17 22:58:23.523993 [ 5233 ] {} BaseDaemon: Stack trace: 0x0000aaaac4b78308 0x0000ffffb7701850 0x0000aaaac0104855 0x0000aaaac01048a0 0x0000aaaac501e84c 0x0000aaaac7c510d0 0x0000aaaac7c4ba20 0x0000aaaac968bbfc 0x0000aaaac968fab0 0x0000aaaac969bf50 0x0000aaaac9b7520c 0x0000aaaac9b74c74 0x0000aaaac9b8a150 0x0000aaaac9b809f0 0x0000aaaac9b80574 0x0000aaaac9b8e364 0x0000aaaac9b8e4fc 0x0000aaaac94f4328 0x0000aaaac94f428c 0x0000aaaac94f7df0 0x0000aaaac98b5a3c 0x0000aaaac950b234 0x0000aaaac49ae264 0x0000aaaac49b1dd0 0x0000aaaac49b0a80 0x0000ffffb755d5c8 0x0000ffffb75c5edc 2024.10.17 22:58:23.524817 [ 5233 ] {} BaseDaemon: 0. signalHandler(int, siginfo_t*, void*) @ 0x000000000c6f8308 2024.10.17 22:58:23.524917 [ 5233 ] {} BaseDaemon: 1. ? @ 0x0000ffffb7701850 2024.10.17 22:58:23.524962 [ 5233 ] {} BaseDaemon: 2. DB::Field::~Field() @ 0x0000000007c84855 2024.10.17 22:58:23.525012 [ 5233 ] {} BaseDaemon: 3. DB::Field::~Field() @ 0x0000000007c848a0 2024.10.17 22:58:23.526626 [ 5233 ] {} BaseDaemon: 4. DB::IAggregateFunctionDataHelper, DB::(anonymous namespace)::GroupArraySorted, DB::Field>>::destroy(char*) const (.5a6a451027f732f9fd91c13f4a13200c) @ 0x000000000cb9e84c 2024.10.17 22:58:23.527322 [ 5233 ] {} BaseDaemon: 5. DB::SerializationAggregateFunction::deserializeBinaryBulk(DB::IColumn&, DB::ReadBuffer&, unsigned long, double) const @ 0x000000000f7d10d0 2024.10.17 22:58:23.528470 [ 5233 ] {} BaseDaemon: 6. DB::ISerialization::deserializeBinaryBulkWithMultipleStreams(COW::immutable_ptr&, unsigned long, DB::ISerialization::DeserializeBinaryBulkSettings&, std::shared_ptr&, std::unordered_map::immutable_ptr, std::hash, std::equal_to, std::allocator::immutable_ptr>>>*) const @ 0x000000000f7cba20 2024.10.17 22:58:23.529213 [ 5233 ] {} BaseDaemon: 7. DB::MergeTreeReaderCompact::readData(DB::NameAndTypePair const&, COW::immutable_ptr&, unsigned long, std::function const&) @ 0x000000001120bbfc 2024.10.17 22:58:23.529277 [ 5233 ] {} BaseDaemon: 8. DB::MergeTreeReaderCompactSingleBuffer::readRows(unsigned long, unsigned long, bool, unsigned long, std::vector::immutable_ptr, std::allocator::immutable_ptr>>&) @ 0x000000001120fab0 2024.10.17 22:58:23.529319 [ 5233 ] {} BaseDaemon: 9. DB::MergeTreeSequentialSource::generate() @ 0x000000001121bf50 2024.10.17 22:58:23.529346 [ 5233 ] {} BaseDaemon: 10. DB::ISource::tryGenerate() @ 0x00000000116f520c 2024.10.17 22:58:23.529653 [ 5233 ] {} BaseDaemon: 11. DB::ISource::work() @ 0x00000000116f4c74 2024.10.17 22:58:23.529679 [ 5233 ] {} BaseDaemon: 12. DB::ExecutionThreadContext::executeTask() @ 0x000000001170a150 2024.10.17 22:58:23.529733 [ 5233 ] {} BaseDaemon: 13. DB::PipelineExecutor::executeStepImpl(unsigned long, std::atomic*) @ 0x00000000117009f0 2024.10.17 22:58:23.529763 [ 5233 ] {} BaseDaemon: 14. DB::PipelineExecutor::executeStep(std::atomic*) @ 0x0000000011700574 2024.10.17 22:58:23.530089 [ 5233 ] {} BaseDaemon: 15. DB::PullingPipelineExecutor::pull(DB::Chunk&) @ 0x000000001170e364 2024.10.17 22:58:23.530277 [ 5233 ] {} BaseDaemon: 16. DB::PullingPipelineExecutor::pull(DB::Block&) @ 0x000000001170e4fc 2024.10.17 22:58:23.530295 [ 5233 ] {} BaseDaemon: 17. DB::MergeTask::ExecuteAndFinalizeHorizontalPart::executeImpl() @ 0x0000000011074328 2024.10.17 22:58:23.530318 [ 5233 ] {} BaseDaemon: 18. DB::MergeTask::ExecuteAndFinalizeHorizontalPart::execute() @ 0x000000001107428c 2024.10.17 22:58:23.530339 [ 5233 ] {} BaseDaemon: 19. DB::MergeTask::execute() @ 0x0000000011077df0 2024.10.17 22:58:23.530362 [ 5233 ] {} BaseDaemon: 20. DB::SharedMergeMutateTaskBase::executeStep() @ 0x0000000011435a3c 2024.10.17 22:58:23.530384 [ 5233 ] {} BaseDaemon: 21. DB::MergeTreeBackgroundExecutor::threadFunction() @ 0x000000001108b234 2024.10.17 22:58:23.530410 [ 5233 ] {} BaseDaemon: 22. ThreadPoolImpl>::worker(std::__list_iterator, void*>) @ 0x000000000c52e264 2024.10.17 22:58:23.530448 [ 5233 ] {} BaseDaemon: 23. void std::__function::__policy_invoker::__call_impl::ThreadFromGlobalPoolImpl>::scheduleImpl(std::function, Priority, std::optional, bool)::'lambda0'()>(void&&)::'lambda'(), void ()>>(std::__function::__policy_storage const*) @ 0x000000000c531dd0 2024.10.17 22:58:23.530476 [ 5233 ] {} BaseDaemon: 24. void* std::__thread_proxy[abi:v15000]>, void ThreadPoolImpl::scheduleImpl(std::function, Priority, std::optional, bool)::'lambda0'()>>(void*) @ 0x000000000c530a80 2024.10.17 22:58:23.530514 [ 5233 ] {} BaseDaemon: 25. ? @ 0x000000000007d5c8 2024.10.17 22:58:23.530534 [ 5233 ] {} BaseDaemon: 26. ? @ 0x00000000000e5edc 2024.10.17 22:58:23.530551 [ 5233 ] {} BaseDaemon: Integrity check of the executable skipped because the reference checksum could not be read. 2024.10.17 22:58:23.531083 [ 5233 ] {} BaseDaemon: Report this error to https://github.com/ClickHouse/ClickHouse/issues 2024.10.17 22:58:23.531294 [ 5233 ] {} BaseDaemon: Changed settings: max_insert_threads = 4, max_threads = 42, use_hedged_requests = false, distributed_foreground_insert = true, alter_sync = 0, enable_memory_bound_merging_of_aggregation_results = true, cluster_for_parallel_replicas = 'default', do_not_merge_across_partitions_select_final = false, log_queries = true, log_queries_probability = 1., max_http_get_redirects = 10, enable_deflate_qpl_codec = false, enable_zstd_qat_codec = false, query_profiler_real_time_period_ns = 0, query_profiler_cpu_time_period_ns = 0, max_bytes_before_external_group_by = 90194313216, max_bytes_before_external_sort = 90194313216, max_memory_usage = 180388626432, backup_restore_keeper_retry_max_backoff_ms = 60000, cancel_http_readonly_queries_on_client_close = true, max_table_size_to_drop = 1000000000000, max_partition_size_to_drop = 1000000000000, default_table_engine = 'ReplicatedMergeTree', mutations_sync = 0, optimize_trivial_insert_select = false, database_replicated_allow_only_replicated_engine = true, cloud_mode = true, cloud_mode_engine = 2, distributed_ddl_output_mode = 'none_only_active', distributed_ddl_entry_format_version = 6, async_insert_max_data_size = 10485760, async_insert_busy_timeout_max_ms = 1000, enable_filesystem_cache_on_write_operations = true, load_marks_asynchronously = true, allow_prefetched_read_pool_for_remote_filesystem = true, filesystem_prefetch_max_memory_usage = 18038862643, filesystem_prefetches_limit = 200, compatibility = '24.6', insert_keeper_max_retries = 20, allow_experimental_materialized_postgresql_table = false, date_time_input_format = 'best_effort' ```. [#70820](https://github.com/ClickHouse/ClickHouse/pull/70820) ([Michael Kolupaev](https://github.com/al13n321)). +* Add ability to override Content-Type by user headers in the URL engine. [#70859](https://github.com/ClickHouse/ClickHouse/pull/70859) ([Artem Iurin](https://github.com/ortyomka)). +* Fix logical error in `StorageS3Queue` "Cannot create a persistent node in /processed since it already exists". [#70984](https://github.com/ClickHouse/ClickHouse/pull/70984) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed named sessions not being closed and hanging on forever under certain circumstances. [#70998](https://github.com/ClickHouse/ClickHouse/pull/70998) ([Márcio Martins](https://github.com/marcio-absmartly)). +* Fix the bug that didn't consider _row_exists column in rebuild option of projection lightweight delete. [#71089](https://github.com/ClickHouse/ClickHouse/pull/71089) ([Shichao Jin](https://github.com/jsc0218)). +* Fix `AT_* is out of range` problem when running on Oracle Linux UEK 6.10. [#71109](https://github.com/ClickHouse/ClickHouse/pull/71109) ([Örjan Fors](https://github.com/op)). +* Fix wrong value in system.query_metric_log due to unexpected race condition. [#71124](https://github.com/ClickHouse/ClickHouse/pull/71124) ([Pablo Marcos](https://github.com/pamarcos)). +* Check if default DB is present after authorization. Fixes [#71097](https://github.com/ClickHouse/ClickHouse/issues/71097). [#71140](https://github.com/ClickHouse/ClickHouse/pull/71140) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix mismatched aggreage function name of quantileExactWeightedInterpolated. The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/69619. cc @Algunenano. [#71168](https://github.com/ClickHouse/ClickHouse/pull/71168) ([李扬](https://github.com/taiyang-li)). +* Fix bad_weak_ptr exception with Dynamic in functions comparison. [#71183](https://github.com/ClickHouse/ClickHouse/pull/71183) ([Pavel Kruglov](https://github.com/Avogar)). +* Checks that read 7z file is on a local machine. [#71184](https://github.com/ClickHouse/ClickHouse/pull/71184) ([Daniil Ivanik](https://github.com/divanik)). +* Don't delete a blob when there are nodes using it in ReplicatedMergeTree with zero-copy replication. [#71186](https://github.com/ClickHouse/ClickHouse/pull/71186) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix ignoring format settings in Native format via HTTP and Async Inserts. [#71193](https://github.com/ClickHouse/ClickHouse/pull/71193) ([Pavel Kruglov](https://github.com/Avogar)). +* SELECT queries run with setting `use_query_cache = 1` are no longer rejected if the name of a system table appears as a literal, e.g. `SELECT * FROM users WHERE name = 'system.metrics' SETTINGS use_query_cache = true;` now works. [#71254](https://github.com/ClickHouse/ClickHouse/pull/71254) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix bug of memory usage increase if enable_filesystem_cache=1, but disk in storage configuration did not have any cache configuration. [#71261](https://github.com/ClickHouse/ClickHouse/pull/71261) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible error "Cannot read all data" erros during deserialization of LowCardinality dictionary from Dynamic column. [#71299](https://github.com/ClickHouse/ClickHouse/pull/71299) ([Pavel Kruglov](https://github.com/Avogar)). +* Fix incomplete cleanup of parallel output format in the client. [#71304](https://github.com/ClickHouse/ClickHouse/pull/71304) ([Raúl Marín](https://github.com/Algunenano)). +* Added missing unescaping in named collections. Without fix clickhouse-server can't start. [#71308](https://github.com/ClickHouse/ClickHouse/pull/71308) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix async inserts with empty blocks via native protocol. [#71312](https://github.com/ClickHouse/ClickHouse/pull/71312) ([Anton Popov](https://github.com/CurtizJ)). +* Fix inconsistent AST formatting when granting wrong wildcard grants [#71309](https://github.com/ClickHouse/ClickHouse/issues/71309). [#71332](https://github.com/ClickHouse/ClickHouse/pull/71332) ([pufit](https://github.com/pufit)). +* Add try/catch to data parts destructors to avoid terminate. [#71364](https://github.com/ClickHouse/ClickHouse/pull/71364) ([alesapin](https://github.com/alesapin)). +* Check suspicious and experimental types in JSON type hints. [#71369](https://github.com/ClickHouse/ClickHouse/pull/71369) ([Pavel Kruglov](https://github.com/Avogar)). +* Start memory worker thread on non-Linux OS too (fixes [#71051](https://github.com/ClickHouse/ClickHouse/issues/71051)). [#71384](https://github.com/ClickHouse/ClickHouse/pull/71384) ([Alexandre Snarskii](https://github.com/snar)). +* Fix error Invalid number of rows in Chunk with Variant column. [#71388](https://github.com/ClickHouse/ClickHouse/pull/71388) ([Pavel Kruglov](https://github.com/Avogar)). +* Fix error column "attgenerated" does not exist for old PostgreSQL, fix [#60651](https://github.com/ClickHouse/ClickHouse/issues/60651). [#71396](https://github.com/ClickHouse/ClickHouse/pull/71396) ([0xMihalich](https://github.com/0xMihalich)). +* To avoid spamming the server logs, failing authentication attempts are now logged at level `DEBUG` instead of `ERROR`. [#71405](https://github.com/ClickHouse/ClickHouse/pull/71405) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix crash in `mongodb` table function when passing wrong arguments (e.g. `NULL`). [#71426](https://github.com/ClickHouse/ClickHouse/pull/71426) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fix crash with optimize_rewrite_array_exists_to_has. [#71432](https://github.com/ClickHouse/ClickHouse/pull/71432) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#72336](https://github.com/ClickHouse/ClickHouse/issues/72336): Fix NoSuchKey error during transaction rollback when creating a directory fails for the palin_rewritable disk. [#71439](https://github.com/ClickHouse/ClickHouse/pull/71439) ([Julia Kartseva](https://github.com/jkartseva)). +* Fixed the usage of setting `max_insert_delayed_streams_for_parallel_write` in inserts. Previously it worked incorrectly which could lead to high memory usage in inserts which write data into several partitions. [#71474](https://github.com/ClickHouse/ClickHouse/pull/71474) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error `Argument for function must be constant` (old analyzer) in case when arrayJoin can apparently appear in `WHERE` condition. Regression after https://github.com/ClickHouse/ClickHouse/pull/65414. [#71476](https://github.com/ClickHouse/ClickHouse/pull/71476) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Prevent crash in SortCursor with 0 columns (old analyzer). [#71494](https://github.com/ClickHouse/ClickHouse/pull/71494) ([Raúl Marín](https://github.com/Algunenano)). +* Fix date32 out of range caused by uninitialized orc data. For more details, refer to https://github.com/apache/incubator-gluten/issues/7823. [#71500](https://github.com/ClickHouse/ClickHouse/pull/71500) ([李扬](https://github.com/taiyang-li)). +* Fix counting column size in wide part for Dynamic and JSON types. [#71526](https://github.com/ClickHouse/ClickHouse/pull/71526) ([Pavel Kruglov](https://github.com/Avogar)). +* Analyzer fix when query inside materialized view uses IN with CTE. Closes [#65598](https://github.com/ClickHouse/ClickHouse/issues/65598). [#71538](https://github.com/ClickHouse/ClickHouse/pull/71538) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid crash when using a UDF in a constraint. [#71541](https://github.com/ClickHouse/ClickHouse/pull/71541) ([Raúl Marín](https://github.com/Algunenano)). +* Return 0 or default char instead of throwing an error in bitShift functions in case of out of bounds. [#71580](https://github.com/ClickHouse/ClickHouse/pull/71580) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix server crashes while using materialized view with certain engines. [#71593](https://github.com/ClickHouse/ClickHouse/pull/71593) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). +* Array join with a nested data structure, which contains an alias to a constant array was leading to a null pointer dereference. This closes [#71677](https://github.com/ClickHouse/ClickHouse/issues/71677). [#71678](https://github.com/ClickHouse/ClickHouse/pull/71678) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix LOGICAL_ERROR when doing ALTER with empty tuple. This fixes [#71647](https://github.com/ClickHouse/ClickHouse/issues/71647). [#71679](https://github.com/ClickHouse/ClickHouse/pull/71679) ([Amos Bird](https://github.com/amosbird)). +* Don't transform constant set in predicates over partition columns in case of NOT IN operator. [#71695](https://github.com/ClickHouse/ClickHouse/pull/71695) ([Eduard Karacharov](https://github.com/korowa)). +* Fix docker init script fail log message for more clean understanding. [#71734](https://github.com/ClickHouse/ClickHouse/pull/71734) ([Андрей](https://github.com/andreineustroev)). +* Fix CAST from LowCardinality(Nullable) to Dynamic. Previously it could lead to error `Bad cast from type DB::ColumnVector to DB::ColumnNullable`. [#71742](https://github.com/ClickHouse/ClickHouse/pull/71742) ([Pavel Kruglov](https://github.com/Avogar)). +* Acquiring zero-copy shared lock before moving a part to zero-copy disk to prevent possible data loss if Keeper is unavailable. [#71845](https://github.com/ClickHouse/ClickHouse/pull/71845) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix exception for toDayOfWeek on WHERE condition with primary key of DateTime64 type. [#71849](https://github.com/ClickHouse/ClickHouse/pull/71849) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed filling of defaults after parsing into sparse columns. [#71854](https://github.com/ClickHouse/ClickHouse/pull/71854) ([Anton Popov](https://github.com/CurtizJ)). +* Fix GROUPING function error when input is ALIAS on distributed table, close [#68602](https://github.com/ClickHouse/ClickHouse/issues/68602). [#71855](https://github.com/ClickHouse/ClickHouse/pull/71855) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fix possible crash when using `allow_experimental_join_condition`, close [#71693](https://github.com/ClickHouse/ClickHouse/issues/71693). [#71857](https://github.com/ClickHouse/ClickHouse/pull/71857) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fixed select statements that use `WITH TIES` clause which might not return enough rows. [#71886](https://github.com/ClickHouse/ClickHouse/pull/71886) ([wxybear](https://github.com/wxybear)). +* Fix an exception of TOO_LARGE_ARRAY_SIZE caused when a column of arrayWithConstant evaluation is mistaken to cross the array size limit. [#71894](https://github.com/ClickHouse/ClickHouse/pull/71894) ([Udi](https://github.com/udiz)). +* `clickhouse-benchmark` reported wrong metrics for queries taking longer than one second. [#71898](https://github.com/ClickHouse/ClickHouse/pull/71898) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race between the progress indicator and the progress table in clickhouse-client. This issue is visible when FROM INFILE is used. Intercept keystrokes during INSERT queries to toggle progress table display. [#71901](https://github.com/ClickHouse/ClickHouse/pull/71901) ([Julia Kartseva](https://github.com/jkartseva)). +* Use auxiliary keepers for cluster autodiscovery. [#71911](https://github.com/ClickHouse/ClickHouse/pull/71911) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Backported in [#72275](https://github.com/ClickHouse/ClickHouse/issues/72275): Fix serialization of Dynamic values in Pretty JSON formats. [#71923](https://github.com/ClickHouse/ClickHouse/pull/71923) ([Pavel Kruglov](https://github.com/Avogar)). +* Fix rows_processed column in system.s3/azure_queue_log broken in 24.6. Closes [#69975](https://github.com/ClickHouse/ClickHouse/issues/69975). [#71946](https://github.com/ClickHouse/ClickHouse/pull/71946) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed case when `s3`/`s3Cluster` functions could return incomplete result or throw an exception. It involved using glob pattern in s3 uri (like `pattern/*`) and an empty object should exist with the key `pattern/` (such objects automatically created by S3 Console). Also default value for setting `s3_skip_empty_files` changed from `false` to `true` by default. [#71947](https://github.com/ClickHouse/ClickHouse/pull/71947) ([Nikita Taranov](https://github.com/nickitat)). +* Fix a crash in clickhouse-client syntax highlighting. Closes [#71864](https://github.com/ClickHouse/ClickHouse/issues/71864). [#71949](https://github.com/ClickHouse/ClickHouse/pull/71949) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix `Illegal type` error for `MergeTree` tables with binary monotonic function in `ORDER BY` when the first argument is constant. Fixes [#71941](https://github.com/ClickHouse/ClickHouse/issues/71941). [#71966](https://github.com/ClickHouse/ClickHouse/pull/71966) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow only SELECT queries in EXPLAIN AST used inside subquery. Other types of queries lead to logical error: 'Bad cast from type DB::ASTCreateQuery to DB::ASTSelectWithUnionQuery' or `Inconsistent AST formatting`. [#71982](https://github.com/ClickHouse/ClickHouse/pull/71982) ([Pavel Kruglov](https://github.com/Avogar)). +* When insert a record by `clickhouse-client`, client will read column descriptions from server. but there was a bug that we wrote the descritions with a wrong order , it should be [statistics, ttl, settings]. [#71991](https://github.com/ClickHouse/ClickHouse/pull/71991) ([Han Fei](https://github.com/hanfei1991)). +* Fix formatting of `MOVE PARTITION ... TO TABLE ...` alter commands when `format_alter_commands_with_parentheses` is enabled. [#72080](https://github.com/ClickHouse/ClickHouse/pull/72080) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#72300](https://github.com/ClickHouse/ClickHouse/issues/72300): Fix bugs when using UDF in join on expression with the old analyzer. [#72179](https://github.com/ClickHouse/ClickHouse/pull/72179) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#72378](https://github.com/ClickHouse/ClickHouse/issues/72378): Fixed a crash in `SimpleSquashingChunksTransform` that occurred in rare cases when processing sparse columns. [#72226](https://github.com/ClickHouse/ClickHouse/pull/72226) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Backported in [#72329](https://github.com/ClickHouse/ClickHouse/issues/72329): Fixed data race in `GraceHashJoin` as the result of which some rows might be missing in the join output. [#72233](https://github.com/ClickHouse/ClickHouse/pull/72233) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#72372](https://github.com/ClickHouse/ClickHouse/issues/72372): Fixed `ALTER DELETE` queries with materialized `_block_number` column (if setting `enable_block_number_column` is enabled). [#72261](https://github.com/ClickHouse/ClickHouse/pull/72261) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#72318](https://github.com/ClickHouse/ClickHouse/issues/72318): Fixed data race when `ColumnDynamic::dumpStructure()` is called concurrently e.g. in `ConcurrentHashJoin` constructor. [#72278](https://github.com/ClickHouse/ClickHouse/pull/72278) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#72420](https://github.com/ClickHouse/ClickHouse/issues/72420): Use `AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE` instead of `AWS_CONTAINER_AUTHORIZATION_TOKEN_PATH`. Fixes [#71074](https://github.com/ClickHouse/ClickHouse/issues/71074). [#72397](https://github.com/ClickHouse/ClickHouse/pull/72397) ([Konstantin Bogdanov](https://github.com/thevar1able)). + +#### Build/Testing/Packaging Improvement +* Add the script to update sources of [docker official library](https://github.com/ClickHouse/docker-library). [#57203](https://github.com/ClickHouse/ClickHouse/pull/57203) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The build system will prevent libraries with unexpected licenses. [#70988](https://github.com/ClickHouse/ClickHouse/pull/70988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upgrade docker base image for clickhouse-server and keeper to `ubuntu:22.04`. **Breaking change**: the minimal supported docker version is `20.10.10`. [#71505](https://github.com/ClickHouse/ClickHouse/pull/71505) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve clickhouse-server Dockerfile.ubuntu. Deprecate `CLICKHOUSE_UID/CLICKHOUSE_GID` envs. Remove `CLICKHOUSE_DOCKER_RESTART_ON_EXIT` processing to complien requirements. Consistent `clickhouse/clickhouse-server/clickhouse-keeper` execution to not have it plain in one place and `/usr/bin/clickhouse*` in another. [#71573](https://github.com/ClickHouse/ClickHouse/pull/71573) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NO CL CATEGORY + +* Backported in [#72289](https://github.com/ClickHouse/ClickHouse/issues/72289):. [#72258](https://github.com/ClickHouse/ClickHouse/pull/72258) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#72343](https://github.com/ClickHouse/ClickHouse/issues/72343):. [#72319](https://github.com/ClickHouse/ClickHouse/pull/72319) ([Raúl Marín](https://github.com/Algunenano)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Miscellaneous"'. [#71083](https://github.com/ClickHouse/ClickHouse/pull/71083) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Miscellaneous""'. [#71084](https://github.com/ClickHouse/ClickHouse/pull/71084) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "[RFC] Fix optimize_functions_to_subcolumns optimization"'. [#71220](https://github.com/ClickHouse/ClickHouse/pull/71220) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "SQL syntax for workload and resource management"'. [#71251](https://github.com/ClickHouse/ClickHouse/pull/71251) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "SQL syntax for workload and resource management""'. [#71266](https://github.com/ClickHouse/ClickHouse/pull/71266) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Revert "Selection of hash join inner table"'. [#71527](https://github.com/ClickHouse/ClickHouse/pull/71527) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Enable enable_job_stack_trace by default"'. [#71619](https://github.com/ClickHouse/ClickHouse/pull/71619) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Remove ridiculous code bloat"'. [#71914](https://github.com/ClickHouse/ClickHouse/pull/71914) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Remove ridiculous code bloat""'. [#71945](https://github.com/ClickHouse/ClickHouse/pull/71945) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "CI: Functional Tests with praktika"'. [#71974](https://github.com/ClickHouse/ClickHouse/pull/71974) ([Max Kainov](https://github.com/maxknv)). +* NO CL ENTRY: 'CI: Functional Tests with praktika'. [#71976](https://github.com/ClickHouse/ClickHouse/pull/71976) ([Max Kainov](https://github.com/maxknv)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Refactor TempDataOnDisk. [#66606](https://github.com/ClickHouse/ClickHouse/pull/66606) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Bump krb5 from v1.21.2 to v1.21.3. [#69360](https://github.com/ClickHouse/ClickHouse/pull/69360) ([Robert Schulze](https://github.com/rschu1ze)). +* USearch: Enable SimSIMD backend + enable dynamic dispatch. [#69387](https://github.com/ClickHouse/ClickHouse/pull/69387) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: enable libfuzzer. [#70112](https://github.com/ClickHouse/ClickHouse/pull/70112) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Use `clang-19`. [#70414](https://github.com/ClickHouse/ClickHouse/pull/70414) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Randomize Keeper feature flags in integration tests. [#70523](https://github.com/ClickHouse/ClickHouse/pull/70523) ([Antonio Andelic](https://github.com/antonio2368)). +* All the patches for arrow were re-applied in this PR: https://github.com/ClickHouse/arrow/pull/68. [#70691](https://github.com/ClickHouse/ClickHouse/pull/70691) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Check number of arguments for function with Dynamic argument. [#70749](https://github.com/ClickHouse/ClickHouse/pull/70749) ([Nikita Taranov](https://github.com/nickitat)). +* Add a settings `filesystem_cache_enable_background_download_for_metadata_files` for filesystem cache to allow to disable background download of filesystem cache for metadata files. This feature is needed for private code feature, for public version it does not make much sense. [#70806](https://github.com/ClickHouse/ClickHouse/pull/70806) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make ParquetMetadata say whether bloom filter is present. [#70947](https://github.com/ClickHouse/ClickHouse/pull/70947) ([Michael Kolupaev](https://github.com/al13n321)). +* test for reproducing that ReplacingMergeTree depends on the order of part attachment. [#71010](https://github.com/ClickHouse/ClickHouse/pull/71010) ([Konstantin Morozov](https://github.com/k-morozov)). +* CI: Build Job with praktika. [#71015](https://github.com/ClickHouse/ClickHouse/pull/71015) ([Max Kainov](https://github.com/maxknv)). +* Fix bad test `01524_do_not_merge_across_partitions_select_final.sql`. [#71035](https://github.com/ClickHouse/ClickHouse/pull/71035) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable enable_job_stack_trace by default. [#71039](https://github.com/ClickHouse/ClickHouse/pull/71039) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix two logical errors when reading from stdin in clickhouse local. [#71046](https://github.com/ClickHouse/ClickHouse/pull/71046) ([Michael Kolupaev](https://github.com/al13n321)). +* Sync changes to `ProtocolServerAdapter`. [#71058](https://github.com/ClickHouse/ClickHouse/pull/71058) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix a typo. [#71067](https://github.com/ClickHouse/ClickHouse/pull/71067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous. [#71070](https://github.com/ClickHouse/ClickHouse/pull/71070) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test `test_system_replicated_fetches`. [#71071](https://github.com/ClickHouse/ClickHouse/pull/71071) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version after release. [#71076](https://github.com/ClickHouse/ClickHouse/pull/71076) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* CI: Functional tests for ARM + ASAN binary. [#71079](https://github.com/ClickHouse/ClickHouse/pull/71079) ([Max Kainov](https://github.com/maxknv)). +* CI: Functional Tests with praktika. [#71081](https://github.com/ClickHouse/ClickHouse/pull/71081) ([Max Kainov](https://github.com/maxknv)). +* Fixup of TrivialMergeSelector. [#71082](https://github.com/ClickHouse/ClickHouse/pull/71082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Sync integration test with private. [#71096](https://github.com/ClickHouse/ClickHouse/pull/71096) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Unique symbols in the `system.coverage_log`. [#71099](https://github.com/ClickHouse/ClickHouse/pull/71099) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better log messages. [#71102](https://github.com/ClickHouse/ClickHouse/pull/71102) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in Replicated database. [#71103](https://github.com/ClickHouse/ClickHouse/pull/71103) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update error message for JSONAsObject format. [#71123](https://github.com/ClickHouse/ClickHouse/pull/71123) ([Pavel Kruglov](https://github.com/Avogar)). +* Initial changelog for 24.10. [#71127](https://github.com/ClickHouse/ClickHouse/pull/71127) ([Raúl Marín](https://github.com/Algunenano)). +* Followup [#70520](https://github.com/ClickHouse/ClickHouse/issues/70520). [#71129](https://github.com/ClickHouse/ClickHouse/pull/71129) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Update compatibility setting for `hnsw_candidate_list_size_for_search`. [#71133](https://github.com/ClickHouse/ClickHouse/pull/71133) ([Robert Schulze](https://github.com/rschu1ze)). +* Try fix rabbitmq. [#71143](https://github.com/ClickHouse/ClickHouse/pull/71143) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Randomize setting `enable_vertical_final`. [#71144](https://github.com/ClickHouse/ClickHouse/pull/71144) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bad test `02561_sorting_constants_and_distinct_crash`. [#71147](https://github.com/ClickHouse/ClickHouse/pull/71147) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test to verify [#62308](https://github.com/ClickHouse/ClickHouse/issues/62308) works. [#71149](https://github.com/ClickHouse/ClickHouse/pull/71149) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix 02932_refreshable_materialized_views_1 flakiness. [#71160](https://github.com/ClickHouse/ClickHouse/pull/71160) ([Michael Kolupaev](https://github.com/al13n321)). +* Use `_minmax_count_projection` instead of `Optimized trivial count` for `ReadFromPreparedSource` node in trivial count optimized query plans, providing a more descriptive representation of the projection-based trivial count optimization. This addresses [#70939](https://github.com/ClickHouse/ClickHouse/issues/70939). [#71166](https://github.com/ClickHouse/ClickHouse/pull/71166) ([Amos Bird](https://github.com/amosbird)). +* Close [#8687](https://github.com/ClickHouse/ClickHouse/issues/8687). [#71169](https://github.com/ClickHouse/ClickHouse/pull/71169) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes for interactive metrics. [#71173](https://github.com/ClickHouse/ClickHouse/pull/71173) ([Julia Kartseva](https://github.com/jkartseva)). +* Maybe not GWPAsan by default. [#71174](https://github.com/ClickHouse/ClickHouse/pull/71174) ([Antonio Andelic](https://github.com/antonio2368)). +* Print compression method in `clickhouse-compressor --stat`. Useful for inspecting random data files. [#71192](https://github.com/ClickHouse/ClickHouse/pull/71192) ([Amos Bird](https://github.com/amosbird)). +* Updating the events into the recent category and adding the new york event. [#71194](https://github.com/ClickHouse/ClickHouse/pull/71194) ([Zoe Steinkamp](https://github.com/zoesteinkamp)). +* Improve error and log messages around memory usage. [#71195](https://github.com/ClickHouse/ClickHouse/pull/71195) ([Raúl Marín](https://github.com/Algunenano)). +* Minor test adjustments. [#71199](https://github.com/ClickHouse/ClickHouse/pull/71199) ([Raúl Marín](https://github.com/Algunenano)). +* Fix LOGICAL_ERROR on wrong scalar subquery argument to table functions. [#71216](https://github.com/ClickHouse/ClickHouse/pull/71216) ([Raúl Marín](https://github.com/Algunenano)). +* Disable enable_named_columns_in_function_tuple for 24.10. [#71219](https://github.com/ClickHouse/ClickHouse/pull/71219) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md - Update meetups. [#71223](https://github.com/ClickHouse/ClickHouse/pull/71223) ([Tanya Bragin](https://github.com/tbragin)). +* Fix `WITH TOTALS` in subquery with parallel replicas. [#71224](https://github.com/ClickHouse/ClickHouse/pull/71224) ([Nikita Taranov](https://github.com/nickitat)). +* Ignore `No such key` exceptions in some cases. [#71236](https://github.com/ClickHouse/ClickHouse/pull/71236) ([Antonio Andelic](https://github.com/antonio2368)). +* Make cloud sync title shorter. [#71255](https://github.com/ClickHouse/ClickHouse/pull/71255) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md - Meetups update. [#71271](https://github.com/ClickHouse/ClickHouse/pull/71271) ([Tanya Bragin](https://github.com/tbragin)). +* Improve system.query_metric_log to remove flakiness. [#71295](https://github.com/ClickHouse/ClickHouse/pull/71295) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix debug log timestamp. [#71311](https://github.com/ClickHouse/ClickHouse/pull/71311) ([Pablo Marcos](https://github.com/pamarcos)). +* Expose one more simple merge selector setting. [#71313](https://github.com/ClickHouse/ClickHouse/pull/71313) ([alesapin](https://github.com/alesapin)). +* Better style for some sever-level settings. [#71319](https://github.com/ClickHouse/ClickHouse/pull/71319) ([alesapin](https://github.com/alesapin)). +* Sync some changes. [#71321](https://github.com/ClickHouse/ClickHouse/pull/71321) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add library to requirements for style-check and fix warning. [#71322](https://github.com/ClickHouse/ClickHouse/pull/71322) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix test `test_workload_entity_keeper_storage`: add more retries. [#71325](https://github.com/ClickHouse/ClickHouse/pull/71325) ([Sergei Trifonov](https://github.com/serxa)). +* Debug build faild with clang-18 after https://github.com/ClickHouse/ClickHouse/pull/71266, don't know why it's ok in release build, simply changing _ to _1 is ok for both release and debug build. [#71335](https://github.com/ClickHouse/ClickHouse/pull/71335) ([Chang chen](https://github.com/baibaichen)). +* Exempt refreshable materialized views from ignore_empty_sql_security_in_create_view_query. [#71336](https://github.com/ClickHouse/ClickHouse/pull/71336) ([Michael Kolupaev](https://github.com/al13n321)). +* CI: Remove deprecated release script. [#71341](https://github.com/ClickHouse/ClickHouse/pull/71341) ([Max Kainov](https://github.com/maxknv)). +* Update version_date.tsv and changelog after v24.10.1.2812-stable. [#71343](https://github.com/ClickHouse/ClickHouse/pull/71343) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Bump USearch to 2.16.0 and add more tests. [#71344](https://github.com/ClickHouse/ClickHouse/pull/71344) ([Robert Schulze](https://github.com/rschu1ze)). +* check-doc-aspell: Print full path to script in CI report. [#71345](https://github.com/ClickHouse/ClickHouse/pull/71345) ([Vladimir Cherkasov](https://github.com/vdimir)). +* CI: Fix fedora version in create release workflow. [#71347](https://github.com/ClickHouse/ClickHouse/pull/71347) ([Max Kainov](https://github.com/maxknv)). +* fs cache: add assertions. [#71348](https://github.com/ClickHouse/ClickHouse/pull/71348) ([Kseniia Sumarokova](https://github.com/kssenii)). +* More info in TOO_SLOW exception. [#71365](https://github.com/ClickHouse/ClickHouse/pull/71365) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fix unused variables around WorkloadEntityStorageBase. [#71367](https://github.com/ClickHouse/ClickHouse/pull/71367) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Allow to prewarm mark cache by system command without enabled setting. [#71368](https://github.com/ClickHouse/ClickHouse/pull/71368) ([Anton Popov](https://github.com/CurtizJ)). +* Fix after https://github.com/ClickHouse/ClickHouse/pull/64847. [#71380](https://github.com/ClickHouse/ClickHouse/pull/71380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Rename `compare8` to `compare16` for consistency. [#71416](https://github.com/ClickHouse/ClickHouse/pull/71416) ([Nikita Taranov](https://github.com/nickitat)). +* Fix missing cluster startup for test_quorum_inserts::test_insert_quorum_with_keeper_fail. [#71418](https://github.com/ClickHouse/ClickHouse/pull/71418) ([Azat Khuzhin](https://github.com/azat)). +* [Experiment] Analyzer: Check what happens after if-condition removal. [#71425](https://github.com/ClickHouse/ClickHouse/pull/71425) ([Dmitry Novik](https://github.com/novikd)). +* Update version_date.tsv and changelog after v24.8.6.70-lts. [#71428](https://github.com/ClickHouse/ClickHouse/pull/71428) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix upgrade check (24.11). [#71438](https://github.com/ClickHouse/ClickHouse/pull/71438) ([Raúl Marín](https://github.com/Algunenano)). +* Fix assert during insert into vector similarity index in presence of other skipping indexes. [#71457](https://github.com/ClickHouse/ClickHouse/pull/71457) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid `seconds left [-3]` in cache await. [#71468](https://github.com/ClickHouse/ClickHouse/pull/71468) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Added tests for corner cases for 24.10. [#71469](https://github.com/ClickHouse/ClickHouse/pull/71469) ([Max Vostrikov](https://github.com/max-vostrikov)). +* Expose base setting for merge selector. [#71497](https://github.com/ClickHouse/ClickHouse/pull/71497) ([alesapin](https://github.com/alesapin)). +* Fixed incorrect settings order `max_parser_depth` and `max_parser_backtracks`. [#71498](https://github.com/ClickHouse/ClickHouse/pull/71498) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support the endpoint of oss accelerator. [#71502](https://github.com/ClickHouse/ClickHouse/pull/71502) ([Kai Zhu](https://github.com/nauu)). +* Fix flaky test_drop_complex_columns. [#71504](https://github.com/ClickHouse/ClickHouse/pull/71504) ([Ilya Golshtein](https://github.com/ilejn)). +* Move bitShift function changelog entries to backward incompatible. [#71510](https://github.com/ClickHouse/ClickHouse/pull/71510) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix copy/paste error. [#71513](https://github.com/ClickHouse/ClickHouse/pull/71513) ([Denny Crane](https://github.com/den-crane)). +* Allow specifying cmdline flags in integration test. It's needed by [#71452](https://github.com/ClickHouse/ClickHouse/issues/71452) to validate a bugfix. [#71523](https://github.com/ClickHouse/ClickHouse/pull/71523) ([Amos Bird](https://github.com/amosbird)). +* Add ProfileEvents for merge selector timings. [#71524](https://github.com/ClickHouse/ClickHouse/pull/71524) ([alesapin](https://github.com/alesapin)). +* Minor: Remove "experimental" mention of analyzer. [#71525](https://github.com/ClickHouse/ClickHouse/pull/71525) ([Robert Schulze](https://github.com/rschu1ze)). +* Our builds, jobs, and hosts are called aarch64, so we make the code consistent with the content. [#71530](https://github.com/ClickHouse/ClickHouse/pull/71530) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add test to check that accessing system.functions does not populate query_log used_functions. [#71535](https://github.com/ClickHouse/ClickHouse/pull/71535) ([Raúl Marín](https://github.com/Algunenano)). +* Improve `query_plan_merge_filters` optimization. Fixes [#71408](https://github.com/ClickHouse/ClickHouse/issues/71408). [#71539](https://github.com/ClickHouse/ClickHouse/pull/71539) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix compatibility with refreshable materialized views created by old clickhouse servers. [#71556](https://github.com/ClickHouse/ClickHouse/pull/71556) ([Michael Kolupaev](https://github.com/al13n321)). +* Vector similarity index: Re-introduce support for legacy index creation syntax. [#71572](https://github.com/ClickHouse/ClickHouse/pull/71572) ([Robert Schulze](https://github.com/rschu1ze)). +* Avoid port clash in CoordinationTest/0.TestSummingRaft1. [#71584](https://github.com/ClickHouse/ClickHouse/pull/71584) ([Raúl Marín](https://github.com/Algunenano)). +* Fix for `00180_no_seek_avoiding_when_reading_from_cache`. [#71596](https://github.com/ClickHouse/ClickHouse/pull/71596) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix for `test_storage_s3_queue::test_shards_distributed[ordered-2]`. [#71597](https://github.com/ClickHouse/ClickHouse/pull/71597) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable enable_job_stack_trace by default, second attempt. [#71625](https://github.com/ClickHouse/ClickHouse/pull/71625) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update version_date.tsv and changelog after v24.3.13.40-lts. [#71627](https://github.com/ClickHouse/ClickHouse/pull/71627) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fixes the bug regarding max rows/bytes to read. [#71634](https://github.com/ClickHouse/ClickHouse/pull/71634) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* SimSIMD: Improve suppression for msan false positive. [#71635](https://github.com/ClickHouse/ClickHouse/pull/71635) ([Robert Schulze](https://github.com/rschu1ze)). +* Add `min_parts_to_merge_at_once` merge tree setting which introduces lower limit of amount of data parts to merge at once. The main motivation for this setting is Trifonov's theorem which states that it's not effective to merge less than `e` (2.71...) data parts at once because it increases both write amplification and parts number. [#71637](https://github.com/ClickHouse/ClickHouse/pull/71637) ([alesapin](https://github.com/alesapin)). +* Fix broken 03247_ghdata_string_to_json_alter. [#71638](https://github.com/ClickHouse/ClickHouse/pull/71638) ([Pavel Kruglov](https://github.com/Avogar)). +* Update test. [#71654](https://github.com/ClickHouse/ClickHouse/pull/71654) ([Kseniia Sumarokova](https://github.com/kssenii)). +* CI: fix mysql containers using improper log directory. [#71655](https://github.com/ClickHouse/ClickHouse/pull/71655) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update README.md - Update meetups. [#71657](https://github.com/ClickHouse/ClickHouse/pull/71657) ([Tanya Bragin](https://github.com/tbragin)). +* Add index granularity size column to system.parts. [#71658](https://github.com/ClickHouse/ClickHouse/pull/71658) ([alesapin](https://github.com/alesapin)). +* Update PULL_REQUEST_TEMPLATE.md. [#71687](https://github.com/ClickHouse/ClickHouse/pull/71687) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Historically we have a strange cherry-pick branches naming, e.g. `cherrypick/24.3/5849aeb8c3ca5402f7d8e16e780598c88774371e`. The `cherrypick/24.3/62297` looks nicer and more straightforward. [#71698](https://github.com/ClickHouse/ClickHouse/pull/71698) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/70027. [#71715](https://github.com/ClickHouse/ClickHouse/pull/71715) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix flaky test test_prometheus_protocols. [#71772](https://github.com/ClickHouse/ClickHouse/pull/71772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix issues we face on orphane backport branches and closed release PRs, when fake-master events are sent to the check DB. [#71782](https://github.com/ClickHouse/ClickHouse/pull/71782) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix processors profile is not collected for subqueries in some cases. [#71787](https://github.com/ClickHouse/ClickHouse/pull/71787) ([Nikita Taranov](https://github.com/nickitat)). +* Lint some stuff. [#71795](https://github.com/ClickHouse/ClickHouse/pull/71795) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#72280](https://github.com/ClickHouse/ClickHouse/issues/72280): After https://github.com/ClickHouse/ClickHouse/pull/70442 (not included in any release yet) we use new Native serialization for JSON and Dynamic, but it breaks new client - old server communication. This PR fixes it. Also add a setting `merge_tree_use_v1_object_and_dynamic_serialization` that allows to use V1 serialization in newer versions (it can be used during upgrades to new version to be able to rollback). [#71816](https://github.com/ClickHouse/ClickHouse/pull/71816) ([Pavel Kruglov](https://github.com/Avogar)). +* Closes [#71780](https://github.com/ClickHouse/ClickHouse/issues/71780). [#71818](https://github.com/ClickHouse/ClickHouse/pull/71818) ([Kseniia Sumarokova](https://github.com/kssenii)). +* relax memory limit for 00755_avg_value_size_hint_passing.sql. [#71820](https://github.com/ClickHouse/ClickHouse/pull/71820) ([Sema Checherinda](https://github.com/CheSema)). +* The change has already been applied to https://github.com/docker-library/official-images/pull/17876. Backport it to every branch to have a proper `Dockerfile.ubuntu` there. [#71825](https://github.com/ClickHouse/ClickHouse/pull/71825) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* tests for parseDateTime64InJodaSyntax. [#71829](https://github.com/ClickHouse/ClickHouse/pull/71829) ([Max Vostrikov](https://github.com/max-vostrikov)). +* Enable build profiling in pull requests. [#71847](https://github.com/ClickHouse/ClickHouse/pull/71847) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add check and assertion. [#71856](https://github.com/ClickHouse/ClickHouse/pull/71856) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Some healthcheck is better than nothing. [#71865](https://github.com/ClickHouse/ClickHouse/pull/71865) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* More accurate `calculateCacheKey` implementation. [#71868](https://github.com/ClickHouse/ClickHouse/pull/71868) ([Nikita Taranov](https://github.com/nickitat)). +* add test 03248_max_parts_to_move. [#71869](https://github.com/ClickHouse/ClickHouse/pull/71869) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Update README.md - Update meetups. [#71872](https://github.com/ClickHouse/ClickHouse/pull/71872) ([Tanya Bragin](https://github.com/tbragin)). +* Prevents listing files from s3 while inserting. [#71889](https://github.com/ClickHouse/ClickHouse/pull/71889) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Keep query_plan_merge_filters disabled by default. [#71890](https://github.com/ClickHouse/ClickHouse/pull/71890) ([Raúl Marín](https://github.com/Algunenano)). +* Remove useless code. [#71900](https://github.com/ClickHouse/ClickHouse/pull/71900) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* TreeRewriter: fix typo: `parititon` => `partition`. [#71907](https://github.com/ClickHouse/ClickHouse/pull/71907) ([yun](https://github.com/yokofly)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/70027 and https://github.com/ClickHouse/ClickHouse/pull/71715. [#71912](https://github.com/ClickHouse/ClickHouse/pull/71912) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix test_storage_mongodb/test.py::test_secure_connection_uri. [#71924](https://github.com/ClickHouse/ClickHouse/pull/71924) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Adapt some test to run in non-CI configurations. [#71928](https://github.com/ClickHouse/ClickHouse/pull/71928) ([Raúl Marín](https://github.com/Algunenano)). +* Fix build after [#71179](https://github.com/ClickHouse/ClickHouse/issues/71179). Clang-19 checks for unused variables inside of if-conditions. [#71929](https://github.com/ClickHouse/ClickHouse/pull/71929) ([Dmitry Novik](https://github.com/novikd)). +* Fix flaky test 03262_column_sizes_with_dynamic_structure. [#71931](https://github.com/ClickHouse/ClickHouse/pull/71931) ([Pavel Kruglov](https://github.com/Avogar)). +* Don't randomise settings in 02354_distributed_with_external_aggregation_memory_usage. [#71944](https://github.com/ClickHouse/ClickHouse/pull/71944) ([Nikita Taranov](https://github.com/nickitat)). +* Enabling `query_plan_merge_filters` again after [#71890](https://github.com/ClickHouse/ClickHouse/issues/71890). [#71964](https://github.com/ClickHouse/ClickHouse/pull/71964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add comment about bf16 to CMake docs. [#71973](https://github.com/ClickHouse/ClickHouse/pull/71973) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 00098_primary_key_memory_allocated. [#71977](https://github.com/ClickHouse/ClickHouse/pull/71977) ([Alexander Gololobov](https://github.com/davenger)). +* Add a test for [#71908](https://github.com/ClickHouse/ClickHouse/issues/71908). [#71986](https://github.com/ClickHouse/ClickHouse/pull/71986) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Miscellaneous. [#71987](https://github.com/ClickHouse/ClickHouse/pull/71987) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor follow-up to [#71581](https://github.com/ClickHouse/ClickHouse/issues/71581). [#71993](https://github.com/ClickHouse/ClickHouse/pull/71993) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Stress test with praktika. [#71995](https://github.com/ClickHouse/ClickHouse/pull/71995) ([Max Kainov](https://github.com/maxknv)). +* Fix prewarm of mark cache after adding a new column. [#71996](https://github.com/ClickHouse/ClickHouse/pull/71996) ([Anton Popov](https://github.com/CurtizJ)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/70027 and https://github.com/ClickHouse/ClickHouse/pull/71715 and https://github.com/ClickHouse/ClickHouse/pull/71912. [#72018](https://github.com/ClickHouse/ClickHouse/pull/72018) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix comments. [#72023](https://github.com/ClickHouse/ClickHouse/pull/72023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add test for 33604. [#72026](https://github.com/ClickHouse/ClickHouse/pull/72026) ([Nikita Taranov](https://github.com/nickitat)). +* CI: Remove unsafe secret_envs input from yml workflows. [#72028](https://github.com/ClickHouse/ClickHouse/pull/72028) ([Max Kainov](https://github.com/maxknv)). +* Update version_date.tsv and changelog after v24.10.2.80-stable. [#72029](https://github.com/ClickHouse/ClickHouse/pull/72029) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelog after v24.8.7.41-lts. [#72037](https://github.com/ClickHouse/ClickHouse/pull/72037) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelog after v24.9.3.128-stable. [#72041](https://github.com/ClickHouse/ClickHouse/pull/72041) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelog after v24.3.14.35-lts. [#72042](https://github.com/ClickHouse/ClickHouse/pull/72042) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Save several minutes of build time. [#72046](https://github.com/ClickHouse/ClickHouse/pull/72046) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md - Update meetups. [#72048](https://github.com/ClickHouse/ClickHouse/pull/72048) ([Tanya Bragin](https://github.com/tbragin)). +* Fix client syntax highlighting that was broken in https://github.com/ClickHouse/ClickHouse/pull/71949. [#72049](https://github.com/ClickHouse/ClickHouse/pull/72049) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix test_disk_over_web_server/. [#72075](https://github.com/ClickHouse/ClickHouse/pull/72075) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Minor improvement for system.query_metric_log stateless test. [#72076](https://github.com/ClickHouse/ClickHouse/pull/72076) ([Pablo Marcos](https://github.com/pamarcos)). +* A follow-up for [#72057](https://github.com/ClickHouse/ClickHouse/issues/72057) and https://github.com/ClickHouse/ClickHouse/pull/71505. [#72079](https://github.com/ClickHouse/ClickHouse/pull/72079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add google-cloud-cpp submodule. [#72092](https://github.com/ClickHouse/ClickHouse/pull/72092) ([Pablo Marcos](https://github.com/pamarcos)). +* CI: Enable fuzzer job in Nightly workflow. [#72101](https://github.com/ClickHouse/ClickHouse/pull/72101) ([Max Kainov](https://github.com/maxknv)). +* Get rid of code duplication after adding `CHECK GRANT` in https://github.com/ClickHouse/ClickHouse/pull/68885. [#72103](https://github.com/ClickHouse/ClickHouse/pull/72103) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add jwt-cpp submodule. [#72104](https://github.com/ClickHouse/ClickHouse/pull/72104) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix list-licenses.sh with OSX. [#72107](https://github.com/ClickHouse/ClickHouse/pull/72107) ([Raúl Marín](https://github.com/Algunenano)). +* fix cancelation for PartitionedSink. [#72126](https://github.com/ClickHouse/ClickHouse/pull/72126) ([Sema Checherinda](https://github.com/CheSema)). +* FIx 02374_analyzer_join_using. [#72145](https://github.com/ClickHouse/ClickHouse/pull/72145) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Fixed a test which was flaky-flaky. [#72147](https://github.com/ClickHouse/ClickHouse/pull/72147) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Backported in [#72236](https://github.com/ClickHouse/ClickHouse/issues/72236): Revert "CI: Stress test with praktika". [#72231](https://github.com/ClickHouse/ClickHouse/pull/72231) ([Max Kainov](https://github.com/maxknv)). +* Backported in [#72345](https://github.com/ClickHouse/ClickHouse/issues/72345): Apply colors correctly to terminal output. [#72283](https://github.com/ClickHouse/ClickHouse/pull/72283) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#72361](https://github.com/ClickHouse/ClickHouse/issues/72361): disable a cloud setting. [#72292](https://github.com/ClickHouse/ClickHouse/pull/72292) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#72469](https://github.com/ClickHouse/ClickHouse/issues/72469): Remove flaky test test_move_shared_lock_fail_keeper_unavailable and extend the stable one. [#72357](https://github.com/ClickHouse/ClickHouse/pull/72357) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#72463](https://github.com/ClickHouse/ClickHouse/issues/72463): Fix data race in Squashing with LowCardinality. [#72392](https://github.com/ClickHouse/ClickHouse/pull/72392) ([Vladimir Cherkasov](https://github.com/vdimir)). +* Backported in [#72453](https://github.com/ClickHouse/ClickHouse/issues/72453): make operations_to_execute as shared ptr. [#72400](https://github.com/ClickHouse/ClickHouse/pull/72400) ([Sema Checherinda](https://github.com/CheSema)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 7591f7050cb..d2f31d4780b 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.11.1.2557-stable 2024-11-26 v24.10.3.21-stable 2024-11-22 v24.10.2.80-stable 2024-11-18 v24.10.1.2812-stable 2024-11-01 From 1166e93447c9ab70aa97314a1f6cd9ab198a8dd9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 27 Nov 2024 00:56:03 +0100 Subject: [PATCH 55/56] SettingsChangesHistory: disable allow_experimental_shared_set_join --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 5cc5a54639a..682d5845f73 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -117,7 +117,7 @@ static std::initializer_list Date: Tue, 26 Nov 2024 16:57:49 -0800 Subject: [PATCH 56/56] [Docs] Specify that Replicated is default engine for cloud --- docs/en/engines/database-engines/atomic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 0224e1aba21..e2d6025f98f 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -6,7 +6,7 @@ sidebar_position: 10 # Atomic -It supports non-blocking [DROP TABLE](#drop-detach-table) and [RENAME TABLE](#rename-table) queries and atomic [EXCHANGE TABLES](#exchange-tables) queries. `Atomic` database engine is used by default. +It supports non-blocking [DROP TABLE](#drop-detach-table) and [RENAME TABLE](#rename-table) queries and atomic [EXCHANGE TABLES](#exchange-tables) queries. `Atomic` database engine is used by default. Note that on ClickHouse Cloud, the `Replicated` database engine is used by default. ## Creating a Database {#creating-a-database}