ClickHouse/src/Dictionaries/MongoDBDictionarySource.cpp

263 lines
9.2 KiB
C++
Raw Normal View History

#include "MongoDBDictionarySource.h"
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
2019-12-15 06:34:43 +00:00
#include "registerDictionaries.h"
2021-09-03 11:16:32 +00:00
#include <Storages/ExternalDataSourceConfiguration.h>
namespace DB
{
2021-12-29 10:02:18 +00:00
static const std::unordered_set<std::string_view> dictionary_allowed_keys = {
"host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method"};
void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
{
2020-05-14 18:18:37 +00:00
auto create_mongo_db_dictionary = [](
2020-05-14 11:36:19 +00:00
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
2020-05-15 10:48:55 +00:00
const std::string & root_config_prefix,
2020-05-14 11:36:19 +00:00
Block & sample_block,
2021-09-03 11:16:32 +00:00
ContextPtr context,
2020-08-15 03:10:57 +00:00
const std::string & /* default_database */,
bool /* created_from_ddl */)
2020-05-14 11:36:19 +00:00
{
2020-05-15 10:48:55 +00:00
const auto config_prefix = root_config_prefix + ".mongodb";
2021-09-25 14:46:03 +00:00
ExternalDataSourceConfiguration configuration;
2021-12-29 10:02:18 +00:00
auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); };
auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key);
2021-09-25 14:46:03 +00:00
if (named_collection)
{
2022-01-10 11:00:03 +00:00
configuration = named_collection->configuration;
2021-09-25 14:46:03 +00:00
}
else
{
configuration.host = config.getString(config_prefix + ".host", "");
configuration.port = config.getUInt(config_prefix + ".port", 0);
configuration.username = config.getString(config_prefix + ".user", "");
configuration.password = config.getString(config_prefix + ".password", "");
configuration.database = config.getString(config_prefix + ".db", "");
}
2020-05-14 11:36:19 +00:00
return std::make_unique<MongoDBDictionarySource>(dict_struct,
2020-05-14 18:17:15 +00:00
config.getString(config_prefix + ".uri", ""),
2021-09-03 11:16:32 +00:00
configuration.host,
configuration.port,
configuration.username,
configuration.password,
2020-05-14 11:36:19 +00:00
config.getString(config_prefix + ".method", ""),
2021-09-03 11:16:32 +00:00
configuration.database,
2020-05-14 11:36:19 +00:00
config.getString(config_prefix + ".collection"),
sample_block);
};
2020-05-14 11:36:19 +00:00
2020-05-14 18:18:37 +00:00
factory.registerSource("mongodb", create_mongo_db_dictionary);
}
}
2017-12-01 20:21:35 +00:00
2021-10-02 07:13:14 +00:00
#include <base/logger_useful.h>
#include <Poco/MongoDB/Array.h>
#include <Poco/MongoDB/Connection.h>
#include <Poco/MongoDB/Cursor.h>
#include <Poco/MongoDB/Database.h>
#include <Poco/MongoDB/ObjectId.h>
2020-05-19 20:12:10 +00:00
#include <Poco/URI.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Poco/Version.h>
// only after poco
// naming conflict:
2018-08-24 05:25:00 +00:00
// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value);
2020-04-03 15:14:31 +00:00
// src/IO/WriteHelpers.h:146 #define writeCString(s, buf)
#include <IO/WriteHelpers.h>
2021-10-08 14:03:54 +00:00
#include <Processors/Transforms/MongoDBSource.h>
2016-12-08 02:49:04 +00:00
namespace DB
{
namespace ErrorCodes
{
2021-05-08 19:01:59 +00:00
extern const int NOT_IMPLEMENTED;
extern const int UNSUPPORTED_METHOD;
extern const int MONGODB_CANNOT_AUTHENTICATE;
2016-12-08 02:49:04 +00:00
}
2019-02-10 16:55:12 +00:00
static const UInt64 max_block_size = 8192;
2016-12-08 02:49:04 +00:00
2016-12-08 02:49:04 +00:00
MongoDBDictionarySource::MongoDBDictionarySource(
2019-08-03 11:02:40 +00:00
const DictionaryStructure & dict_struct_,
2020-05-14 11:36:19 +00:00
const std::string & uri_,
2019-08-03 11:02:40 +00:00
const std::string & host_,
UInt16 port_,
const std::string & user_,
const std::string & password_,
const std::string & method_,
const std::string & db_,
const std::string & collection_,
const Block & sample_block_)
: dict_struct{dict_struct_}
2020-05-14 11:36:19 +00:00
, uri{uri_}
2019-08-03 11:02:40 +00:00
, host{host_}
, port{port_}
, user{user_}
, password{password_}
, method{method_}
, db{db_}
, collection{collection_}
, sample_block{sample_block_}
2020-05-19 20:12:10 +00:00
, connection{std::make_shared<Poco::MongoDB::Connection>()}
2016-12-08 02:49:04 +00:00
{
2020-05-14 11:36:19 +00:00
if (!uri.empty())
{
2020-05-19 20:12:10 +00:00
Poco::URI poco_uri(uri);
// Parse database from URI. This is required for correctness -- the
// cursor is created using database name and colleciton name, so we have
// to specify them properly.
db = poco_uri.getPath();
// getPath() may return a leading slash, remove it.
if (!db.empty() && db[0] == '/')
{
db.erase(0, 1);
}
// Parse some other parts from URI, for logging and display purposes.
host = poco_uri.getHost();
port = poco_uri.getPort();
user = poco_uri.getUserInfo();
if (size_t separator = user.find(':'); separator != std::string::npos)
{
user.resize(separator);
}
// Connect with URI.
2020-05-14 11:36:19 +00:00
Poco::MongoDB::Connection::SocketFactory socket_factory;
connection->connect(uri, socket_factory);
}
else
{
2020-05-19 20:12:10 +00:00
// Connect with host/port/user/etc.
2020-05-14 11:36:19 +00:00
connection->connect(host, port);
if (!user.empty())
{
#if POCO_VERSION >= 0x01070800
2020-05-14 11:36:19 +00:00
Poco::MongoDB::Database poco_db(db);
if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
2021-04-10 18:48:36 +00:00
throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password");
#else
2020-05-14 11:36:19 +00:00
authenticate(*connection, db, user, password);
#endif
2020-05-14 11:36:19 +00:00
}
}
2016-12-08 02:49:04 +00:00
}
MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
: MongoDBDictionarySource{
2020-05-14 11:36:19 +00:00
other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
2016-12-08 02:49:04 +00:00
{
}
MongoDBDictionarySource::~MongoDBDictionarySource() = default;
Pipe MongoDBDictionarySource::loadAll()
2016-12-08 02:49:04 +00:00
{
return Pipe(std::make_shared<MongoDBSource>(connection, createCursor(db, collection, sample_block), sample_block, max_block_size));
2016-12-08 02:49:04 +00:00
}
Pipe MongoDBDictionarySource::loadIds(const std::vector<UInt64> & ids)
2016-12-08 02:49:04 +00:00
{
if (!dict_struct.id)
2021-04-10 18:48:36 +00:00
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is required for selective loading");
2016-12-08 02:49:04 +00:00
auto cursor = createCursor(db, collection, sample_block);
/** NOTE: While building array, Poco::MongoDB requires passing of different unused element names, along with values.
* In general, Poco::MongoDB is quite inefficient and bulky.
*/
Poco::MongoDB::Array::Ptr ids_array(new Poco::MongoDB::Array);
for (const UInt64 id : ids)
ids_array->add(DB::toString(id), Int32(id));
cursor->query().selector().addNewDocument(dict_struct.id->name).add("$in", ids_array);
2016-12-08 02:49:04 +00:00
return Pipe(std::make_shared<MongoDBSource>(connection, std::move(cursor), sample_block, max_block_size));
2016-12-08 02:49:04 +00:00
}
Pipe MongoDBDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
2016-12-08 02:49:04 +00:00
{
if (!dict_struct.key)
2021-04-10 18:48:36 +00:00
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is required for selective loading");
auto cursor = createCursor(db, collection, sample_block);
Poco::MongoDB::Array::Ptr keys_array(new Poco::MongoDB::Array);
2018-02-06 09:45:52 +00:00
for (const auto row_idx : requested_rows)
{
2018-02-06 11:15:13 +00:00
auto & key = keys_array->addNewDocument(DB::toString(row_idx));
2021-06-15 19:55:21 +00:00
const auto & key_attributes = *dict_struct.key;
for (size_t attribute_index = 0; attribute_index < key_attributes.size(); ++attribute_index)
{
2021-06-15 19:55:21 +00:00
const auto & key_attribute = key_attributes[attribute_index];
switch (key_attribute.underlying_type)
{
2021-05-08 19:05:08 +00:00
case AttributeUnderlyingType::UInt8:
case AttributeUnderlyingType::UInt16:
case AttributeUnderlyingType::UInt32:
case AttributeUnderlyingType::UInt64:
case AttributeUnderlyingType::Int8:
case AttributeUnderlyingType::Int16:
case AttributeUnderlyingType::Int32:
case AttributeUnderlyingType::Int64:
2021-05-08 19:01:59 +00:00
{
2021-06-15 19:55:21 +00:00
key.add(key_attribute.name, Int32(key_columns[attribute_index]->get64(row_idx)));
break;
2021-05-08 19:01:59 +00:00
}
2021-05-08 19:05:08 +00:00
case AttributeUnderlyingType::Float32:
case AttributeUnderlyingType::Float64:
2021-05-08 19:01:59 +00:00
{
2021-06-15 19:55:21 +00:00
key.add(key_attribute.name, key_columns[attribute_index]->getFloat64(row_idx));
break;
2021-05-08 19:01:59 +00:00
}
2021-05-08 19:05:08 +00:00
case AttributeUnderlyingType::String:
2021-05-08 19:01:59 +00:00
{
2021-06-15 19:55:21 +00:00
String loaded_str(get<String>((*key_columns[attribute_index])[row_idx]));
2018-02-06 09:45:52 +00:00
/// Convert string to ObjectID
2021-06-15 19:55:21 +00:00
if (key_attribute.is_object_id)
2018-02-06 09:45:52 +00:00
{
Poco::MongoDB::ObjectId::Ptr loaded_id(new Poco::MongoDB::ObjectId(loaded_str));
2021-06-15 19:55:21 +00:00
key.add(key_attribute.name, loaded_id);
2018-02-06 09:45:52 +00:00
}
else
{
2021-06-15 19:55:21 +00:00
key.add(key_attribute.name, loaded_str);
2018-02-06 09:45:52 +00:00
}
break;
2021-05-08 19:01:59 +00:00
}
default:
throw Exception("Unsupported dictionary attribute type for MongoDB dictionary source", ErrorCodes::NOT_IMPLEMENTED);
}
}
}
/// If more than one key we should use $or
cursor->query().selector().add("$or", keys_array);
return Pipe(std::make_shared<MongoDBSource>(connection, std::move(cursor), sample_block, max_block_size));
2016-12-08 02:49:04 +00:00
}
std::string MongoDBDictionarySource::toString() const
{
return "MongoDB: " + db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + host + ':' + DB::toString(port);
2016-12-08 02:49:04 +00:00
}
}