2018-12-10 15:25:45 +00:00
|
|
|
#include "MongoDBDictionarySource.h"
|
2018-11-28 11:37:12 +00:00
|
|
|
#include "DictionarySourceFactory.h"
|
|
|
|
#include "DictionaryStructure.h"
|
2019-12-15 06:34:43 +00:00
|
|
|
#include "registerDictionaries.h"
|
2018-11-28 11:37:12 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
void registerDictionarySourceMongoDB(DictionarySourceFactory & factory)
|
|
|
|
{
|
2020-05-14 18:18:37 +00:00
|
|
|
auto create_mongo_db_dictionary = [](
|
2020-05-14 11:36:19 +00:00
|
|
|
const DictionaryStructure & dict_struct,
|
|
|
|
const Poco::Util::AbstractConfiguration & config,
|
2020-05-15 10:48:55 +00:00
|
|
|
const std::string & root_config_prefix,
|
2020-05-14 11:36:19 +00:00
|
|
|
Block & sample_block,
|
|
|
|
const Context &,
|
|
|
|
bool /* check_config */)
|
|
|
|
{
|
2020-05-15 10:48:55 +00:00
|
|
|
const auto config_prefix = root_config_prefix + ".mongodb";
|
2020-05-14 11:36:19 +00:00
|
|
|
return std::make_unique<MongoDBDictionarySource>(dict_struct,
|
2020-05-14 18:17:15 +00:00
|
|
|
config.getString(config_prefix + ".uri", ""),
|
|
|
|
config.getString(config_prefix + ".host", ""),
|
|
|
|
config.getUInt(config_prefix + ".port", 0),
|
2020-05-14 11:36:19 +00:00
|
|
|
config.getString(config_prefix + ".user", ""),
|
|
|
|
config.getString(config_prefix + ".password", ""),
|
|
|
|
config.getString(config_prefix + ".method", ""),
|
|
|
|
config.getString(config_prefix + ".db", ""),
|
|
|
|
config.getString(config_prefix + ".collection"),
|
|
|
|
sample_block);
|
2018-11-28 11:37:12 +00:00
|
|
|
};
|
2020-05-14 11:36:19 +00:00
|
|
|
|
2020-05-14 18:18:37 +00:00
|
|
|
factory.registerSource("mongodb", create_mongo_db_dictionary);
|
2018-11-28 11:37:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2017-12-01 20:21:35 +00:00
|
|
|
|
2018-11-28 11:37:12 +00:00
|
|
|
|
2020-05-19 20:12:10 +00:00
|
|
|
#include <common/logger_useful.h>
|
2020-05-08 14:11:19 +00:00
|
|
|
#include <Poco/MongoDB/Array.h>
|
|
|
|
#include <Poco/MongoDB/Connection.h>
|
|
|
|
#include <Poco/MongoDB/Cursor.h>
|
|
|
|
#include <Poco/MongoDB/Database.h>
|
|
|
|
#include <Poco/MongoDB/ObjectId.h>
|
2020-05-19 20:12:10 +00:00
|
|
|
#include <Poco/URI.h>
|
2020-05-08 14:11:19 +00:00
|
|
|
#include <Poco/Util/AbstractConfiguration.h>
|
|
|
|
#include <Poco/Version.h>
|
2017-03-28 20:30:57 +00:00
|
|
|
|
|
|
|
// only after poco
|
|
|
|
// naming conflict:
|
2018-08-24 05:25:00 +00:00
|
|
|
// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value);
|
2020-04-03 15:14:31 +00:00
|
|
|
// src/IO/WriteHelpers.h:146 #define writeCString(s, buf)
|
2020-05-08 14:11:19 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <Common/FieldVisitors.h>
|
|
|
|
#include <ext/enumerate.h>
|
|
|
|
#include "MongoDBBlockInputStream.h"
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int UNSUPPORTED_METHOD;
|
|
|
|
extern const int MONGODB_CANNOT_AUTHENTICATE;
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-10 16:55:12 +00:00
|
|
|
static const UInt64 max_block_size = 8192;
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2016-12-11 09:43:16 +00:00
|
|
|
|
2020-05-08 14:11:19 +00:00
|
|
|
#if POCO_VERSION < 0x01070800
|
2016-12-11 09:43:16 +00:00
|
|
|
/// See https://pocoproject.org/forum/viewtopic.php?f=10&t=6326&p=11426&hilit=mongodb+auth#p11485
|
2020-05-13 23:20:45 +00:00
|
|
|
void authenticate(Poco::MongoDB::Connection & connection, const std::string & database, const std::string & user, const std::string & password)
|
2016-12-08 02:49:04 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::MongoDB::Database db(database);
|
|
|
|
|
|
|
|
/// Challenge-response authentication.
|
|
|
|
std::string nonce;
|
|
|
|
|
|
|
|
/// First step: request nonce.
|
|
|
|
{
|
|
|
|
auto command = db.createCommand();
|
|
|
|
command->setNumberToReturn(1);
|
|
|
|
command->selector().add<Int32>("getnonce", 1);
|
|
|
|
|
|
|
|
Poco::MongoDB::ResponseMessage response;
|
|
|
|
connection.sendRequest(*command, response);
|
|
|
|
|
|
|
|
if (response.documents().empty())
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned empty response for 'getnonce' command",
|
2017-04-01 07:20:54 +00:00
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
|
|
|
|
|
|
|
auto doc = response.documents()[0];
|
|
|
|
try
|
|
|
|
{
|
|
|
|
double ok = doc->get<double>("ok", 0);
|
|
|
|
if (ok != 1)
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned response for 'getnonce' command that"
|
|
|
|
" has field 'ok' missing or having wrong value",
|
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
nonce = doc->get<std::string>("nonce", "");
|
|
|
|
if (nonce.empty())
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned response for 'getnonce' command that"
|
|
|
|
" has field 'nonce' missing or empty",
|
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
catch (Poco::NotFoundException & e)
|
|
|
|
{
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned response for 'getnonce' command that has missing required field: "
|
|
|
|
+ e.displayText(),
|
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Second step: use nonce to calculate digest and send it back to the server.
|
|
|
|
/// Digest is hex_md5(n.nonce + username + hex_md5(username + ":mongo:" + password))
|
|
|
|
{
|
|
|
|
std::string first = user + ":mongo:" + password;
|
|
|
|
|
|
|
|
Poco::MD5Engine md5;
|
|
|
|
md5.update(first);
|
|
|
|
std::string digest_first(Poco::DigestEngine::digestToHex(md5.digest()));
|
2018-11-26 00:56:50 +00:00
|
|
|
std::string second = nonce + user + digest_first;
|
2017-04-01 07:20:54 +00:00
|
|
|
md5.reset();
|
|
|
|
md5.update(second);
|
|
|
|
std::string digest_second(Poco::DigestEngine::digestToHex(md5.digest()));
|
|
|
|
|
|
|
|
auto command = db.createCommand();
|
|
|
|
command->setNumberToReturn(1);
|
|
|
|
command->selector()
|
|
|
|
.add<Int32>("authenticate", 1)
|
|
|
|
.add<std::string>("user", user)
|
|
|
|
.add<std::string>("nonce", nonce)
|
|
|
|
.add<std::string>("key", digest_second);
|
|
|
|
|
|
|
|
Poco::MongoDB::ResponseMessage response;
|
|
|
|
connection.sendRequest(*command, response);
|
|
|
|
|
|
|
|
if (response.empty())
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned empty response for 'authenticate' command",
|
2017-04-01 07:20:54 +00:00
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
|
|
|
|
|
|
|
auto doc = response.documents()[0];
|
|
|
|
try
|
|
|
|
{
|
|
|
|
double ok = doc->get<double>("ok", 0);
|
|
|
|
if (ok != 1)
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned response for 'authenticate' command that"
|
|
|
|
" has field 'ok' missing or having wrong value",
|
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
catch (Poco::NotFoundException & e)
|
|
|
|
{
|
2018-12-10 15:25:45 +00:00
|
|
|
throw Exception(
|
|
|
|
"Cannot authenticate in MongoDB: server returned response for 'authenticate' command that has missing required field: "
|
|
|
|
+ e.displayText(),
|
|
|
|
ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
}
|
2016-12-11 09:43:16 +00:00
|
|
|
}
|
2020-05-08 14:11:19 +00:00
|
|
|
#endif
|
2016-12-08 02:49:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
MongoDBDictionarySource::MongoDBDictionarySource(
|
2019-08-03 11:02:40 +00:00
|
|
|
const DictionaryStructure & dict_struct_,
|
2020-05-14 11:36:19 +00:00
|
|
|
const std::string & uri_,
|
2019-08-03 11:02:40 +00:00
|
|
|
const std::string & host_,
|
|
|
|
UInt16 port_,
|
|
|
|
const std::string & user_,
|
|
|
|
const std::string & password_,
|
|
|
|
const std::string & method_,
|
|
|
|
const std::string & db_,
|
|
|
|
const std::string & collection_,
|
|
|
|
const Block & sample_block_)
|
|
|
|
: dict_struct{dict_struct_}
|
2020-05-14 11:36:19 +00:00
|
|
|
, uri{uri_}
|
2019-08-03 11:02:40 +00:00
|
|
|
, host{host_}
|
|
|
|
, port{port_}
|
|
|
|
, user{user_}
|
|
|
|
, password{password_}
|
|
|
|
, method{method_}
|
|
|
|
, db{db_}
|
|
|
|
, collection{collection_}
|
|
|
|
, sample_block{sample_block_}
|
2020-05-19 20:12:10 +00:00
|
|
|
, connection{std::make_shared<Poco::MongoDB::Connection>()}
|
2016-12-08 02:49:04 +00:00
|
|
|
{
|
2020-05-14 11:36:19 +00:00
|
|
|
if (!uri.empty())
|
|
|
|
{
|
2020-05-19 20:12:10 +00:00
|
|
|
Poco::URI poco_uri(uri);
|
|
|
|
|
|
|
|
// Parse database from URI. This is required for correctness -- the
|
|
|
|
// cursor is created using database name and colleciton name, so we have
|
|
|
|
// to specify them properly.
|
|
|
|
db = poco_uri.getPath();
|
|
|
|
// getPath() may return a leading slash, remove it.
|
|
|
|
if (!db.empty() && db[0] == '/')
|
|
|
|
{
|
|
|
|
db.erase(0, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Parse some other parts from URI, for logging and display purposes.
|
|
|
|
host = poco_uri.getHost();
|
|
|
|
port = poco_uri.getPort();
|
|
|
|
user = poco_uri.getUserInfo();
|
|
|
|
if (size_t separator = user.find(':'); separator != std::string::npos)
|
|
|
|
{
|
|
|
|
user.resize(separator);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Connect with URI.
|
2020-05-14 11:36:19 +00:00
|
|
|
Poco::MongoDB::Connection::SocketFactory socket_factory;
|
|
|
|
connection->connect(uri, socket_factory);
|
|
|
|
}
|
|
|
|
else
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2020-05-19 20:12:10 +00:00
|
|
|
// Connect with host/port/user/etc.
|
2020-05-14 11:36:19 +00:00
|
|
|
connection->connect(host, port);
|
|
|
|
if (!user.empty())
|
|
|
|
{
|
2020-05-08 14:11:19 +00:00
|
|
|
#if POCO_VERSION >= 0x01070800
|
2020-05-14 11:36:19 +00:00
|
|
|
Poco::MongoDB::Database poco_db(db);
|
|
|
|
if (!poco_db.authenticate(*connection, user, password, method.empty() ? Poco::MongoDB::Database::AUTH_SCRAM_SHA1 : method))
|
|
|
|
throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
2020-05-08 14:11:19 +00:00
|
|
|
#else
|
2020-05-14 11:36:19 +00:00
|
|
|
authenticate(*connection, db, user, password);
|
2020-05-08 14:11:19 +00:00
|
|
|
#endif
|
2020-05-14 11:36:19 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MongoDBDictionarySource::MongoDBDictionarySource(const MongoDBDictionarySource & other)
|
2020-05-08 14:11:19 +00:00
|
|
|
: MongoDBDictionarySource{
|
2020-05-14 11:36:19 +00:00
|
|
|
other.dict_struct, other.uri, other.host, other.port, other.user, other.password, other.method, other.db, other.collection, other.sample_block}
|
2016-12-08 02:49:04 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MongoDBDictionarySource::~MongoDBDictionarySource() = default;
|
|
|
|
|
|
|
|
|
2020-05-13 23:20:45 +00:00
|
|
|
std::unique_ptr<Poco::MongoDB::Cursor> createCursor(const std::string & database, const std::string & collection, const Block & sample_block_to_select)
|
2016-12-11 09:43:16 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
auto cursor = std::make_unique<Poco::MongoDB::Cursor>(database, collection);
|
2016-12-11 09:43:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Looks like selecting _id column is implicit by default.
|
|
|
|
if (!sample_block_to_select.has("_id"))
|
|
|
|
cursor->query().returnFieldSelector().add("_id", 0);
|
2016-12-11 09:43:16 +00:00
|
|
|
|
2017-12-18 02:37:08 +00:00
|
|
|
for (const auto & column : sample_block_to_select)
|
2017-04-01 07:20:54 +00:00
|
|
|
cursor->query().returnFieldSelector().add(column.name, 1);
|
|
|
|
return cursor;
|
2016-12-11 09:43:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-08 02:49:04 +00:00
|
|
|
BlockInputStreamPtr MongoDBDictionarySource::loadAll()
|
|
|
|
{
|
2018-12-10 15:25:45 +00:00
|
|
|
return std::make_shared<MongoDBBlockInputStream>(connection, createCursor(db, collection, sample_block), sample_block, max_block_size);
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
BlockInputStreamPtr MongoDBDictionarySource::loadIds(const std::vector<UInt64> & ids)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!dict_struct.id)
|
|
|
|
throw Exception{"'id' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD};
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
auto cursor = createCursor(db, collection, sample_block);
|
2016-12-11 09:43:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/** NOTE: While building array, Poco::MongoDB requires passing of different unused element names, along with values.
|
|
|
|
* In general, Poco::MongoDB is quite inefficient and bulky.
|
|
|
|
*/
|
2016-12-20 20:48:40 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Poco::MongoDB::Array::Ptr ids_array(new Poco::MongoDB::Array);
|
|
|
|
for (const UInt64 id : ids)
|
|
|
|
ids_array->add(DB::toString(id), Int32(id));
|
2016-12-11 09:43:16 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
cursor->query().selector().addNewDocument(dict_struct.id->name).add("$in", ids_array);
|
2016-12-08 02:49:04 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
return std::make_shared<MongoDBBlockInputStream>(connection, std::move(cursor), sample_block, max_block_size);
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
BlockInputStreamPtr MongoDBDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
|
2016-12-08 02:49:04 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!dict_struct.key)
|
|
|
|
throw Exception{"'key' is required for selective loading", ErrorCodes::UNSUPPORTED_METHOD};
|
|
|
|
|
|
|
|
auto cursor = createCursor(db, collection, sample_block);
|
|
|
|
|
|
|
|
Poco::MongoDB::Array::Ptr keys_array(new Poco::MongoDB::Array);
|
2018-02-06 09:45:52 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto row_idx : requested_rows)
|
|
|
|
{
|
2018-02-06 11:15:13 +00:00
|
|
|
auto & key = keys_array->addNewDocument(DB::toString(row_idx));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
for (const auto attr : ext::enumerate(*dict_struct.key))
|
|
|
|
{
|
|
|
|
switch (attr.second.underlying_type)
|
|
|
|
{
|
2019-08-03 11:02:40 +00:00
|
|
|
case AttributeUnderlyingType::utUInt8:
|
|
|
|
case AttributeUnderlyingType::utUInt16:
|
|
|
|
case AttributeUnderlyingType::utUInt32:
|
|
|
|
case AttributeUnderlyingType::utUInt64:
|
|
|
|
case AttributeUnderlyingType::utUInt128:
|
|
|
|
case AttributeUnderlyingType::utInt8:
|
|
|
|
case AttributeUnderlyingType::utInt16:
|
|
|
|
case AttributeUnderlyingType::utInt32:
|
|
|
|
case AttributeUnderlyingType::utInt64:
|
|
|
|
case AttributeUnderlyingType::utDecimal32:
|
|
|
|
case AttributeUnderlyingType::utDecimal64:
|
|
|
|
case AttributeUnderlyingType::utDecimal128:
|
2017-04-01 07:20:54 +00:00
|
|
|
key.add(attr.second.name, Int32(key_columns[attr.first]->get64(row_idx)));
|
|
|
|
break;
|
|
|
|
|
2019-08-03 11:02:40 +00:00
|
|
|
case AttributeUnderlyingType::utFloat32:
|
|
|
|
case AttributeUnderlyingType::utFloat64:
|
2019-12-29 19:36:02 +00:00
|
|
|
key.add(attr.second.name, key_columns[attr.first]->getFloat64(row_idx));
|
2017-04-01 07:20:54 +00:00
|
|
|
break;
|
|
|
|
|
2019-08-03 11:02:40 +00:00
|
|
|
case AttributeUnderlyingType::utString:
|
2020-03-23 02:12:31 +00:00
|
|
|
String loaded_str(get<String>((*key_columns[attr.first])[row_idx]));
|
2018-02-06 09:45:52 +00:00
|
|
|
/// Convert string to ObjectID
|
2018-02-06 21:34:56 +00:00
|
|
|
if (attr.second.is_object_id)
|
2018-02-06 09:45:52 +00:00
|
|
|
{
|
2020-03-23 02:12:31 +00:00
|
|
|
Poco::MongoDB::ObjectId::Ptr loaded_id(new Poco::MongoDB::ObjectId(loaded_str));
|
|
|
|
key.add(attr.second.name, loaded_id);
|
2018-02-06 09:45:52 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2020-03-23 02:12:31 +00:00
|
|
|
key.add(attr.second.name, loaded_str);
|
2018-02-06 09:45:52 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-06 12:46:34 +00:00
|
|
|
/// If more than one key we should use $or
|
2018-02-06 13:04:19 +00:00
|
|
|
cursor->query().selector().add("$or", keys_array);
|
2018-02-06 21:34:56 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
return std::make_shared<MongoDBBlockInputStream>(connection, std::move(cursor), sample_block, max_block_size);
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
std::string MongoDBDictionarySource::toString() const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
return "MongoDB: " + db + '.' + collection + ',' + (user.empty() ? " " : " " + user + '@') + host + ':' + DB::toString(port);
|
2016-12-08 02:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|