2022-06-23 20:04:06 +00:00
|
|
|
#include <Storages/StorageMongoDB.h>
|
|
|
|
#include <Storages/StorageMongoDBSocketFactory.h>
|
|
|
|
#include <Storages/StorageFactory.h>
|
|
|
|
#include <Storages/checkAndGetLiteralArgument.h>
|
2020-05-13 23:20:45 +00:00
|
|
|
|
|
|
|
#include <Poco/MongoDB/Connection.h>
|
|
|
|
#include <Poco/MongoDB/Cursor.h>
|
|
|
|
#include <Poco/MongoDB/Database.h>
|
|
|
|
#include <Poco/Version.h>
|
|
|
|
#include <Interpreters/evaluateConstantExpression.h>
|
|
|
|
#include <Core/Settings.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Common/parseAddress.h>
|
|
|
|
#include <IO/Operators.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
2021-10-16 14:03:50 +00:00
|
|
|
#include <QueryPipeline/Pipe.h>
|
2021-10-08 14:03:54 +00:00
|
|
|
#include <Processors/Transforms/MongoDBSource.h>
|
2022-06-05 13:14:02 +00:00
|
|
|
#include <Processors/Sinks/SinkToStorage.h>
|
2020-05-13 23:20:45 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
|
|
extern const int MONGODB_CANNOT_AUTHENTICATE;
|
2021-09-04 18:46:09 +00:00
|
|
|
extern const int BAD_ARGUMENTS;
|
2020-05-13 23:20:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StorageMongoDB::StorageMongoDB(
|
|
|
|
const StorageID & table_id_,
|
|
|
|
const std::string & host_,
|
2020-05-26 22:20:25 +00:00
|
|
|
uint16_t port_,
|
2020-05-13 23:20:45 +00:00
|
|
|
const std::string & database_name_,
|
|
|
|
const std::string & collection_name_,
|
|
|
|
const std::string & username_,
|
|
|
|
const std::string & password_,
|
2021-03-23 15:01:13 +00:00
|
|
|
const std::string & options_,
|
2020-05-13 23:20:45 +00:00
|
|
|
const ColumnsDescription & columns_,
|
2021-04-23 12:18:23 +00:00
|
|
|
const ConstraintsDescription & constraints_,
|
|
|
|
const String & comment)
|
2020-05-13 23:20:45 +00:00
|
|
|
: IStorage(table_id_)
|
|
|
|
, database_name(database_name_)
|
|
|
|
, collection_name(collection_name_)
|
|
|
|
, username(username_)
|
|
|
|
, password(password_)
|
2021-07-28 15:28:30 +00:00
|
|
|
, uri("mongodb://" + host_ + ":" + std::to_string(port_) + "/" + database_name_ + "?" + options_)
|
2020-05-13 23:20:45 +00:00
|
|
|
{
|
2020-06-26 14:28:00 +00:00
|
|
|
StorageInMemoryMetadata storage_metadata;
|
|
|
|
storage_metadata.setColumns(columns_);
|
|
|
|
storage_metadata.setConstraints(constraints_);
|
2021-04-23 12:18:23 +00:00
|
|
|
storage_metadata.setComment(comment);
|
2020-06-26 14:28:00 +00:00
|
|
|
setInMemoryMetadata(storage_metadata);
|
2020-05-13 23:20:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-02-05 08:47:02 +00:00
|
|
|
void StorageMongoDB::connectIfNotConnected()
|
|
|
|
{
|
|
|
|
std::lock_guard lock{connection_mutex};
|
|
|
|
if (!connection)
|
2021-03-23 15:01:13 +00:00
|
|
|
{
|
|
|
|
StorageMongoDBSocketFactory factory;
|
|
|
|
connection = std::make_shared<Poco::MongoDB::Connection>(uri, factory);
|
|
|
|
}
|
2021-02-05 08:47:02 +00:00
|
|
|
|
2021-06-28 17:02:22 +00:00
|
|
|
if (!authenticated)
|
2021-02-05 08:47:02 +00:00
|
|
|
{
|
2021-12-13 16:03:24 +00:00
|
|
|
Poco::URI poco_uri(uri);
|
|
|
|
auto query_params = poco_uri.getQueryParameters();
|
|
|
|
auto auth_source = std::find_if(query_params.begin(), query_params.end(),
|
|
|
|
[&](const std::pair<std::string, std::string> & param) { return param.first == "authSource"; });
|
|
|
|
auto auth_db = database_name;
|
|
|
|
if (auth_source != query_params.end())
|
|
|
|
auth_db = auth_source->second;
|
2022-05-05 20:31:59 +00:00
|
|
|
#if POCO_VERSION >= 0x01070800
|
|
|
|
if (!username.empty() && !password.empty())
|
|
|
|
{
|
|
|
|
Poco::MongoDB::Database poco_db(auth_db);
|
|
|
|
if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
|
|
|
|
throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
authenticate(*connection, database_name, username, password);
|
|
|
|
#endif
|
2021-06-28 17:02:22 +00:00
|
|
|
authenticated = true;
|
2021-02-05 08:47:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2022-06-05 13:14:02 +00:00
|
|
|
class StorageMongoDBSink : public SinkToStorage
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
explicit StorageMongoDBSink(
|
|
|
|
const std::string & collection_name_,
|
|
|
|
const std::string & db_name_,
|
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2022-07-13 09:27:43 +00:00
|
|
|
std::shared_ptr<Poco::MongoDB::Connection> connection_)
|
2022-06-05 13:14:02 +00:00
|
|
|
: SinkToStorage(metadata_snapshot_->getSampleBlock())
|
|
|
|
, collection_name(collection_name_)
|
|
|
|
, db_name(db_name_)
|
|
|
|
, metadata_snapshot{metadata_snapshot_}
|
|
|
|
, connection(connection_)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-07-12 15:20:20 +00:00
|
|
|
String getName() const override { return "StorageMongoDBSink"; }
|
2022-06-05 13:14:02 +00:00
|
|
|
|
|
|
|
void consume(Chunk chunk) override
|
|
|
|
{
|
|
|
|
Poco::MongoDB::Database db(db_name);
|
|
|
|
Poco::MongoDB::Document::Ptr index = new Poco::MongoDB::Document();
|
|
|
|
|
2022-07-13 09:27:43 +00:00
|
|
|
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
|
|
|
|
|
|
|
|
size_t num_rows = block.rows();
|
|
|
|
size_t num_cols = block.columns();
|
|
|
|
|
2022-06-05 13:14:02 +00:00
|
|
|
const auto columns = block.getColumns();
|
|
|
|
const auto data_types = block.getDataTypes();
|
|
|
|
const auto data_names = block.getNames();
|
2022-07-13 09:27:43 +00:00
|
|
|
|
2022-06-05 13:14:02 +00:00
|
|
|
std::vector<std::string> row(num_cols);
|
|
|
|
for (const auto i : collections::range(0, num_rows))
|
|
|
|
{
|
|
|
|
for (const auto j : collections::range(0, num_cols))
|
|
|
|
{
|
|
|
|
WriteBufferFromOwnString ostr;
|
|
|
|
data_types[j]->getDefaultSerialization()->serializeText(*columns[j], i, ostr, FormatSettings{});
|
|
|
|
row[j] = ostr.str();
|
|
|
|
index->add(data_names[j], row[j]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Poco::SharedPtr<Poco::MongoDB::InsertRequest> insert_request = db.createInsertRequest(collection_name);
|
|
|
|
insert_request->documents().push_back(index);
|
|
|
|
connection->sendRequest(*insert_request);
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
String collection_name;
|
|
|
|
String db_name;
|
|
|
|
StorageMetadataPtr metadata_snapshot;
|
|
|
|
std::shared_ptr<Poco::MongoDB::Connection> connection;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2020-08-03 13:54:14 +00:00
|
|
|
Pipe StorageMongoDB::read(
|
2020-05-13 23:20:45 +00:00
|
|
|
const Names & column_names,
|
2021-07-09 03:15:41 +00:00
|
|
|
const StorageSnapshotPtr & storage_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo & /*query_info*/,
|
2021-04-10 23:33:54 +00:00
|
|
|
ContextPtr /*context*/,
|
2020-05-13 23:20:45 +00:00
|
|
|
QueryProcessingStage::Enum /*processed_stage*/,
|
|
|
|
size_t max_block_size,
|
2022-10-07 10:46:45 +00:00
|
|
|
size_t /*num_streams*/)
|
2020-05-13 23:20:45 +00:00
|
|
|
{
|
2021-02-05 08:47:02 +00:00
|
|
|
connectIfNotConnected();
|
2020-05-13 23:20:45 +00:00
|
|
|
|
2021-07-09 03:15:41 +00:00
|
|
|
storage_snapshot->check(column_names);
|
2020-05-13 23:20:45 +00:00
|
|
|
|
|
|
|
Block sample_block;
|
|
|
|
for (const String & column_name : column_names)
|
|
|
|
{
|
2021-07-09 03:15:41 +00:00
|
|
|
auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name);
|
2020-05-13 23:20:45 +00:00
|
|
|
sample_block.insert({ column_data.type, column_data.name });
|
|
|
|
}
|
|
|
|
|
2022-04-19 08:59:47 +00:00
|
|
|
return Pipe(std::make_shared<MongoDBSource>(connection, createCursor(database_name, collection_name, sample_block), sample_block, max_block_size));
|
2020-05-13 23:20:45 +00:00
|
|
|
}
|
|
|
|
|
2022-07-12 16:37:19 +00:00
|
|
|
SinkToStoragePtr StorageMongoDB::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr /* context */)
|
2022-06-05 13:14:02 +00:00
|
|
|
{
|
|
|
|
connectIfNotConnected();
|
2022-07-12 12:02:09 +00:00
|
|
|
return std::make_shared<StorageMongoDBSink>(collection_name, database_name, metadata_snapshot, connection);
|
2022-06-05 13:14:02 +00:00
|
|
|
}
|
2021-09-03 11:16:32 +00:00
|
|
|
|
|
|
|
StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, ContextPtr context)
|
2020-05-13 23:20:45 +00:00
|
|
|
{
|
2021-09-15 22:45:43 +00:00
|
|
|
StorageMongoDBConfiguration configuration;
|
|
|
|
if (auto named_collection = getExternalDataSourceConfiguration(engine_args, context))
|
2021-09-03 11:16:32 +00:00
|
|
|
{
|
2021-12-27 14:41:37 +00:00
|
|
|
auto [common_configuration, storage_specific_args, _] = named_collection.value();
|
2021-09-15 22:45:43 +00:00
|
|
|
configuration.set(common_configuration);
|
|
|
|
|
2021-09-03 11:16:32 +00:00
|
|
|
for (const auto & [arg_name, arg_value] : storage_specific_args)
|
|
|
|
{
|
2021-12-09 14:40:51 +00:00
|
|
|
if (arg_name == "options")
|
2022-06-23 20:04:06 +00:00
|
|
|
configuration.options = checkAndGetLiteralArgument<String>(arg_value, "options");
|
2021-09-03 11:16:32 +00:00
|
|
|
else
|
|
|
|
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
2021-09-15 18:11:49 +00:00
|
|
|
"Unexpected key-value argument."
|
2021-09-03 11:16:32 +00:00
|
|
|
"Got: {}, but expected one of:"
|
|
|
|
"host, port, username, password, database, table, options.", arg_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2020-05-13 23:20:45 +00:00
|
|
|
{
|
2021-03-23 15:01:13 +00:00
|
|
|
if (engine_args.size() < 5 || engine_args.size() > 6)
|
2020-05-13 23:20:45 +00:00
|
|
|
throw Exception(
|
2021-03-23 15:01:13 +00:00
|
|
|
"Storage MongoDB requires from 5 to 6 parameters: MongoDB('host:port', database, collection, 'user', 'password' [, 'options']).",
|
2020-05-13 23:20:45 +00:00
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
for (auto & engine_arg : engine_args)
|
2021-09-03 11:16:32 +00:00
|
|
|
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
|
2020-05-13 23:20:45 +00:00
|
|
|
|
|
|
|
/// 27017 is the default MongoDB port.
|
2022-06-23 20:04:06 +00:00
|
|
|
auto parsed_host_port = parseAddress(checkAndGetLiteralArgument<String>(engine_args[0], "host:port"), 27017);
|
2020-05-13 23:20:45 +00:00
|
|
|
|
2021-09-09 09:18:08 +00:00
|
|
|
configuration.host = parsed_host_port.first;
|
|
|
|
configuration.port = parsed_host_port.second;
|
2022-06-23 20:04:06 +00:00
|
|
|
configuration.database = checkAndGetLiteralArgument<String>(engine_args[1], "database");
|
|
|
|
configuration.table = checkAndGetLiteralArgument<String>(engine_args[2], "table");
|
|
|
|
configuration.username = checkAndGetLiteralArgument<String>(engine_args[3], "username");
|
|
|
|
configuration.password = checkAndGetLiteralArgument<String>(engine_args[4], "password");
|
2021-03-23 15:01:13 +00:00
|
|
|
|
|
|
|
if (engine_args.size() >= 6)
|
2022-06-23 20:04:06 +00:00
|
|
|
configuration.options = checkAndGetLiteralArgument<String>(engine_args[5], "database");
|
2021-03-23 15:01:13 +00:00
|
|
|
|
2021-09-03 11:16:32 +00:00
|
|
|
}
|
|
|
|
|
2022-03-17 10:48:42 +00:00
|
|
|
context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port));
|
|
|
|
|
2021-09-03 11:16:32 +00:00
|
|
|
return configuration;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void registerStorageMongoDB(StorageFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerStorage("MongoDB", [](const StorageFactory::Arguments & args)
|
|
|
|
{
|
2021-09-15 18:11:49 +00:00
|
|
|
auto configuration = StorageMongoDB::getConfiguration(args.engine_args, args.getLocalContext());
|
2021-03-23 15:01:13 +00:00
|
|
|
|
2022-04-19 20:47:29 +00:00
|
|
|
return std::make_shared<StorageMongoDB>(
|
2020-05-13 23:20:45 +00:00
|
|
|
args.table_id,
|
2021-09-03 11:16:32 +00:00
|
|
|
configuration.host,
|
|
|
|
configuration.port,
|
|
|
|
configuration.database,
|
2021-12-09 14:40:51 +00:00
|
|
|
configuration.table,
|
2021-09-03 11:16:32 +00:00
|
|
|
configuration.username,
|
|
|
|
configuration.password,
|
|
|
|
configuration.options,
|
2020-05-13 23:20:45 +00:00
|
|
|
args.columns,
|
2021-04-23 12:18:23 +00:00
|
|
|
args.constraints,
|
|
|
|
args.comment);
|
2020-05-13 23:20:45 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
.source_access_type = AccessType::MONGO,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|