ClickHouse/src/Storages/StorageMongoDB.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

320 lines
12 KiB
C++
Raw Normal View History

#include <Storages/StorageMongoDB.h>
#include <Storages/StorageMongoDBSocketFactory.h>
#include <Storages/StorageFactory.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/NamedCollectionsHelpers.h>
2020-05-13 23:20:45 +00:00
#include <Poco/MongoDB/Connection.h>
#include <Poco/MongoDB/Cursor.h>
#include <Poco/MongoDB/Database.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <Common/parseAddress.h>
#include <Common/NamedCollections/NamedCollections.h>
2020-05-13 23:20:45 +00:00
#include <IO/Operators.h>
#include <Parsers/ASTLiteral.h>
2021-10-16 14:03:50 +00:00
#include <QueryPipeline/Pipe.h>
#include <Processors/Sources/MongoDBSource.h>
2022-06-05 13:14:02 +00:00
#include <Processors/Sinks/SinkToStorage.h>
2023-02-24 19:38:40 +00:00
#include <unordered_set>
2020-05-13 23:20:45 +00:00
2023-07-06 05:05:27 +00:00
#include <DataTypes/DataTypeArray.h>
2020-05-13 23:20:45 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int MONGODB_CANNOT_AUTHENTICATE;
}
StorageMongoDB::StorageMongoDB(
const StorageID & table_id_,
const std::string & host_,
uint16_t port_,
2020-05-13 23:20:45 +00:00
const std::string & database_name_,
const std::string & collection_name_,
const std::string & username_,
const std::string & password_,
2021-03-23 15:01:13 +00:00
const std::string & options_,
2020-05-13 23:20:45 +00:00
const ColumnsDescription & columns_,
2021-04-23 12:18:23 +00:00
const ConstraintsDescription & constraints_,
const String & comment)
2020-05-13 23:20:45 +00:00
: IStorage(table_id_)
, database_name(database_name_)
, collection_name(collection_name_)
, username(username_)
, password(password_)
2021-07-28 15:28:30 +00:00
, uri("mongodb://" + host_ + ":" + std::to_string(port_) + "/" + database_name_ + "?" + options_)
2020-05-13 23:20:45 +00:00
{
2020-06-26 14:28:00 +00:00
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
storage_metadata.setConstraints(constraints_);
2021-04-23 12:18:23 +00:00
storage_metadata.setComment(comment);
2020-06-26 14:28:00 +00:00
setInMemoryMetadata(storage_metadata);
2020-05-13 23:20:45 +00:00
}
void StorageMongoDB::connectIfNotConnected()
{
std::lock_guard lock{connection_mutex};
if (!connection)
2021-03-23 15:01:13 +00:00
{
StorageMongoDBSocketFactory factory;
connection = std::make_shared<Poco::MongoDB::Connection>(uri, factory);
}
2021-06-28 17:02:22 +00:00
if (!authenticated)
{
2021-12-13 16:03:24 +00:00
Poco::URI poco_uri(uri);
auto query_params = poco_uri.getQueryParameters();
auto auth_source = std::find_if(query_params.begin(), query_params.end(),
[&](const std::pair<std::string, std::string> & param) { return param.first == "authSource"; });
auto auth_db = database_name;
if (auth_source != query_params.end())
auth_db = auth_source->second;
2022-12-28 18:02:06 +00:00
if (!username.empty() && !password.empty())
{
Poco::MongoDB::Database poco_db(auth_db);
if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1))
throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password");
}
2022-12-28 18:02:06 +00:00
2021-06-28 17:02:22 +00:00
authenticated = true;
}
}
2022-06-05 13:14:02 +00:00
class StorageMongoDBSink : public SinkToStorage
{
public:
explicit StorageMongoDBSink(
const std::string & collection_name_,
const std::string & db_name_,
const StorageMetadataPtr & metadata_snapshot_,
2022-07-13 09:27:43 +00:00
std::shared_ptr<Poco::MongoDB::Connection> connection_)
2022-06-05 13:14:02 +00:00
: SinkToStorage(metadata_snapshot_->getSampleBlock())
, collection_name(collection_name_)
, db_name(db_name_)
, metadata_snapshot{metadata_snapshot_}
, connection(connection_)
2023-05-22 02:02:09 +00:00
, is_wire_protocol_old(isMongoDBWireProtocolOld(*connection_))
2022-06-05 13:14:02 +00:00
{
}
2022-07-12 15:20:20 +00:00
String getName() const override { return "StorageMongoDBSink"; }
2022-06-05 13:14:02 +00:00
void consume(Chunk chunk) override
{
Poco::MongoDB::Database db(db_name);
2023-05-22 02:02:09 +00:00
Poco::MongoDB::Document::Vector documents;
2022-06-05 13:14:02 +00:00
2022-07-13 09:27:43 +00:00
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
size_t num_rows = block.rows();
size_t num_cols = block.columns();
2022-06-05 13:14:02 +00:00
const auto columns = block.getColumns();
const auto data_types = block.getDataTypes();
const auto data_names = block.getNames();
2022-07-13 09:27:43 +00:00
2023-05-22 02:02:09 +00:00
documents.reserve(num_rows);
2022-06-05 13:14:02 +00:00
for (const auto i : collections::range(0, num_rows))
{
2023-05-22 02:02:09 +00:00
Poco::MongoDB::Document::Ptr document = new Poco::MongoDB::Document();
2022-06-05 13:14:02 +00:00
for (const auto j : collections::range(0, num_cols))
{
2023-07-06 05:05:27 +00:00
insertValueIntoMongoDB(*document, data_names[j], *data_types[j], *columns[j], i);
2022-06-05 13:14:02 +00:00
}
2023-05-22 02:02:09 +00:00
documents.push_back(std::move(document));
}
if (is_wire_protocol_old)
{
Poco::SharedPtr<Poco::MongoDB::InsertRequest> insert_request = db.createInsertRequest(collection_name);
insert_request->documents() = std::move(documents);
connection->sendRequest(*insert_request);
}
else
{
Poco::SharedPtr<Poco::MongoDB::OpMsgMessage> insert_request = db.createOpMsgMessage(collection_name);
insert_request->setCommandName(Poco::MongoDB::OpMsgMessage::CMD_INSERT);
insert_request->documents() = std::move(documents);
connection->sendRequest(*insert_request);
2022-06-05 13:14:02 +00:00
}
}
private:
2023-07-06 05:05:27 +00:00
void insertValueIntoMongoDB(
Poco::MongoDB::Document & document,
const std::string & name,
const IDataType & data_type,
const IColumn & column,
size_t idx)
{
WhichDataType which(data_type);
if (which.isArray())
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[idx - 1];
size_t next_offset = offsets[idx];
const IColumn & nested_column = column_array.getData();
const auto * array_type = assert_cast<const DataTypeArray *>(&data_type);
const DataTypePtr & nested_type = array_type->getNestedType();
Poco::MongoDB::Array::Ptr array = new Poco::MongoDB::Array();
for (size_t i = 0; i + offset < next_offset; ++i)
{
insertValueIntoMongoDB(*array, Poco::NumberFormatter::format(i), *nested_type, nested_column, i + offset);
}
document.add(name, array);
return;
}
/// MongoDB does not support UInt64 type, so just cast it to Int64
if (which.isNativeUInt())
2023-07-06 06:31:09 +00:00
document.add(name, static_cast<Poco::Int64>(column.getUInt(idx)));
2023-07-06 05:05:27 +00:00
else if (which.isNativeInt())
2023-07-06 06:31:09 +00:00
document.add(name, static_cast<Poco::Int64>(column.getInt(idx)));
2023-07-06 05:05:27 +00:00
else if (which.isFloat32())
document.add(name, static_cast<Float64>(column.getFloat32(idx)));
else if (which.isFloat64())
document.add(name, static_cast<Float64>(column.getFloat64(idx)));
else if (which.isDate())
document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000));
else if (which.isDateTime())
document.add(name, Poco::Timestamp(column.getUInt(idx) * 1000000));
else
{
WriteBufferFromOwnString ostr;
data_type.getDefaultSerialization()->serializeText(column, idx, ostr, FormatSettings{});
document.add(name, ostr.str());
}
}
2022-06-05 13:14:02 +00:00
String collection_name;
String db_name;
StorageMetadataPtr metadata_snapshot;
std::shared_ptr<Poco::MongoDB::Connection> connection;
2023-05-22 02:02:09 +00:00
const bool is_wire_protocol_old;
2022-06-05 13:14:02 +00:00
};
2020-08-03 13:54:14 +00:00
Pipe StorageMongoDB::read(
2020-05-13 23:20:45 +00:00
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & /*query_info*/,
ContextPtr /*context*/,
2020-05-13 23:20:45 +00:00
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
size_t /*num_streams*/)
2020-05-13 23:20:45 +00:00
{
connectIfNotConnected();
2020-05-13 23:20:45 +00:00
storage_snapshot->check(column_names);
2020-05-13 23:20:45 +00:00
Block sample_block;
for (const String & column_name : column_names)
{
auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name);
2020-05-13 23:20:45 +00:00
sample_block.insert({ column_data.type, column_data.name });
}
2023-05-22 02:02:09 +00:00
return Pipe(std::make_shared<MongoDBSource>(connection, database_name, collection_name, Poco::MongoDB::Document{}, sample_block, max_block_size));
2020-05-13 23:20:45 +00:00
}
SinkToStoragePtr StorageMongoDB::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr /* context */, bool /*async_insert*/)
2022-06-05 13:14:02 +00:00
{
connectIfNotConnected();
return std::make_shared<StorageMongoDBSink>(collection_name, database_name, metadata_snapshot, connection);
2022-06-05 13:14:02 +00:00
}
2021-09-03 11:16:32 +00:00
StorageMongoDB::Configuration StorageMongoDB::getConfiguration(ASTs engine_args, ContextPtr context)
2020-05-13 23:20:45 +00:00
{
Configuration configuration;
2021-09-15 22:45:43 +00:00
2023-03-05 11:50:29 +00:00
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context))
{
2022-12-17 00:34:19 +00:00
validateNamedCollection(
*named_collection,
2023-02-24 19:38:40 +00:00
ValidateKeysMultiset<MongoDBEqualKeysSet>{"host", "port", "user", "username", "password", "database", "db", "collection", "table"},
2022-12-17 00:34:19 +00:00
{"options"});
2023-02-24 19:57:37 +00:00
configuration.host = named_collection->getAny<String>({"host", "hostname"});
configuration.port = static_cast<UInt16>(named_collection->get<UInt64>("port"));
2023-02-24 19:57:37 +00:00
configuration.username = named_collection->getAny<String>({"user", "username"});
configuration.password = named_collection->get<String>("password");
2023-02-24 19:57:37 +00:00
configuration.database = named_collection->getAny<String>({"database", "db"});
configuration.table = named_collection->getAny<String>({"collection", "table"});
configuration.options = named_collection->getOrDefault<String>("options", "");
2021-09-03 11:16:32 +00:00
}
else
2020-05-13 23:20:45 +00:00
{
2021-03-23 15:01:13 +00:00
if (engine_args.size() < 5 || engine_args.size() > 6)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Storage MongoDB requires from 5 to 6 parameters: "
"MongoDB('host:port', database, collection, 'user', 'password' [, 'options']).");
2020-05-13 23:20:45 +00:00
for (auto & engine_arg : engine_args)
2021-09-03 11:16:32 +00:00
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context);
2020-05-13 23:20:45 +00:00
/// 27017 is the default MongoDB port.
auto parsed_host_port = parseAddress(checkAndGetLiteralArgument<String>(engine_args[0], "host:port"), 27017);
2020-05-13 23:20:45 +00:00
2021-09-09 09:18:08 +00:00
configuration.host = parsed_host_port.first;
configuration.port = parsed_host_port.second;
configuration.database = checkAndGetLiteralArgument<String>(engine_args[1], "database");
configuration.table = checkAndGetLiteralArgument<String>(engine_args[2], "table");
configuration.username = checkAndGetLiteralArgument<String>(engine_args[3], "username");
configuration.password = checkAndGetLiteralArgument<String>(engine_args[4], "password");
2021-03-23 15:01:13 +00:00
if (engine_args.size() >= 6)
configuration.options = checkAndGetLiteralArgument<String>(engine_args[5], "database");
2021-09-03 11:16:32 +00:00
}
2022-03-17 10:48:42 +00:00
context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port));
2021-09-03 11:16:32 +00:00
return configuration;
}
void registerStorageMongoDB(StorageFactory & factory)
{
factory.registerStorage("MongoDB", [](const StorageFactory::Arguments & args)
{
2021-09-15 18:11:49 +00:00
auto configuration = StorageMongoDB::getConfiguration(args.engine_args, args.getLocalContext());
2021-03-23 15:01:13 +00:00
return std::make_shared<StorageMongoDB>(
2020-05-13 23:20:45 +00:00
args.table_id,
2021-09-03 11:16:32 +00:00
configuration.host,
configuration.port,
configuration.database,
2021-12-09 14:40:51 +00:00
configuration.table,
2021-09-03 11:16:32 +00:00
configuration.username,
configuration.password,
configuration.options,
2020-05-13 23:20:45 +00:00
args.columns,
2021-04-23 12:18:23 +00:00
args.constraints,
args.comment);
2020-05-13 23:20:45 +00:00
},
{
.source_access_type = AccessType::MONGO,
});
}
}