ClickHouse/src/Storages/StorageMySQL.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

383 lines
14 KiB
C++
Raw Normal View History

#include "StorageMySQL.h"
#if USE_MYSQL
#include <Storages/StorageFactory.h>
2017-12-26 21:34:06 +00:00
#include <Storages/transformQueryForExternalDatabase.h>
2021-12-13 22:06:46 +00:00
#include <Storages/MySQL/MySQLHelpers.h>
#include <Storages/checkAndGetLiteralArgument.h>
2021-10-15 20:18:20 +00:00
#include <Processors/Sources/MySQLSource.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <DataTypes/DataTypeString.h>
#include <Formats/FormatFactory.h>
2021-10-11 16:11:50 +00:00
#include <Processors/Formats/IOutputFormat.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTCreateQuery.h>
#include <mysqlxx/Transaction.h>
2021-07-23 19:33:59 +00:00
#include <Processors/Sinks/SinkToStorage.h>
2021-10-16 14:03:50 +00:00
#include <QueryPipeline/Pipe.h>
#include <Common/parseRemoteDescription.h>
#include <Common/quoteString.h>
2022-04-27 15:05:45 +00:00
#include <Common/logger_useful.h>
2023-02-20 20:37:38 +00:00
#include <Storages/NamedCollectionsHelpers.h>
#include <Databases/MySQL/FetchTablesColumnsList.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
2018-05-14 11:00:22 +00:00
extern const int BAD_ARGUMENTS;
2023-05-19 01:31:45 +00:00
extern const int UNKNOWN_TABLE;
}
StorageMySQL::StorageMySQL(
2019-12-04 16:06:55 +00:00
const StorageID & table_id_,
mysqlxx::PoolWithFailover && pool_,
2019-08-03 11:02:40 +00:00
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const bool replace_query_,
const std::string & on_duplicate_clause_,
const ColumnsDescription & columns_,
2019-08-24 21:20:20 +00:00
const ConstraintsDescription & constraints_,
2021-04-23 12:18:23 +00:00
const String & comment,
ContextPtr context_,
const MySQLSettings & mysql_settings_)
2019-12-04 16:06:55 +00:00
: IStorage(table_id_)
, WithContext(context_->getGlobalContext())
2019-08-03 11:02:40 +00:00
, remote_database_name(remote_database_name_)
, remote_table_name(remote_table_name_)
, replace_query{replace_query_}
, on_duplicate_clause{on_duplicate_clause_}
, mysql_settings(mysql_settings_)
2021-03-27 14:35:44 +00:00
, pool(std::make_shared<mysqlxx::PoolWithFailover>(pool_))
2024-01-23 17:04:50 +00:00
, log(getLogger("StorageMySQL (" + table_id_.table_name + ")"))
{
2020-06-19 15:39:41 +00:00
StorageInMemoryMetadata storage_metadata;
if (columns_.empty())
{
auto columns = getTableStructureFromData(*pool, remote_database_name, remote_table_name, context_);
storage_metadata.setColumns(columns);
}
else
storage_metadata.setColumns(columns_);
2020-06-19 15:39:41 +00:00
storage_metadata.setConstraints(constraints_);
2021-04-23 12:18:23 +00:00
storage_metadata.setComment(comment);
2020-06-19 15:39:41 +00:00
setInMemoryMetadata(storage_metadata);
}
ColumnsDescription StorageMySQL::getTableStructureFromData(
mysqlxx::PoolWithFailover & pool_,
const String & database,
const String & table,
const ContextPtr & context_)
{
const auto & settings = context_->getSettingsRef();
const auto tables_and_columns = fetchTablesColumnsList(pool_, database, {table}, settings, settings.mysql_datatypes_support_level);
const auto columns = tables_and_columns.find(table);
if (columns == tables_and_columns.end())
throw Exception(ErrorCodes::UNKNOWN_TABLE, "MySQL table {} doesn't exist.",
(database.empty() ? "" : (backQuote(database) + "." + backQuote(table))));
return columns->second;
}
2020-08-03 13:54:14 +00:00
Pipe StorageMySQL::read(
2019-08-03 11:02:40 +00:00
const Names & column_names_,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info_,
ContextPtr context_,
QueryProcessingStage::Enum /*processed_stage*/,
size_t /*max_block_size*/,
size_t /*num_streams*/)
{
storage_snapshot->check(column_names_);
String query = transformQueryForExternalDatabase(
query_info_,
column_names_,
storage_snapshot->metadata->getColumns().getOrdinary(),
IdentifierQuotingStyle::BackticksMySQL,
2023-08-10 06:32:28 +00:00
LiteralEscapingStyle::Regular,
remote_database_name,
remote_table_name,
context_);
LOG_TRACE(log, "Query: {}", query);
Block sample_block;
2019-08-03 11:02:40 +00:00
for (const String & column_name : column_names_)
{
auto column_data = storage_snapshot->metadata->getColumns().getPhysical(column_name);
WhichDataType which(column_data.type);
/// Convert enum to string.
if (which.isEnum())
column_data.type = std::make_shared<DataTypeString>();
sample_block.insert({ column_data.type, column_data.name });
}
StreamSettings mysql_input_stream_settings(context_->getSettingsRef(),
mysql_settings.connection_auto_close);
return Pipe(std::make_shared<MySQLWithFailoverSource>(pool, query, sample_block, mysql_input_stream_settings));
}
2017-12-26 21:34:06 +00:00
2021-07-23 19:33:59 +00:00
class StorageMySQLSink : public SinkToStorage
{
public:
2021-07-23 19:33:59 +00:00
explicit StorageMySQLSink(
const StorageMySQL & storage_,
const StorageMetadataPtr & metadata_snapshot_,
2019-08-03 11:02:40 +00:00
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const mysqlxx::PoolWithFailover::Entry & entry_,
2018-05-14 11:00:22 +00:00
const size_t & mysql_max_rows_to_insert)
2021-07-26 10:08:40 +00:00
: SinkToStorage(metadata_snapshot_->getSampleBlock())
2021-07-23 19:33:59 +00:00
, storage{storage_}
, metadata_snapshot{metadata_snapshot_}
2019-08-03 11:02:40 +00:00
, remote_database_name{remote_database_name_}
, remote_table_name{remote_table_name_}
, entry{entry_}
, max_batch_rows{mysql_max_rows_to_insert}
{
}
2021-07-23 19:33:59 +00:00
String getName() const override { return "StorageMySQLSink"; }
2018-05-14 11:00:22 +00:00
2021-07-23 19:33:59 +00:00
void consume(Chunk chunk) override
2018-05-14 11:00:22 +00:00
{
auto block = getHeader().cloneWithColumns(chunk.detachColumns());
2018-05-14 11:00:22 +00:00
auto blocks = splitBlocks(block, max_batch_rows);
mysqlxx::Transaction trans(entry);
try
{
for (const Block & batch_data : blocks)
{
writeBlockData(batch_data);
}
trans.commit();
}
catch (...)
2018-05-14 11:00:22 +00:00
{
trans.rollback();
throw;
}
}
void writeBlockData(const Block & block)
{
WriteBufferFromOwnString sqlbuf;
sqlbuf << (storage.replace_query ? "REPLACE" : "INSERT") << " INTO ";
if (!remote_database_name.empty())
sqlbuf << backQuoteMySQL(remote_database_name) << ".";
sqlbuf << backQuoteMySQL(remote_table_name);
2018-11-24 01:48:06 +00:00
sqlbuf << " (" << dumpNamesWithBackQuote(block) << ") VALUES ";
2018-05-14 11:00:22 +00:00
2021-10-11 16:11:50 +00:00
auto writer = FormatFactory::instance().getOutputFormat("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.getContext());
2018-05-14 11:00:22 +00:00
writer->write(block);
if (!storage.on_duplicate_clause.empty())
sqlbuf << " ON DUPLICATE KEY " << storage.on_duplicate_clause;
sqlbuf << ";";
auto query = this->entry->query(sqlbuf.str());
query.execute();
}
Blocks splitBlocks(const Block & block, const size_t & max_rows) const
{
/// Avoid Excessive copy when block is small enough
if (block.rows() <= max_rows)
return {block};
2018-05-14 11:00:22 +00:00
2022-09-11 01:21:34 +00:00
const size_t split_block_size = static_cast<size_t>(ceil(block.rows() * 1.0 / max_rows));
2021-06-28 17:02:22 +00:00
Blocks split_blocks(split_block_size);
2018-05-14 11:00:22 +00:00
2021-06-28 17:02:22 +00:00
for (size_t idx = 0; idx < split_block_size; ++idx)
split_blocks[idx] = block.cloneEmpty();
2018-05-14 11:00:22 +00:00
const size_t columns = block.columns();
const size_t rows = block.rows();
size_t offsets = 0;
2019-02-10 15:17:45 +00:00
UInt64 limits = max_batch_rows;
2021-06-28 17:02:22 +00:00
for (size_t idx = 0; idx < split_block_size; ++idx)
2018-05-14 11:00:22 +00:00
{
/// For last batch, limits should be the remain size
2021-06-28 17:02:22 +00:00
if (idx == split_block_size - 1) limits = rows - offsets;
2018-05-14 11:00:22 +00:00
for (size_t col_idx = 0; col_idx < columns; ++col_idx)
{
2021-06-28 17:02:22 +00:00
split_blocks[idx].getByPosition(col_idx).column = block.getByPosition(col_idx).column->cut(offsets, limits);
2018-05-14 11:00:22 +00:00
}
offsets += max_batch_rows;
}
2021-06-28 17:02:22 +00:00
return split_blocks;
2018-05-14 11:00:22 +00:00
}
2020-03-18 00:57:00 +00:00
static std::string dumpNamesWithBackQuote(const Block & block)
2018-05-14 11:00:22 +00:00
{
WriteBufferFromOwnString out;
for (auto it = block.begin(); it != block.end(); ++it)
{
if (it != block.begin())
out << ", ";
out << backQuoteMySQL(it->name);
2018-05-14 11:00:22 +00:00
}
return out.str();
}
private:
2018-05-14 11:00:22 +00:00
const StorageMySQL & storage;
StorageMetadataPtr metadata_snapshot;
2018-05-14 11:00:22 +00:00
std::string remote_database_name;
std::string remote_table_name;
mysqlxx::PoolWithFailover::Entry entry;
size_t max_batch_rows;
};
SinkToStoragePtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
{
2021-07-23 19:33:59 +00:00
return std::make_shared<StorageMySQLSink>(
*this,
metadata_snapshot,
remote_database_name,
remote_table_name,
pool->get(),
local_context->getSettingsRef().mysql_max_rows_to_insert);
}
2023-02-20 20:37:38 +00:00
StorageMySQL::Configuration StorageMySQL::processNamedCollectionResult(
2023-04-13 17:33:58 +00:00
const NamedCollection & named_collection, MySQLSettings & storage_settings, ContextPtr context_, bool require_table)
{
2023-02-20 20:37:38 +00:00
StorageMySQL::Configuration configuration;
2023-02-24 19:38:40 +00:00
ValidateKeysMultiset<ExternalDatabaseEqualKeysSet> optional_arguments = {"replace_query", "on_duplicate_clause", "addresses_expr", "host", "hostname", "port"};
2023-02-20 20:37:38 +00:00
auto mysql_settings = storage_settings.all();
for (const auto & setting : mysql_settings)
optional_arguments.insert(setting.getName());
2023-03-01 16:00:10 +00:00
ValidateKeysMultiset<ExternalDatabaseEqualKeysSet> required_arguments = {"user", "username", "password", "database", "db"};
2023-02-20 20:37:38 +00:00
if (require_table)
required_arguments.insert("table");
2023-02-24 19:38:40 +00:00
validateNamedCollection<ValidateKeysMultiset<ExternalDatabaseEqualKeysSet>>(named_collection, required_arguments, optional_arguments);
2023-02-20 20:37:38 +00:00
configuration.addresses_expr = named_collection.getOrDefault<String>("addresses_expr", "");
if (configuration.addresses_expr.empty())
{
2023-04-13 17:33:58 +00:00
configuration.host = named_collection.getAnyOrDefault<String>({"host", "hostname"}, "");
2023-02-20 20:37:38 +00:00
configuration.port = static_cast<UInt16>(named_collection.get<UInt64>("port"));
2021-09-02 13:01:26 +00:00
configuration.addresses = {std::make_pair(configuration.host, configuration.port)};
2023-02-20 20:37:38 +00:00
}
2023-04-13 17:33:58 +00:00
else
{
size_t max_addresses = context_->getSettingsRef().glob_expansion_max_elements;
configuration.addresses = parseRemoteDescriptionForExternalDatabase(
configuration.addresses_expr, max_addresses, 3306);
}
2023-02-24 19:57:37 +00:00
configuration.username = named_collection.getAny<String>({"username", "user"});
2023-02-20 20:37:38 +00:00
configuration.password = named_collection.get<String>("password");
2023-02-24 19:57:37 +00:00
configuration.database = named_collection.getAny<String>({"db", "database"});
2023-02-20 20:37:38 +00:00
if (require_table)
configuration.table = named_collection.get<String>("table");
configuration.replace_query = named_collection.getOrDefault<UInt64>("replace_query", false);
configuration.on_duplicate_clause = named_collection.getOrDefault<String>("on_duplicate_clause", "");
for (const auto & setting : mysql_settings)
{
const auto & setting_name = setting.getName();
if (named_collection.has(setting_name))
storage_settings.set(setting_name, named_collection.get<String>(setting_name));
}
return configuration;
}
StorageMySQL::Configuration StorageMySQL::getConfiguration(ASTs engine_args, ContextPtr context_, MySQLSettings & storage_settings)
{
StorageMySQL::Configuration configuration;
2023-03-05 11:50:29 +00:00
if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args, context_))
2023-02-20 20:37:38 +00:00
{
2023-04-13 17:33:58 +00:00
configuration = StorageMySQL::processNamedCollectionResult(*named_collection, storage_settings, context_);
2021-09-02 13:01:26 +00:00
}
else
{
if (engine_args.size() < 5 || engine_args.size() > 7)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage MySQL requires 5-7 parameters: "
"MySQL('host:port' (or 'addresses_pattern'), database, table, "
"'user', 'password'[, replace_query, 'on_duplicate_clause']).");
2020-03-09 01:22:33 +00:00
for (auto & engine_arg : engine_args)
2021-09-02 13:01:26 +00:00
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context_);
2023-03-05 11:50:29 +00:00
configuration.addresses_expr = checkAndGetLiteralArgument<String>(engine_args[0], "host:port");
2021-09-02 13:01:26 +00:00
size_t max_addresses = context_->getSettingsRef().glob_expansion_max_elements;
2023-03-05 11:50:29 +00:00
configuration.addresses = parseRemoteDescriptionForExternalDatabase(configuration.addresses_expr, max_addresses, 3306);
configuration.database = checkAndGetLiteralArgument<String>(engine_args[1], "database");
configuration.table = checkAndGetLiteralArgument<String>(engine_args[2], "table");
configuration.username = checkAndGetLiteralArgument<String>(engine_args[3], "username");
configuration.password = checkAndGetLiteralArgument<String>(engine_args[4], "password");
2021-09-02 13:01:26 +00:00
if (engine_args.size() >= 6)
configuration.replace_query = checkAndGetLiteralArgument<UInt64>(engine_args[5], "replace_query");
2021-09-02 13:01:26 +00:00
if (engine_args.size() == 7)
configuration.on_duplicate_clause = checkAndGetLiteralArgument<String>(engine_args[6], "on_duplicate_clause");
2021-09-02 13:01:26 +00:00
}
for (const auto & address : configuration.addresses)
2022-03-11 15:33:23 +00:00
context_->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second));
2021-09-02 13:01:26 +00:00
if (configuration.replace_query && !configuration.on_duplicate_clause.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Only one of 'replace_query' and 'on_duplicate_clause' can be specified, or none of them");
return configuration;
}
void registerStorageMySQL(StorageFactory & factory)
{
factory.registerStorage("MySQL", [](const StorageFactory::Arguments & args)
{
MySQLSettings mysql_settings; /// TODO: move some arguments from the arguments to the SETTINGS.
2021-12-27 14:41:37 +00:00
auto configuration = StorageMySQL::getConfiguration(args.engine_args, args.getLocalContext(), mysql_settings);
if (args.storage_def->settings)
mysql_settings.loadFromQuery(*args.storage_def);
if (!mysql_settings.connection_pool_size)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "connection_pool_size cannot be zero.");
2021-12-13 22:06:46 +00:00
mysqlxx::PoolWithFailover pool = createMySQLPoolWithFailover(configuration, mysql_settings);
2022-05-04 10:01:29 +00:00
return std::make_shared<StorageMySQL>(
2019-12-04 16:06:55 +00:00
args.table_id,
std::move(pool),
2021-09-02 13:01:26 +00:00
configuration.database,
configuration.table,
configuration.replace_query,
configuration.on_duplicate_clause,
args.columns,
2019-08-24 21:20:20 +00:00
args.constraints,
2021-04-23 12:18:23 +00:00
args.comment,
args.getContext(),
mysql_settings);
},
{
.supports_settings = true,
.supports_schema_inference = true,
.source_access_type = AccessType::MYSQL,
});
}
}
#endif