ClickHouse/dbms/src/Storages/StorageMySQL.cpp

251 lines
8.1 KiB
C++
Raw Normal View History

#include "StorageMySQL.h"
#if USE_MYSQL
#include <Storages/StorageFactory.h>
2017-12-26 21:34:06 +00:00
#include <Storages/transformQueryForExternalDatabase.h>
2019-02-15 11:46:07 +00:00
#include <Formats/MySQLBlockInputStream.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Core/Settings.h>
#include <Interpreters/Context.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Formats/FormatFactory.h>
#include <Common/parseAddress.h>
#include <IO/Operators.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ASTLiteral.h>
#include <mysqlxx/Transaction.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
2018-05-14 11:00:22 +00:00
extern const int BAD_ARGUMENTS;
}
String backQuoteMySQL(const String & x)
{
String res(x.size(), '\0');
{
WriteBufferFromString wb(res);
writeBackQuotedStringMySQL(x, wb);
}
return res;
}
StorageMySQL::StorageMySQL(
const std::string & database_name_,
const std::string & table_name_,
2019-08-03 11:02:40 +00:00
mysqlxx::Pool && pool_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const bool replace_query_,
const std::string & on_duplicate_clause_,
const ColumnsDescription & columns_,
2019-08-03 11:02:40 +00:00
const Context & context_)
: IStorage{columns_}
, table_name(table_name_)
, database_name(database_name_)
2019-08-03 11:02:40 +00:00
, remote_database_name(remote_database_name_)
, remote_table_name(remote_table_name_)
, replace_query{replace_query_}
, on_duplicate_clause{on_duplicate_clause_}
, pool(std::move(pool_))
, global_context(context_)
{
}
BlockInputStreams StorageMySQL::read(
2019-08-03 11:02:40 +00:00
const Names & column_names_,
const SelectQueryInfo & query_info_,
const Context & context_,
QueryProcessingStage::Enum /*processed_stage*/,
2019-08-03 11:02:40 +00:00
size_t max_block_size_,
2017-12-26 21:34:06 +00:00
unsigned)
{
2019-08-03 11:02:40 +00:00
check(column_names_);
String query = transformQueryForExternalDatabase(
2019-08-03 11:02:40 +00:00
*query_info_.query, getColumns().getOrdinary(), IdentifierQuotingStyle::BackticksMySQL, remote_database_name, remote_table_name, context_);
Block sample_block;
2019-08-03 11:02:40 +00:00
for (const String & column_name : column_names_)
{
auto column_data = getColumn(column_name);
sample_block.insert({ column_data.type, column_data.name });
}
2019-08-03 11:02:40 +00:00
return { std::make_shared<MySQLBlockInputStream>(pool.Get(), query, sample_block, max_block_size_) };
}
2017-12-26 21:34:06 +00:00
class StorageMySQLBlockOutputStream : public IBlockOutputStream
{
public:
2019-08-03 11:02:40 +00:00
explicit StorageMySQLBlockOutputStream(const StorageMySQL & storage_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const mysqlxx::PoolWithFailover::Entry & entry_,
2018-05-14 11:00:22 +00:00
const size_t & mysql_max_rows_to_insert)
2019-08-03 11:02:40 +00:00
: storage{storage_}
, remote_database_name{remote_database_name_}
, remote_table_name{remote_table_name_}
, entry{entry_}
, max_batch_rows{mysql_max_rows_to_insert}
{
}
2018-05-14 11:00:22 +00:00
Block getHeader() const override { return storage.getSampleBlock(); }
void write(const Block & block) override
{
auto blocks = splitBlocks(block, max_batch_rows);
mysqlxx::Transaction trans(entry);
try
{
for (const Block & batch_data : blocks)
{
writeBlockData(batch_data);
}
trans.commit();
}
catch (...)
2018-05-14 11:00:22 +00:00
{
trans.rollback();
throw;
}
}
void writeBlockData(const Block & block)
{
WriteBufferFromOwnString sqlbuf;
sqlbuf << (storage.replace_query ? "REPLACE" : "INSERT") << " INTO ";
sqlbuf << backQuoteMySQL(remote_database_name) << "." << backQuoteMySQL(remote_table_name);
2018-11-24 01:48:06 +00:00
sqlbuf << " (" << dumpNamesWithBackQuote(block) << ") VALUES ";
2018-05-14 11:00:22 +00:00
auto writer = FormatFactory::instance().getOutput("Values", sqlbuf, storage.getSampleBlock(), storage.global_context);
2018-05-14 11:00:22 +00:00
writer->write(block);
if (!storage.on_duplicate_clause.empty())
sqlbuf << " ON DUPLICATE KEY " << storage.on_duplicate_clause;
sqlbuf << ";";
auto query = this->entry->query(sqlbuf.str());
query.execute();
}
Blocks splitBlocks(const Block & block, const size_t & max_rows) const
{
/// Avoid Excessive copy when block is small enough
if (block.rows() <= max_rows)
return Blocks{std::move(block)};
const size_t splited_block_size = ceil(block.rows() * 1.0 / max_rows);
Blocks splitted_blocks(splited_block_size);
for (size_t idx = 0; idx < splited_block_size; ++idx)
splitted_blocks[idx] = block.cloneEmpty();
const size_t columns = block.columns();
const size_t rows = block.rows();
size_t offsets = 0;
2019-02-10 15:17:45 +00:00
UInt64 limits = max_batch_rows;
2018-05-14 11:00:22 +00:00
for (size_t idx = 0; idx < splited_block_size; ++idx)
{
/// For last batch, limits should be the remain size
if (idx == splited_block_size - 1) limits = rows - offsets;
for (size_t col_idx = 0; col_idx < columns; ++col_idx)
{
splitted_blocks[idx].getByPosition(col_idx).column = block.getByPosition(col_idx).column->cut(offsets, limits);
}
offsets += max_batch_rows;
}
return splitted_blocks;
}
std::string dumpNamesWithBackQuote(const Block & block) const
{
WriteBufferFromOwnString out;
for (auto it = block.begin(); it != block.end(); ++it)
{
if (it != block.begin())
out << ", ";
out << backQuoteMySQL(it->name);
2018-05-14 11:00:22 +00:00
}
return out.str();
}
private:
2018-05-14 11:00:22 +00:00
const StorageMySQL & storage;
std::string remote_database_name;
std::string remote_table_name;
mysqlxx::PoolWithFailover::Entry entry;
size_t max_batch_rows;
};
BlockOutputStreamPtr StorageMySQL::write(
const ASTPtr & /*query*/, const Context & context)
{
return std::make_shared<StorageMySQLBlockOutputStream>(*this, remote_database_name, remote_table_name, pool.Get(), context.getSettingsRef().mysql_max_rows_to_insert);
}
void registerStorageMySQL(StorageFactory & factory)
{
factory.registerStorage("MySQL", [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
if (engine_args.size() < 5 || engine_args.size() > 7)
throw Exception(
2018-05-14 11:00:22 +00:00
"Storage MySQL requires 5-7 parameters: MySQL('host:port', database, table, 'user', 'password'[, replace_query, 'on_duplicate_clause']).",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (size_t i = 0; i < engine_args.size(); ++i)
engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.local_context);
/// 3306 is the default MySQL port.
2019-03-15 17:09:14 +00:00
auto parsed_host_port = parseAddress(engine_args[0]->as<ASTLiteral &>().value.safeGet<String>(), 3306);
2019-03-15 17:09:14 +00:00
const String & remote_database = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
const String & remote_table = engine_args[2]->as<ASTLiteral &>().value.safeGet<String>();
const String & username = engine_args[3]->as<ASTLiteral &>().value.safeGet<String>();
const String & password = engine_args[4]->as<ASTLiteral &>().value.safeGet<String>();
mysqlxx::Pool pool(remote_database, parsed_host_port.first, username, password, parsed_host_port.second);
bool replace_query = false;
std::string on_duplicate_clause;
2018-05-14 11:00:22 +00:00
if (engine_args.size() >= 6)
2019-03-15 17:09:14 +00:00
replace_query = engine_args[5]->as<ASTLiteral &>().value.safeGet<UInt64>();
2018-05-14 11:00:22 +00:00
if (engine_args.size() == 7)
2019-03-15 17:09:14 +00:00
on_duplicate_clause = engine_args[6]->as<ASTLiteral &>().value.safeGet<String>();
2018-05-14 11:00:22 +00:00
if (replace_query && !on_duplicate_clause.empty())
throw Exception(
"Only one of 'replace_query' and 'on_duplicate_clause' can be specified, or none of them",
ErrorCodes::BAD_ARGUMENTS);
return StorageMySQL::create(
args.database_name,
args.table_name,
std::move(pool),
remote_database,
remote_table,
replace_query,
on_duplicate_clause,
args.columns,
args.context);
});
}
}
#endif