2014-01-28 16:45:10 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <DB/TableFunctions/ITableFunction.h>
|
2014-02-07 15:11:57 +00:00
|
|
|
|
#include <DB/Storages/StorageDistributed.h>
|
|
|
|
|
#include <DB/Parsers/ASTIdentifier.h>
|
2015-05-28 03:49:28 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypeFactory.h>
|
2014-02-07 15:11:57 +00:00
|
|
|
|
#include <DB/DataStreams/RemoteBlockInputStream.h>
|
2014-09-23 16:02:04 +00:00
|
|
|
|
#include <DB/Interpreters/reinterpretAsIdentifier.h>
|
2015-04-16 06:12:35 +00:00
|
|
|
|
#include <DB/Interpreters/Cluster.h>
|
2014-09-23 15:28:21 +00:00
|
|
|
|
|
2014-01-28 16:45:10 +00:00
|
|
|
|
|
2014-02-07 15:11:57 +00:00
|
|
|
|
struct data;
|
2014-01-28 16:45:10 +00:00
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* remote('address', db, table) - создаёт временный StorageDistributed.
|
|
|
|
|
* Чтобы получить структуру таблицы, делается запрос DESC TABLE на удалённый сервер.
|
|
|
|
|
* Например:
|
2014-02-23 00:37:25 +00:00
|
|
|
|
* SELECT count() FROM remote('example01-01-1', merge, hits) - пойти на example01-01-1, в БД merge, таблицу hits.
|
|
|
|
|
* В качестве имени хоста может быть указано также выражение, генерирующее множество шардов и реплик - см. ниже.
|
2014-01-28 16:45:10 +00:00
|
|
|
|
*/
|
|
|
|
|
|
2014-02-23 00:37:25 +00:00
|
|
|
|
class TableFunctionRemote : public ITableFunction
|
2014-01-28 16:45:10 +00:00
|
|
|
|
{
|
|
|
|
|
public:
|
2014-02-07 15:11:57 +00:00
|
|
|
|
/// Максимальное количество различных шардов и максимальное количество реплик одного шарда
|
2015-05-16 08:33:32 +00:00
|
|
|
|
const size_t MAX_ADDRESSES = 1000; /// TODO Перенести в Settings.
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2015-01-21 04:17:02 +00:00
|
|
|
|
std::string getName() const override { return "remote"; }
|
2014-01-28 16:45:10 +00:00
|
|
|
|
|
2014-02-25 17:37:50 +00:00
|
|
|
|
StoragePtr execute(ASTPtr ast_function, Context & context) const override
|
2014-01-28 16:45:10 +00:00
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
ASTs & args_func = typeid_cast<ASTFunction &>(*ast_function).children;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2015-02-27 21:05:56 +00:00
|
|
|
|
const char * err = "Table function 'remote' requires from 2 to 5 parameters: "
|
|
|
|
|
"addresses pattern, name of remote database, name of remote table, [username, [password]].";
|
2014-03-28 16:02:12 +00:00
|
|
|
|
|
2014-02-07 15:11:57 +00:00
|
|
|
|
if (args_func.size() != 1)
|
2014-03-28 16:02:12 +00:00
|
|
|
|
throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2014-03-28 16:02:12 +00:00
|
|
|
|
if (args.size() < 2 || args.size() > 5)
|
|
|
|
|
throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2015-02-27 21:05:56 +00:00
|
|
|
|
String description;
|
|
|
|
|
String remote_database;
|
|
|
|
|
String remote_table;
|
|
|
|
|
String username;
|
|
|
|
|
String password;
|
2014-03-28 16:02:12 +00:00
|
|
|
|
|
2015-02-27 21:05:56 +00:00
|
|
|
|
size_t arg_num = 0;
|
|
|
|
|
|
|
|
|
|
auto getStringLiteral = [](const IAST & node, const char * description)
|
2014-03-28 16:02:12 +00:00
|
|
|
|
{
|
2015-02-27 21:05:56 +00:00
|
|
|
|
const ASTLiteral * lit = typeid_cast<const ASTLiteral *>(&node);
|
|
|
|
|
if (!lit)
|
|
|
|
|
throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
|
|
|
|
|
if (lit->value.getType() != Field::Types::String)
|
|
|
|
|
throw Exception(description + String(" must be string literal (in single quotes)."), ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
|
|
|
|
|
return safeGet<const String &>(lit->value);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
description = getStringLiteral(*args[arg_num], "Hosts pattern");
|
|
|
|
|
++arg_num;
|
|
|
|
|
|
|
|
|
|
remote_database = reinterpretAsIdentifier(args[arg_num], context).name;
|
|
|
|
|
++arg_num;
|
|
|
|
|
|
|
|
|
|
size_t dot = remote_database.find('.');
|
|
|
|
|
if (dot != String::npos)
|
|
|
|
|
{
|
|
|
|
|
/// NOTE Плохо - не поддерживаются идентификаторы в обратных кавычках.
|
2014-03-28 16:02:12 +00:00
|
|
|
|
remote_table = remote_database.substr(dot + 1);
|
|
|
|
|
remote_database = remote_database.substr(0, dot);
|
|
|
|
|
}
|
2015-02-27 21:05:56 +00:00
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if (arg_num >= args.size())
|
|
|
|
|
throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
|
|
remote_table = reinterpretAsIdentifier(args[arg_num], context).name;
|
|
|
|
|
++arg_num;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (arg_num < args.size())
|
|
|
|
|
{
|
|
|
|
|
username = getStringLiteral(*args[arg_num], "Username");
|
|
|
|
|
++arg_num;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
username = "default";
|
|
|
|
|
|
|
|
|
|
if (arg_num < args.size())
|
|
|
|
|
{
|
|
|
|
|
password = getStringLiteral(*args[arg_num], "Password");
|
|
|
|
|
++arg_num;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (arg_num < args.size())
|
|
|
|
|
throw Exception(err, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
|
|
|
|
/// В InterpreterSelectQuery будет создан ExpressionAnalzyer, который при обработке запроса наткнется на эти Identifier.
|
2015-02-27 21:05:56 +00:00
|
|
|
|
/// Нам необходимо их пометить как имя базы данных или таблицы, поскольку по умолчанию стоит значение column.
|
|
|
|
|
for (auto & arg : args)
|
|
|
|
|
if (ASTIdentifier * id = typeid_cast<ASTIdentifier *>(arg.get()))
|
|
|
|
|
id->kind = ASTIdentifier::Table;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2015-02-27 21:05:56 +00:00
|
|
|
|
std::vector<std::vector<String>> names;
|
|
|
|
|
std::vector<String> shards = parseDescription(description, 0, description.size(), ',');
|
2014-03-18 12:01:13 +00:00
|
|
|
|
|
2014-02-07 15:11:57 +00:00
|
|
|
|
for (size_t i = 0; i < shards.size(); ++i)
|
|
|
|
|
names.push_back(parseDescription(shards[i], 0, shards[i].size(), '|'));
|
|
|
|
|
|
2014-03-18 12:01:13 +00:00
|
|
|
|
if (names.empty())
|
|
|
|
|
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
|
2015-05-28 03:49:28 +00:00
|
|
|
|
SharedPtr<Cluster> cluster = new Cluster(context.getSettings(), names, username, password);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
2014-02-22 22:42:36 +00:00
|
|
|
|
return StorageDistributed::create(getName(), chooseColumns(*cluster, remote_database, remote_table, context),
|
2014-03-10 04:17:17 +00:00
|
|
|
|
remote_database, remote_table, cluster, context);
|
2014-01-28 16:45:10 +00:00
|
|
|
|
}
|
|
|
|
|
|
2014-02-07 15:11:57 +00:00
|
|
|
|
private:
|
|
|
|
|
/// Узнать имена и типы столбцов для создания таблицы
|
2014-02-22 22:42:36 +00:00
|
|
|
|
NamesAndTypesListPtr chooseColumns(Cluster & cluster, const String & database, const String & table, const Context & context) const
|
2014-02-07 15:11:57 +00:00
|
|
|
|
{
|
|
|
|
|
/// Запрос на описание таблицы
|
2015-09-05 01:22:09 +00:00
|
|
|
|
String query = "DESC TABLE " + backQuoteIfNeed(database) + "." + backQuoteIfNeed(table);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
Settings settings = context.getSettings();
|
|
|
|
|
NamesAndTypesList res;
|
2014-02-22 22:42:36 +00:00
|
|
|
|
|
2014-04-07 00:00:23 +00:00
|
|
|
|
/// Отправляем на первый попавшийся шард
|
2014-12-17 11:53:17 +00:00
|
|
|
|
BlockInputStreamPtr input{
|
|
|
|
|
new RemoteBlockInputStream{
|
2015-02-10 20:48:17 +00:00
|
|
|
|
cluster.pools.front().get(), query, &settings, nullptr,
|
|
|
|
|
Tables(), QueryProcessingStage::Complete, context}
|
2014-12-17 11:53:17 +00:00
|
|
|
|
};
|
2014-02-22 22:42:36 +00:00
|
|
|
|
input->readPrefix();
|
|
|
|
|
|
2015-05-28 03:49:28 +00:00
|
|
|
|
const DataTypeFactory & data_type_factory = DataTypeFactory::instance();
|
|
|
|
|
|
2014-02-22 22:42:36 +00:00
|
|
|
|
while (true)
|
2014-02-07 15:11:57 +00:00
|
|
|
|
{
|
2014-02-22 22:42:36 +00:00
|
|
|
|
Block current = input->read();
|
|
|
|
|
if (!current)
|
|
|
|
|
break;
|
|
|
|
|
ColumnPtr name = current.getByName("name").column;
|
|
|
|
|
ColumnPtr type = current.getByName("type").column;
|
|
|
|
|
size_t size = name->size();
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
2014-02-07 15:11:57 +00:00
|
|
|
|
{
|
2014-02-22 22:42:36 +00:00
|
|
|
|
String column_name = (*name)[i].get<const String &>();
|
|
|
|
|
String data_type_name = (*type)[i].get<const String &>();
|
|
|
|
|
|
2015-05-28 03:49:28 +00:00
|
|
|
|
res.emplace_back(column_name, data_type_factory.get(data_type_name));
|
2014-02-07 15:11:57 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-02-22 22:42:36 +00:00
|
|
|
|
|
|
|
|
|
return new NamesAndTypesList(std::move(res));
|
2014-02-07 15:11:57 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Декартово произведение двух множеств строк, результат записываем на место первого аргумента
|
|
|
|
|
void append(std::vector<String> & to, const std::vector<String> & what) const
|
|
|
|
|
{
|
|
|
|
|
if (what.empty()) return;
|
|
|
|
|
if (to.empty())
|
|
|
|
|
{
|
|
|
|
|
to = what;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (what.size() * to.size() > MAX_ADDRESSES)
|
|
|
|
|
throw Exception("Storage Distributed, first argument generates too many result addresses",
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
std::vector<String> res;
|
|
|
|
|
for (size_t i = 0; i < to.size(); ++i)
|
|
|
|
|
for (size_t j = 0; j < what.size(); ++j)
|
|
|
|
|
res.push_back(to[i] + what[j]);
|
|
|
|
|
to.swap(res);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Парсим число из подстроки
|
2014-02-22 22:42:36 +00:00
|
|
|
|
static bool parseNumber(const String & description, size_t l, size_t r, size_t & res)
|
2014-02-07 15:11:57 +00:00
|
|
|
|
{
|
|
|
|
|
res = 0;
|
|
|
|
|
for (size_t pos = l; pos < r; pos ++)
|
|
|
|
|
{
|
|
|
|
|
if (!isdigit(description[pos]))
|
|
|
|
|
return false;
|
|
|
|
|
res = res * 10 + description[pos] - '0';
|
|
|
|
|
if (res > 1e15)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-28 21:41:28 +00:00
|
|
|
|
/* Парсит строку, генерирующую шарды и реплики. Разделитель - один из двух символов | или ,
|
2014-02-07 15:11:57 +00:00
|
|
|
|
* в зависимости от того генерируются шарды или реплики.
|
|
|
|
|
* Например:
|
|
|
|
|
* host1,host2,... - порождает множество шардов из host1, host2, ...
|
|
|
|
|
* host1|host2|... - порождает множество реплик из host1, host2, ...
|
|
|
|
|
* abc{8..10}def - порождает множество шардов abc8def, abc9def, abc10def.
|
|
|
|
|
* abc{08..10}def - порождает множество шардов abc08def, abc09def, abc10def.
|
|
|
|
|
* abc{x,yy,z}def - порождает множество шардов abcxdef, abcyydef, abczdef.
|
|
|
|
|
* abc{x|yy|z}def - порождает множество реплик abcxdef, abcyydef, abczdef.
|
|
|
|
|
* abc{1..9}de{f,g,h} - прямое произведение, 27 шардов.
|
|
|
|
|
* abc{1..9}de{0|1} - прямое произведение, 9 шардов, в каждом 2 реплики.
|
|
|
|
|
*/
|
2015-05-28 21:41:28 +00:00
|
|
|
|
std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator) const
|
2014-02-07 15:11:57 +00:00
|
|
|
|
{
|
|
|
|
|
std::vector<String> res;
|
|
|
|
|
std::vector<String> cur;
|
|
|
|
|
|
|
|
|
|
/// Пустая подстрока, означает множество из пустой строки
|
|
|
|
|
if (l >= r)
|
|
|
|
|
{
|
|
|
|
|
res.push_back("");
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = l; i < r; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Либо числовой интервал (8..10) либо аналогичное выражение в скобках
|
|
|
|
|
if (description[i] == '{')
|
|
|
|
|
{
|
|
|
|
|
int cnt = 1;
|
2014-02-07 15:12:58 +00:00
|
|
|
|
int last_dot = -1; /// Самая правая пара точек, запоминаем индекс правой из двух
|
2014-02-07 15:11:57 +00:00
|
|
|
|
size_t m;
|
|
|
|
|
std::vector<String> buffer;
|
2014-02-25 17:37:50 +00:00
|
|
|
|
bool have_splitter = false;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
|
|
|
|
|
/// Ищем соответствующую нашей закрывающую скобку
|
|
|
|
|
for (m = i + 1; m < r; ++m)
|
|
|
|
|
{
|
|
|
|
|
if (description[m] == '{') ++cnt;
|
|
|
|
|
if (description[m] == '}') --cnt;
|
|
|
|
|
if (description[m] == '.' && description[m-1] == '.') last_dot = m;
|
2015-05-28 21:41:28 +00:00
|
|
|
|
if (description[m] == separator) have_splitter = true;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
if (cnt == 0) break;
|
|
|
|
|
}
|
|
|
|
|
if (cnt != 0)
|
|
|
|
|
throw Exception("Storage Distributed, incorrect brace sequence in first argument",
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
/// Наличие точки означает, что числовой интервал
|
|
|
|
|
if (last_dot != -1)
|
|
|
|
|
{
|
|
|
|
|
size_t left, right;
|
|
|
|
|
if (description[last_dot - 1] != '.')
|
|
|
|
|
throw Exception("Storage Distributed, incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1),
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
2014-02-22 22:42:36 +00:00
|
|
|
|
if (!parseNumber(description, i + 1, last_dot - 1, left))
|
2014-02-07 15:11:57 +00:00
|
|
|
|
throw Exception("Storage Distributed, incorrect argument in braces (Incorrect left number): "
|
|
|
|
|
+ description.substr(i, m - i + 1),
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
2014-02-22 22:42:36 +00:00
|
|
|
|
if (!parseNumber(description, last_dot + 1, m, right))
|
2014-02-07 15:11:57 +00:00
|
|
|
|
throw Exception("Storage Distributed, incorrect argument in braces (Incorrect right number): "
|
|
|
|
|
+ description.substr(i, m - i + 1),
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
if (left > right)
|
|
|
|
|
throw Exception("Storage Distributed, incorrect argument in braces (left number is greater then right): "
|
|
|
|
|
+ description.substr(i, m - i + 1),
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
if (right - left + 1 > MAX_ADDRESSES)
|
|
|
|
|
throw Exception("Storage Distributed, first argument generates too many result addresses",
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
2014-05-15 10:39:52 +00:00
|
|
|
|
bool add_leading_zeroes = false;
|
|
|
|
|
size_t len = last_dot - 1 - (i + 1);
|
|
|
|
|
/// Если у левой и правой границы поровну цифр, значит необходимо дополнять лидирующими нулями.
|
|
|
|
|
if (last_dot - 1 - (i + 1) == m - (last_dot + 1))
|
|
|
|
|
add_leading_zeroes = true;
|
2014-02-07 15:11:57 +00:00
|
|
|
|
for (size_t id = left; id <= right; ++id)
|
2014-05-15 10:39:52 +00:00
|
|
|
|
{
|
|
|
|
|
String cur = toString<uint64>(id);
|
|
|
|
|
if (add_leading_zeroes)
|
|
|
|
|
{
|
|
|
|
|
while (cur.size() < len)
|
|
|
|
|
cur = "0" + cur;
|
|
|
|
|
}
|
|
|
|
|
buffer.push_back(cur);
|
|
|
|
|
}
|
2014-02-25 17:37:50 +00:00
|
|
|
|
} else if (have_splitter) /// Если внутри есть текущий разделитель, то сгенерировать множество получаемых строк
|
2015-05-28 21:41:28 +00:00
|
|
|
|
buffer = parseDescription(description, i + 1, m, separator);
|
2014-02-07 15:11:57 +00:00
|
|
|
|
else /// Иначе просто скопировать, порождение произойдет при вызове с правильным разделителем
|
|
|
|
|
buffer.push_back(description.substr(i, m - i + 1));
|
|
|
|
|
/// К текущему множеству строк добавить все возможные полученные продолжения
|
|
|
|
|
append(cur, buffer);
|
|
|
|
|
i = m;
|
2015-05-28 21:41:28 +00:00
|
|
|
|
} else if (description[i] == separator) {
|
2014-02-07 15:11:57 +00:00
|
|
|
|
/// Если разделитель, то добавляем в ответ найденные строки
|
|
|
|
|
res.insert(res.end(), cur.begin(), cur.end());
|
|
|
|
|
cur.clear();
|
|
|
|
|
} else {
|
|
|
|
|
/// Иначе просто дописываем символ к текущим строкам
|
|
|
|
|
std::vector<String> buffer;
|
|
|
|
|
buffer.push_back(description.substr(i, 1));
|
|
|
|
|
append(cur, buffer);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
res.insert(res.end(), cur.begin(), cur.end());
|
|
|
|
|
if (res.size() > MAX_ADDRESSES)
|
|
|
|
|
throw Exception("Storage Distributed, first argument generates too many result addresses",
|
|
|
|
|
ErrorCodes::BAD_ARGUMENTS);
|
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
};
|
2014-01-28 16:45:10 +00:00
|
|
|
|
|
|
|
|
|
}
|