ClickHouse/dbms/src/Storages/StorageFactory.cpp

835 lines
34 KiB
C++
Raw Normal View History

#include <Poco/Util/Application.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <DB/Core/FieldVisitors.h>
#include <DB/Common/StringUtils.h>
2012-05-22 19:32:56 +00:00
#include <DB/Parsers/ASTCreateQuery.h>
#include <DB/Parsers/ASTIdentifier.h>
2012-05-30 05:53:09 +00:00
#include <DB/Parsers/ASTLiteral.h>
2012-05-22 19:32:56 +00:00
#include <DB/Interpreters/Context.h>
#include <DB/Interpreters/evaluateConstantExpression.h>
#include <DB/Interpreters/getClusterName.h>
2012-05-22 19:32:56 +00:00
2011-10-31 17:30:44 +00:00
#include <DB/Storages/StorageLog.h>
2012-06-25 00:17:19 +00:00
#include <DB/Storages/StorageTinyLog.h>
#include <DB/Storages/StorageStripeLog.h>
2011-10-31 17:55:06 +00:00
#include <DB/Storages/StorageMemory.h>
#include <DB/Storages/StorageBuffer.h>
#include <DB/Storages/StorageNull.h>
2012-05-30 05:53:09 +00:00
#include <DB/Storages/StorageMerge.h>
2012-07-18 19:30:33 +00:00
#include <DB/Storages/StorageMergeTree.h>
2012-05-22 19:32:56 +00:00
#include <DB/Storages/StorageDistributed.h>
2015-09-24 03:50:09 +00:00
#include <DB/Storages/System/StorageSystemNumbers.h>
#include <DB/Storages/System/StorageSystemOne.h>
2011-10-31 17:30:44 +00:00
#include <DB/Storages/StorageFactory.h>
#include <DB/Storages/StorageView.h>
#include <DB/Storages/StorageMaterializedView.h>
2014-03-21 19:17:59 +00:00
#include <DB/Storages/StorageReplicatedMergeTree.h>
2015-01-27 00:52:03 +00:00
#include <DB/Storages/StorageSet.h>
#include <DB/Storages/StorageJoin.h>
#include <DB/Storages/StorageFile.h>
2016-04-24 09:44:47 +00:00
#include <DB/AggregateFunctions/AggregateFunctionFactory.h>
2011-10-31 17:30:44 +00:00
#include <unistd.h>
2011-10-31 17:30:44 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int TABLE_MUST_NOT_BE_CREATED_MANUALLY;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_STORAGE;
extern const int NO_REPLICA_NAME_GIVEN;
2016-04-24 09:44:47 +00:00
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
extern const int UNKNOWN_IDENTIFIER;
}
2014-03-18 17:20:44 +00:00
/** Для StorageMergeTree: достать первичный ключ в виде ASTExpressionList.
* Он может быть указан в кортеже: (CounterID, Date),
* или как один столбец: CounterID.
*/
2014-11-22 02:22:30 +00:00
static ASTPtr extractPrimaryKey(const ASTPtr & node)
2014-03-18 17:20:44 +00:00
{
const ASTFunction * primary_expr_func = typeid_cast<const ASTFunction *>(&*node);
2014-03-18 17:20:44 +00:00
if (primary_expr_func && primary_expr_func->name == "tuple")
{
/// Первичный ключ указан в кортеже.
return primary_expr_func->children.at(0);
}
else
{
/// Первичный ключ состоит из одного столбца.
auto res = std::make_shared<ASTExpressionList>();
2014-03-18 17:20:44 +00:00
res->children.push_back(node);
return res;
2014-03-18 17:20:44 +00:00
}
}
2014-11-22 02:22:30 +00:00
/** Для StorageMergeTree: достать список имён столбцов.
* Он может быть указан в кортеже: (Clicks, Cost),
* или как один столбец: Clicks.
*/
static Names extractColumnNames(const ASTPtr & node)
{
const ASTFunction * expr_func = typeid_cast<const ASTFunction *>(&*node);
if (expr_func && expr_func->name == "tuple")
{
const auto & elements = expr_func->children.at(0)->children;
Names res;
res.reserve(elements.size());
for (const auto & elem : elements)
res.push_back(typeid_cast<const ASTIdentifier &>(*elem).name);
return res;
}
else
{
return { typeid_cast<const ASTIdentifier &>(*node).name };
}
}
2014-03-18 17:20:44 +00:00
2016-04-24 09:44:47 +00:00
/** Прочитать настройки прореживания старых данных Графита из конфига.
* Пример:
*
* <graphite_rollup>
* <path_column_name>Path</path_column_name>
* <pattern>
* <regexp>click_cost</regexp>
* <function>any</function>
* <retention>
* <age>0</age>
* <precision>3600</precision>
* </retention>
* <retention>
* <age>86400</age>
* <precision>60</precision>
* </retention>
* </pattern>
* <default>
* <function>max</function>
* <retention>
* <age>0</age>
* <precision>60</precision>
* </retention>
* <retention>
* <age>3600</age>
* <precision>300</precision>
* </retention>
* <retention>
* <age>86400</age>
* <precision>3600</precision>
* </retention>
* </default>
* </graphite_rollup>
*/
static void appendGraphitePattern(const Context & context,
const Poco::Util::AbstractConfiguration & config, const String & config_element, Graphite::Patterns & patterns)
{
Graphite::Pattern pattern;
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_element, keys);
for (const auto & key : keys)
{
if (key == "regexp")
{
pattern.regexp = std::make_shared<OptimizedRegularExpression>(config.getString(config_element + ".regexp"));
}
else if (key == "function")
{
/// TODO Не только Float64
pattern.function = context.getAggregateFunctionFactory().get(
config.getString(config_element + ".function"), { std::make_shared<DataTypeFloat64>() });
2016-04-24 09:44:47 +00:00
}
else if (startsWith(key, "retention"))
{
pattern.retentions.emplace_back(
Graphite::Retention{
.age = config.getUInt(config_element + "." + key + ".age"),
.precision = config.getUInt(config_element + "." + key + ".precision")});
}
else
throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
if (!pattern.function)
throw Exception("Aggregate function is mandatory for retention patterns in GraphiteMergeTree",
ErrorCodes::NO_ELEMENTS_IN_CONFIG);
/// retention-ы должны идти по убыванию возраста.
std::sort(pattern.retentions.begin(), pattern.retentions.end(),
[] (const Graphite::Retention & a, const Graphite::Retention & b) { return a.age > b.age; });
patterns.emplace_back(pattern);
}
static void setGraphitePatternsFromConfig(const Context & context,
const String & config_element, Graphite::Params & params)
{
const Poco::Util::AbstractConfiguration & config = Poco::Util::Application::instance().config();
2016-04-24 10:39:18 +00:00
if (!config.has(config_element))
throw Exception("No '" + config_element + "' element in configuration file",
ErrorCodes::NO_ELEMENTS_IN_CONFIG);
2016-04-24 09:44:47 +00:00
params.path_column_name = config.getString(config_element + ".path_column_name", "Path");
params.time_column_name = config.getString(config_element + ".time_column_name", "Time");
params.value_column_name = config.getString(config_element + ".value_column_name", "Value");
params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp");
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_element, keys);
for (const auto & key : keys)
{
if (startsWith(key, "pattern"))
{
appendGraphitePattern(context, config, config_element + "." + key, params.patterns);
}
else if (key == "default")
{
/// Ниже.
}
else
throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
}
if (config.has(config_element + ".default"))
appendGraphitePattern(context, config, config_element + "." + ".default", params.patterns);
}
2011-10-31 17:30:44 +00:00
StoragePtr StorageFactory::get(
const String & name,
const String & data_path,
const String & table_name,
const String & database_name,
Context & local_context,
2012-05-22 19:32:56 +00:00
Context & context,
ASTPtr & query,
NamesAndTypesListPtr columns,
const NamesAndTypesList & materialized_columns,
const NamesAndTypesList & alias_columns,
const ColumnDefaults & column_defaults,
bool attach,
bool has_force_restore_data_flag) const
2011-10-31 17:30:44 +00:00
{
if (name == "Log")
2012-05-22 19:32:56 +00:00
{
return StorageLog::create(
data_path, table_name, columns,
materialized_columns, alias_columns, column_defaults,
context.getSettings().max_compress_block_size);
2012-06-25 00:17:19 +00:00
}
else if (name == "View")
{
return StorageView::create(
table_name, database_name, context, query, columns,
materialized_columns, alias_columns, column_defaults);
}
else if (name == "MaterializedView")
{
return StorageMaterializedView::create(
table_name, database_name, context, query, columns,
materialized_columns, alias_columns, column_defaults,
attach);
}
2012-06-25 00:51:23 +00:00
else if (name == "TinyLog")
2012-06-25 00:17:19 +00:00
{
return StorageTinyLog::create(
data_path, table_name, columns,
materialized_columns, alias_columns, column_defaults,
attach, context.getSettings().max_compress_block_size);
2012-05-22 19:32:56 +00:00
}
else if (name == "StripeLog")
{
return StorageStripeLog::create(
data_path, table_name, columns,
materialized_columns, alias_columns, column_defaults,
attach, context.getSettings().max_compress_block_size);
}
else if (name == "File")
{
auto & func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage);
auto & args = typeid_cast<ASTExpressionList &>(*func.arguments).children;
constexpr auto error_msg = "Storage File requires 1 or 2 arguments: name of used format and source.";
if (func.parameters)
throw Exception(error_msg, ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS);
if (args.empty() || args.size() > 2)
throw Exception(error_msg, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
String format_name = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
int source_fd = -1;
String source_path;
if (args.size() >= 2)
{
/// Will use FD if args[1] is int literal or identifier with std* name
if (ASTIdentifier * identifier = typeid_cast<ASTIdentifier *>(args[1].get()))
{
if (identifier->name == "stdin")
source_fd = STDIN_FILENO;
else if (identifier->name == "stdout")
source_fd = STDOUT_FILENO;
else if (identifier->name == "stderr")
source_fd = STDERR_FILENO;
else
throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor",
ErrorCodes::UNKNOWN_IDENTIFIER);
}
if (ASTLiteral * literal = typeid_cast<ASTLiteral *>(args[1].get()))
{
auto type = literal->value.getType();
if (type == Field::Types::Int64)
source_fd = static_cast<int>(literal->value.get<Int64>());
else if (type == Field::Types::UInt64)
source_fd = static_cast<int>(literal->value.get<UInt64>());
}
args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
source_path = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
}
return StorageFile::create(
source_path, source_fd,
data_path, table_name, format_name, columns,
materialized_columns, alias_columns, column_defaults,
context);
}
2015-01-27 00:52:03 +00:00
else if (name == "Set")
{
return StorageSet::create(
data_path, table_name, columns,
materialized_columns, alias_columns, column_defaults);
}
else if (name == "Join")
{
/// Join(ANY, LEFT, k1, k2, ...)
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
constexpr auto params_error_message = "Storage Join requires at least 3 parameters: Join(ANY|ALL, LEFT|INNER, keys...).";
if (args_func.size() != 1)
throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
if (args.size() < 3)
throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const ASTIdentifier * strictness_id = typeid_cast<ASTIdentifier *>(&*args[0]);
if (!strictness_id)
throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS);
const String strictness_str = Poco::toLower(strictness_id->name);
ASTTableJoin::Strictness strictness;
if (strictness_str == "any")
strictness = ASTTableJoin::Strictness::Any;
else if (strictness_str == "all")
strictness = ASTTableJoin::Strictness::All;
else
throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS);
const ASTIdentifier * kind_id = typeid_cast<ASTIdentifier *>(&*args[1]);
if (!kind_id)
throw Exception("Second parameter of storage Join must be LEFT or INNER (without quotes).", ErrorCodes::BAD_ARGUMENTS);
const String kind_str = Poco::toLower(kind_id->name);
ASTTableJoin::Kind kind;
if (kind_str == "left")
kind = ASTTableJoin::Kind::Left;
else if (kind_str == "inner")
kind = ASTTableJoin::Kind::Inner;
else if (kind_str == "right")
kind = ASTTableJoin::Kind::Right;
else if (kind_str == "full")
kind = ASTTableJoin::Kind::Full;
else
throw Exception("Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes).", ErrorCodes::BAD_ARGUMENTS);
Names key_names;
key_names.reserve(args.size() - 2);
for (size_t i = 2, size = args.size(); i < size; ++i)
{
const ASTIdentifier * key = typeid_cast<ASTIdentifier *>(&*args[i]);
if (!key)
throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS);
key_names.push_back(key->name);
}
return StorageJoin::create(
data_path, table_name,
key_names, kind, strictness,
columns, materialized_columns, alias_columns, column_defaults);
}
2011-10-31 17:55:06 +00:00
else if (name == "Memory")
2012-05-22 19:32:56 +00:00
{
return StorageMemory::create(table_name, columns, materialized_columns, alias_columns, column_defaults);
2012-05-22 19:32:56 +00:00
}
else if (name == "Null")
{
return StorageNull::create(table_name, columns, materialized_columns, alias_columns, column_defaults);
}
2012-05-30 05:53:09 +00:00
else if (name == "Merge")
{
/** В запросе в качестве аргумента для движка указано имя БД, в которой находятся таблицы-источники,
* а также регексп для имён таблиц-источников.
*/
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
2012-05-30 05:53:09 +00:00
if (args_func.size() != 1)
throw Exception("Storage Merge requires exactly 2 parameters"
" - name of source database and regexp for table names.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
2012-05-30 05:53:09 +00:00
if (args.size() != 2)
throw Exception("Storage Merge requires exactly 2 parameters"
" - name of source database and regexp for table names.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
args[1] = evaluateConstantExpressionAsLiteral(args[1], local_context);
String source_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
String table_name_regexp = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
return StorageMerge::create(
table_name, columns,
materialized_columns, alias_columns, column_defaults,
source_database, table_name_regexp, context);
2012-05-30 05:53:09 +00:00
}
2012-05-22 19:32:56 +00:00
else if (name == "Distributed")
{
/** Arguments of engine is following:
* - name of cluster in configuration;
* - name of remote database;
* - name of remote table;
*
* Remote database may be specified in following form:
* - identifier;
* - constant expression with string result, like currentDatabase();
* -- string literal as specific case;
* - empty string means 'use default database from cluster'.
2012-05-22 19:32:56 +00:00
*/
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
2012-05-22 20:18:45 +00:00
2015-05-16 08:33:32 +00:00
const auto params_error_message = "Storage Distributed requires 3 or 4 parameters"
" - name of configuration section with list of remote servers, name of remote database, name of remote table,"
" sharding key expression (optional).";
2012-05-22 20:18:45 +00:00
if (args_func.size() != 1)
2015-05-16 08:33:32 +00:00
throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
if (args.size() != 3 && args.size() != 4)
2015-05-16 08:33:32 +00:00
throw Exception(params_error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
String cluster_name = getClusterName(*args[0]);
args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
args[2] = evaluateConstantExpressionOrIdentidierAsLiteral(args[2], local_context);
String remote_database = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
String remote_table = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>();
const auto & sharding_key = args.size() == 4 ? args[3] : nullptr;
return StorageDistributed::create(
table_name, columns,
materialized_columns, alias_columns, column_defaults,
remote_database, remote_table, cluster_name,
context, sharding_key, data_path);
2012-05-22 19:32:56 +00:00
}
else if (name == "Buffer")
{
/** Buffer(db, table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes)
*
* db, table - в какую таблицу сбрасывать данные из буфера.
* num_buckets - уровень параллелизма.
* min_time, max_time, min_rows, max_rows, min_bytes, max_bytes - условия вытеснения из буфера.
*/
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
if (args_func.size() != 1)
throw Exception("Storage Buffer requires 9 parameters: "
" destination database, destination table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
if (args.size() != 9)
throw Exception("Storage Buffer requires 9 parameters: "
" destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
2016-08-26 18:06:24 +00:00
args[0] = evaluateConstantExpressionOrIdentidierAsLiteral(args[0], local_context);
args[1] = evaluateConstantExpressionOrIdentidierAsLiteral(args[1], local_context);
2016-08-26 18:06:24 +00:00
String destination_database = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>();
String destination_table = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>();
size_t num_buckets = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[2]).value);
time_t min_time = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[3]).value);
time_t max_time = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[4]).value);
size_t min_rows = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[5]).value);
size_t max_rows = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[6]).value);
size_t min_bytes = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[7]).value);
size_t max_bytes = apply_visitor(FieldVisitorConvertToNumber<size_t>(), typeid_cast<ASTLiteral &>(*args[8]).value);
return StorageBuffer::create(
table_name, columns,
materialized_columns, alias_columns, column_defaults,
context,
num_buckets, {min_time, min_rows, min_bytes}, {max_time, max_rows, max_bytes},
destination_database, destination_table);
}
2014-03-21 19:17:59 +00:00
else if (endsWith(name, "MergeTree"))
2012-07-18 19:30:33 +00:00
{
2016-04-24 09:44:47 +00:00
/** Движки [Replicated][|Summing|Collapsing|Aggregating|Unsorted|Replacing|Graphite]MergeTree (2 * 7 комбинаций)
2014-03-21 19:17:59 +00:00
* В качестве аргумента для движка должно быть указано:
* - (для Replicated) Путь к таблице в ZooKeeper
* - (для Replicated) Имя реплики в ZooKeeper
2012-07-18 19:30:33 +00:00
* - имя столбца с датой;
2016-04-15 17:42:51 +00:00
* - (не обязательно) выражение для семплирования
* (запрос с SAMPLE x будет выбирать строки, у которых в этом столбце значение меньше, чем x * UINT32_MAX);
2014-03-21 19:17:59 +00:00
* - выражение для сортировки (либо скалярное выражение, либо tuple из нескольких);
* - index_granularity;
2016-04-15 19:09:42 +00:00
* - (для Collapsing) имя столбца типа Int8, содержащего тип строчки с изменением "визита" (принимающего значения 1 и -1).
2014-03-21 19:17:59 +00:00
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
2014-11-22 02:22:30 +00:00
* - (для Summing, не обязательно) кортеж столбцов, которых следует суммировать. Если не задано - используются все числовые столбцы, не входящие в первичный ключ.
2016-04-15 19:09:42 +00:00
* - (для Replacing, не обязательно) имя столбца одного из UInt типов, обозначающего "версию"
2014-11-22 02:22:30 +00:00
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
2016-04-24 19:46:37 +00:00
* - (для Graphite) имя параметра в конфиге с настройками правил прореживания.
2014-11-22 02:22:30 +00:00
*
* MergeTree(date, [sample_key], primary_key, index_granularity)
* CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign)
* SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum])
* AggregatingMergeTree(date, [sample_key], primary_key, index_granularity)
2016-04-15 19:09:42 +00:00
* ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column])
2016-04-24 09:44:47 +00:00
* GraphiteMergeTree(date, [sample_key], primary_key, index_granularity, Path, Time, Value, Version)
2015-03-13 21:31:23 +00:00
* UnsortedMergeTree(date, index_granularity) TODO Добавить описание ниже.
2012-07-18 19:30:33 +00:00
*/
2014-11-22 02:22:30 +00:00
const char * verbose_help = R"(
MergeTree is family of storage engines.
MergeTrees is different in two ways:
- it may be replicated and non-replicated;
- it may do different actions on merge: nothing; sign collapse; sum; apply aggregete functions.
2016-04-24 09:44:47 +00:00
So we have 14 combinations:
MergeTree, CollapsingMergeTree, SummingMergeTree, AggregatingMergeTree, ReplacingMergeTree, UnsortedMergeTree, GraphiteMergeTree
ReplicatedMergeTree, ReplicatedCollapsingMergeTree, ReplicatedSummingMergeTree, ReplicatedAggregatingMergeTree, ReplicatedReplacingMergeTree, ReplicatedUnsortedMergeTree, ReplicatedGraphiteMergeTree
2014-11-22 02:22:30 +00:00
In most of cases, you need MergeTree or ReplicatedMergeTree.
For replicated merge trees, you need to supply path in ZooKeeper and replica name as first two parameters.
Path in ZooKeeper is like '/clickhouse/tables/01/' where /clickhouse/tables/ is common prefix and 01 is shard name.
Replica name is like 'mtstat01-1' - it may be hostname or any suitable string identifying replica.
You may use macro substitutions for these parameters. It's like ReplicatedMergeTree('/clickhouse/tables/{shard}/', '{replica}'...
Look at <macros> section in server configuration file.
Next parameter (which is first for unreplicated tables and third for replicated tables) is name of date column.
Date column must exist in table and have type Date (not DateTime).
It is used for internal data partitioning and works like some kind of index.
If your source data doesn't have column of type Date, but have DateTime column, you may add values for Date column while loading,
or you may INSERT your source data to table of type Log and then transform it with INSERT INTO t SELECT toDate(time) AS date, * FROM ...
If your source data doesn't have any date or time, you may just pass any constant for date column while loading.
Next parameter is optional sampling expression. Sampling expression is used to implement SAMPLE clause in query for approximate query execution.
If you don't need approximate query execution, simply omit this parameter.
Sample expression must be one of elements of primary key tuple. For example, if your primary key is (CounterID, EventDate, intHash64(UserID)), your sampling expression might be intHash64(UserID).
Next parameter is primary key tuple. It's like (CounterID, EventDate, intHash64(UserID)) - list of column names or functional expressions in round brackets. If your primary key have just one element, you may omit round brackets.
Careful choice of primary key is extremely important for processing short-time queries.
Next parameter is index (primary key) granularity. Good value is 8192. You have no reasons to use any other value.
For Collapsing mode, last parameter is name of sign column - special column that is used to 'collapse' rows with same primary key while merge.
For Summing mode, last parameter is optional list of columns to sum while merge. List is passed in round brackets, like (PageViews, Cost).
If this parameter is omitted, storage will sum all numeric columns except columns participated in primary key.
2016-04-15 19:09:42 +00:00
For Replacing mode, last parameter is optional name of 'version' column. While merging, for all rows with same primary key, only one row is selected: last row, if version column was not specified, or last row with maximum version value, if specified.
2014-11-22 02:22:30 +00:00
Examples:
MergeTree(EventDate, (CounterID, EventDate), 8192)
MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192)
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192, (Shows, Clicks, Cost, CostCur, ShowsSumPosition, ClicksSumPosition, SessionNum, SessionLen, SessionCost, GoalsNum, SessionDepth))
ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
For further info please read the documentation: https://clickhouse.yandex/
2014-11-22 02:22:30 +00:00
)";
2014-03-21 19:17:59 +00:00
String name_part = name.substr(0, name.size() - strlen("MergeTree"));
2012-07-18 19:30:33 +00:00
2014-03-21 19:17:59 +00:00
bool replicated = startsWith(name_part, "Replicated");
if (replicated)
name_part = name_part.substr(strlen("Replicated"));
2012-07-18 19:30:33 +00:00
2016-04-15 17:42:51 +00:00
MergeTreeData::MergingParams merging_params;
merging_params.mode = MergeTreeData::MergingParams::Ordinary;
2012-07-18 19:30:33 +00:00
2014-03-21 19:17:59 +00:00
if (name_part == "Collapsing")
2016-04-15 17:42:51 +00:00
merging_params.mode = MergeTreeData::MergingParams::Collapsing;
2014-03-21 19:17:59 +00:00
else if (name_part == "Summing")
2016-04-15 17:42:51 +00:00
merging_params.mode = MergeTreeData::MergingParams::Summing;
2014-05-28 14:54:42 +00:00
else if (name_part == "Aggregating")
2016-04-15 17:42:51 +00:00
merging_params.mode = MergeTreeData::MergingParams::Aggregating;
2015-03-13 21:31:23 +00:00
else if (name_part == "Unsorted")
2016-04-15 17:42:51 +00:00
merging_params.mode = MergeTreeData::MergingParams::Unsorted;
2016-04-15 19:09:42 +00:00
else if (name_part == "Replacing")
merging_params.mode = MergeTreeData::MergingParams::Replacing;
2016-04-24 10:39:18 +00:00
else if (name_part == "Graphite")
merging_params.mode = MergeTreeData::MergingParams::Graphite;
2014-03-21 19:17:59 +00:00
else if (!name_part.empty())
2014-11-22 02:22:30 +00:00
throw Exception("Unknown storage " + name + verbose_help, ErrorCodes::UNKNOWN_STORAGE);
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
2012-08-16 17:27:40 +00:00
2014-03-24 13:59:04 +00:00
ASTs args;
2012-08-16 17:27:40 +00:00
2014-03-24 13:59:04 +00:00
if (args_func.size() == 1)
args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
2012-08-16 17:27:40 +00:00
2014-11-22 02:22:30 +00:00
/// NOTE Слегка запутанно.
2016-04-24 09:44:47 +00:00
size_t num_additional_params = (replicated ? 2 : 0)
+ (merging_params.mode == MergeTreeData::MergingParams::Collapsing)
2016-04-24 10:39:18 +00:00
+ (merging_params.mode == MergeTreeData::MergingParams::Graphite);
2014-11-22 02:22:30 +00:00
2016-04-15 17:42:51 +00:00
if (merging_params.mode == MergeTreeData::MergingParams::Unsorted
2015-03-13 21:31:23 +00:00
&& args.size() != num_additional_params + 2)
{
String params;
if (replicated)
params +=
"\npath in ZooKeeper,"
"\nreplica name,";
params +=
"\nname of column with date,"
"\nindex granularity\n";
throw Exception("Storage " + name + " requires "
+ toString(num_additional_params + 2) + " parameters: " + params + verbose_help,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
2016-04-15 17:42:51 +00:00
if (merging_params.mode != MergeTreeData::MergingParams::Summing
2016-04-15 19:09:42 +00:00
&& merging_params.mode != MergeTreeData::MergingParams::Replacing
2016-04-15 17:42:51 +00:00
&& merging_params.mode != MergeTreeData::MergingParams::Unsorted
2014-11-22 02:22:30 +00:00
&& args.size() != num_additional_params + 3
&& args.size() != num_additional_params + 4)
2014-03-21 19:17:59 +00:00
{
String params;
2014-11-22 02:22:30 +00:00
2014-03-21 19:17:59 +00:00
if (replicated)
2014-11-22 02:22:30 +00:00
params +=
"\npath in ZooKeeper,"
"\nreplica name,";
params +=
"\nname of column with date,"
"\n[sampling element of primary key],"
"\nprimary key expression,"
"\nindex granularity\n";
2016-04-15 17:42:51 +00:00
if (merging_params.mode == MergeTreeData::MergingParams::Collapsing)
params += ", sign column";
2014-11-22 02:22:30 +00:00
throw Exception("Storage " + name + " requires "
+ toString(num_additional_params + 3) + " or "
+ toString(num_additional_params + 4) + " parameters: " + params + verbose_help,
2012-08-16 17:27:40 +00:00
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
2014-03-21 19:17:59 +00:00
}
2012-08-16 17:27:40 +00:00
2016-04-15 19:09:42 +00:00
if ((merging_params.mode == MergeTreeData::MergingParams::Summing
|| merging_params.mode == MergeTreeData::MergingParams::Replacing)
2014-11-22 02:22:30 +00:00
&& args.size() != num_additional_params + 3
&& args.size() != num_additional_params + 4
&& args.size() != num_additional_params + 5)
{
String params;
if (replicated)
params +=
"\npath in ZooKeeper,"
"\nreplica name,";
params +=
"\nname of column with date,"
"\n[sampling element of primary key],"
"\nprimary key expression,"
2016-04-15 19:09:42 +00:00
"\nindex granularity,";
if (merging_params.mode == MergeTreeData::MergingParams::Summing)
params += "\n[list of columns to sum]\n";
else
params += "\n[version]\n";
2014-11-22 02:22:30 +00:00
throw Exception("Storage " + name + " requires "
+ toString(num_additional_params + 3) + " or "
+ toString(num_additional_params + 4) + " or "
+ toString(num_additional_params + 5) + " parameters: " + params + verbose_help,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
/// Для Replicated.
2014-03-21 19:17:59 +00:00
String zookeeper_path;
String replica_name;
2014-11-22 02:22:30 +00:00
/// Для всех.
2014-03-21 19:17:59 +00:00
String date_column_name;
ASTPtr primary_expr_list;
ASTPtr sampling_expression;
UInt64 index_granularity;
2012-08-16 17:27:40 +00:00
2014-03-21 19:17:59 +00:00
if (replicated)
{
auto ast = typeid_cast<ASTLiteral *>(&*args[0]);
2014-03-21 19:17:59 +00:00
if (ast && ast->value.getType() == Field::Types::String)
zookeeper_path = safeGet<String>(ast->value);
else
2014-11-22 02:22:30 +00:00
throw Exception(String("Path in ZooKeeper must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2014-03-21 19:17:59 +00:00
ast = typeid_cast<ASTLiteral *>(&*args[1]);
2014-03-21 19:17:59 +00:00
if (ast && ast->value.getType() == Field::Types::String)
replica_name = safeGet<String>(ast->value);
else
2014-11-22 02:22:30 +00:00
throw Exception(String("Replica name must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2014-03-21 19:17:59 +00:00
2014-04-14 10:18:23 +00:00
if (replica_name.empty())
2014-11-22 02:22:30 +00:00
throw Exception(String("No replica name in config") + verbose_help, ErrorCodes::NO_REPLICA_NAME_GIVEN);
2014-04-14 10:18:23 +00:00
2014-03-21 19:17:59 +00:00
args.erase(args.begin(), args.begin() + 2);
}
2016-04-15 17:42:51 +00:00
if (merging_params.mode == MergeTreeData::MergingParams::Collapsing)
2014-03-21 19:17:59 +00:00
{
if (auto ast = typeid_cast<ASTIdentifier *>(&*args.back()))
2016-04-15 17:13:51 +00:00
merging_params.sign_column = ast->name;
2014-03-21 19:17:59 +00:00
else
2014-11-22 02:22:30 +00:00
throw Exception(String("Sign column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2014-03-21 19:17:59 +00:00
args.pop_back();
}
2016-04-15 19:09:42 +00:00
else if (merging_params.mode == MergeTreeData::MergingParams::Replacing)
{
/// Если последний элемент - не index granularity (литерал), то это - имя столбца-версии.
if (!typeid_cast<const ASTLiteral *>(&*args.back()))
{
if (auto ast = typeid_cast<ASTIdentifier *>(&*args.back()))
merging_params.version_column = ast->name;
else
throw Exception(String("Version column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
args.pop_back();
}
}
2016-04-15 17:42:51 +00:00
else if (merging_params.mode == MergeTreeData::MergingParams::Summing)
2014-11-22 02:22:30 +00:00
{
/// Если последний элемент - не index granularity (литерал), то это - список суммируемых столбцов.
if (!typeid_cast<const ASTLiteral *>(&*args.back()))
{
2016-04-15 17:13:51 +00:00
merging_params.columns_to_sum = extractColumnNames(args.back());
2014-11-22 02:22:30 +00:00
args.pop_back();
}
}
2016-04-24 09:44:47 +00:00
else if (merging_params.mode == MergeTreeData::MergingParams::Graphite)
{
String graphite_config_name;
if (auto ast = typeid_cast<ASTLiteral *>(&*args.back()))
graphite_config_name = ast->value.get<String>();
else
throw Exception(String("Last parameter of GraphiteMergeTree must be name (in single quotes) of element in configuration file with Graphite options") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2016-04-24 10:39:18 +00:00
args.pop_back();
2016-04-24 09:44:47 +00:00
setGraphitePatternsFromConfig(context, graphite_config_name, merging_params.graphite_params);
}
2014-03-21 19:17:59 +00:00
2014-11-22 02:22:30 +00:00
/// Если присутствует выражение для сэмплирования. MergeTree(date, [sample_key], primary_key, index_granularity)
2014-03-21 19:17:59 +00:00
if (args.size() == 4)
{
sampling_expression = args[1];
args.erase(args.begin() + 1);
}
2014-11-22 02:22:30 +00:00
/// Теперь осталось только три параметра - date, primary_key, index_granularity.
if (auto ast = typeid_cast<ASTIdentifier *>(&*args[0]))
2014-03-21 19:17:59 +00:00
date_column_name = ast->name;
else
2014-11-22 02:22:30 +00:00
throw Exception(String("Date column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2014-03-21 19:17:59 +00:00
2016-04-15 17:42:51 +00:00
if (merging_params.mode != MergeTreeData::MergingParams::Unsorted)
2015-03-13 21:31:23 +00:00
primary_expr_list = extractPrimaryKey(args[1]);
2014-03-21 19:17:59 +00:00
2015-03-13 21:31:23 +00:00
auto ast = typeid_cast<ASTLiteral *>(&*args.back());
2014-03-21 19:17:59 +00:00
if (ast && ast->value.getType() == Field::Types::UInt64)
index_granularity = safeGet<UInt64>(ast->value);
else
2014-11-22 02:22:30 +00:00
throw Exception(String("Index granularity must be a positive integer") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
2014-03-21 19:17:59 +00:00
if (replicated)
return StorageReplicatedMergeTree::create(
zookeeper_path, replica_name, attach, data_path, database_name, table_name,
columns, materialized_columns, alias_columns, column_defaults,
context, primary_expr_list, date_column_name,
2016-04-15 17:42:51 +00:00
sampling_expression, index_granularity, merging_params,
has_force_restore_data_flag,
2015-07-16 21:32:51 +00:00
context.getMergeTreeSettings());
2014-03-21 19:17:59 +00:00
else
return StorageMergeTree::create(
data_path, database_name, table_name,
columns, materialized_columns, alias_columns, column_defaults,
context, primary_expr_list, date_column_name,
2016-04-15 17:42:51 +00:00
sampling_expression, index_granularity, merging_params,
has_force_restore_data_flag,
2015-07-16 21:32:51 +00:00
context.getMergeTreeSettings());
2012-08-16 17:27:40 +00:00
}
2011-10-31 17:30:44 +00:00
else
throw Exception("Unknown storage " + name, ErrorCodes::UNKNOWN_STORAGE);
}
}