This commit is contained in:
Alexey Milovidov 2014-11-22 05:22:30 +03:00
parent 811565b5b1
commit d7a1abbbdc
12 changed files with 335 additions and 80 deletions

View File

@ -18,9 +18,12 @@ namespace DB
class SummingSortedBlockInputStream : public MergingSortedBlockInputStream
{
public:
SummingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_, size_t max_block_size_)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_),
log(&Logger::get("SummingSortedBlockInputStream")), current_row_is_zero(false), output_is_non_empty(false)
SummingSortedBlockInputStream(BlockInputStreams inputs_,
const SortDescription & description_,
/// Список столбцов, которых нужно суммировать. Если пустое - берутся все числовые столбцы, не входящие в description.
const Names & column_names_to_sum_,
size_t max_block_size_)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_), column_names_to_sum(column_names_to_sum_)
{
}
@ -48,18 +51,19 @@ protected:
Block readImpl() override;
private:
Logger * log;
Logger * log = &Logger::get("SummingSortedBlockInputStream");
/// Столбцы с какими номерами надо суммировать.
Names column_names_to_sum; /// Если задано - преобразуется в column_numbers_to_sum при инициализации.
ColumnNumbers column_numbers_to_sum;
Row current_key; /// Текущий первичный ключ.
Row next_key; /// Первичный ключ следующей строки.
Row current_row;
bool current_row_is_zero; /// Текущая строчка просуммировалась в ноль, и её следует удалить.
bool current_row_is_zero = false; /// Текущая строчка просуммировалась в ноль, и её следует удалить.
bool output_is_non_empty; /// Отдали ли мы наружу хоть одну строку.
bool output_is_non_empty = false; /// Отдали ли мы наружу хоть одну строку.
/** Делаем поддержку двух разных курсоров - с Collation и без.
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.

View File

@ -594,7 +594,8 @@ public:
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
size_t index_granularity_,
Mode mode_,
const String & sign_column_,
const String & sign_column_, /// Для Collapsing режима.
const Names & columns_to_sum_, /// Для Summing режима. Если пустое - то выбирается автоматически.
const MergeTreeSettings & settings_,
const String & log_name_,
bool require_part_metadata_,
@ -772,6 +773,8 @@ public:
const Mode mode;
/// Для схлопывания записей об изменениях, если используется Collapsing режим работы.
const String sign_column;
/// Для суммирования, если используется Summing режим работы.
const Names columns_to_sum;
const MergeTreeSettings settings;

View File

@ -37,25 +37,11 @@ public:
const String & date_column_name_,
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
size_t index_granularity_,
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
const String & sign_column_ = "",
MergeTreeData::Mode mode_,
const String & sign_column_, /// Для Collapsing режима.
const Names & columns_to_sum_, /// Для Summing режима.
const MergeTreeSettings & settings_ = MergeTreeSettings());
static StoragePtr create(
const String & path_,
const String & database_name_,
const String & table_name_,
NamesAndTypesListPtr columns_,
Context & context_,
ASTPtr & primary_expr_ast_,
const String & date_column_name_,
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
size_t index_granularity_,
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
const String & sign_column_ = "",
const MergeTreeSettings & settings_ = MergeTreeSettings());
void shutdown() override;
~StorageMergeTree() override;
@ -193,6 +179,7 @@ private:
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_);
/** Определяет, какие куски нужно объединять, и объединяет их.

View File

@ -39,8 +39,9 @@ public:
const String & date_column_name_,
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
size_t index_granularity_,
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
const String & sign_column_ = "",
MergeTreeData::Mode mode_,
const String & sign_column_, /// Для Collapsing режима.
const Names & columns_to_sum_, /// Для Summing режима.
const MergeTreeSettings & settings_ = MergeTreeSettings());
void shutdown() override;
@ -259,8 +260,9 @@ private:
const String & date_column_name_,
const ASTPtr & sampling_expression_,
size_t index_granularity_,
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
const String & sign_column_ = "",
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_ = MergeTreeSettings());
/// Инициализация.

View File

@ -16,13 +16,13 @@ Block SummingSortedBlockInputStream::readImpl()
{
if (!children.size())
return Block();
if (children.size() == 1)
return children[0]->read();
Block merged_block;
ColumnPlainPtrs merged_columns;
init(merged_block, merged_columns);
if (merged_columns.empty())
return Block();
@ -34,7 +34,11 @@ Block SummingSortedBlockInputStream::readImpl()
current_key.resize(description.size());
next_key.resize(description.size());
/// Заполним номера столбцов, которые должны быть просуммированы.
/** Заполним номера столбцов, которые должны быть просуммированы.
* Это могут быть только числовые столбцы, не входящие в ключ сортировки.
* Если задан непустой список column_names_to_sum, то берём только эти столбцы.
* Часть столбцов из column_names_to_sum может быть не найдена. Это игнорируется.
*/
for (size_t i = 0; i < num_columns; ++i)
{
ColumnWithNameAndType & column = merged_block.getByPosition(i);
@ -52,7 +56,11 @@ Block SummingSortedBlockInputStream::readImpl()
if (it != description.end())
continue;
column_numbers_to_sum.push_back(i);
if (column_names_to_sum.empty()
|| column_names_to_sum.end() != std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name))
{
column_numbers_to_sum.push_back(i);
}
}
}
@ -67,9 +75,9 @@ Block SummingSortedBlockInputStream::readImpl()
template<class TSortCursor>
void SummingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
{
{
size_t merged_rows = 0;
/// Вынимаем строки в нужном порядке и кладём в merged_block, пока строк не больше max_block_size
while (!queue.empty())
{

View File

@ -33,6 +33,7 @@ MergeTreeData::MergeTreeData(
size_t index_granularity_,
Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_,
const String & log_name_,
bool require_part_metadata_,
@ -40,7 +41,7 @@ MergeTreeData::MergeTreeData(
: ITableDeclaration{materialized_columns_, alias_columns_, column_defaults_}, context(context_),
date_column_name(date_column_name_), sampling_expression(sampling_expression_),
index_granularity(index_granularity_),
mode(mode_), sign_column(sign_column_),
mode(mode_), sign_column(sign_column_), columns_to_sum(columns_to_sum_),
settings(settings_), primary_expr_ast(primary_expr_ast_->clone()),
require_part_metadata(require_part_metadata_),
full_path(full_path_), columns(columns_),
@ -48,7 +49,8 @@ MergeTreeData::MergeTreeData(
log_name(log_name_), log(&Logger::get(log_name + " (Data)"))
{
/// Проверяем, что столбец с датой существует и имеет тип Date.
const auto check_date_exists = [this] (const NamesAndTypesList & columns) {
const auto check_date_exists = [this] (const NamesAndTypesList & columns)
{
for (const auto & column : columns)
{
if (column.name == date_column_name)
@ -67,8 +69,19 @@ MergeTreeData::MergeTreeData(
if (!check_date_exists(*columns) && !check_date_exists(materialized_columns))
throw Exception{
"Date column (" + date_column_name + ") does not exist in table declaration.",
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE
};
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE};
/// Если заданы columns_to_sum, проверяем, что такие столбцы существуют.
if (!columns_to_sum.empty())
{
if (mode != Summing)
throw Exception("List of columns to sum for MergeTree cannot be specified in all modes except Summing.", ErrorCodes::LOGICAL_ERROR);
for (const auto & column_to_sum : columns_to_sum)
if (columns->end() == std::find_if(columns->begin(), columns->end(),
[&](const NameAndTypePair & name_and_type) { return column_to_sum == name_and_type.name; }))
throw Exception("Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.");
}
/// создаём директорию, если её нет
Poco::File(full_path).createDirectories();

View File

@ -380,7 +380,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMerger::mergeParts(
break;
case MergeTreeData::Summing:
merged_stream = ext::make_unique<SummingSortedBlockInputStream>(src_streams, data.getSortDescription(), DEFAULT_MERGE_BLOCK_SIZE);
merged_stream = ext::make_unique<SummingSortedBlockInputStream>(src_streams, data.getSortDescription(), data.columns_to_sum, DEFAULT_MERGE_BLOCK_SIZE);
break;
case MergeTreeData::Aggregating:

View File

@ -44,7 +44,7 @@ static bool startsWith(const std::string & s, const std::string & prefix)
* Он может быть указан в кортеже: (CounterID, Date),
* или как один столбец: CounterID.
*/
static ASTPtr extractPrimaryKey(const ASTPtr & node, const std::string & storage_name)
static ASTPtr extractPrimaryKey(const ASTPtr & node)
{
const ASTFunction * primary_expr_func = typeid_cast<const ASTFunction *>(&*node);
@ -63,6 +63,30 @@ static ASTPtr extractPrimaryKey(const ASTPtr & node, const std::string & storage
}
}
/** Для StorageMergeTree: достать список имён столбцов.
* Он может быть указан в кортеже: (Clicks, Cost),
* или как один столбец: Clicks.
*/
static Names extractColumnNames(const ASTPtr & node)
{
const ASTFunction * expr_func = typeid_cast<const ASTFunction *>(&*node);
if (expr_func && expr_func->name == "tuple")
{
const auto & elements = expr_func->children.at(0)->children;
Names res;
res.reserve(elements.size());
for (const auto & elem : elements)
res.push_back(typeid_cast<const ASTIdentifier &>(*elem).name);
return res;
}
else
{
return { typeid_cast<const ASTIdentifier &>(*node).name };
}
}
StoragePtr StorageFactory::get(
const String & name,
@ -260,7 +284,7 @@ StoragePtr StorageFactory::get(
}
else if (endsWith(name, "MergeTree"))
{
/** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций)
/** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций)
* В качестве аргумента для движка должно быть указано:
* - (для Replicated) Путь к таблице в ZooKeeper
* - (для Replicated) Имя реплики в ZooKeeper
@ -270,8 +294,77 @@ StoragePtr StorageFactory::get(
* - index_granularity;
* - (для Collapsing) имя столбца, содержащего тип строчки с изменением "визита" (принимающего значения 1 и -1).
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
* - (для Summing, не обязательно) кортеж столбцов, которых следует суммировать. Если не задано - используются все числовые столбцы, не входящие в первичный ключ.
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
*
* MergeTree(date, [sample_key], primary_key, index_granularity)
* CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign)
* SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum])
* AggregatingMergeTree(date, [sample_key], primary_key, index_granularity)
*/
const char * verbose_help = R"(
MergeTree is family of storage engines.
MergeTrees is different in two ways:
- it may be replicated and non-replicated;
- it may do different actions on merge: nothing; sign collapse; sum; apply aggregete functions.
So we have 8 combinations:
MergeTree, CollapsingMergeTree, SummingMergeTree, AggregatingMergeTree,
ReplicatedMergeTree, ReplicatedCollapsingMergeTree, ReplicatedSummingMergeTree, ReplicatedAggregatingMergeTree.
In most of cases, you need MergeTree or ReplicatedMergeTree.
For replicated merge trees, you need to supply path in ZooKeeper and replica name as first two parameters.
Path in ZooKeeper is like '/clickhouse/tables/01/' where /clickhouse/tables/ is common prefix and 01 is shard name.
Replica name is like 'mtstat01-1' - it may be hostname or any suitable string identifying replica.
You may use macro substitutions for these parameters. It's like ReplicatedMergeTree('/clickhouse/tables/{shard}/', '{replica}'...
Look at <macros> section in server configuration file.
Next parameter (which is first for unreplicated tables and third for replicated tables) is name of date column.
Date column must exist in table and have type Date (not DateTime).
It is used for internal data partitioning and works like some kind of index.
If your source data doesn't have column of type Date, but have DateTime column, you may add values for Date column while loading,
or you may INSERT your source data to table of type Log and then transform it with INSERT INTO t SELECT toDate(time) AS date, * FROM ...
If your source data doesn't have any date or time, you may just pass any constant for date column while loading.
Next parameter is optional sampling expression. Sampling expression is used to implement SAMPLE clause in query for approximate query execution.
If you don't need approximate query execution, simply omit this parameter.
Sample expression must be one of elements of primary key tuple. For example, if your primary key is (CounterID, EventDate, intHash64(UserID)), your sampling expression might be intHash64(UserID).
Next parameter is primary key tuple. It's like (CounterID, EventDate, intHash64(UserID)) - list of column names or functional expressions in round brackets. If your primary key have just one element, you may omit round brackets.
Careful choice of primary key is extremely important for processing short-time queries.
Next parameter is index (primary key) granularity. Good value is 8192. You have no reasons to use any other value.
For Collapsing mode, last parameter is name of sign column - special column that is used to 'collapse' rows with same primary key while merge.
For Summing mode, last parameter is optional list of columns to sum while merge. List is passed in round brackets, like (PageViews, Cost).
If this parameter is omitted, storage will sum all numeric columns except columns participated in primary key.
Examples:
MergeTree(EventDate, (CounterID, EventDate), 8192)
MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192)
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192, (Shows, Clicks, Cost, CostCur, ShowsSumPosition, ClicksSumPosition, SessionNum, SessionLen, SessionCost, GoalsNum, SessionDepth))
ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
For further info please read the documentation: http://clickhouse.yandex-team.ru/
)";
String name_part = name.substr(0, name.size() - strlen("MergeTree"));
bool replicated = startsWith(name_part, "Replicated");
@ -287,7 +380,7 @@ StoragePtr StorageFactory::get(
else if (name_part == "Aggregating")
mode = MergeTreeData::Aggregating;
else if (!name_part.empty())
throw Exception("Unknown storage " + name, ErrorCodes::UNKNOWN_STORAGE);
throw Exception("Unknown storage " + name + verbose_help, ErrorCodes::UNKNOWN_STORAGE);
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
@ -296,46 +389,93 @@ StoragePtr StorageFactory::get(
if (args_func.size() == 1)
args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
size_t additional_params = (replicated ? 2 : 0) + (mode == MergeTreeData::Collapsing ? 1 : 0);
if (args.size() != additional_params + 3 && args.size() != additional_params + 4)
/// NOTE Слегка запутанно.
size_t num_additional_params = (replicated ? 2 : 0) + (mode == MergeTreeData::Collapsing);
if (mode != MergeTreeData::Summing
&& args.size() != num_additional_params + 3
&& args.size() != num_additional_params + 4)
{
String params;
if (replicated)
params += "path in ZooKeeper, replica name or '', ";
params += "name of column with date, [name of column for sampling], primary key expression, index granularity";
params +=
"\npath in ZooKeeper,"
"\nreplica name,";
params +=
"\nname of column with date,"
"\n[sampling element of primary key],"
"\nprimary key expression,"
"\nindex granularity\n";
if (mode == MergeTreeData::Collapsing)
params += ", sign column";
throw Exception("Storage " + name + " requires " + toString(additional_params + 3) + " or "
+ toString(additional_params + 4) +" parameters: " + params,
throw Exception("Storage " + name + " requires "
+ toString(num_additional_params + 3) + " or "
+ toString(num_additional_params + 4) + " parameters: " + params + verbose_help,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (mode == MergeTreeData::Summing
&& args.size() != num_additional_params + 3
&& args.size() != num_additional_params + 4
&& args.size() != num_additional_params + 5)
{
String params;
if (replicated)
params +=
"\npath in ZooKeeper,"
"\nreplica name,";
params +=
"\nname of column with date,"
"\n[sampling element of primary key],"
"\nprimary key expression,"
"\nindex granularity,"
"\n[list of columns to sum]\n";
throw Exception("Storage " + name + " requires "
+ toString(num_additional_params + 3) + " or "
+ toString(num_additional_params + 4) + " or "
+ toString(num_additional_params + 5) + " parameters: " + params + verbose_help,
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
/// Для Replicated.
String zookeeper_path;
String replica_name;
/// Для всех.
String date_column_name;
ASTPtr primary_expr_list;
ASTPtr sampling_expression;
UInt64 index_granularity;
/// Для Collapsing.
String sign_column_name;
/// Для Summing.
Names columns_to_sum;
if (replicated)
{
auto ast = typeid_cast<ASTLiteral *>(&*args[0]);
if (ast && ast->value.getType() == Field::Types::String)
zookeeper_path = safeGet<String>(ast->value);
else
throw Exception("Path in ZooKeeper must be a string literal", ErrorCodes::BAD_ARGUMENTS);
throw Exception(String("Path in ZooKeeper must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
ast = typeid_cast<ASTLiteral *>(&*args[1]);
if (ast && ast->value.getType() == Field::Types::String)
replica_name = safeGet<String>(ast->value);
else
throw Exception("Replica name must be a string literal", ErrorCodes::BAD_ARGUMENTS);
throw Exception(String("Replica name must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
if (replica_name.empty())
throw Exception("No replica name in config", ErrorCodes::NO_REPLICA_NAME_GIVEN);
throw Exception(String("No replica name in config") + verbose_help, ErrorCodes::NO_REPLICA_NAME_GIVEN);
args.erase(args.begin(), args.begin() + 2);
}
@ -345,42 +485,54 @@ StoragePtr StorageFactory::get(
if (auto ast = typeid_cast<ASTIdentifier *>(&*args.back()))
sign_column_name = ast->name;
else
throw Exception("Sign column name must be an unquoted string", ErrorCodes::BAD_ARGUMENTS);
throw Exception(String("Sign column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
args.pop_back();
}
else if (mode == MergeTreeData::Summing)
{
/// Если последний элемент - не index granularity (литерал), то это - список суммируемых столбцов.
if (!typeid_cast<const ASTLiteral *>(&*args.back()))
{
columns_to_sum = extractColumnNames(args.back());
args.pop_back();
}
}
/// Если присутствует выражение для сэмплирования. MergeTree(date, [sample_key], primary_key, index_granularity)
if (args.size() == 4)
{
sampling_expression = args[1];
args.erase(args.begin() + 1);
}
/// Теперь осталось только три параметра - date, primary_key, index_granularity.
if (auto ast = typeid_cast<ASTIdentifier *>(&*args[0]))
date_column_name = ast->name;
else
throw Exception("Date column name must be an unquoted string", ErrorCodes::BAD_ARGUMENTS);
throw Exception(String("Date column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
primary_expr_list = extractPrimaryKey(args[1], name);
primary_expr_list = extractPrimaryKey(args[1]);
auto ast = typeid_cast<ASTLiteral *>(&*args[2]);
if (ast && ast->value.getType() == Field::Types::UInt64)
index_granularity = safeGet<UInt64>(ast->value);
else
throw Exception("Index granularity must be a positive integer", ErrorCodes::BAD_ARGUMENTS);
throw Exception(String("Index granularity must be a positive integer") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
if (replicated)
return StorageReplicatedMergeTree::create(
zookeeper_path, replica_name, attach, data_path, database_name, table_name,
columns, materialized_columns, alias_columns, column_defaults,
context, primary_expr_list, date_column_name,
sampling_expression, index_granularity, mode, sign_column_name);
sampling_expression, index_granularity, mode, sign_column_name, columns_to_sum);
else
return StorageMergeTree::create(
data_path, database_name, table_name,
columns, materialized_columns, alias_columns, column_defaults,
context, primary_expr_list, date_column_name,
sampling_expression, index_granularity, mode, sign_column_name);
sampling_expression, index_granularity, mode, sign_column_name, columns_to_sum);
}
else
throw Exception("Unknown storage " + name, ErrorCodes::UNKNOWN_STORAGE);

View File

@ -22,6 +22,7 @@ StorageMergeTree::StorageMergeTree(
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_)
: IStorage{materialized_columns_, alias_columns_, column_defaults_},
path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'),
@ -29,7 +30,7 @@ StorageMergeTree::StorageMergeTree(
data(full_path, columns_,
materialized_columns_, alias_columns_, column_defaults_,
context_, primary_expr_ast_, date_column_name_,
sampling_expression_, index_granularity_,mode_, sign_column_,
sampling_expression_, index_granularity_,mode_, sign_column_, columns_to_sum_,
settings_, database_name_ + "." + table_name, false),
reader(data), writer(data), merger(data),
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")),
@ -54,13 +55,14 @@ StoragePtr StorageMergeTree::create(
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_)
{
auto res = new StorageMergeTree{
path_, database_name_, table_name_,
columns_, materialized_columns_, alias_columns_, column_defaults_,
context_, primary_expr_ast_, date_column_name_,
sampling_expression_, index_granularity_, mode_, sign_column_, settings_
sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_, settings_
};
StoragePtr res_ptr = res->thisPtr();
@ -69,23 +71,6 @@ StoragePtr StorageMergeTree::create(
return res_ptr;
}
StoragePtr StorageMergeTree::create(
const String & path_, const String & database_name_, const String & table_name_,
NamesAndTypesListPtr columns_,
Context & context_,
ASTPtr & primary_expr_ast_,
const String & date_column_name_,
const ASTPtr & sampling_expression_,
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const MergeTreeSettings & settings_)
{
return create(path_, database_name_, table_name_,
columns_, {}, {}, {},
context_, primary_expr_ast_, date_column_name_,
sampling_expression_, index_granularity_, mode_, sign_column_, settings_);
}
void StorageMergeTree::shutdown()
{

View File

@ -36,6 +36,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_,
const MergeTreeSettings & settings_)
: IStorage{materialized_columns_, alias_columns_, column_defaults_}, context(context_),
zookeeper(context.getZooKeeper()), database_name(database_name_),
@ -45,7 +46,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
data(full_path, columns_,
materialized_columns_, alias_columns_, column_defaults_,
context_, primary_expr_ast_, date_column_name_,
sampling_expression_, index_granularity_, mode_, sign_column_,
sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_,
settings_, database_name_ + "." + table_name, true,
std::bind(&StorageReplicatedMergeTree::enqueuePartForCheck, this, std::placeholders::_1)),
reader(data), writer(data), merger(data), fetcher(data),
@ -115,7 +116,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
unreplicated_data.reset(new MergeTreeData(unreplicated_path, columns_,
materialized_columns_, alias_columns_, column_defaults_,
context_, primary_expr_ast_,
date_column_name_, sampling_expression_, index_granularity_, mode_, sign_column_, settings_,
date_column_name_, sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_, settings_,
database_name_ + "." + table_name + "[unreplicated]", false));
unreplicated_data->loadDataParts(skip_sanity_checks);
@ -147,6 +148,7 @@ StoragePtr StorageReplicatedMergeTree::create(
size_t index_granularity_,
MergeTreeData::Mode mode_,
const String & sign_column_,
const Names & columns_to_sum_ = Names(),
const MergeTreeSettings & settings_)
{
auto res = new StorageReplicatedMergeTree{
@ -155,7 +157,7 @@ StoragePtr StorageReplicatedMergeTree::create(
columns_, materialized_columns_, alias_columns_, column_defaults_,
context_, primary_expr_ast_, date_column_name_,
sampling_expression_, index_granularity_, mode_,
sign_column_, settings_
sign_column_, columns_to_sum_, settings_
};
StoragePtr res_ptr = res->thisPtr();

View File

@ -41,7 +41,11 @@ int main(int argc, char ** argv)
if (!parser.parse(begin, end, primary_expr, expected))
throw Poco::Exception("Cannot parse " + primary_expr_str);
StoragePtr table = StorageMergeTree::create("./", "default", "test", names_and_types, context, primary_expr, "d", nullptr, 101);
StoragePtr table = StorageMergeTree::create(
"./", "default", "test",
names_and_types, context, primary_expr, "d",
nullptr, {}, 101, {},
MergeTreeData::Ordinary, {}, {}, {});
/// пишем в неё
{

View File

@ -0,0 +1,95 @@
DROP TABLE IF EXISTS test.merge_tree;
DROP TABLE IF EXISTS test.collapsing_merge_tree;
DROP TABLE IF EXISTS test.summing_merge_tree;
DROP TABLE IF EXISTS test.summing_merge_tree_with_list_of_columns_to_sum;
DROP TABLE IF EXISTS test.aggregating_merge_tree;
CREATE TABLE test.merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111);
CREATE TABLE test.collapsing_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);
CREATE TABLE test.summing_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111);
CREATE TABLE test.summing_merge_tree_with_list_of_columns_to_sum
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111, (y, z));
CREATE TABLE test.aggregating_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111);
DROP TABLE IF EXISTS test.merge_tree_with_sampling;
DROP TABLE IF EXISTS test.collapsing_merge_tree_with_sampling;
DROP TABLE IF EXISTS test.summing_merge_tree_with_sampling;
DROP TABLE IF EXISTS test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum;
DROP TABLE IF EXISTS test.aggregating_merge_tree_with_sampling;
CREATE TABLE test.merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE test.collapsing_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
CREATE TABLE test.summing_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, (y, z));
CREATE TABLE test.aggregating_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
DROP TABLE IF EXISTS test.replicated_merge_tree;
DROP TABLE IF EXISTS test.replicated_collapsing_merge_tree;
DROP TABLE IF EXISTS test.replicated_summing_merge_tree;
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_list_of_columns_to_sum;
DROP TABLE IF EXISTS test.replicated_aggregating_merge_tree;
CREATE TABLE test.replicated_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
CREATE TABLE test.replicated_collapsing_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/01/replicated_collapsing_merge_tree/', 'r1', d, (a, b), 111, y);
CREATE TABLE test.replicated_summing_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree/', 'r1', d, (a, b), 111);
CREATE TABLE test.replicated_summing_merge_tree_with_list_of_columns_to_sum
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_list_of_columns_to_sum/', 'r1', d, (a, b), 111, (y, z));
CREATE TABLE test.replicated_aggregating_merge_tree
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/01/replicated_aggregating_merge_tree/', 'r1', d, (a, b), 111);
DROP TABLE IF EXISTS test.replicated_merge_tree_with_sampling;
DROP TABLE IF EXISTS test.replicated_collapsing_merge_tree_with_sampling;
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_sampling;
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum;
DROP TABLE IF EXISTS test.replicated_aggregating_merge_tree_with_sampling;
CREATE TABLE test.replicated_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE test.replicated_collapsing_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/01/replicated_collapsing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
CREATE TABLE test.replicated_summing_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
CREATE TABLE test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, (y, z));
CREATE TABLE test.replicated_aggregating_merge_tree_with_sampling
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/01/replicated_aggregating_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
INSERT INTO test.merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.collapsing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.summing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.summing_merge_tree_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.aggregating_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.collapsing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.summing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.aggregating_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_collapsing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_summing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_summing_merge_tree_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_aggregating_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_collapsing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_summing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
INSERT INTO test.replicated_aggregating_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);