mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge
This commit is contained in:
parent
811565b5b1
commit
d7a1abbbdc
@ -18,9 +18,12 @@ namespace DB
|
||||
class SummingSortedBlockInputStream : public MergingSortedBlockInputStream
|
||||
{
|
||||
public:
|
||||
SummingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_, size_t max_block_size_)
|
||||
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_),
|
||||
log(&Logger::get("SummingSortedBlockInputStream")), current_row_is_zero(false), output_is_non_empty(false)
|
||||
SummingSortedBlockInputStream(BlockInputStreams inputs_,
|
||||
const SortDescription & description_,
|
||||
/// Список столбцов, которых нужно суммировать. Если пустое - берутся все числовые столбцы, не входящие в description.
|
||||
const Names & column_names_to_sum_,
|
||||
size_t max_block_size_)
|
||||
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_), column_names_to_sum(column_names_to_sum_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -48,18 +51,19 @@ protected:
|
||||
Block readImpl() override;
|
||||
|
||||
private:
|
||||
Logger * log;
|
||||
Logger * log = &Logger::get("SummingSortedBlockInputStream");
|
||||
|
||||
/// Столбцы с какими номерами надо суммировать.
|
||||
Names column_names_to_sum; /// Если задано - преобразуется в column_numbers_to_sum при инициализации.
|
||||
ColumnNumbers column_numbers_to_sum;
|
||||
|
||||
Row current_key; /// Текущий первичный ключ.
|
||||
Row next_key; /// Первичный ключ следующей строки.
|
||||
|
||||
Row current_row;
|
||||
bool current_row_is_zero; /// Текущая строчка просуммировалась в ноль, и её следует удалить.
|
||||
bool current_row_is_zero = false; /// Текущая строчка просуммировалась в ноль, и её следует удалить.
|
||||
|
||||
bool output_is_non_empty; /// Отдали ли мы наружу хоть одну строку.
|
||||
bool output_is_non_empty = false; /// Отдали ли мы наружу хоть одну строку.
|
||||
|
||||
/** Делаем поддержку двух разных курсоров - с Collation и без.
|
||||
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.
|
||||
|
@ -594,7 +594,8 @@ public:
|
||||
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
|
||||
size_t index_granularity_,
|
||||
Mode mode_,
|
||||
const String & sign_column_,
|
||||
const String & sign_column_, /// Для Collapsing режима.
|
||||
const Names & columns_to_sum_, /// Для Summing режима. Если пустое - то выбирается автоматически.
|
||||
const MergeTreeSettings & settings_,
|
||||
const String & log_name_,
|
||||
bool require_part_metadata_,
|
||||
@ -772,6 +773,8 @@ public:
|
||||
const Mode mode;
|
||||
/// Для схлопывания записей об изменениях, если используется Collapsing режим работы.
|
||||
const String sign_column;
|
||||
/// Для суммирования, если используется Summing режим работы.
|
||||
const Names columns_to_sum;
|
||||
|
||||
const MergeTreeSettings settings;
|
||||
|
||||
|
@ -37,25 +37,11 @@ public:
|
||||
const String & date_column_name_,
|
||||
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
|
||||
const String & sign_column_ = "",
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_, /// Для Collapsing режима.
|
||||
const Names & columns_to_sum_, /// Для Summing режима.
|
||||
const MergeTreeSettings & settings_ = MergeTreeSettings());
|
||||
|
||||
static StoragePtr create(
|
||||
const String & path_,
|
||||
const String & database_name_,
|
||||
const String & table_name_,
|
||||
NamesAndTypesListPtr columns_,
|
||||
Context & context_,
|
||||
ASTPtr & primary_expr_ast_,
|
||||
const String & date_column_name_,
|
||||
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
|
||||
const String & sign_column_ = "",
|
||||
const MergeTreeSettings & settings_ = MergeTreeSettings());
|
||||
|
||||
|
||||
void shutdown() override;
|
||||
~StorageMergeTree() override;
|
||||
|
||||
@ -193,6 +179,7 @@ private:
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_);
|
||||
|
||||
/** Определяет, какие куски нужно объединять, и объединяет их.
|
||||
|
@ -39,8 +39,9 @@ public:
|
||||
const String & date_column_name_,
|
||||
const ASTPtr & sampling_expression_, /// nullptr, если семплирование не поддерживается.
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
|
||||
const String & sign_column_ = "",
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_, /// Для Collapsing режима.
|
||||
const Names & columns_to_sum_, /// Для Summing режима.
|
||||
const MergeTreeSettings & settings_ = MergeTreeSettings());
|
||||
|
||||
void shutdown() override;
|
||||
@ -259,8 +260,9 @@ private:
|
||||
const String & date_column_name_,
|
||||
const ASTPtr & sampling_expression_,
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_ = MergeTreeData::Ordinary,
|
||||
const String & sign_column_ = "",
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_ = MergeTreeSettings());
|
||||
|
||||
/// Инициализация.
|
||||
|
@ -16,13 +16,13 @@ Block SummingSortedBlockInputStream::readImpl()
|
||||
{
|
||||
if (!children.size())
|
||||
return Block();
|
||||
|
||||
|
||||
if (children.size() == 1)
|
||||
return children[0]->read();
|
||||
|
||||
Block merged_block;
|
||||
ColumnPlainPtrs merged_columns;
|
||||
|
||||
|
||||
init(merged_block, merged_columns);
|
||||
if (merged_columns.empty())
|
||||
return Block();
|
||||
@ -34,7 +34,11 @@ Block SummingSortedBlockInputStream::readImpl()
|
||||
current_key.resize(description.size());
|
||||
next_key.resize(description.size());
|
||||
|
||||
/// Заполним номера столбцов, которые должны быть просуммированы.
|
||||
/** Заполним номера столбцов, которые должны быть просуммированы.
|
||||
* Это могут быть только числовые столбцы, не входящие в ключ сортировки.
|
||||
* Если задан непустой список column_names_to_sum, то берём только эти столбцы.
|
||||
* Часть столбцов из column_names_to_sum может быть не найдена. Это игнорируется.
|
||||
*/
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
ColumnWithNameAndType & column = merged_block.getByPosition(i);
|
||||
@ -52,7 +56,11 @@ Block SummingSortedBlockInputStream::readImpl()
|
||||
if (it != description.end())
|
||||
continue;
|
||||
|
||||
column_numbers_to_sum.push_back(i);
|
||||
if (column_names_to_sum.empty()
|
||||
|| column_names_to_sum.end() != std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name))
|
||||
{
|
||||
column_numbers_to_sum.push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -67,9 +75,9 @@ Block SummingSortedBlockInputStream::readImpl()
|
||||
|
||||
template<class TSortCursor>
|
||||
void SummingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
|
||||
{
|
||||
{
|
||||
size_t merged_rows = 0;
|
||||
|
||||
|
||||
/// Вынимаем строки в нужном порядке и кладём в merged_block, пока строк не больше max_block_size
|
||||
while (!queue.empty())
|
||||
{
|
||||
|
@ -33,6 +33,7 @@ MergeTreeData::MergeTreeData(
|
||||
size_t index_granularity_,
|
||||
Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_,
|
||||
const String & log_name_,
|
||||
bool require_part_metadata_,
|
||||
@ -40,7 +41,7 @@ MergeTreeData::MergeTreeData(
|
||||
: ITableDeclaration{materialized_columns_, alias_columns_, column_defaults_}, context(context_),
|
||||
date_column_name(date_column_name_), sampling_expression(sampling_expression_),
|
||||
index_granularity(index_granularity_),
|
||||
mode(mode_), sign_column(sign_column_),
|
||||
mode(mode_), sign_column(sign_column_), columns_to_sum(columns_to_sum_),
|
||||
settings(settings_), primary_expr_ast(primary_expr_ast_->clone()),
|
||||
require_part_metadata(require_part_metadata_),
|
||||
full_path(full_path_), columns(columns_),
|
||||
@ -48,7 +49,8 @@ MergeTreeData::MergeTreeData(
|
||||
log_name(log_name_), log(&Logger::get(log_name + " (Data)"))
|
||||
{
|
||||
/// Проверяем, что столбец с датой существует и имеет тип Date.
|
||||
const auto check_date_exists = [this] (const NamesAndTypesList & columns) {
|
||||
const auto check_date_exists = [this] (const NamesAndTypesList & columns)
|
||||
{
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
if (column.name == date_column_name)
|
||||
@ -67,8 +69,19 @@ MergeTreeData::MergeTreeData(
|
||||
if (!check_date_exists(*columns) && !check_date_exists(materialized_columns))
|
||||
throw Exception{
|
||||
"Date column (" + date_column_name + ") does not exist in table declaration.",
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE
|
||||
};
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE};
|
||||
|
||||
/// Если заданы columns_to_sum, проверяем, что такие столбцы существуют.
|
||||
if (!columns_to_sum.empty())
|
||||
{
|
||||
if (mode != Summing)
|
||||
throw Exception("List of columns to sum for MergeTree cannot be specified in all modes except Summing.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
for (const auto & column_to_sum : columns_to_sum)
|
||||
if (columns->end() == std::find_if(columns->begin(), columns->end(),
|
||||
[&](const NameAndTypePair & name_and_type) { return column_to_sum == name_and_type.name; }))
|
||||
throw Exception("Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.");
|
||||
}
|
||||
|
||||
/// создаём директорию, если её нет
|
||||
Poco::File(full_path).createDirectories();
|
||||
|
@ -380,7 +380,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMerger::mergeParts(
|
||||
break;
|
||||
|
||||
case MergeTreeData::Summing:
|
||||
merged_stream = ext::make_unique<SummingSortedBlockInputStream>(src_streams, data.getSortDescription(), DEFAULT_MERGE_BLOCK_SIZE);
|
||||
merged_stream = ext::make_unique<SummingSortedBlockInputStream>(src_streams, data.getSortDescription(), data.columns_to_sum, DEFAULT_MERGE_BLOCK_SIZE);
|
||||
break;
|
||||
|
||||
case MergeTreeData::Aggregating:
|
||||
|
@ -44,7 +44,7 @@ static bool startsWith(const std::string & s, const std::string & prefix)
|
||||
* Он может быть указан в кортеже: (CounterID, Date),
|
||||
* или как один столбец: CounterID.
|
||||
*/
|
||||
static ASTPtr extractPrimaryKey(const ASTPtr & node, const std::string & storage_name)
|
||||
static ASTPtr extractPrimaryKey(const ASTPtr & node)
|
||||
{
|
||||
const ASTFunction * primary_expr_func = typeid_cast<const ASTFunction *>(&*node);
|
||||
|
||||
@ -63,6 +63,30 @@ static ASTPtr extractPrimaryKey(const ASTPtr & node, const std::string & storage
|
||||
}
|
||||
}
|
||||
|
||||
/** Для StorageMergeTree: достать список имён столбцов.
|
||||
* Он может быть указан в кортеже: (Clicks, Cost),
|
||||
* или как один столбец: Clicks.
|
||||
*/
|
||||
static Names extractColumnNames(const ASTPtr & node)
|
||||
{
|
||||
const ASTFunction * expr_func = typeid_cast<const ASTFunction *>(&*node);
|
||||
|
||||
if (expr_func && expr_func->name == "tuple")
|
||||
{
|
||||
const auto & elements = expr_func->children.at(0)->children;
|
||||
Names res;
|
||||
res.reserve(elements.size());
|
||||
for (const auto & elem : elements)
|
||||
res.push_back(typeid_cast<const ASTIdentifier &>(*elem).name);
|
||||
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{
|
||||
return { typeid_cast<const ASTIdentifier &>(*node).name };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
StoragePtr StorageFactory::get(
|
||||
const String & name,
|
||||
@ -260,7 +284,7 @@ StoragePtr StorageFactory::get(
|
||||
}
|
||||
else if (endsWith(name, "MergeTree"))
|
||||
{
|
||||
/** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций)
|
||||
/** Движки [Replicated][Summing|Collapsing|Aggregating|]MergeTree (8 комбинаций)
|
||||
* В качестве аргумента для движка должно быть указано:
|
||||
* - (для Replicated) Путь к таблице в ZooKeeper
|
||||
* - (для Replicated) Имя реплики в ZooKeeper
|
||||
@ -270,8 +294,77 @@ StoragePtr StorageFactory::get(
|
||||
* - index_granularity;
|
||||
* - (для Collapsing) имя столбца, содержащего тип строчки с изменением "визита" (принимающего значения 1 и -1).
|
||||
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
|
||||
* - (для Summing, не обязательно) кортеж столбцов, которых следует суммировать. Если не задано - используются все числовые столбцы, не входящие в первичный ключ.
|
||||
* Например: ENGINE = ReplicatedCollapsingMergeTree('/tables/mytable', 'rep02', EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID), 8192, Sign).
|
||||
*
|
||||
* MergeTree(date, [sample_key], primary_key, index_granularity)
|
||||
* CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign)
|
||||
* SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum])
|
||||
* AggregatingMergeTree(date, [sample_key], primary_key, index_granularity)
|
||||
*/
|
||||
|
||||
const char * verbose_help = R"(
|
||||
|
||||
MergeTree is family of storage engines.
|
||||
|
||||
MergeTrees is different in two ways:
|
||||
- it may be replicated and non-replicated;
|
||||
- it may do different actions on merge: nothing; sign collapse; sum; apply aggregete functions.
|
||||
|
||||
So we have 8 combinations:
|
||||
MergeTree, CollapsingMergeTree, SummingMergeTree, AggregatingMergeTree,
|
||||
ReplicatedMergeTree, ReplicatedCollapsingMergeTree, ReplicatedSummingMergeTree, ReplicatedAggregatingMergeTree.
|
||||
|
||||
In most of cases, you need MergeTree or ReplicatedMergeTree.
|
||||
|
||||
For replicated merge trees, you need to supply path in ZooKeeper and replica name as first two parameters.
|
||||
Path in ZooKeeper is like '/clickhouse/tables/01/' where /clickhouse/tables/ is common prefix and 01 is shard name.
|
||||
Replica name is like 'mtstat01-1' - it may be hostname or any suitable string identifying replica.
|
||||
You may use macro substitutions for these parameters. It's like ReplicatedMergeTree('/clickhouse/tables/{shard}/', '{replica}'...
|
||||
Look at <macros> section in server configuration file.
|
||||
|
||||
Next parameter (which is first for unreplicated tables and third for replicated tables) is name of date column.
|
||||
Date column must exist in table and have type Date (not DateTime).
|
||||
It is used for internal data partitioning and works like some kind of index.
|
||||
|
||||
If your source data doesn't have column of type Date, but have DateTime column, you may add values for Date column while loading,
|
||||
or you may INSERT your source data to table of type Log and then transform it with INSERT INTO t SELECT toDate(time) AS date, * FROM ...
|
||||
If your source data doesn't have any date or time, you may just pass any constant for date column while loading.
|
||||
|
||||
Next parameter is optional sampling expression. Sampling expression is used to implement SAMPLE clause in query for approximate query execution.
|
||||
If you don't need approximate query execution, simply omit this parameter.
|
||||
Sample expression must be one of elements of primary key tuple. For example, if your primary key is (CounterID, EventDate, intHash64(UserID)), your sampling expression might be intHash64(UserID).
|
||||
|
||||
Next parameter is primary key tuple. It's like (CounterID, EventDate, intHash64(UserID)) - list of column names or functional expressions in round brackets. If your primary key have just one element, you may omit round brackets.
|
||||
|
||||
Careful choice of primary key is extremely important for processing short-time queries.
|
||||
|
||||
Next parameter is index (primary key) granularity. Good value is 8192. You have no reasons to use any other value.
|
||||
|
||||
For Collapsing mode, last parameter is name of sign column - special column that is used to 'collapse' rows with same primary key while merge.
|
||||
|
||||
For Summing mode, last parameter is optional list of columns to sum while merge. List is passed in round brackets, like (PageViews, Cost).
|
||||
If this parameter is omitted, storage will sum all numeric columns except columns participated in primary key.
|
||||
|
||||
|
||||
Examples:
|
||||
|
||||
MergeTree(EventDate, (CounterID, EventDate), 8192)
|
||||
|
||||
MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
|
||||
|
||||
CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign)
|
||||
|
||||
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192)
|
||||
|
||||
SummingMergeTree(EventDate, (OrderID, EventDate, BannerID, PhraseID, ContextType, RegionID, PageID, IsFlat, TypeID, ResourceNo), 8192, (Shows, Clicks, Cost, CostCur, ShowsSumPosition, ClicksSumPosition, SessionNum, SessionLen, SessionCost, GoalsNum, SessionDepth))
|
||||
|
||||
ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/hits', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
|
||||
|
||||
|
||||
For further info please read the documentation: http://clickhouse.yandex-team.ru/
|
||||
)";
|
||||
|
||||
String name_part = name.substr(0, name.size() - strlen("MergeTree"));
|
||||
|
||||
bool replicated = startsWith(name_part, "Replicated");
|
||||
@ -287,7 +380,7 @@ StoragePtr StorageFactory::get(
|
||||
else if (name_part == "Aggregating")
|
||||
mode = MergeTreeData::Aggregating;
|
||||
else if (!name_part.empty())
|
||||
throw Exception("Unknown storage " + name, ErrorCodes::UNKNOWN_STORAGE);
|
||||
throw Exception("Unknown storage " + name + verbose_help, ErrorCodes::UNKNOWN_STORAGE);
|
||||
|
||||
ASTs & args_func = typeid_cast<ASTFunction &>(*typeid_cast<ASTCreateQuery &>(*query).storage).children;
|
||||
|
||||
@ -296,46 +389,93 @@ StoragePtr StorageFactory::get(
|
||||
if (args_func.size() == 1)
|
||||
args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children;
|
||||
|
||||
size_t additional_params = (replicated ? 2 : 0) + (mode == MergeTreeData::Collapsing ? 1 : 0);
|
||||
if (args.size() != additional_params + 3 && args.size() != additional_params + 4)
|
||||
/// NOTE Слегка запутанно.
|
||||
size_t num_additional_params = (replicated ? 2 : 0) + (mode == MergeTreeData::Collapsing);
|
||||
|
||||
if (mode != MergeTreeData::Summing
|
||||
&& args.size() != num_additional_params + 3
|
||||
&& args.size() != num_additional_params + 4)
|
||||
{
|
||||
String params;
|
||||
|
||||
if (replicated)
|
||||
params += "path in ZooKeeper, replica name or '', ";
|
||||
params += "name of column with date, [name of column for sampling], primary key expression, index granularity";
|
||||
params +=
|
||||
"\npath in ZooKeeper,"
|
||||
"\nreplica name,";
|
||||
|
||||
params +=
|
||||
"\nname of column with date,"
|
||||
"\n[sampling element of primary key],"
|
||||
"\nprimary key expression,"
|
||||
"\nindex granularity\n";
|
||||
|
||||
if (mode == MergeTreeData::Collapsing)
|
||||
params += ", sign column";
|
||||
throw Exception("Storage " + name + " requires " + toString(additional_params + 3) + " or "
|
||||
+ toString(additional_params + 4) +" parameters: " + params,
|
||||
|
||||
throw Exception("Storage " + name + " requires "
|
||||
+ toString(num_additional_params + 3) + " or "
|
||||
+ toString(num_additional_params + 4) + " parameters: " + params + verbose_help,
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
if (mode == MergeTreeData::Summing
|
||||
&& args.size() != num_additional_params + 3
|
||||
&& args.size() != num_additional_params + 4
|
||||
&& args.size() != num_additional_params + 5)
|
||||
{
|
||||
String params;
|
||||
|
||||
if (replicated)
|
||||
params +=
|
||||
"\npath in ZooKeeper,"
|
||||
"\nreplica name,";
|
||||
|
||||
params +=
|
||||
"\nname of column with date,"
|
||||
"\n[sampling element of primary key],"
|
||||
"\nprimary key expression,"
|
||||
"\nindex granularity,"
|
||||
"\n[list of columns to sum]\n";
|
||||
|
||||
throw Exception("Storage " + name + " requires "
|
||||
+ toString(num_additional_params + 3) + " or "
|
||||
+ toString(num_additional_params + 4) + " or "
|
||||
+ toString(num_additional_params + 5) + " parameters: " + params + verbose_help,
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
/// Для Replicated.
|
||||
String zookeeper_path;
|
||||
String replica_name;
|
||||
|
||||
/// Для всех.
|
||||
String date_column_name;
|
||||
ASTPtr primary_expr_list;
|
||||
ASTPtr sampling_expression;
|
||||
UInt64 index_granularity;
|
||||
|
||||
/// Для Collapsing.
|
||||
String sign_column_name;
|
||||
|
||||
/// Для Summing.
|
||||
Names columns_to_sum;
|
||||
|
||||
if (replicated)
|
||||
{
|
||||
auto ast = typeid_cast<ASTLiteral *>(&*args[0]);
|
||||
if (ast && ast->value.getType() == Field::Types::String)
|
||||
zookeeper_path = safeGet<String>(ast->value);
|
||||
else
|
||||
throw Exception("Path in ZooKeeper must be a string literal", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(String("Path in ZooKeeper must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ast = typeid_cast<ASTLiteral *>(&*args[1]);
|
||||
if (ast && ast->value.getType() == Field::Types::String)
|
||||
replica_name = safeGet<String>(ast->value);
|
||||
else
|
||||
throw Exception("Replica name must be a string literal", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(String("Replica name must be a string literal") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (replica_name.empty())
|
||||
throw Exception("No replica name in config", ErrorCodes::NO_REPLICA_NAME_GIVEN);
|
||||
throw Exception(String("No replica name in config") + verbose_help, ErrorCodes::NO_REPLICA_NAME_GIVEN);
|
||||
|
||||
args.erase(args.begin(), args.begin() + 2);
|
||||
}
|
||||
@ -345,42 +485,54 @@ StoragePtr StorageFactory::get(
|
||||
if (auto ast = typeid_cast<ASTIdentifier *>(&*args.back()))
|
||||
sign_column_name = ast->name;
|
||||
else
|
||||
throw Exception("Sign column name must be an unquoted string", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(String("Sign column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
args.pop_back();
|
||||
}
|
||||
else if (mode == MergeTreeData::Summing)
|
||||
{
|
||||
/// Если последний элемент - не index granularity (литерал), то это - список суммируемых столбцов.
|
||||
if (!typeid_cast<const ASTLiteral *>(&*args.back()))
|
||||
{
|
||||
columns_to_sum = extractColumnNames(args.back());
|
||||
args.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
/// Если присутствует выражение для сэмплирования. MergeTree(date, [sample_key], primary_key, index_granularity)
|
||||
if (args.size() == 4)
|
||||
{
|
||||
sampling_expression = args[1];
|
||||
args.erase(args.begin() + 1);
|
||||
}
|
||||
|
||||
/// Теперь осталось только три параметра - date, primary_key, index_granularity.
|
||||
|
||||
if (auto ast = typeid_cast<ASTIdentifier *>(&*args[0]))
|
||||
date_column_name = ast->name;
|
||||
else
|
||||
throw Exception("Date column name must be an unquoted string", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(String("Date column name must be an unquoted string") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
primary_expr_list = extractPrimaryKey(args[1], name);
|
||||
primary_expr_list = extractPrimaryKey(args[1]);
|
||||
|
||||
auto ast = typeid_cast<ASTLiteral *>(&*args[2]);
|
||||
if (ast && ast->value.getType() == Field::Types::UInt64)
|
||||
index_granularity = safeGet<UInt64>(ast->value);
|
||||
else
|
||||
throw Exception("Index granularity must be a positive integer", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(String("Index granularity must be a positive integer") + verbose_help, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (replicated)
|
||||
return StorageReplicatedMergeTree::create(
|
||||
zookeeper_path, replica_name, attach, data_path, database_name, table_name,
|
||||
columns, materialized_columns, alias_columns, column_defaults,
|
||||
context, primary_expr_list, date_column_name,
|
||||
sampling_expression, index_granularity, mode, sign_column_name);
|
||||
sampling_expression, index_granularity, mode, sign_column_name, columns_to_sum);
|
||||
else
|
||||
return StorageMergeTree::create(
|
||||
data_path, database_name, table_name,
|
||||
columns, materialized_columns, alias_columns, column_defaults,
|
||||
context, primary_expr_list, date_column_name,
|
||||
sampling_expression, index_granularity, mode, sign_column_name);
|
||||
sampling_expression, index_granularity, mode, sign_column_name, columns_to_sum);
|
||||
}
|
||||
else
|
||||
throw Exception("Unknown storage " + name, ErrorCodes::UNKNOWN_STORAGE);
|
||||
|
@ -22,6 +22,7 @@ StorageMergeTree::StorageMergeTree(
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_)
|
||||
: IStorage{materialized_columns_, alias_columns_, column_defaults_},
|
||||
path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'),
|
||||
@ -29,7 +30,7 @@ StorageMergeTree::StorageMergeTree(
|
||||
data(full_path, columns_,
|
||||
materialized_columns_, alias_columns_, column_defaults_,
|
||||
context_, primary_expr_ast_, date_column_name_,
|
||||
sampling_expression_, index_granularity_,mode_, sign_column_,
|
||||
sampling_expression_, index_granularity_,mode_, sign_column_, columns_to_sum_,
|
||||
settings_, database_name_ + "." + table_name, false),
|
||||
reader(data), writer(data), merger(data),
|
||||
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)")),
|
||||
@ -54,13 +55,14 @@ StoragePtr StorageMergeTree::create(
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_)
|
||||
{
|
||||
auto res = new StorageMergeTree{
|
||||
path_, database_name_, table_name_,
|
||||
columns_, materialized_columns_, alias_columns_, column_defaults_,
|
||||
context_, primary_expr_ast_, date_column_name_,
|
||||
sampling_expression_, index_granularity_, mode_, sign_column_, settings_
|
||||
sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_, settings_
|
||||
};
|
||||
StoragePtr res_ptr = res->thisPtr();
|
||||
|
||||
@ -69,23 +71,6 @@ StoragePtr StorageMergeTree::create(
|
||||
return res_ptr;
|
||||
}
|
||||
|
||||
StoragePtr StorageMergeTree::create(
|
||||
const String & path_, const String & database_name_, const String & table_name_,
|
||||
NamesAndTypesListPtr columns_,
|
||||
Context & context_,
|
||||
ASTPtr & primary_expr_ast_,
|
||||
const String & date_column_name_,
|
||||
const ASTPtr & sampling_expression_,
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const MergeTreeSettings & settings_)
|
||||
{
|
||||
return create(path_, database_name_, table_name_,
|
||||
columns_, {}, {}, {},
|
||||
context_, primary_expr_ast_, date_column_name_,
|
||||
sampling_expression_, index_granularity_, mode_, sign_column_, settings_);
|
||||
}
|
||||
|
||||
void StorageMergeTree::shutdown()
|
||||
{
|
||||
|
@ -36,6 +36,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_,
|
||||
const MergeTreeSettings & settings_)
|
||||
: IStorage{materialized_columns_, alias_columns_, column_defaults_}, context(context_),
|
||||
zookeeper(context.getZooKeeper()), database_name(database_name_),
|
||||
@ -45,7 +46,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
data(full_path, columns_,
|
||||
materialized_columns_, alias_columns_, column_defaults_,
|
||||
context_, primary_expr_ast_, date_column_name_,
|
||||
sampling_expression_, index_granularity_, mode_, sign_column_,
|
||||
sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_,
|
||||
settings_, database_name_ + "." + table_name, true,
|
||||
std::bind(&StorageReplicatedMergeTree::enqueuePartForCheck, this, std::placeholders::_1)),
|
||||
reader(data), writer(data), merger(data), fetcher(data),
|
||||
@ -115,7 +116,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
unreplicated_data.reset(new MergeTreeData(unreplicated_path, columns_,
|
||||
materialized_columns_, alias_columns_, column_defaults_,
|
||||
context_, primary_expr_ast_,
|
||||
date_column_name_, sampling_expression_, index_granularity_, mode_, sign_column_, settings_,
|
||||
date_column_name_, sampling_expression_, index_granularity_, mode_, sign_column_, columns_to_sum_, settings_,
|
||||
database_name_ + "." + table_name + "[unreplicated]", false));
|
||||
|
||||
unreplicated_data->loadDataParts(skip_sanity_checks);
|
||||
@ -147,6 +148,7 @@ StoragePtr StorageReplicatedMergeTree::create(
|
||||
size_t index_granularity_,
|
||||
MergeTreeData::Mode mode_,
|
||||
const String & sign_column_,
|
||||
const Names & columns_to_sum_ = Names(),
|
||||
const MergeTreeSettings & settings_)
|
||||
{
|
||||
auto res = new StorageReplicatedMergeTree{
|
||||
@ -155,7 +157,7 @@ StoragePtr StorageReplicatedMergeTree::create(
|
||||
columns_, materialized_columns_, alias_columns_, column_defaults_,
|
||||
context_, primary_expr_ast_, date_column_name_,
|
||||
sampling_expression_, index_granularity_, mode_,
|
||||
sign_column_, settings_
|
||||
sign_column_, columns_to_sum_, settings_
|
||||
};
|
||||
StoragePtr res_ptr = res->thisPtr();
|
||||
|
||||
|
@ -41,7 +41,11 @@ int main(int argc, char ** argv)
|
||||
if (!parser.parse(begin, end, primary_expr, expected))
|
||||
throw Poco::Exception("Cannot parse " + primary_expr_str);
|
||||
|
||||
StoragePtr table = StorageMergeTree::create("./", "default", "test", names_and_types, context, primary_expr, "d", nullptr, 101);
|
||||
StoragePtr table = StorageMergeTree::create(
|
||||
"./", "default", "test",
|
||||
names_and_types, context, primary_expr, "d",
|
||||
nullptr, {}, 101, {},
|
||||
MergeTreeData::Ordinary, {}, {}, {});
|
||||
|
||||
/// пишем в неё
|
||||
{
|
||||
|
@ -0,0 +1,95 @@
|
||||
DROP TABLE IF EXISTS test.merge_tree;
|
||||
DROP TABLE IF EXISTS test.collapsing_merge_tree;
|
||||
DROP TABLE IF EXISTS test.summing_merge_tree;
|
||||
DROP TABLE IF EXISTS test.summing_merge_tree_with_list_of_columns_to_sum;
|
||||
DROP TABLE IF EXISTS test.aggregating_merge_tree;
|
||||
|
||||
CREATE TABLE test.merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, (a, b), 111);
|
||||
CREATE TABLE test.collapsing_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, (a, b), 111, y);
|
||||
CREATE TABLE test.summing_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111);
|
||||
CREATE TABLE test.summing_merge_tree_with_list_of_columns_to_sum
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, (a, b), 111, (y, z));
|
||||
CREATE TABLE test.aggregating_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, (a, b), 111);
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS test.merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.collapsing_merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.summing_merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum;
|
||||
DROP TABLE IF EXISTS test.aggregating_merge_tree_with_sampling;
|
||||
|
||||
CREATE TABLE test.merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
CREATE TABLE test.collapsing_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = CollapsingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
|
||||
CREATE TABLE test.summing_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
CREATE TABLE test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = SummingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, (y, z));
|
||||
CREATE TABLE test.aggregating_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS test.replicated_merge_tree;
|
||||
DROP TABLE IF EXISTS test.replicated_collapsing_merge_tree;
|
||||
DROP TABLE IF EXISTS test.replicated_summing_merge_tree;
|
||||
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_list_of_columns_to_sum;
|
||||
DROP TABLE IF EXISTS test.replicated_aggregating_merge_tree;
|
||||
|
||||
CREATE TABLE test.replicated_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
|
||||
CREATE TABLE test.replicated_collapsing_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/01/replicated_collapsing_merge_tree/', 'r1', d, (a, b), 111, y);
|
||||
CREATE TABLE test.replicated_summing_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree/', 'r1', d, (a, b), 111);
|
||||
CREATE TABLE test.replicated_summing_merge_tree_with_list_of_columns_to_sum
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_list_of_columns_to_sum/', 'r1', d, (a, b), 111, (y, z));
|
||||
CREATE TABLE test.replicated_aggregating_merge_tree
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/01/replicated_aggregating_merge_tree/', 'r1', d, (a, b), 111);
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS test.replicated_merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.replicated_collapsing_merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_sampling;
|
||||
DROP TABLE IF EXISTS test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum;
|
||||
DROP TABLE IF EXISTS test.replicated_aggregating_merge_tree_with_sampling;
|
||||
|
||||
CREATE TABLE test.replicated_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
CREATE TABLE test.replicated_collapsing_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/01/replicated_collapsing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
|
||||
CREATE TABLE test.replicated_summing_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
CREATE TABLE test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedSummingMergeTree('/clickhouse/tables/01/replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, (y, z));
|
||||
CREATE TABLE test.replicated_aggregating_merge_tree_with_sampling
|
||||
(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/01/replicated_aggregating_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
|
||||
|
||||
|
||||
INSERT INTO test.merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.collapsing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.summing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.summing_merge_tree_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.aggregating_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
|
||||
INSERT INTO test.merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.collapsing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.summing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.summing_merge_tree_with_sampling_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.aggregating_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
|
||||
INSERT INTO test.replicated_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_collapsing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_summing_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_summing_merge_tree_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_aggregating_merge_tree VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
|
||||
INSERT INTO test.replicated_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_collapsing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_summing_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_summing_merge_tree_with_sampling_with_list_of_columns_to_sum VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
||||
INSERT INTO test.replicated_aggregating_merge_tree_with_sampling VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789);
|
Loading…
Reference in New Issue
Block a user