mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
parent
80cb0f0636
commit
7f93f9ad2f
@ -424,6 +424,8 @@ namespace ErrorCodes
|
||||
extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447;
|
||||
extern const int BROTLI_READ_FAILED = 448;
|
||||
extern const int BROTLI_WRITE_FAILED = 449;
|
||||
extern const int BAD_TTL_EXPRESSION = 450;
|
||||
extern const int BAD_TTL_FILE = 451;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
208
dbms/src/DataStreams/TTLBlockInputStream.cpp
Normal file
208
dbms/src/DataStreams/TTLBlockInputStream.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
#include <DataStreams/TTLBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <Interpreters/evaluateMissingDefaults.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
TTLBlockInputStream::TTLBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const MergeTreeData & storage_,
|
||||
const MergeTreeData::MutableDataPartPtr & data_part_,
|
||||
time_t current_time_)
|
||||
: storage(storage_)
|
||||
, data_part(data_part_)
|
||||
, current_time(current_time_)
|
||||
, old_ttl_infos(data_part->ttl_infos)
|
||||
, log(&Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
|
||||
, date_lut(DateLUT::instance())
|
||||
{
|
||||
children.push_back(input_);
|
||||
|
||||
const auto & column_defaults = storage.getColumns().getDefaults();
|
||||
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
|
||||
for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
|
||||
{
|
||||
if (ttl_info.min <= current_time)
|
||||
{
|
||||
new_ttl_infos.columns_ttl.emplace(name, MergeTreeDataPart::TTLInfo{});
|
||||
empty_columns.emplace(name);
|
||||
|
||||
auto it = column_defaults.find(name);
|
||||
|
||||
if (it != column_defaults.end())
|
||||
default_expr_list->children.emplace_back(
|
||||
setAlias(it->second.expression, it->first));
|
||||
}
|
||||
else
|
||||
new_ttl_infos.columns_ttl.emplace(name, ttl_info);
|
||||
}
|
||||
|
||||
if (old_ttl_infos.table_ttl.min > current_time)
|
||||
new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
|
||||
|
||||
if (!default_expr_list->children.empty())
|
||||
{
|
||||
auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(
|
||||
default_expr_list, storage.getColumns().getAllPhysical());
|
||||
defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Block TTLBlockInputStream::getHeader() const
|
||||
{
|
||||
return children.at(0)->getHeader();
|
||||
}
|
||||
|
||||
Block TTLBlockInputStream::readImpl()
|
||||
{
|
||||
Block block = children.at(0)->read();
|
||||
if (!block)
|
||||
return block;
|
||||
|
||||
if (storage.hasTableTTL())
|
||||
{
|
||||
/// Skip all data if table ttl is expired for part
|
||||
if (old_ttl_infos.table_ttl.max <= current_time)
|
||||
{
|
||||
rows_removed = data_part->rows_count;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (old_ttl_infos.table_ttl.min <= current_time)
|
||||
removeRowsWithExpiredTableTTL(block);
|
||||
}
|
||||
|
||||
removeValuesWithExpiredColumnTTL(block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::readSuffixImpl()
|
||||
{
|
||||
for (const auto & elem : new_ttl_infos.columns_ttl)
|
||||
new_ttl_infos.updatePartMinTTL(elem.second.min);
|
||||
|
||||
new_ttl_infos.updatePartMinTTL(new_ttl_infos.table_ttl.min);
|
||||
|
||||
data_part->ttl_infos = std::move(new_ttl_infos);
|
||||
data_part->empty_columns = std::move(empty_columns);
|
||||
|
||||
if (rows_removed)
|
||||
LOG_INFO(log, "Removed " << rows_removed << " rows with expired ttl from part " << data_part->name);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
|
||||
{
|
||||
storage.ttl_table_entry.expression->execute(block);
|
||||
|
||||
const auto & current = block.getByName(storage.ttl_table_entry.result_column);
|
||||
const IColumn * ttl_column = current.column.get();
|
||||
|
||||
MutableColumns result_columns;
|
||||
result_columns.reserve(getHeader().columns());
|
||||
for (const auto & name : storage.getColumns().getNamesOfPhysical())
|
||||
{
|
||||
auto & column_with_type = block.getByName(name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
if (cur_ttl > current_time)
|
||||
{
|
||||
new_ttl_infos.table_ttl.update(cur_ttl);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
else
|
||||
++rows_removed;
|
||||
}
|
||||
result_columns.emplace_back(std::move(result_column));
|
||||
}
|
||||
|
||||
block = getHeader().cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
|
||||
{
|
||||
Block block_with_defaults;
|
||||
if (defaults_expression)
|
||||
{
|
||||
block_with_defaults = block;
|
||||
defaults_expression->execute(block_with_defaults);
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_entry] : storage.ttl_entries_by_name)
|
||||
{
|
||||
const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
|
||||
auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
|
||||
|
||||
if (old_ttl_info.min > current_time)
|
||||
continue;
|
||||
|
||||
if (old_ttl_info.max <= current_time)
|
||||
continue;
|
||||
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
ttl_entry.expression->execute(block);
|
||||
|
||||
ColumnPtr default_column = nullptr;
|
||||
if (block_with_defaults.has(name))
|
||||
default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
|
||||
|
||||
auto & column_with_type = block.getByName(name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
const auto & current = block.getByName(ttl_entry.result_column);
|
||||
const IColumn * ttl_column = current.column.get();
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
|
||||
if (cur_ttl <= current_time)
|
||||
{
|
||||
if (default_column)
|
||||
result_column->insertFrom(*default_column, i);
|
||||
else
|
||||
result_column->insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
empty_columns.erase(name);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
column_with_type.column = std::move(result_column);
|
||||
}
|
||||
|
||||
for (const auto & elem : storage.ttl_entries_by_name)
|
||||
if (block.has(elem.second.result_column))
|
||||
block.erase(elem.second.result_column);
|
||||
}
|
||||
|
||||
UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
|
||||
{
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
return column_date_time->getData()[ind];
|
||||
else
|
||||
throw Exception("Unexpected type of result ttl column", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
60
dbms/src/DataStreams/TTLBlockInputStream.h
Normal file
60
dbms/src/DataStreams/TTLBlockInputStream.h
Normal file
@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPart.h>
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <common/DateLUT.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class TTLBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
TTLBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const MergeTreeData & storage_,
|
||||
const MergeTreeData::MutableDataPartPtr & data_part_,
|
||||
time_t current_time
|
||||
);
|
||||
|
||||
String getName() const override { return "TTLBlockInputStream"; }
|
||||
|
||||
Block getHeader() const override;
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
|
||||
/// Finalizes ttl infos and updates data part
|
||||
void readSuffixImpl() override;
|
||||
|
||||
private:
|
||||
const MergeTreeData & storage;
|
||||
|
||||
/// ttl_infos and empty_columns are updating while reading
|
||||
const MergeTreeData::MutableDataPartPtr & data_part;
|
||||
|
||||
time_t current_time;
|
||||
|
||||
MergeTreeDataPart::TTLInfos old_ttl_infos;
|
||||
MergeTreeDataPart::TTLInfos new_ttl_infos;
|
||||
NameSet empty_columns;
|
||||
|
||||
size_t rows_removed = 0;
|
||||
Logger * log;
|
||||
DateLUTImpl date_lut;
|
||||
|
||||
std::unordered_map<String, String> defaults_result_column;
|
||||
ExpressionActionsPtr defaults_expression;
|
||||
private:
|
||||
/// Removes values with expired ttl and computes new min_ttl and empty_columns for part
|
||||
void removeValuesWithExpiredColumnTTL(Block & block);
|
||||
|
||||
/// Remove rows with expired table ttl and computes new min_ttl for part
|
||||
void removeRowsWithExpiredTableTTL(Block & block);
|
||||
|
||||
UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
|
||||
};
|
||||
|
||||
}
|
@ -233,6 +233,9 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
|
||||
column_declaration->codec = parseQuery(codec_p, codec_desc_pos, codec_desc_end, "column codec", 0);
|
||||
}
|
||||
|
||||
if (column.ttl)
|
||||
column_declaration->ttl = column.ttl;
|
||||
|
||||
columns_list->children.push_back(column_declaration_ptr);
|
||||
}
|
||||
|
||||
@ -347,6 +350,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpres
|
||||
if (col_decl.codec)
|
||||
column.codec = CompressionCodecFactory::instance().get(col_decl.codec, column.type);
|
||||
|
||||
if (col_decl.ttl)
|
||||
column.ttl = col_decl.ttl;
|
||||
|
||||
res.add(std::move(column));
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,9 @@ Block InterpreterDescribeQuery::getSampleBlock()
|
||||
col.name = "codec_expression";
|
||||
block.insert(col);
|
||||
|
||||
col.name = "ttl_expression";
|
||||
block.insert(col);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
@ -118,6 +121,11 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
|
||||
res_columns[5]->insert(column.codec->getCodecDesc());
|
||||
else
|
||||
res_columns[5]->insertDefault();
|
||||
|
||||
if (column.ttl)
|
||||
res_columns[6]->insert(queryToString(column.ttl));
|
||||
else
|
||||
res_columns[6]->insertDefault();
|
||||
}
|
||||
|
||||
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
|
||||
|
@ -40,6 +40,11 @@ ASTPtr ASTAlterCommand::clone() const
|
||||
res->predicate = predicate->clone();
|
||||
res->children.push_back(res->predicate);
|
||||
}
|
||||
if (ttl)
|
||||
{
|
||||
res->ttl = ttl->clone();
|
||||
res->children.push_back(res->ttl);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -174,6 +179,11 @@ void ASTAlterCommand::formatImpl(
|
||||
settings.ostr << " " << (settings.hilite ? hilite_none : "");
|
||||
comment->formatImpl(settings, state, frame);
|
||||
}
|
||||
else if (type == ASTAlterCommand::MODIFY_TTL)
|
||||
{
|
||||
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY TTL " << (settings.hilite ? hilite_none : "");
|
||||
ttl->formatImpl(settings, state, frame);
|
||||
}
|
||||
else
|
||||
throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
MODIFY_COLUMN,
|
||||
COMMENT_COLUMN,
|
||||
MODIFY_ORDER_BY,
|
||||
MODIFY_TTL,
|
||||
|
||||
ADD_INDEX,
|
||||
DROP_INDEX,
|
||||
@ -84,6 +85,9 @@ public:
|
||||
/// A column comment
|
||||
ASTPtr comment;
|
||||
|
||||
/// For MODIFY TTL query
|
||||
ASTPtr ttl;
|
||||
|
||||
bool detach = false; /// true for DETACH PARTITION
|
||||
|
||||
bool part = false; /// true for ATTACH PART
|
||||
|
@ -33,6 +33,12 @@ ASTPtr ASTColumnDeclaration::clone() const
|
||||
res->children.push_back(res->comment);
|
||||
}
|
||||
|
||||
if (ttl)
|
||||
{
|
||||
res->ttl = ttl->clone();
|
||||
res->children.push_back(res->ttl);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -69,6 +75,12 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
|
||||
settings.ostr << ' ';
|
||||
codec->formatImpl(settings, state, frame);
|
||||
}
|
||||
|
||||
if (ttl)
|
||||
{
|
||||
settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "TTL" << (settings.hilite ? hilite_none : "") << ' ';
|
||||
ttl->formatImpl(settings, state, frame);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
ASTPtr default_expression;
|
||||
ASTPtr codec;
|
||||
ASTPtr comment;
|
||||
ASTPtr ttl;
|
||||
|
||||
String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); }
|
||||
|
||||
|
@ -23,6 +23,8 @@ ASTPtr ASTStorage::clone() const
|
||||
res->set(res->order_by, order_by->clone());
|
||||
if (sample_by)
|
||||
res->set(res->sample_by, sample_by->clone());
|
||||
if (ttl_table)
|
||||
res->set(res->ttl_table, ttl_table->clone());
|
||||
|
||||
if (settings)
|
||||
res->set(res->settings, settings->clone());
|
||||
@ -57,6 +59,11 @@ void ASTStorage::formatImpl(const FormatSettings & s, FormatState & state, Forma
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SAMPLE BY " << (s.hilite ? hilite_none : "");
|
||||
sample_by->formatImpl(s, state, frame);
|
||||
}
|
||||
if (ttl_table)
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "TTL " << (s.hilite ? hilite_none : "");
|
||||
ttl_table->formatImpl(s, state, frame);
|
||||
}
|
||||
if (settings)
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << "SETTINGS " << (s.hilite ? hilite_none : "");
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
IAST * primary_key = nullptr;
|
||||
IAST * order_by = nullptr;
|
||||
IAST * sample_by = nullptr;
|
||||
IAST * ttl_table = nullptr;
|
||||
ASTSetQuery * settings = nullptr;
|
||||
|
||||
String getID(char) const override { return "Storage definition"; }
|
||||
|
@ -27,6 +27,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
ParserKeyword s_modify_column("MODIFY COLUMN");
|
||||
ParserKeyword s_comment_column("COMMENT COLUMN");
|
||||
ParserKeyword s_modify_order_by("MODIFY ORDER BY");
|
||||
ParserKeyword s_modify_ttl("MODIFY TTL");
|
||||
|
||||
ParserKeyword s_add_index("ADD INDEX");
|
||||
ParserKeyword s_drop_index("DROP INDEX");
|
||||
@ -282,6 +283,12 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
|
||||
command->type = ASTAlterCommand::COMMENT_COLUMN;
|
||||
}
|
||||
else if (s_modify_ttl.ignore(pos, expected))
|
||||
{
|
||||
if (!parser_exp_elem.parse(pos, command->ttl, expected))
|
||||
return false;
|
||||
command->type = ASTAlterCommand::MODIFY_TTL;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
@ -299,6 +306,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
command->children.push_back(command->update_assignments);
|
||||
if (command->comment)
|
||||
command->children.push_back(command->comment);
|
||||
if (command->ttl)
|
||||
command->children.push_back(command->ttl);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -210,6 +210,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
ParserKeyword s_primary_key("PRIMARY KEY");
|
||||
ParserKeyword s_order_by("ORDER BY");
|
||||
ParserKeyword s_sample_by("SAMPLE BY");
|
||||
ParserKeyword s_ttl("TTL");
|
||||
ParserKeyword s_settings("SETTINGS");
|
||||
|
||||
ParserIdentifierWithOptionalParameters ident_with_optional_params_p;
|
||||
@ -221,6 +222,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
ASTPtr primary_key;
|
||||
ASTPtr order_by;
|
||||
ASTPtr sample_by;
|
||||
ASTPtr ttl_table;
|
||||
ASTPtr settings;
|
||||
|
||||
if (!s_engine.ignore(pos, expected))
|
||||
@ -265,6 +267,14 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!ttl_table && s_ttl.ignore(pos, expected))
|
||||
{
|
||||
if (expression_p.parse(pos, ttl_table, expected))
|
||||
continue;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_settings.ignore(pos, expected))
|
||||
{
|
||||
if (!settings_p.parse(pos, settings, expected))
|
||||
@ -280,6 +290,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
storage->set(storage->primary_key, primary_key);
|
||||
storage->set(storage->order_by, order_by);
|
||||
storage->set(storage->sample_by, sample_by);
|
||||
storage->set(storage->ttl_table, ttl_table);
|
||||
|
||||
storage->set(storage->settings, settings);
|
||||
|
||||
|
@ -123,9 +123,11 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
ParserKeyword s_alias{"ALIAS"};
|
||||
ParserKeyword s_comment{"COMMENT"};
|
||||
ParserKeyword s_codec{"CODEC"};
|
||||
ParserKeyword s_ttl{"TTL"};
|
||||
ParserTernaryOperatorExpression expr_parser;
|
||||
ParserStringLiteral string_literal_parser;
|
||||
ParserCodec codec_parser;
|
||||
ParserExpression expression_parser;
|
||||
|
||||
/// mandatory column name
|
||||
ASTPtr name;
|
||||
@ -140,6 +142,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
ASTPtr default_expression;
|
||||
ASTPtr comment_expression;
|
||||
ASTPtr codec_expression;
|
||||
ASTPtr ttl_expression;
|
||||
|
||||
if (!s_default.check_without_moving(pos, expected) &&
|
||||
!s_materialized.check_without_moving(pos, expected) &&
|
||||
@ -178,6 +181,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
return false;
|
||||
}
|
||||
|
||||
if (s_ttl.ignore(pos, expected))
|
||||
{
|
||||
if (!expression_parser.parse(pos, ttl_expression, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
const auto column_declaration = std::make_shared<ASTColumnDeclaration>();
|
||||
node = column_declaration;
|
||||
getIdentifierName(name, column_declaration->name);
|
||||
@ -207,6 +216,12 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
|
||||
column_declaration->children.push_back(std::move(codec_expression));
|
||||
}
|
||||
|
||||
if (ttl_expression)
|
||||
{
|
||||
column_declaration->ttl = ttl_expression;
|
||||
column_declaration->children.push_back(std::move(ttl_expression));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -64,6 +66,9 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
if (command_ast->column)
|
||||
command.after_column = *getIdentifierName(command_ast->column);
|
||||
|
||||
if (ast_col_decl.ttl)
|
||||
command.ttl = ast_col_decl.ttl;
|
||||
|
||||
command.if_not_exists = command_ast->if_not_exists;
|
||||
|
||||
return command;
|
||||
@ -104,6 +109,9 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
command.comment = ast_comment.value.get<String>();
|
||||
}
|
||||
|
||||
if (ast_col_decl.ttl)
|
||||
command.ttl = ast_col_decl.ttl;
|
||||
|
||||
if (ast_col_decl.codec)
|
||||
command.codec = compression_codec_factory.get(ast_col_decl.codec, command.data_type);
|
||||
|
||||
@ -157,13 +165,20 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
|
||||
|
||||
return command;
|
||||
}
|
||||
else if (command_ast->type == ASTAlterCommand::MODIFY_TTL)
|
||||
{
|
||||
AlterCommand command;
|
||||
command.type = AlterCommand::MODIFY_TTL;
|
||||
command.ttl = command_ast->ttl;
|
||||
return command;
|
||||
}
|
||||
else
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const
|
||||
{
|
||||
if (type == ADD_COLUMN)
|
||||
{
|
||||
@ -175,6 +190,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
|
||||
}
|
||||
column.comment = comment;
|
||||
column.codec = codec;
|
||||
column.ttl = ttl;
|
||||
|
||||
columns_description.add(column, after_column);
|
||||
|
||||
@ -204,6 +220,9 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
|
||||
return;
|
||||
}
|
||||
|
||||
if (ttl)
|
||||
column.ttl = ttl;
|
||||
|
||||
column.type = data_type;
|
||||
|
||||
column.default_desc.kind = default_kind;
|
||||
@ -278,6 +297,10 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
|
||||
|
||||
indices_description.indices.erase(erase_it);
|
||||
}
|
||||
else if (type == MODIFY_TTL)
|
||||
{
|
||||
ttl_table_ast = ttl;
|
||||
}
|
||||
else
|
||||
throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
@ -293,20 +316,22 @@ bool AlterCommand::is_mutable() const
|
||||
}
|
||||
|
||||
void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const
|
||||
{
|
||||
auto new_columns_description = columns_description;
|
||||
auto new_indices_description = indices_description;
|
||||
auto new_order_by_ast = order_by_ast;
|
||||
auto new_primary_key_ast = primary_key_ast;
|
||||
auto new_ttl_table_ast = ttl_table_ast;
|
||||
|
||||
for (const AlterCommand & command : *this)
|
||||
if (!command.ignore)
|
||||
command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast);
|
||||
command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
|
||||
columns_description = std::move(new_columns_description);
|
||||
indices_description = std::move(new_indices_description);
|
||||
order_by_ast = std::move(new_order_by_ast);
|
||||
primary_key_ast = std::move(new_primary_key_ast);
|
||||
ttl_table_ast = std::move(new_ttl_table_ast);
|
||||
}
|
||||
|
||||
void AlterCommands::validate(const IStorage & table, const Context & context)
|
||||
@ -493,7 +518,8 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const
|
||||
IndicesDescription indices_description;
|
||||
ASTPtr out_order_by;
|
||||
ASTPtr out_primary_key;
|
||||
apply(out_columns_description, indices_description, out_order_by, out_primary_key);
|
||||
ASTPtr out_ttl_table;
|
||||
apply(out_columns_description, indices_description, out_order_by, out_primary_key, out_ttl_table);
|
||||
|
||||
if (out_order_by)
|
||||
throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED);
|
||||
@ -501,6 +527,8 @@ void AlterCommands::apply(ColumnsDescription & columns_description) const
|
||||
throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED);
|
||||
if (!indices_description.indices.empty())
|
||||
throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED);
|
||||
if (out_ttl_table)
|
||||
throw Exception("Storage doesn't support modifying TTL expression", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
columns_description = std::move(out_columns_description);
|
||||
}
|
||||
|
@ -24,6 +24,7 @@ struct AlterCommand
|
||||
MODIFY_ORDER_BY,
|
||||
ADD_INDEX,
|
||||
DROP_INDEX,
|
||||
MODIFY_TTL,
|
||||
UKNOWN_TYPE,
|
||||
};
|
||||
|
||||
@ -60,6 +61,9 @@ struct AlterCommand
|
||||
/// For ADD/DROP INDEX
|
||||
String index_name;
|
||||
|
||||
/// For MODIFY TTL
|
||||
ASTPtr ttl;
|
||||
|
||||
/// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist.
|
||||
bool ignore = false;
|
||||
|
||||
@ -79,7 +83,7 @@ struct AlterCommand
|
||||
static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
|
||||
|
||||
void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const;
|
||||
ASTPtr & order_by_ast, ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const;
|
||||
/// Checks that not only metadata touched by that command
|
||||
bool is_mutable() const;
|
||||
};
|
||||
@ -91,7 +95,7 @@ class AlterCommands : public std::vector<AlterCommand>
|
||||
{
|
||||
public:
|
||||
void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast,
|
||||
ASTPtr & primary_key_ast) const;
|
||||
ASTPtr & primary_key_ast, ASTPtr & ttl_table_ast) const;
|
||||
|
||||
/// For storages that don't support MODIFY_ORDER_BY.
|
||||
void apply(ColumnsDescription & columns_description) const;
|
||||
|
@ -37,12 +37,14 @@ namespace ErrorCodes
|
||||
bool ColumnDescription::operator==(const ColumnDescription & other) const
|
||||
{
|
||||
auto codec_str = [](const CompressionCodecPtr & codec_ptr) { return codec_ptr ? codec_ptr->getCodecDesc() : String(); };
|
||||
auto ttl_str = [](const ASTPtr & ttl_ast) { return ttl_ast ? queryToString(ttl_ast) : String{}; };
|
||||
|
||||
return name == other.name
|
||||
&& type->equals(*other.type)
|
||||
&& default_desc == other.default_desc
|
||||
&& comment == other.comment
|
||||
&& codec_str(codec) == codec_str(other.codec);
|
||||
&& codec_str(codec) == codec_str(other.codec)
|
||||
&& ttl_str(ttl) == ttl_str(other.ttl);
|
||||
}
|
||||
|
||||
void ColumnDescription::writeText(WriteBuffer & buf) const
|
||||
@ -74,6 +76,13 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
|
||||
DB::writeText(")", buf);
|
||||
}
|
||||
|
||||
if (ttl)
|
||||
{
|
||||
writeChar('\t', buf);
|
||||
DB::writeText("TTL ", buf);
|
||||
DB::writeText(queryToString(ttl), buf);
|
||||
}
|
||||
|
||||
writeChar('\n', buf);
|
||||
}
|
||||
|
||||
@ -99,6 +108,9 @@ void ColumnDescription::readText(ReadBuffer & buf)
|
||||
|
||||
if (col_ast->codec)
|
||||
codec = CompressionCodecFactory::instance().get(col_ast->codec, type);
|
||||
|
||||
if (col_ast->ttl)
|
||||
ttl = col_ast->ttl;
|
||||
}
|
||||
else
|
||||
throw Exception("Cannot parse column description", ErrorCodes::CANNOT_PARSE_TEXT);
|
||||
@ -388,6 +400,18 @@ CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_
|
||||
return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
|
||||
}
|
||||
|
||||
ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const
|
||||
{
|
||||
ColumnTTLs ret;
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
if (column.ttl)
|
||||
ret.emplace(column.name, column.ttl);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
String ColumnsDescription::toString() const
|
||||
{
|
||||
|
@ -18,6 +18,7 @@ struct ColumnDescription
|
||||
ColumnDefault default_desc;
|
||||
String comment;
|
||||
CompressionCodecPtr codec;
|
||||
ASTPtr ttl;
|
||||
|
||||
ColumnDescription() = default;
|
||||
ColumnDescription(String name_, DataTypePtr type_) : name(std::move(name_)), type(std::move(type_)) {}
|
||||
@ -58,6 +59,9 @@ public:
|
||||
/// ordinary + materialized + aliases.
|
||||
NamesAndTypesList getAll() const;
|
||||
|
||||
using ColumnTTLs = std::unordered_map<String, ASTPtr>;
|
||||
ColumnTTLs getColumnTTLs() const;
|
||||
|
||||
bool has(const String & column_name) const;
|
||||
bool hasNested(const String & column_name) const;
|
||||
ColumnDescription & get(const String & column_name);
|
||||
|
@ -39,6 +39,9 @@ public:
|
||||
|
||||
/// Opaque pointer to avoid dependencies (it is not possible to do forward declaration of typedef).
|
||||
const void * data;
|
||||
|
||||
/// Minimal time, when we need to delete some data from this part
|
||||
time_t min_ttl;
|
||||
};
|
||||
|
||||
/// Parts are belong to partitions. Only parts within same partition could be merged.
|
||||
|
@ -84,6 +84,7 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
extern const int BAD_TTL_EXPRESSION;
|
||||
}
|
||||
|
||||
|
||||
@ -97,6 +98,7 @@ MergeTreeData::MergeTreeData(
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_,
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool require_part_metadata_,
|
||||
@ -108,6 +110,7 @@ MergeTreeData::MergeTreeData(
|
||||
settings(settings_),
|
||||
partition_by_ast(partition_by_ast_),
|
||||
sample_by_ast(sample_by_ast_),
|
||||
ttl_table_ast(ttl_table_ast_),
|
||||
require_part_metadata(require_part_metadata_),
|
||||
database_name(database_), table_name(table_),
|
||||
full_path(full_path_),
|
||||
@ -159,6 +162,8 @@ MergeTreeData::MergeTreeData(
|
||||
min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING;
|
||||
}
|
||||
|
||||
setTTLExpressions(columns_.getColumnTTLs(), ttl_table_ast_);
|
||||
|
||||
auto path_exists = Poco::File(full_path).exists();
|
||||
/// Creating directories, if not exist.
|
||||
Poco::File(full_path).createDirectories();
|
||||
@ -490,6 +495,98 @@ void MergeTreeData::initPartitionKey()
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const String & result_column_name)
|
||||
{
|
||||
for (const auto & action : ttl_expression->getActions())
|
||||
{
|
||||
if (action.type == ExpressionAction::APPLY_FUNCTION)
|
||||
{
|
||||
IFunctionBase & func = *action.function_base;
|
||||
if (!func.isDeterministic())
|
||||
throw Exception("TTL expression cannot contain non-deterministic functions, "
|
||||
"but contains function " + func.getName(), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
bool has_date_column = false;
|
||||
for (const auto & elem : ttl_expression->getRequiredColumnsWithTypes())
|
||||
{
|
||||
if (typeid_cast<const DataTypeDateTime *>(elem.type.get()) || typeid_cast<const DataTypeDate *>(elem.type.get()))
|
||||
{
|
||||
has_date_column = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_date_column)
|
||||
throw Exception("TTL expression should use at least one Date or DateTime column", ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
|
||||
const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name);
|
||||
|
||||
if (!typeid_cast<const DataTypeDateTime *>(result_column.type.get())
|
||||
&& !typeid_cast<const DataTypeDate *>(result_column.type.get()))
|
||||
{
|
||||
throw Exception("TTL expression result column should have DateTime or Date type, but has "
|
||||
+ result_column.type->getName(), ErrorCodes::BAD_TTL_EXPRESSION);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeData::setTTLExpressions(const ColumnsDescription::ColumnTTLs & new_column_ttls,
|
||||
const ASTPtr & new_ttl_table_ast, bool only_check)
|
||||
{
|
||||
auto create_ttl_entry = [this](ASTPtr ttl_ast) -> TTLEntry
|
||||
{
|
||||
auto syntax_result = SyntaxAnalyzer(global_context).analyze(ttl_ast, getColumns().getAllPhysical());
|
||||
auto expr = ExpressionAnalyzer(ttl_ast, syntax_result, global_context).getActions(false);
|
||||
|
||||
String result_column = ttl_ast->getColumnName();
|
||||
checkTTLExpression(expr, result_column);
|
||||
|
||||
return {expr, result_column};
|
||||
};
|
||||
|
||||
if (!new_column_ttls.empty())
|
||||
{
|
||||
NameSet columns_ttl_forbidden;
|
||||
|
||||
if (partition_key_expr)
|
||||
for (const auto & col : partition_key_expr->getRequiredColumns())
|
||||
columns_ttl_forbidden.insert(col);
|
||||
|
||||
if (sorting_key_expr)
|
||||
for (const auto & col : sorting_key_expr->getRequiredColumns())
|
||||
columns_ttl_forbidden.insert(col);
|
||||
|
||||
for (const auto & [name, ast] : new_column_ttls)
|
||||
{
|
||||
if (columns_ttl_forbidden.count(name))
|
||||
throw Exception("Trying to set ttl for key column " + name, ErrorCodes::ILLEGAL_COLUMN);
|
||||
else
|
||||
{
|
||||
auto new_ttl_entry = create_ttl_entry(ast);
|
||||
if (!only_check)
|
||||
ttl_entries_by_name.emplace(name, new_ttl_entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (new_ttl_table_ast)
|
||||
{
|
||||
auto new_ttl_table_entry = create_ttl_entry(new_ttl_table_ast);
|
||||
if (!only_check)
|
||||
{
|
||||
ttl_table_ast = new_ttl_table_ast;
|
||||
ttl_table_entry = new_ttl_table_entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MergeTreeData::MergingParams::check(const NamesAndTypesList & columns) const
|
||||
{
|
||||
@ -1059,7 +1156,8 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c
|
||||
auto new_indices = getIndicesDescription();
|
||||
ASTPtr new_order_by_ast = order_by_ast;
|
||||
ASTPtr new_primary_key_ast = primary_key_ast;
|
||||
commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
|
||||
ASTPtr new_ttl_table_ast = ttl_table_ast;
|
||||
commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
|
||||
|
||||
if (getIndicesDescription().empty() && !new_indices.empty() &&
|
||||
!context.getSettingsRef().allow_experimental_data_skipping_indices)
|
||||
@ -1145,6 +1243,8 @@ void MergeTreeData::checkAlter(const AlterCommands & commands, const Context & c
|
||||
setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast,
|
||||
new_columns, new_indices, /* only_check = */ true);
|
||||
|
||||
setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast, /* only_check = */ true);
|
||||
|
||||
/// Check that type conversions are possible.
|
||||
ExpressionActionsPtr unused_expression;
|
||||
NameToNameMap unused_map;
|
||||
@ -1446,6 +1546,32 @@ MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
|
||||
return transaction;
|
||||
}
|
||||
|
||||
void MergeTreeData::removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part)
|
||||
{
|
||||
auto & empty_columns = data_part->empty_columns;
|
||||
if (empty_columns.empty())
|
||||
return;
|
||||
|
||||
NamesAndTypesList new_columns;
|
||||
for (const auto & [name, type] : data_part->columns)
|
||||
if (!empty_columns.count(name))
|
||||
new_columns.emplace_back(name, type);
|
||||
|
||||
std::stringstream log_message;
|
||||
for (auto it = empty_columns.begin(); it != empty_columns.end(); ++it)
|
||||
{
|
||||
if (it != empty_columns.begin())
|
||||
log_message << ", ";
|
||||
log_message << *it;
|
||||
}
|
||||
|
||||
LOG_INFO(log, "Removing empty columns: " << log_message.str() << " from part " << data_part->name);
|
||||
|
||||
if (auto transaction = alterDataPart(data_part, new_columns, getIndicesDescription().indices, false))
|
||||
transaction->commit();
|
||||
empty_columns.clear();
|
||||
}
|
||||
|
||||
void MergeTreeData::freezeAll(const String & with_name, const Context & context)
|
||||
{
|
||||
freezePartitionsByMatcher([] (const DataPartPtr &){ return true; }, with_name, context);
|
||||
|
@ -312,6 +312,7 @@ public:
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool require_part_metadata_,
|
||||
@ -494,6 +495,9 @@ public:
|
||||
const IndicesASTs & new_indices,
|
||||
bool skip_sanity_checks);
|
||||
|
||||
/// Remove columns, that have been markedd as empty after zeroing values with expired ttl
|
||||
void removeEmptyColumnsFromPart(MergeTreeData::MutableDataPartPtr & data_part);
|
||||
|
||||
/// Freezes all parts.
|
||||
void freezeAll(const String & with_name, const Context & context);
|
||||
|
||||
@ -514,6 +518,7 @@ public:
|
||||
bool hasSortingKey() const { return !sorting_key_columns.empty(); }
|
||||
bool hasPrimaryKey() const { return !primary_key_columns.empty(); }
|
||||
bool hasSkipIndices() const { return !skip_indices.empty(); }
|
||||
bool hasTableTTL() const { return ttl_table_ast != nullptr; }
|
||||
|
||||
ASTPtr getSortingKeyAST() const { return sorting_key_expr_ast; }
|
||||
ASTPtr getPrimaryKeyAST() const { return primary_key_expr_ast; }
|
||||
@ -601,6 +606,17 @@ public:
|
||||
Block primary_key_sample;
|
||||
DataTypes primary_key_data_types;
|
||||
|
||||
struct TTLEntry
|
||||
{
|
||||
ExpressionActionsPtr expression;
|
||||
String result_column;
|
||||
};
|
||||
|
||||
using TTLEntriesByName = std::unordered_map<String, TTLEntry>;
|
||||
TTLEntriesByName ttl_entries_by_name;
|
||||
|
||||
TTLEntry ttl_table_entry;
|
||||
|
||||
String sampling_expr_column_name;
|
||||
Names columns_required_for_sampling;
|
||||
|
||||
@ -625,6 +641,7 @@ private:
|
||||
ASTPtr order_by_ast;
|
||||
ASTPtr primary_key_ast;
|
||||
ASTPtr sample_by_ast;
|
||||
ASTPtr ttl_table_ast;
|
||||
|
||||
bool require_part_metadata;
|
||||
|
||||
@ -735,6 +752,9 @@ private:
|
||||
|
||||
void initPartitionKey();
|
||||
|
||||
void setTTLExpressions(const ColumnsDescription::ColumnTTLs & new_column_ttls,
|
||||
const ASTPtr & new_ttl_table_ast, bool only_check = false);
|
||||
|
||||
/// Expression for column type conversion.
|
||||
/// If no conversions are needed, out_expression=nullptr.
|
||||
/// out_rename_map maps column files for the out_expression onto new table files.
|
||||
|
@ -4,9 +4,11 @@
|
||||
#include <Storages/MergeTree/DiskSpaceMonitor.h>
|
||||
#include <Storages/MergeTree/SimpleMergeSelector.h>
|
||||
#include <Storages/MergeTree/AllMergeSelector.h>
|
||||
#include <Storages/MergeTree/TTLMergeSelector.h>
|
||||
#include <Storages/MergeTree/MergeList.h>
|
||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/BackgroundProcessingPool.h>
|
||||
#include <DataStreams/TTLBlockInputStream.h>
|
||||
#include <DataStreams/DistinctSortedBlockInputStream.h>
|
||||
#include <DataStreams/ExpressionBlockInputStream.h>
|
||||
#include <DataStreams/MergingSortedBlockInputStream.h>
|
||||
@ -176,6 +178,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
|
||||
const String * prev_partition_id = nullptr;
|
||||
const MergeTreeData::DataPartPtr * prev_part = nullptr;
|
||||
bool has_part_with_expired_ttl = false;
|
||||
for (const MergeTreeData::DataPartPtr & part : data_parts)
|
||||
{
|
||||
const String & partition_id = part->info.partition_id;
|
||||
@ -191,6 +194,10 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
part_info.age = current_time - part->modification_time;
|
||||
part_info.level = part->info.level;
|
||||
part_info.data = ∂
|
||||
part_info.min_ttl = part->ttl_infos.part_min_ttl;
|
||||
|
||||
if (part_info.min_ttl && part_info.min_ttl <= current_time)
|
||||
has_part_with_expired_ttl = true;
|
||||
|
||||
partitions.back().emplace_back(part_info);
|
||||
|
||||
@ -210,7 +217,16 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
if (aggressive)
|
||||
merge_settings.base = 1;
|
||||
|
||||
bool can_merge_with_ttl =
|
||||
(current_time - last_merge_with_ttl > data.settings.merge_with_ttl_timeout);
|
||||
|
||||
/// NOTE Could allow selection of different merge strategy.
|
||||
if (can_merge_with_ttl && has_part_with_expired_ttl)
|
||||
{
|
||||
merge_selector = std::make_unique<TTLMergeSelector>(current_time);
|
||||
last_merge_with_ttl = current_time;
|
||||
}
|
||||
else
|
||||
merge_selector = std::make_unique<SimpleMergeSelector>(merge_settings);
|
||||
|
||||
IMergeSelector::PartsInPartition parts_to_merge = merge_selector->select(
|
||||
@ -224,7 +240,8 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
|
||||
return false;
|
||||
}
|
||||
|
||||
if (parts_to_merge.size() == 1)
|
||||
/// Allow to "merge" part with itself if we need remove some values with expired ttl
|
||||
if (parts_to_merge.size() == 1 && !has_part_with_expired_ttl)
|
||||
throw Exception("Logical error: merge selector returned only one part to merge", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
MergeTreeData::DataPartsVector parts;
|
||||
@ -536,9 +553,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
new_data_part->relative_path = TMP_PREFIX + future_part.name;
|
||||
new_data_part->is_temp = true;
|
||||
|
||||
bool need_remove_expired_values = false;
|
||||
for (const MergeTreeData::DataPartPtr & part : parts)
|
||||
new_data_part->ttl_infos.update(part->ttl_infos);
|
||||
|
||||
const auto & part_min_ttl = new_data_part->ttl_infos.part_min_ttl;
|
||||
if (part_min_ttl && part_min_ttl <= time_of_merge)
|
||||
need_remove_expired_values = true;
|
||||
|
||||
size_t sum_input_rows_upper_bound = merge_entry->total_size_marks * data.index_granularity;
|
||||
|
||||
MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate);
|
||||
MergeAlgorithm merge_alg = chooseMergeAlgorithm(parts, sum_input_rows_upper_bound, gathering_columns, deduplicate, need_remove_expired_values);
|
||||
|
||||
LOG_DEBUG(log, "Selected MergeAlgorithm: " << ((merge_alg == MergeAlgorithm::Vertical) ? "Vertical" : "Horizontal"));
|
||||
|
||||
@ -599,6 +624,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
|
||||
MergeStageProgress horizontal_stage_progress(
|
||||
merge_alg == MergeAlgorithm::Horizontal ? 1.0 : column_sizes.keyColumnsWeight());
|
||||
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
auto input = std::make_unique<MergeTreeSequentialBlockInputStream>(
|
||||
@ -671,6 +697,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
if (deduplicate)
|
||||
merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, SizeLimits(), 0 /*limit_hint*/, Names());
|
||||
|
||||
if (need_remove_expired_values)
|
||||
merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, new_data_part, time_of_merge);
|
||||
|
||||
MergedBlockOutputStream to{
|
||||
data, new_part_tmp_path, merging_columns, compression_codec, merged_column_to_size, data.settings.min_merge_bytes_to_use_direct_io};
|
||||
|
||||
@ -684,6 +713,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
|
||||
while (!actions_blocker.isCancelled() && (block = merged_stream->read()))
|
||||
{
|
||||
rows_written += block.rows();
|
||||
|
||||
to.write(block);
|
||||
|
||||
merge_entry->rows_written = merged_stream->getProfileInfo().rows;
|
||||
@ -857,6 +887,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
|
||||
data, future_part.name, future_part.part_info);
|
||||
new_data_part->relative_path = "tmp_mut_" + future_part.name;
|
||||
new_data_part->is_temp = true;
|
||||
new_data_part->ttl_infos = source_part->ttl_infos;
|
||||
|
||||
String new_part_tmp_path = new_data_part->getFullPath();
|
||||
|
||||
@ -1016,12 +1047,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
|
||||
|
||||
MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMergeAlgorithm(
|
||||
const MergeTreeData::DataPartsVector & parts, size_t sum_rows_upper_bound,
|
||||
const NamesAndTypesList & gathering_columns, bool deduplicate) const
|
||||
const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const
|
||||
{
|
||||
if (deduplicate)
|
||||
return MergeAlgorithm::Horizontal;
|
||||
if (data.settings.enable_vertical_merge_algorithm == 0)
|
||||
return MergeAlgorithm::Horizontal;
|
||||
if (need_remove_expired_values)
|
||||
return MergeAlgorithm::Horizontal;
|
||||
|
||||
bool is_supported_storage =
|
||||
data.merging_params.mode == MergeTreeData::MergingParams::Ordinary ||
|
||||
@ -1093,7 +1126,6 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
|
||||
return new_data_part;
|
||||
}
|
||||
|
||||
|
||||
size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts)
|
||||
{
|
||||
size_t res = 0;
|
||||
|
@ -127,7 +127,7 @@ private:
|
||||
|
||||
MergeAlgorithm chooseMergeAlgorithm(
|
||||
const MergeTreeData::DataPartsVector & parts,
|
||||
size_t rows_upper_bound, const NamesAndTypesList & gathering_columns, bool deduplicate) const;
|
||||
size_t rows_upper_bound, const NamesAndTypesList & gathering_columns, bool deduplicate, bool need_remove_expired_values) const;
|
||||
|
||||
private:
|
||||
MergeTreeData & data;
|
||||
@ -137,6 +137,9 @@ private:
|
||||
|
||||
/// When the last time you wrote to the log that the disk space was running out (not to write about this too often).
|
||||
time_t disk_space_warning_time = 0;
|
||||
|
||||
/// Last time when TTLMergeSelector has been used
|
||||
time_t last_merge_with_ttl = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Poco/DirectoryIterator.h>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
#include <common/JSON.h>
|
||||
|
||||
#define MERGE_TREE_MARK_SIZE (2 * sizeof(UInt64))
|
||||
|
||||
@ -37,6 +38,7 @@ namespace ErrorCodes
|
||||
extern const int CORRUPTED_DATA;
|
||||
extern const int NOT_FOUND_EXPECTED_DATA_PART;
|
||||
extern const int BAD_SIZE_OF_FILE_IN_DATA_PART;
|
||||
extern const int BAD_TTL_FILE;
|
||||
}
|
||||
|
||||
|
||||
@ -198,7 +200,6 @@ size_t MergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
|
||||
return checksum->second.file_size;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
|
||||
* If no checksums are present returns the name of the first physically existing column.
|
||||
*/
|
||||
@ -479,6 +480,7 @@ void MergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checksu
|
||||
loadIndex();
|
||||
loadRowsCount(); /// Must be called after loadIndex() as it uses the value of `marks_count`.
|
||||
loadPartitionAndMinMaxIndex();
|
||||
loadTTLInfos();
|
||||
if (check_consistency)
|
||||
checkConsistency(require_columns_checksums);
|
||||
}
|
||||
@ -637,6 +639,33 @@ void MergeTreeDataPart::loadRowsCount()
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataPart::loadTTLInfos()
|
||||
{
|
||||
String path = getFullPath() + "ttl.txt";
|
||||
if (Poco::File(path).exists())
|
||||
{
|
||||
ReadBufferFromFile in = openForReading(path);
|
||||
assertString("ttl format version: ", in);
|
||||
size_t format_version;
|
||||
readText(format_version, in);
|
||||
assertChar('\n', in);
|
||||
|
||||
if (format_version == 1)
|
||||
{
|
||||
try
|
||||
{
|
||||
ttl_infos.read(in);
|
||||
}
|
||||
catch (const JSONException &)
|
||||
{
|
||||
throw Exception("Error while parsing file ttl.txt in part: " + name, ErrorCodes::BAD_TTL_FILE);
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception("Unknown ttl format version: " + toString(format_version), ErrorCodes::BAD_TTL_FILE);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const
|
||||
{
|
||||
std::shared_lock<std::shared_mutex> part_lock(columns_lock);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Storages/MergeTree/MergeTreePartInfo.h>
|
||||
#include <Storages/MergeTree/MergeTreePartition.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
@ -129,6 +130,11 @@ struct MergeTreeDataPart
|
||||
Deleting /// not active data part with identity refcounter, it is deleting right now by a cleaner
|
||||
};
|
||||
|
||||
using TTLInfo = MergeTreeDataPartTTLInfo;
|
||||
using TTLInfos = MergeTreeDataPartTTLInfos;
|
||||
|
||||
TTLInfos ttl_infos;
|
||||
|
||||
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
|
||||
mutable State state{State::Temporary};
|
||||
|
||||
@ -216,6 +222,9 @@ struct MergeTreeDataPart
|
||||
/// Columns description.
|
||||
NamesAndTypesList columns;
|
||||
|
||||
/// Columns with values, that all have been zeroed by expired ttl
|
||||
NameSet empty_columns;
|
||||
|
||||
using ColumnToSize = std::map<std::string, UInt64>;
|
||||
|
||||
/** It is blocked for writing when changing columns, checksums or any part files.
|
||||
@ -281,6 +290,9 @@ private:
|
||||
/// For the older format version calculates rows count from the size of a column with a fixed size.
|
||||
void loadRowsCount();
|
||||
|
||||
/// Loads ttl infos in json format from file ttl.txt. If file doesn`t exists assigns ttl infos with all zeros
|
||||
void loadTTLInfos();
|
||||
|
||||
void loadPartitionAndMinMaxIndex();
|
||||
|
||||
void checkConsistency(bool require_part_metadata);
|
||||
|
88
dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
Normal file
88
dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
Normal file
@ -0,0 +1,88 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartTTLInfo.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <common/JSON.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void MergeTreeDataPartTTLInfos::update(const MergeTreeDataPartTTLInfos & other_infos)
|
||||
{
|
||||
for (const auto & [name, ttl_info] : other_infos.columns_ttl)
|
||||
{
|
||||
columns_ttl[name].update(ttl_info);
|
||||
updatePartMinTTL(ttl_info.min);
|
||||
}
|
||||
|
||||
table_ttl.update(other_infos.table_ttl);
|
||||
updatePartMinTTL(table_ttl.min);
|
||||
}
|
||||
|
||||
void MergeTreeDataPartTTLInfos::read(ReadBuffer & in)
|
||||
{
|
||||
String json_str;
|
||||
readString(json_str, in);
|
||||
assertEOF(in);
|
||||
|
||||
JSON json(json_str);
|
||||
if (json.has("columns"))
|
||||
{
|
||||
JSON columns = json["columns"];
|
||||
for (auto col : columns)
|
||||
{
|
||||
MergeTreeDataPartTTLInfo ttl_info;
|
||||
ttl_info.min = col["min"].getUInt();
|
||||
ttl_info.max = col["max"].getUInt();
|
||||
String name = col["name"].getString();
|
||||
columns_ttl.emplace(name, ttl_info);
|
||||
|
||||
updatePartMinTTL(ttl_info.min);
|
||||
}
|
||||
}
|
||||
if (json.has("table"))
|
||||
{
|
||||
JSON table = json["table"];
|
||||
table_ttl.min = table["min"].getUInt();
|
||||
table_ttl.max = table["max"].getUInt();
|
||||
|
||||
updatePartMinTTL(table_ttl.min);
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeDataPartTTLInfos::write(WriteBuffer & out) const
|
||||
{
|
||||
writeString("ttl format version: 1\n", out);
|
||||
writeString("{", out);
|
||||
if (!columns_ttl.empty())
|
||||
{
|
||||
writeString("\"columns\":[", out);
|
||||
for (auto it = columns_ttl.begin(); it != columns_ttl.end(); ++it)
|
||||
{
|
||||
if (it != columns_ttl.begin())
|
||||
writeString(",", out);
|
||||
|
||||
writeString("{\"name\":\"", out);
|
||||
writeString(it->first, out);
|
||||
writeString("\",\"min\":", out);
|
||||
writeIntText(it->second.min, out);
|
||||
writeString(",\"max\":", out);
|
||||
writeIntText(it->second.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
writeString("]", out);
|
||||
}
|
||||
if (table_ttl.min)
|
||||
{
|
||||
if (!columns_ttl.empty())
|
||||
writeString(",", out);
|
||||
writeString("\"table\":{\"min\":", out);
|
||||
writeIntText(table_ttl.min, out);
|
||||
writeString(",\"max\":", out);
|
||||
writeIntText(table_ttl.max, out);
|
||||
writeString("}", out);
|
||||
}
|
||||
writeString("}", out);
|
||||
}
|
||||
|
||||
}
|
51
dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
Normal file
51
dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
Normal file
@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Minimal and maximal ttl for column or table
|
||||
struct MergeTreeDataPartTTLInfo
|
||||
{
|
||||
time_t min = 0;
|
||||
time_t max = 0;
|
||||
|
||||
void update(time_t time)
|
||||
{
|
||||
if (time && (!min || time < min))
|
||||
min = time;
|
||||
|
||||
max = std::max(time, max);
|
||||
}
|
||||
|
||||
void update(const MergeTreeDataPartTTLInfo & other_info)
|
||||
{
|
||||
if (other_info.min && (!min || other_info.min < min))
|
||||
min = other_info.min;
|
||||
|
||||
max = std::max(other_info.max, max);
|
||||
}
|
||||
};
|
||||
|
||||
/// PartTTLInfo for all columns and table with minimal ttl for whole part
|
||||
struct MergeTreeDataPartTTLInfos
|
||||
{
|
||||
std::unordered_map<String, MergeTreeDataPartTTLInfo> columns_ttl;
|
||||
MergeTreeDataPartTTLInfo table_ttl;
|
||||
time_t part_min_ttl = 0;
|
||||
|
||||
void read(ReadBuffer & in);
|
||||
void write(WriteBuffer & out) const;
|
||||
void update(const MergeTreeDataPartTTLInfos & other_infos);
|
||||
|
||||
void updatePartMinTTL(time_t time)
|
||||
{
|
||||
if (time && (!part_min_ttl || time < part_min_ttl))
|
||||
part_min_ttl = time;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -4,8 +4,11 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Interpreters/AggregationCommon.h>
|
||||
#include <IO/HashingWriteBuffer.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Poco/File.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -71,6 +74,34 @@ void buildScatterSelector(
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes ttls and updates ttl infos
|
||||
void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, MergeTreeDataPart::TTLInfos & ttl_infos, Block & block, const String & column_name)
|
||||
{
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
ttl_entry.expression->execute(block);
|
||||
|
||||
auto & ttl_info = (column_name.empty() ? ttl_infos.table_ttl : ttl_infos.columns_ttl[column_name]);
|
||||
|
||||
const auto & current = block.getByName(ttl_entry.result_column);
|
||||
|
||||
const IColumn * column = current.column.get();
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
{
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
for (const auto & val : column_date->getData())
|
||||
ttl_info.update(date_lut.fromDayNum(DayNum(val)));
|
||||
}
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
{
|
||||
for (const auto & val : column_date_time->getData())
|
||||
ttl_info.update(val);
|
||||
}
|
||||
else
|
||||
throw Exception("Unexpected type of result ttl column", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
ttl_infos.updatePartMinTTL(ttl_info.min);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block, size_t max_parts)
|
||||
@ -213,6 +244,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
|
||||
}
|
||||
|
||||
if (data.hasTableTTL())
|
||||
updateTTL(data.ttl_table_entry, new_data_part->ttl_infos, block, "");
|
||||
|
||||
for (const auto & [name, ttl_entry] : data.ttl_entries_by_name)
|
||||
updateTTL(ttl_entry, new_data_part->ttl_infos, block, name);
|
||||
|
||||
/// This effectively chooses minimal compression method:
|
||||
/// either default lz4 or compression method with zero thresholds on absolute and relative part size.
|
||||
auto compression_codec = data.global_context.chooseCompressionCodec(0, 0);
|
||||
|
@ -160,7 +160,10 @@ struct MergeTreeSettings
|
||||
M(SettingUInt64, finished_mutations_to_keep, 100) \
|
||||
\
|
||||
/** Minimal amount of bytes to enable O_DIRECT in merge (0 - disabled) */ \
|
||||
M(SettingUInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024)
|
||||
M(SettingUInt64, min_merge_bytes_to_use_direct_io, 10ULL * 1024 * 1024 * 1024) \
|
||||
\
|
||||
/** Minimal time in seconds, when merge with TTL can be repeated */ \
|
||||
M(SettingInt64, merge_with_ttl_timeout, 3600 * 24)
|
||||
|
||||
/// Settings that should not change after the creation of a table.
|
||||
#define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \
|
||||
|
@ -392,6 +392,16 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
|
||||
checksums.files["count.txt"].file_hash = count_out_hashing.getHash();
|
||||
}
|
||||
|
||||
if (new_part->ttl_infos.part_min_ttl)
|
||||
{
|
||||
/// Write a file with ttl infos in json format.
|
||||
WriteBufferFromFile out(part_path + "ttl.txt", 4096);
|
||||
HashingWriteBuffer out_hashing(out);
|
||||
new_part->ttl_infos.write(out_hashing);
|
||||
checksums.files["ttl.txt"].file_size = out_hashing.count();
|
||||
checksums.files["ttl.txt"].file_hash = out_hashing.getHash();
|
||||
}
|
||||
|
||||
{
|
||||
/// Write a file with a description of columns.
|
||||
WriteBufferFromFile out(part_path + "columns.txt", 4096);
|
||||
|
@ -46,6 +46,8 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr
|
||||
partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast));
|
||||
|
||||
skip_indices = data.getIndicesDescription().toString();
|
||||
|
||||
ttl_table = formattedAST(data.ttl_table_ast);
|
||||
}
|
||||
|
||||
void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const
|
||||
@ -67,6 +69,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const
|
||||
if (!sorting_key.empty())
|
||||
out << "sorting key: " << sorting_key << "\n";
|
||||
|
||||
if (!ttl_table.empty())
|
||||
out << "ttl: " << ttl_table << "\n";
|
||||
|
||||
if (!skip_indices.empty())
|
||||
out << "indices: " << skip_indices << "\n";
|
||||
}
|
||||
@ -101,6 +106,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in)
|
||||
|
||||
if (checkString("indices: ", in))
|
||||
in >> skip_indices >> "\n";
|
||||
|
||||
if (checkString("ttl: ", in))
|
||||
in >> ttl_table >> "\n";
|
||||
}
|
||||
|
||||
ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s)
|
||||
@ -183,6 +191,21 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (ttl_table != from_zk.ttl_table)
|
||||
{
|
||||
if (allow_alter)
|
||||
{
|
||||
diff.ttl_table_changed = true;
|
||||
diff.new_ttl_table = from_zk.ttl_table;
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Existing table metadata in ZooKeeper differs in ttl."
|
||||
" Stored in ZooKeeper: " + from_zk.ttl_table +
|
||||
", local: " + ttl_table,
|
||||
ErrorCodes::METADATA_MISMATCH);
|
||||
}
|
||||
|
||||
if (skip_indices != from_zk.skip_indices)
|
||||
{
|
||||
if (allow_alter)
|
||||
|
@ -26,6 +26,7 @@ struct ReplicatedMergeTreeTableMetadata
|
||||
String partition_key;
|
||||
String sorting_key;
|
||||
String skip_indices;
|
||||
String ttl_table;
|
||||
|
||||
ReplicatedMergeTreeTableMetadata() = default;
|
||||
explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data);
|
||||
@ -44,6 +45,9 @@ struct ReplicatedMergeTreeTableMetadata
|
||||
bool skip_indices_changed = false;
|
||||
String new_skip_indices;
|
||||
|
||||
bool ttl_table_changed = false;
|
||||
String new_ttl_table;
|
||||
|
||||
bool empty() const { return !sorting_key_changed && !skip_indices_changed; }
|
||||
};
|
||||
|
||||
|
68
dbms/src/Storages/MergeTree/TTLMergeSelector.cpp
Normal file
68
dbms/src/Storages/MergeTree/TTLMergeSelector.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Storages/MergeTree/TTLMergeSelector.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
IMergeSelector::PartsInPartition TTLMergeSelector::select(
|
||||
const Partitions & partitions,
|
||||
const size_t max_total_size_to_merge)
|
||||
{
|
||||
using Iterator = IMergeSelector::PartsInPartition::const_iterator;
|
||||
Iterator best_begin;
|
||||
ssize_t partition_to_merge_index = -1;
|
||||
time_t partition_to_merge_min_ttl = 0;
|
||||
|
||||
for (size_t i = 0; i < partitions.size(); ++i)
|
||||
{
|
||||
for (auto it = partitions[i].begin(); it != partitions[i].end(); ++it)
|
||||
{
|
||||
if (it->min_ttl && (partition_to_merge_index == -1 || it->min_ttl < partition_to_merge_min_ttl))
|
||||
{
|
||||
partition_to_merge_min_ttl = it->min_ttl;
|
||||
partition_to_merge_index = i;
|
||||
best_begin = it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (partition_to_merge_index == -1 || partition_to_merge_min_ttl > current_time)
|
||||
return {};
|
||||
|
||||
const auto & best_partition = partitions[partition_to_merge_index];
|
||||
Iterator best_end = best_begin + 1;
|
||||
size_t total_size = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
if (!best_begin->min_ttl || best_begin->min_ttl > current_time
|
||||
|| (max_total_size_to_merge && total_size > max_total_size_to_merge))
|
||||
{
|
||||
++best_begin;
|
||||
break;
|
||||
}
|
||||
|
||||
total_size += best_begin->size;
|
||||
if (best_begin == best_partition.begin())
|
||||
break;
|
||||
|
||||
--best_begin;
|
||||
}
|
||||
|
||||
while (best_end != best_partition.end())
|
||||
{
|
||||
if (!best_end->min_ttl || best_end->min_ttl > current_time
|
||||
|| (max_total_size_to_merge && total_size > max_total_size_to_merge))
|
||||
break;
|
||||
|
||||
total_size += best_end->size;
|
||||
++best_end;
|
||||
}
|
||||
|
||||
return PartsInPartition(best_begin, best_end);
|
||||
}
|
||||
|
||||
}
|
26
dbms/src/Storages/MergeTree/TTLMergeSelector.h
Normal file
26
dbms/src/Storages/MergeTree/TTLMergeSelector.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Storages/MergeTree/MergeSelector.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Merge selector, which is used to remove values with expired ttl.
|
||||
* It selects parts to merge by greedy algorithm:
|
||||
* 1. Finds part with the most earliest expired ttl and includes it to result.
|
||||
* 2. Tries to find the longest range of parts with expired ttl, that includes part from step 1.
|
||||
*/
|
||||
class TTLMergeSelector : public IMergeSelector
|
||||
{
|
||||
public:
|
||||
explicit TTLMergeSelector(time_t current_time_) : current_time(current_time_) {}
|
||||
|
||||
PartsInPartition select(
|
||||
const Partitions & partitions,
|
||||
const size_t max_total_size_to_merge) override;
|
||||
private:
|
||||
time_t current_time;
|
||||
};
|
||||
|
||||
}
|
@ -211,14 +211,15 @@ MergeTreeData::DataPart::Checksums checkDataPart(
|
||||
checksums_data.files["primary.idx"] = MergeTreeData::DataPart::Checksums::Checksum(primary_idx_size, hashing_buf.getHash());
|
||||
}
|
||||
|
||||
/// Optional files count.txt, partition.dat, minmax_*.idx. Just calculate checksums for existing files.
|
||||
/// Optional files count.txt, partition.dat, minmax_*.idx, ttl.txt. Just calculate checksums for existing files.
|
||||
Poco::DirectoryIterator dir_end;
|
||||
for (Poco::DirectoryIterator dir_it(path); dir_it != dir_end; ++dir_it)
|
||||
{
|
||||
const String & file_name = dir_it.name();
|
||||
if (file_name == "count.txt"
|
||||
|| file_name == "partition.dat"
|
||||
|| (startsWith(file_name, "minmax_") && endsWith(file_name, ".idx")))
|
||||
|| (startsWith(file_name, "minmax_") && endsWith(file_name, ".idx"))
|
||||
|| file_name == "ttl.txt")
|
||||
{
|
||||
ReadBufferFromFile file_buf(dir_it->path());
|
||||
HashingReadBuffer hashing_buf(file_buf);
|
||||
|
@ -572,6 +572,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
ASTPtr order_by_ast;
|
||||
ASTPtr primary_key_ast;
|
||||
ASTPtr sample_by_ast;
|
||||
ASTPtr ttl_table_ast;
|
||||
IndicesDescription indices_description;
|
||||
MergeTreeSettings storage_settings = args.context.getMergeTreeSettings();
|
||||
|
||||
@ -593,11 +594,15 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
if (args.storage_def->sample_by)
|
||||
sample_by_ast = args.storage_def->sample_by->ptr();
|
||||
|
||||
if (args.storage_def->ttl_table)
|
||||
ttl_table_ast = args.storage_def->ttl_table->ptr();
|
||||
|
||||
if (args.query.columns_list && args.query.columns_list->indices)
|
||||
for (const auto & index : args.query.columns_list->indices->children)
|
||||
indices_description.indices.push_back(
|
||||
std::dynamic_pointer_cast<ASTIndexDeclaration>(index->clone()));
|
||||
|
||||
|
||||
storage_settings.loadFromQuery(*args.storage_def);
|
||||
}
|
||||
else
|
||||
@ -636,12 +641,14 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
||||
zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name,
|
||||
args.columns, indices_description,
|
||||
args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast,
|
||||
sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag);
|
||||
sample_by_ast, ttl_table_ast, merging_params, storage_settings,
|
||||
args.has_force_restore_data_flag);
|
||||
else
|
||||
return StorageMergeTree::create(
|
||||
args.data_path, args.database_name, args.table_name, args.columns, indices_description,
|
||||
args.attach, args.context, date_column_name, partition_by_ast, order_by_ast,
|
||||
primary_key_ast, sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag);
|
||||
primary_key_ast, sample_by_ast, ttl_table_ast, merging_params, storage_settings,
|
||||
args.has_force_restore_data_flag);
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,6 +59,7 @@ StorageMergeTree::StorageMergeTree(
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag)
|
||||
@ -67,7 +68,8 @@ StorageMergeTree::StorageMergeTree(
|
||||
data(database_name, table_name,
|
||||
full_path, columns_, indices_,
|
||||
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
|
||||
sample_by_ast_, merging_params_, settings_, false, attach),
|
||||
sample_by_ast_, ttl_table_ast_, merging_params_,
|
||||
settings_, false, attach),
|
||||
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()),
|
||||
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)"))
|
||||
{
|
||||
@ -220,7 +222,8 @@ void StorageMergeTree::alter(
|
||||
auto new_indices = data.getIndicesDescription();
|
||||
ASTPtr new_order_by_ast = data.order_by_ast;
|
||||
ASTPtr new_primary_key_ast = data.primary_key_ast;
|
||||
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
|
||||
ASTPtr new_ttl_table_ast = data.ttl_table_ast;
|
||||
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
|
||||
|
||||
auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
|
||||
auto columns_for_parts = new_columns.getAllPhysical();
|
||||
@ -242,6 +245,9 @@ void StorageMergeTree::alter(
|
||||
|
||||
if (new_primary_key_ast.get() != data.primary_key_ast.get())
|
||||
storage_ast.set(storage_ast.primary_key, new_primary_key_ast);
|
||||
|
||||
if (new_ttl_table_ast.get() != data.ttl_table_ast.get())
|
||||
storage_ast.set(storage_ast.ttl_table, new_ttl_table_ast);
|
||||
};
|
||||
|
||||
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier);
|
||||
@ -249,6 +255,8 @@ void StorageMergeTree::alter(
|
||||
/// Reinitialize primary key because primary key column types might have changed.
|
||||
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
|
||||
|
||||
data.setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
|
||||
|
||||
for (auto & transaction : transactions)
|
||||
transaction->commit();
|
||||
|
||||
@ -546,6 +554,7 @@ bool StorageMergeTree::merge(
|
||||
future_part, *merge_entry, time(nullptr),
|
||||
merging_tagger->reserved_space.get(), deduplicate);
|
||||
merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr);
|
||||
data.removeEmptyColumnsFromPart(new_part);
|
||||
|
||||
merging_tagger->is_successful = true;
|
||||
write_part_log({});
|
||||
@ -794,7 +803,8 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
|
||||
auto new_indices = getIndicesDescription();
|
||||
ASTPtr ignored_order_by_ast;
|
||||
ASTPtr ignored_primary_key_ast;
|
||||
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast);
|
||||
ASTPtr ignored_ttl_table_ast;
|
||||
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast);
|
||||
|
||||
auto columns_for_parts = new_columns.getAllPhysical();
|
||||
for (const auto & part : parts)
|
||||
|
@ -181,6 +181,7 @@ protected:
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_, /// nullptr, if sampling is not supported.
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag);
|
||||
|
@ -54,6 +54,7 @@
|
||||
#include <thread>
|
||||
#include <future>
|
||||
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -206,6 +207,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_,
|
||||
const ASTPtr & ttl_table_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag)
|
||||
@ -217,7 +219,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
|
||||
data(database_name, table_name,
|
||||
full_path, columns_, indices_,
|
||||
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
|
||||
sample_by_ast_, merging_params_, settings_, true, attach,
|
||||
sample_by_ast_, ttl_table_ast_, merging_params_,
|
||||
settings_, true, attach,
|
||||
[this] (const std::string & name) { enqueuePartForCheck(name); }),
|
||||
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), queue(*this),
|
||||
fetcher(data),
|
||||
@ -424,6 +427,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
|
||||
ASTPtr new_primary_key_ast = data.primary_key_ast;
|
||||
ASTPtr new_order_by_ast = data.order_by_ast;
|
||||
auto new_indices = data.getIndicesDescription();
|
||||
ASTPtr new_ttl_table_ast = data.ttl_table_ast;
|
||||
IDatabase::ASTModifier storage_modifier;
|
||||
if (!metadata_diff.empty())
|
||||
{
|
||||
@ -452,6 +456,12 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
|
||||
if (metadata_diff.skip_indices_changed)
|
||||
new_indices = IndicesDescription::parse(metadata_diff.new_skip_indices);
|
||||
|
||||
if (metadata_diff.ttl_table_changed)
|
||||
{
|
||||
ParserExpression parser;
|
||||
new_ttl_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0);
|
||||
}
|
||||
|
||||
storage_modifier = [&](IAST & ast)
|
||||
{
|
||||
auto & storage_ast = ast.as<ASTStorage &>();
|
||||
@ -464,6 +474,9 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
|
||||
if (new_primary_key_ast.get() != data.primary_key_ast.get())
|
||||
storage_ast.set(storage_ast.primary_key, new_primary_key_ast);
|
||||
|
||||
if (new_ttl_table_ast.get() != data.ttl_table_ast.get())
|
||||
storage_ast.set(storage_ast.ttl_table, new_ttl_table_ast);
|
||||
|
||||
storage_ast.set(storage_ast.order_by, new_order_by_ast);
|
||||
};
|
||||
}
|
||||
@ -473,6 +486,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
|
||||
/// Even if the primary/sorting keys didn't change we must reinitialize it
|
||||
/// because primary key column types might have changed.
|
||||
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
|
||||
data.setTTLExpressions(new_columns.getColumnTTLs(), new_ttl_table_ast);
|
||||
}
|
||||
|
||||
|
||||
@ -1077,6 +1091,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
|
||||
future_merged_part, *merge_entry, entry.create_time, reserved_space.get(), entry.deduplicate);
|
||||
|
||||
merger_mutator.renameMergedTemporaryPart(part, parts, &transaction);
|
||||
data.removeEmptyColumnsFromPart(part);
|
||||
|
||||
try
|
||||
{
|
||||
@ -1499,7 +1514,8 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
|
||||
auto new_indices = getIndicesDescription();
|
||||
ASTPtr ignored_order_by_ast;
|
||||
ASTPtr ignored_primary_key_ast;
|
||||
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast);
|
||||
ASTPtr ignored_ttl_table_ast;
|
||||
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast, ignored_ttl_table_ast);
|
||||
|
||||
size_t modified_parts = 0;
|
||||
auto parts = data.getDataParts();
|
||||
@ -3104,7 +3120,8 @@ void StorageReplicatedMergeTree::alter(
|
||||
IndicesDescription new_indices = data.getIndicesDescription();
|
||||
ASTPtr new_order_by_ast = data.order_by_ast;
|
||||
ASTPtr new_primary_key_ast = data.primary_key_ast;
|
||||
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
|
||||
ASTPtr new_ttl_table_ast = data.ttl_table_ast;
|
||||
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast, new_ttl_table_ast);
|
||||
|
||||
String new_columns_str = new_columns.toString();
|
||||
if (new_columns_str != data.getColumns().toString())
|
||||
@ -3114,6 +3131,9 @@ void StorageReplicatedMergeTree::alter(
|
||||
if (new_order_by_ast.get() != data.order_by_ast.get())
|
||||
new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast));
|
||||
|
||||
if (new_ttl_table_ast.get() != data.ttl_table_ast.get())
|
||||
new_metadata.ttl_table = serializeAST(*new_ttl_table_ast);
|
||||
|
||||
String new_indices_str = new_indices.toString();
|
||||
if (new_indices_str != data.getIndicesDescription().toString())
|
||||
new_metadata.skip_indices = new_indices_str;
|
||||
|
@ -563,6 +563,7 @@ protected:
|
||||
const ASTPtr & order_by_ast_,
|
||||
const ASTPtr & primary_key_ast_,
|
||||
const ASTPtr & sample_by_ast_,
|
||||
const ASTPtr & table_ttl_ast_,
|
||||
const MergeTreeData::MergingParams & merging_params_,
|
||||
const MergeTreeSettings & settings_,
|
||||
bool has_force_restore_data_flag);
|
||||
|
65
dbms/tests/integration/test_ttl_replicated/test.py
Normal file
65
dbms/tests/integration/test_ttl_replicated/test.py
Normal file
@ -0,0 +1,65 @@
|
||||
import time
|
||||
import pytest
|
||||
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
node1 = cluster.add_instance('node1', with_zookeeper=True)
|
||||
node2 = cluster.add_instance('node2', with_zookeeper=True)
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def start_cluster():
|
||||
try:
|
||||
cluster.start()
|
||||
|
||||
yield cluster
|
||||
|
||||
except Exception as ex:
|
||||
print ex
|
||||
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
def drop_table(nodes, table_name):
|
||||
for node in nodes:
|
||||
node.query("DROP TABLE IF EXISTS {}".format(table_name))
|
||||
|
||||
def test_ttl_columns(start_cluster):
|
||||
drop_table([node1, node2], "test_ttl")
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
'''
|
||||
CREATE TABLE test_ttl(date DateTime, id UInt32, a Int32 TTL date + INTERVAL 1 DAY, b Int32 TTL date + INTERVAL 1 MONTH)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}')
|
||||
ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0;
|
||||
'''.format(replica=node.name))
|
||||
|
||||
node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-10 00:00:00'), 1, 1, 3)")
|
||||
node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-11 10:00:00'), 2, 2, 4)")
|
||||
time.sleep(1) # sleep to allow use ttl merge selector for second time
|
||||
node1.query("OPTIMIZE TABLE test_ttl FINAL")
|
||||
|
||||
expected = "1\t0\t0\n2\t0\t0\n"
|
||||
assert TSV(node1.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected)
|
||||
assert TSV(node2.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected)
|
||||
|
||||
def test_ttl_table(start_cluster):
|
||||
drop_table([node1, node2], "test_ttl")
|
||||
for node in [node1, node2]:
|
||||
node.query(
|
||||
'''
|
||||
CREATE TABLE test_ttl(date DateTime, id UInt32)
|
||||
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}')
|
||||
ORDER BY id PARTITION BY toDayOfMonth(date)
|
||||
TTL date + INTERVAL 1 DAY SETTINGS merge_with_ttl_timeout=0;
|
||||
'''.format(replica=node.name))
|
||||
|
||||
node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-10 00:00:00'), 1)")
|
||||
node1.query("INSERT INTO test_ttl VALUES (toDateTime('2000-10-11 10:00:00'), 2)")
|
||||
time.sleep(1) # sleep to allow use ttl merge selector for second time
|
||||
node1.query("OPTIMIZE TABLE test_ttl FINAL")
|
||||
|
||||
assert TSV(node1.query("SELECT * FROM test_ttl")) == TSV("")
|
||||
assert TSV(node2.query("SELECT * FROM test_ttl")) == TSV("")
|
||||
|
5
dbms/tests/queries/0_stateless/00933_alter_ttl.reference
Normal file
5
dbms/tests/queries/0_stateless/00933_alter_ttl.reference
Normal file
@ -0,0 +1,5 @@
|
||||
CREATE TABLE test.ttl (`d` Date, `a` Int32) ENGINE = MergeTree PARTITION BY toDayOfMonth(d) ORDER BY a TTL d + toIntervalDay(1) SETTINGS index_granularity = 8192
|
||||
2100-10-10 3
|
||||
2100-10-10 4
|
||||
d Date
|
||||
a Int32 d + toIntervalDay(1)
|
24
dbms/tests/queries/0_stateless/00933_alter_ttl.sql
Normal file
24
dbms/tests/queries/0_stateless/00933_alter_ttl.sql
Normal file
@ -0,0 +1,24 @@
|
||||
set send_logs_level = 'none';
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d);
|
||||
alter table test.ttl modify ttl d + interval 1 day;
|
||||
show create table test.ttl;
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 3);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 4);
|
||||
optimize table test.ttl partition 10 final;
|
||||
|
||||
select * from test.ttl order by d;
|
||||
|
||||
alter table test.ttl modify ttl a; -- { serverError 450 }
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d Date, a Int) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
|
||||
alter table test.ttl modify column a Int ttl d + interval 1 day;
|
||||
desc table test.ttl;
|
||||
alter table test.ttl modify column d Int ttl d + interval 1 day; -- { serverError 44}
|
||||
|
@ -0,0 +1,8 @@
|
||||
0
|
||||
0 0
|
||||
0 0
|
||||
2000-10-10 00:00:00 0
|
||||
2000-10-10 00:00:00 0
|
||||
2000-10-10 00:00:00 0
|
||||
2100-10-10 00:00:00 3
|
||||
2100-10-10 2
|
44
dbms/tests/queries/0_stateless/00933_ttl_simple.sql
Normal file
44
dbms/tests/queries/0_stateless/00933_ttl_simple.sql
Normal file
@ -0,0 +1,44 @@
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int ttl d + interval 1 second, b Int ttl d + interval 1 second) engine = MergeTree order by tuple() partition by toMinute(d);
|
||||
insert into test.ttl values (now(), 1, 2);
|
||||
insert into test.ttl values (now(), 3, 4);
|
||||
select sleep(1.1);
|
||||
optimize table test.ttl final;
|
||||
select a, b from test.ttl;
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 3);
|
||||
optimize table test.ttl final;
|
||||
select * from test.ttl order by d;
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int) engine = MergeTree order by tuple() partition by tuple() ttl d + interval 1 day;
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 3);
|
||||
optimize table test.ttl final;
|
||||
select * from test.ttl order by d;
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) ttl d + interval 1 day;
|
||||
insert into test.ttl values (toDate('2000-10-10'), 1);
|
||||
insert into test.ttl values (toDate('2100-10-10'), 2);
|
||||
optimize table test.ttl final;
|
||||
select * from test.ttl order by d;
|
||||
|
||||
set send_logs_level = 'none';
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime ttl d) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 44}
|
||||
create table test.ttl (d DateTime, a Int ttl d) engine = MergeTree order by a partition by toSecond(d); -- { serverError 44}
|
||||
create table test.ttl (d DateTime, a Int ttl 2 + 2) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 450 }
|
||||
create table test.ttl (d DateTime, a Int ttl toDateTime(1)) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 450 }
|
||||
create table test.ttl (d DateTime, a Int ttl d - d) engine = MergeTree order by tuple() partition by toSecond(d); -- { serverError 450 }
|
@ -0,0 +1,12 @@
|
||||
3
|
||||
4
|
||||
111
|
||||
111
|
||||
1 2
|
||||
2 4
|
||||
3 300
|
||||
4 400
|
||||
1 222
|
||||
2 222
|
||||
3 15
|
||||
4 20
|
30
dbms/tests/queries/0_stateless/00933_ttl_with_default.sql
Normal file
30
dbms/tests/queries/0_stateless/00933_ttl_with_default.sql
Normal file
@ -0,0 +1,30 @@
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int default 111 ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 3);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 4);
|
||||
optimize table test.ttl final;
|
||||
select a from test.ttl order by a;
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int, b default a * 2 ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1, 100);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2, 200);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 3, 300);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 4, 400);
|
||||
optimize table test.ttl final;
|
||||
select a, b from test.ttl order by a;
|
||||
|
||||
drop table if exists test.ttl;
|
||||
|
||||
create table test.ttl (d DateTime, a Int, b default 222 ttl d + interval 1 DAY) engine = MergeTree order by tuple() partition by toDayOfMonth(d);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 1, 5);
|
||||
insert into test.ttl values (toDateTime('2000-10-10 00:00:00'), 2, 10);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 3, 15);
|
||||
insert into test.ttl values (toDateTime('2100-10-10 00:00:00'), 4, 20);
|
||||
optimize table test.ttl final;
|
||||
select a, b from test.ttl order by a;
|
||||
|
@ -31,8 +31,8 @@ Main features:
|
||||
```
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
|
||||
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
|
||||
@ -41,6 +41,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
[ORDER BY expr]
|
||||
[PRIMARY KEY expr]
|
||||
[SAMPLE BY expr]
|
||||
[TTL expr]
|
||||
[SETTINGS name=value, ...]
|
||||
```
|
||||
|
||||
@ -66,10 +67,18 @@ For a description of request parameters, see [request description](../../query_l
|
||||
|
||||
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
|
||||
|
||||
- `TTL` - An expression for setting storage time for rows.
|
||||
|
||||
It must depends on `Date` or `DateTime` column and has one `Date` or `DateTime` column as a result. Example:
|
||||
`TTL date + INTERVAL 1 DAY`
|
||||
|
||||
For more details, see [TTL for columns and tables](mergetree.md)
|
||||
|
||||
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
|
||||
- `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. By default, 8192. The list of all available parameters you can see in [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h).
|
||||
- `use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information refer the [setting description](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) in the "Server configuration parameters" chapter.
|
||||
- `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation required for using of the direct I/O access to the storage disk. During the merging of the data parts, ClickHouse calculates summary storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, then ClickHouse reads and writes the data using direct I/O interface (`O_DIRECT` option) to the storage disk. If `min_merge_bytes_to_use_direct_io = 0`, then the direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
|
||||
- `merge_with_ttl_timeout` - Minimal time in seconds, when merge with TTL can be repeated. Default value: 86400 (1 day).
|
||||
|
||||
**Example of sections setting**
|
||||
|
||||
@ -298,4 +307,13 @@ For concurrent table access, we use multi-versioning. In other words, when a tab
|
||||
|
||||
Reading from a table is automatically parallelized.
|
||||
|
||||
|
||||
## TTL for columns and tables
|
||||
|
||||
Data with expired TTL is removed while executing merges.
|
||||
|
||||
If TTL is set for column, when it expires, value will be replaced by default. If all values in columns were zeroed in part, data for this column will be deleted from disk for part. You are not allowed to set TTL for all key columns. If TTL is set for table, when it expires, row will be deleted.
|
||||
|
||||
When TTL expires on some value or row in part, extra merge will be executed. To control frequency of merges with TTL you can set `merge_with_ttl_timeout`. If it is too low, many extra merges and lack of regular merges can reduce the perfomance.
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/) <!--hide-->
|
||||
|
@ -17,8 +17,8 @@ The `CREATE TABLE` query can have several forms.
|
||||
```sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec],
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2],
|
||||
...
|
||||
) ENGINE = engine
|
||||
```
|
||||
@ -80,6 +80,13 @@ If you add a new column to a table but later change its default expression, the
|
||||
|
||||
It is not possible to set default values for elements in nested data structures.
|
||||
|
||||
### TTL expression
|
||||
|
||||
Can be specified only for MergeTree-family tables. An expression for setting storage time for values. It must depends on `Date` or `DateTime` column and has one `Date` or `DateTime` column as a result. Example:
|
||||
`TTL date + INTERVAL 1 DAY`
|
||||
|
||||
You are not allowed to set TTL for key columns. For more details, see [TTL for columns and tables](../operations/table_engines/mergetree.md)
|
||||
|
||||
## Column Compression Codecs
|
||||
|
||||
Besides default data compression, defined in [server settings](../operations/server_settings/settings.md#compression), per-column specification is also available.
|
||||
|
@ -30,8 +30,8 @@
|
||||
```sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
|
||||
...
|
||||
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
|
||||
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
|
||||
@ -66,10 +66,18 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример:
|
||||
`SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
|
||||
|
||||
- `TTL` - выражение для задания времени хранения строк.
|
||||
|
||||
Оно должно зависеть от стобца типа `Date` или `DateTime` и в качестве результата вычислять столбец типа `Date` или `DateTime`. Пример:
|
||||
`TTL date + INTERVAL 1 DAY`
|
||||
|
||||
Подробнее смотрите в [TTL для стоблцов и таблиц](mergetree.md)
|
||||
|
||||
- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`:
|
||||
|
||||
- `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. Список всех доступных параметров можно посмотреть в [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h).
|
||||
- `min_merge_bytes_to_use_direct_io` — минимальный объем данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объем хранения всех данных, подлежащих слиянию. Если общий объем хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байт.
|
||||
- `merge_with_ttl_timeout` - Минимальное время в секундах для повторного выполнения слияний с TTL. По умолчанию - 86400 (1 день).
|
||||
|
||||
**Пример задания секций**
|
||||
|
||||
@ -282,4 +290,13 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
|
||||
|
||||
Чтения из таблицы автоматически распараллеливаются.
|
||||
|
||||
|
||||
## TTL для столбцов и таблиц
|
||||
|
||||
Данные с истекшим TTL удаляются во время слияний.
|
||||
|
||||
Если TTL указан для столбца, то когда он истекает, значение заменяется на значение по умолчанию. Если все значения столбца обнулены в куске, то данные этого столбца удаляются с диска в куске. Если TTL указан для таблицы, то когда он истекает, удаляется строка.
|
||||
|
||||
Когда истекает TTL для какого-нибудь значения или строки в куске, назначается внеочередное слияние. Чтобы контролировать частоту слияний с TTL, вы можете задать настройку `merge_with_ttl_timeout`. Если ее значение слишком мало, то будет происходить слишком много внеочередных слияний и мало обычных, вследствие чего может ухудшиться производительность.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/mergetree/) <!--hide-->
|
||||
|
@ -16,8 +16,8 @@ CREATE DATABASE [IF NOT EXISTS] db_name
|
||||
```sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec],
|
||||
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [compression_codec] [TTL expr2],
|
||||
...
|
||||
) ENGINE = engine
|
||||
```
|
||||
@ -80,6 +80,14 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = engine AS SELECT ...
|
||||
|
||||
Отсутствует возможность задать значения по умолчанию для элементов вложенных структур данных.
|
||||
|
||||
### Выражение для TTL
|
||||
|
||||
Может быть указано только для таблиц семейства MergeTree. Выражение для указания времени хранения значений. Оно должно зависеть от стобца типа `Date` или `DateTime` и в качестве результата вычислять столбец типа `Date` или `DateTime`. Пример:
|
||||
`TTL date + INTERVAL 1 DAY`
|
||||
|
||||
Нельзя указывать TTL для ключевых столбцов. Подробнее смотрите в [TTL для стоблцов и таблиц](../operations/table_engines/mergetree.md)
|
||||
|
||||
|
||||
## Форматы сжатия для колонок
|
||||
|
||||
Помимо сжатия для колонок по умолчанию, определяемого в [настройках сервера](../operations/server_settings/settings.md#compression),
|
||||
|
Loading…
Reference in New Issue
Block a user