Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
Ivan Blinkov 2019-02-05 18:30:25 +03:00
commit 61eeb98ffc
90 changed files with 3264 additions and 385 deletions

View File

@ -1179,7 +1179,7 @@ protected:
/// Removes MATERIALIZED and ALIAS columns from create table query
static ASTPtr removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast)
{
const ASTs & column_asts = typeid_cast<ASTCreateQuery &>(*query_ast).columns->children;
const ASTs & column_asts = typeid_cast<ASTCreateQuery &>(*query_ast).columns_list->columns->children;
auto new_columns = std::make_shared<ASTExpressionList>();
for (const ASTPtr & column_ast : column_asts)
@ -1198,8 +1198,13 @@ protected:
ASTPtr new_query_ast = query_ast->clone();
ASTCreateQuery & new_query = typeid_cast<ASTCreateQuery &>(*new_query_ast);
new_query.columns = new_columns.get();
new_query.children.at(0) = std::move(new_columns);
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, new_columns);
new_columns_list->set(
new_columns_list->indices, typeid_cast<ASTCreateQuery &>(*query_ast).columns_list->indices->clone());
new_query.replace(new_query.columns_list, new_columns_list);
return new_query_ast;
}
@ -1217,7 +1222,7 @@ protected:
res->table = new_table.second;
res->children.clear();
res->set(res->columns, create.columns->clone());
res->set(res->columns_list, create.columns_list->clone());
res->set(res->storage, new_storage_ast->clone());
return res;

View File

@ -157,7 +157,7 @@ protected:
using QueueWithCollation = std::priority_queue<SortCursorWithCollation>;
QueueWithCollation queue_with_collation;
/// Used in Vertical merge algorithm to gather non-PK columns (on next step)
/// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step)
/// If it is not nullptr then it should be populated during execution
WriteBuffer * out_row_sources_buf;

View File

@ -138,6 +138,7 @@ void DatabaseDictionary::alterTable(
const Context &,
const String &,
const ColumnsDescription &,
const IndicesDescription &,
const ASTModifier &)
{
throw Exception("DatabaseDictionary: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -71,6 +71,7 @@ public:
const Context & context,
const String & name,
const ColumnsDescription & columns,
const IndicesDescription & indices,
const ASTModifier & engine_modifier) override;
time_t getTableMetadataModificationTime(

View File

@ -53,6 +53,7 @@ void DatabaseMemory::alterTable(
const Context &,
const String &,
const ColumnsDescription &,
const IndicesDescription &,
const ASTModifier &)
{
throw Exception("DatabaseMemory: alterTable() is not supported", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -48,6 +48,7 @@ public:
const Context & context,
const String & name,
const ColumnsDescription & columns,
const IndicesDescription & indices,
const ASTModifier & engine_modifier) override;
time_t getTableMetadataModificationTime(

View File

@ -510,6 +510,7 @@ void DatabaseOrdinary::alterTable(
const Context & context,
const String & table_name,
const ColumnsDescription & columns,
const IndicesDescription & indices,
const ASTModifier & storage_modifier)
{
/// Read the definition of the table and replace the necessary parts with new ones.
@ -531,7 +532,14 @@ void DatabaseOrdinary::alterTable(
ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast);
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns);
ast_create_query.replace(ast_create_query.columns, new_columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(indices);
ast_create_query.columns_list->replace(ast_create_query.columns_list->columns, new_columns);
if (ast_create_query.columns_list->indices)
ast_create_query.columns_list->replace(ast_create_query.columns_list->indices, new_indices);
else
ast_create_query.columns_list->set(ast_create_query.columns_list->indices, new_indices);
if (storage_modifier)
storage_modifier(*ast_create_query.storage);

View File

@ -42,6 +42,7 @@ public:
const Context & context,
const String & name,
const ColumnsDescription & columns,
const IndicesDescription & indices,
const ASTModifier & engine_modifier) override;
time_t getTableMetadataModificationTime(

View File

@ -68,10 +68,10 @@ std::pair<String, StoragePtr> createTableFromDefinition(
/// We do not directly use `InterpreterCreateQuery::execute`, because
/// - the database has not been created yet;
/// - the code is simpler, since the query is already brought to a suitable form.
if (!ast_create_query.columns)
if (!ast_create_query.columns_list || !ast_create_query.columns_list->columns)
throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED);
ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns, context);
ColumnsDescription columns = InterpreterCreateQuery::getColumnsDescription(*ast_create_query.columns_list->columns, context);
return
{

View File

@ -3,6 +3,7 @@
#include <Core/Types.h>
#include <Core/NamesAndTypes.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IndicesDescription.h>
#include <ctime>
#include <memory>
#include <functional>
@ -115,6 +116,7 @@ public:
const Context & context,
const String & name,
const ColumnsDescription & columns,
const IndicesDescription & indices,
const ASTModifier & engine_modifier) = 0;
/// Returns time of table's metadata change, 0 if there is no corresponding metadata file.

View File

@ -0,0 +1,51 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionUnaryArithmetic.h>
#include <DataTypes/NumberTraits.h>
namespace DB
{
template <typename A>
struct BitSwapLastTwoImpl
{
using ResultType = UInt8;
static inline ResultType apply(A a)
{
return static_cast<ResultType>(
((static_cast<ResultType>(a) & 1) << 1) | ((static_cast<ResultType>(a) >> 1) & 1));
}
#if USE_EMBEDDED_COMPILER
static constexpr bool compilable = true;
static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool)
{
if (!arg->getType()->isIntegerTy())
throw Exception("__bitSwapLastTwo expected an integral type", ErrorCodes::LOGICAL_ERROR);
return b.CreateOr(
b.CreateShl(b.CreateAnd(arg, 1), 1),
b.CreateAnd(b.CreateLShr(arg, 1), 1)
);
}
#endif
};
struct NameBitSwapLastTwo { static constexpr auto name = "__bitSwapLastTwo"; };
using FunctionBitSwapLastTwo = FunctionUnaryArithmetic<BitSwapLastTwoImpl, NameBitSwapLastTwo, true>;
template <> struct FunctionUnaryArithmeticMonotonicity<NameBitSwapLastTwo>
{
static bool has() { return false; }
static IFunction::Monotonicity get(const Field &, const Field &)
{
return {};
}
};
void registerFunctionBitSwapLastTwo(FunctionFactory & factory)
{
factory.registerFunction<FunctionBitSwapLastTwo>();
}
}

View File

@ -33,6 +33,8 @@ void registerFunctionRoundToExp2(FunctionFactory & factory);
void registerFunctionRoundDuration(FunctionFactory & factory);
void registerFunctionRoundAge(FunctionFactory & factory);
void registerFunctionBitSwapLastTwo(FunctionFactory & factory);
void registerFunctionsArithmetic(FunctionFactory & factory)
{
registerFunctionPlus(factory);
@ -64,6 +66,9 @@ void registerFunctionsArithmetic(FunctionFactory & factory)
registerFunctionRoundToExp2(factory);
registerFunctionRoundDuration(factory);
registerFunctionRoundAge(factory);
/// Not for external use.
registerFunctionBitSwapLastTwo(factory);
}
}

View File

@ -105,7 +105,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create)
const ASTStorage & storage = *create.storage;
const ASTFunction & engine = *storage.engine;
/// Currently, there are no database engines, that support any arguments.
if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key || storage.order_by || storage.sample_by || storage.settings)
if (engine.arguments || engine.parameters || storage.partition_by || storage.primary_key
|| storage.order_by || storage.sample_by || storage.settings ||
(create.columns_list && create.columns_list->indices && !create.columns_list->indices->children.empty()))
{
std::stringstream ostr;
formatAST(storage, ostr, false, false);
@ -397,6 +399,16 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns)
return columns_list;
}
ASTPtr InterpreterCreateQuery::formatIndices(const IndicesDescription & indices)
{
auto res = std::make_shared<ASTExpressionList>();
for (const auto & index : indices.indices)
res->children.push_back(index->clone());
return res;
}
ColumnsDescription InterpreterCreateQuery::getColumnsDescription(const ASTExpressionList & columns, const Context & context)
{
ColumnsDescription res;
@ -449,9 +461,9 @@ ColumnsDescription InterpreterCreateQuery::setColumns(
{
ColumnsDescription res;
if (create.columns)
if (create.columns_list && create.columns_list->columns)
{
res = getColumnsDescription(*create.columns, context);
res = getColumnsDescription(*create.columns_list->columns, context);
}
else if (!create.as_table.empty())
{
@ -467,10 +479,16 @@ ColumnsDescription InterpreterCreateQuery::setColumns(
/// Even if query has list of columns, canonicalize it (unfold Nested columns).
ASTPtr new_columns = formatColumns(res);
if (create.columns)
create.replace(create.columns, new_columns);
if (!create.columns_list)
{
auto new_columns_list = std::make_shared<ASTColumns>();
create.set(create.columns_list, new_columns_list);
}
if (create.columns_list->columns)
create.columns_list->replace(create.columns_list->columns, new_columns);
else
create.set(create.columns, new_columns);
create.columns_list->set(create.columns_list->columns, new_columns);
/// Check for duplicates
std::set<String> all_columns;
@ -550,7 +568,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
String table_name_escaped = escapeForFileName(table_name);
// If this is a stub ATTACH query, read the query definition from the database
if (create.attach && !create.storage && !create.columns)
if (create.attach && !create.storage && !create.columns_list)
{
// Table SQL definition is available even if the table is detached
auto query = context.getCreateTableQuery(database_name, table_name);
@ -569,7 +587,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
}
Block as_select_sample;
if (create.select && (!create.attach || !create.columns))
if (create.select && (!create.attach || !create.columns_list))
as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context);
String as_database_name = create.as_database.empty() ? current_database : create.as_database;

View File

@ -2,6 +2,7 @@
#include <Interpreters/IInterpreter.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IndicesDescription.h>
#include <Common/ThreadPool.h>
@ -29,6 +30,8 @@ public:
static ASTPtr formatColumns(const NamesAndTypesList & columns);
static ASTPtr formatColumns(const ColumnsDescription & columns);
static ASTPtr formatIndices(const IndicesDescription & indices);
void setDatabaseLoadingThreadpool(ThreadPool & thread_pool_)
{
thread_pool = &thread_pool_;

View File

@ -252,7 +252,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam
create.attach = true;
std::string data_path = database->getDataPath();
auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, system_context);
auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns_list->columns, system_context);
StoragePtr table = StorageFactory::instance().get(create,
data_path,

View File

@ -358,7 +358,10 @@ void SystemLog<LogElement>::prepareTable()
create->table = table_name;
Block sample = LogElement::createBlock();
create->set(create->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList()));
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(sample.getNamesAndTypesList()));
create->set(create->columns_list, new_columns_list);
ParserStorage storage_parser;
ASTPtr storage_ast = parseQuery(

View File

@ -82,6 +82,24 @@ void ASTAlterCommand::formatImpl(
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY ORDER BY " << (settings.hilite ? hilite_none : "");
order_by->formatImpl(settings, state, frame);
}
else if (type == ASTAlterCommand::ADD_INDEX)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "");
index_decl->formatImpl(settings, state, frame);
/// AFTER
if (index)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : "");
index->formatImpl(settings, state, frame);
}
}
else if (type == ASTAlterCommand::DROP_INDEX)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str
<< "DROP INDEX " << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "");
index->formatImpl(settings, state, frame);
}
else if (type == ASTAlterCommand::DROP_PARTITION)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (detach ? "DETACH" : "DROP") << " PARTITION "

View File

@ -28,6 +28,9 @@ public:
COMMENT_COLUMN,
MODIFY_ORDER_BY,
ADD_INDEX,
DROP_INDEX,
DROP_PARTITION,
ATTACH_PARTITION,
REPLACE_PARTITION,
@ -58,6 +61,15 @@ public:
*/
ASTPtr order_by;
/** The ADD INDEX query stores the IndexDeclaration there.
*/
ASTPtr index_decl;
/** The ADD INDEX query stores the name of the index following AFTER.
* The DROP INDEX query stores the name for deletion.
*/
ASTPtr index;
/** Used in DROP PARTITION and ATTACH PARTITION FROM queries.
* The value or ID of the partition is stored here.
*/

View File

@ -38,6 +38,7 @@ public:
res->set(res->order_by, order_by->clone());
if (sample_by)
res->set(res->sample_by, sample_by->clone());
if (settings)
res->set(res->settings, settings->clone());
@ -81,6 +82,95 @@ public:
};
class ASTColumns : public IAST
{
private:
class ASTColumnsElement : public IAST
{
public:
String prefix;
IAST * elem;
String getID(char c) const override { return "ASTColumnsElement for " + elem->getID(c); }
ASTPtr clone() const override
{
auto res = std::make_shared<ASTColumnsElement>();
res->prefix = prefix;
if (elem)
res->set(res->elem, elem->clone());
return res;
}
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override
{
if (!elem)
return;
if (prefix.empty())
{
elem->formatImpl(s, state, frame);
return;
}
frame.need_parens = false;
std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
s.ostr << s.nl_or_ws << indent_str;
s.ostr << (s.hilite ? hilite_keyword : "") << prefix << (s.hilite ? hilite_none : "");
FormatSettings nested_settings = s;
nested_settings.one_line = true;
nested_settings.nl_or_ws = ' ';
elem->formatImpl(nested_settings, state, frame);
}
};
public:
ASTExpressionList * columns = nullptr;
ASTExpressionList * indices = nullptr;
String getID(char) const override { return "Columns definition"; }
ASTPtr clone() const override
{
auto res = std::make_shared<ASTColumns>();
if (columns)
res->set(res->columns, columns->clone());
if (indices)
res->set(res->indices, indices->clone());
return res;
}
void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override
{
ASTExpressionList list;
if (columns)
for (const auto & column : columns->children)
{
auto elem = std::make_shared<ASTColumnsElement>();
elem->prefix = "";
elem->set(elem->elem, column->clone());
list.children.push_back(elem);
}
if (indices)
for (const auto & index : indices->children)
{
auto elem = std::make_shared<ASTColumnsElement>();
elem->prefix = "INDEX";
elem->set(elem->elem, index->clone());
list.children.push_back(elem);
}
if (!list.children.empty())
list.formatImpl(s, state, frame);
}
};
/// CREATE TABLE or ATTACH TABLE query
class ASTCreateQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
{
@ -90,7 +180,7 @@ public:
bool is_view{false};
bool is_materialized_view{false};
bool is_populate{false};
ASTExpressionList * columns = nullptr;
ASTColumns * columns_list = nullptr;
String to_database; /// For CREATE MATERIALIZED VIEW mv TO table.
String to_table;
ASTStorage * storage = nullptr;
@ -106,8 +196,8 @@ public:
auto res = std::make_shared<ASTCreateQuery>(*this);
res->children.clear();
if (columns)
res->set(res->columns, columns->clone());
if (columns_list)
res->set(res->columns_list, columns_list->clone());
if (storage)
res->set(res->storage, storage->clone());
if (select)
@ -175,12 +265,12 @@ protected:
<< (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table);
}
if (columns)
if (columns_list)
{
settings.ostr << (settings.one_line ? " (" : "\n(");
FormatStateStacked frame_nested = frame;
++frame_nested.indent;
columns->formatImpl(settings, state, frame_nested);
columns_list->formatImpl(settings, state, frame_nested);
settings.ostr << (settings.one_line ? ")" : "\n)");
}

View File

@ -0,0 +1,59 @@
#pragma once
#include <Core/Field.h>
#include <Core/Types.h>
#include <Common/FieldVisitors.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/IAST.h>
#include <vector>
namespace DB
{
/** name BY expr TYPE typename(args) GRANULARITY int in create query
*/
class ASTIndexDeclaration : public IAST
{
public:
String name;
IAST * expr;
ASTFunction * type;
Field granularity;
/** Get the text that identifies this element. */
String getID(char) const override { return "Index"; }
ASTPtr clone() const override
{
auto res = std::make_shared<ASTIndexDeclaration>();
res->name = name;
res->granularity = granularity;
if (expr)
res->set(res->expr, expr->clone());
if (type)
res->set(res->type, type->clone());
return res;
}
void formatImpl(const FormatSettings & s, FormatState &state, FormatStateStacked frame) const override
{
frame.need_parens = false;
std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' ');
s.ostr << s.nl_or_ws << indent_str;
s.ostr << backQuoteIfNeed(name);
s.ostr << " ";
expr->formatImpl(s, state, frame);
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
type->formatImpl(s, state, frame);
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
s.ostr << applyVisitor(FieldVisitorToString(), granularity);
}
};
}

View File

@ -6,6 +6,7 @@
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/ParserPartition.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTAlterQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTAssignment.h>
@ -27,6 +28,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserKeyword s_comment_column("COMMENT COLUMN");
ParserKeyword s_modify_order_by("MODIFY ORDER BY");
ParserKeyword s_add_index("ADD INDEX");
ParserKeyword s_drop_index("DROP INDEX");
ParserKeyword s_attach_partition("ATTACH PARTITION");
ParserKeyword s_detach_partition("DETACH PARTITION");
ParserKeyword s_drop_partition("DROP PARTITION");
@ -51,6 +55,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
ParserCompoundIdentifier parser_name;
ParserStringLiteral parser_string_literal;
ParserCompoundColumnDeclaration parser_col_decl;
ParserIndexDeclaration parser_idx_decl;
ParserCompoundColumnDeclaration parser_modify_col_decl(false);
ParserPartition parser_partition;
ParserExpression parser_exp_elem;
@ -92,6 +97,33 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
command->type = ASTAlterCommand::DROP_COLUMN;
command->detach = false;
}
else if (s_add_index.ignore(pos, expected))
{
if (s_if_not_exists.ignore(pos, expected))
command->if_not_exists = true;
if (!parser_idx_decl.parse(pos, command->index_decl, expected))
return false;
if (s_after.ignore(pos, expected))
{
if (!parser_name.parse(pos, command->index, expected))
return false;
}
command->type = ASTAlterCommand::ADD_INDEX;
}
else if (s_drop_index.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))
command->if_exists = true;
if (!parser_name.parse(pos, command->index, expected))
return false;
command->type = ASTAlterCommand::DROP_INDEX;
command->detach = false;
}
else if (s_clear_column.ignore(pos, expected))
{
if (s_if_exists.ignore(pos, expected))

View File

@ -1,5 +1,7 @@
#include <Common/typeid_cast.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/ExpressionListParsers.h>
@ -90,6 +92,113 @@ bool ParserColumnDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected &
.parse(pos, node, expected);
}
bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_type("TYPE");
ParserKeyword s_granularity("GRANULARITY");
ParserIdentifier name_p;
ParserIdentifierWithOptionalParameters ident_with_optional_params_p;
ParserExpression expression_p;
ParserUnsignedInteger granularity_p;
ASTPtr name;
ASTPtr expr;
ASTPtr type;
ASTPtr granularity;
if (!name_p.parse(pos, name, expected))
return false;
if (!expression_p.parse(pos, expr, expected))
return false;
if (!s_type.ignore(pos, expected))
return false;
if (!ident_with_optional_params_p.parse(pos, type, expected))
return false;
if (!s_granularity.ignore(pos, expected))
return false;
if (!granularity_p.parse(pos, granularity, expected))
return false;
auto index = std::make_shared<ASTIndexDeclaration>();
index->name = typeid_cast<const ASTIdentifier &>(*name).name;
index->granularity = typeid_cast<const ASTLiteral &>(*granularity).value;
index->set(index->expr, expr);
index->set(index->type, type);
node = index;
return true;
}
bool ParserColumnAndIndexDeclaraion::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKeyword s_index("INDEX");
ParserIndexDeclaration index_p;
ParserColumnDeclaration column_p;
ASTPtr new_node = nullptr;
if (s_index.ignore(pos, expected))
{
if (!index_p.parse(pos, new_node, expected))
return false;
}
else
{
if (!column_p.parse(pos, new_node, expected))
return false;
}
node = new_node;
return true;
}
bool ParserIndexDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
return ParserList(std::make_unique<ParserIndexDeclaration>(), std::make_unique<ParserToken>(TokenType::Comma), false)
.parse(pos, node, expected);
}
bool ParserColumnsOrIndicesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr list;
if (!ParserList(std::make_unique<ParserColumnAndIndexDeclaraion>(), std::make_unique<ParserToken>(TokenType::Comma), false)
.parse(pos, list, expected))
return false;
ASTPtr columns = std::make_shared<ASTExpressionList>();
ASTPtr indices = std::make_shared<ASTExpressionList>();
for (const auto & elem : list->children)
{
if (typeid_cast<const ASTColumnDeclaration *>(elem.get()))
columns->children.push_back(elem);
else if (typeid_cast<const ASTIndexDeclaration *>(elem.get()))
indices->children.push_back(elem);
else
return false;
}
auto res = std::make_shared<ASTColumns>();
if (!columns->children.empty())
res->set(res->columns, columns);
if (!indices->children.empty())
res->set(res->indices, indices);
node = res;
return true;
}
bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
@ -169,6 +278,7 @@ bool ParserStorage::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
storage->set(storage->primary_key, primary_key);
storage->set(storage->order_by, order_by);
storage->set(storage->sample_by, sample_by);
storage->set(storage->settings, settings);
node = storage;
@ -193,12 +303,12 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserToken s_rparen(TokenType::ClosingRoundBracket);
ParserStorage storage_p;
ParserIdentifier name_p;
ParserColumnDeclarationList columns_p;
ParserColumnsOrIndicesDeclarationList columns_or_indices_p;
ParserSelectWithUnionQuery select_p;
ASTPtr database;
ASTPtr table;
ASTPtr columns;
ASTPtr columns_list;
ASTPtr to_database;
ASTPtr to_table;
ASTPtr storage;
@ -266,7 +376,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// List of columns.
if (s_lparen.ignore(pos, expected))
{
if (!columns_p.parse(pos, columns, expected))
if (!columns_or_indices_p.parse(pos, columns_list, expected))
return false;
if (!s_rparen.ignore(pos, expected))
@ -368,7 +478,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// Optional - a list of columns can be specified. It must fully comply with SELECT.
if (s_lparen.ignore(pos, expected))
{
if (!columns_p.parse(pos, columns, expected))
if (!columns_or_indices_p.parse(pos, columns_list, expected))
return false;
if (!s_rparen.ignore(pos, expected))
@ -410,7 +520,7 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
getIdentifierName(to_database, query->to_database);
getIdentifierName(to_table, query->to_table);
query->set(query->columns, columns);
query->set(query->columns_list, columns_list);
query->set(query->storage, storage);
getIdentifierName(as_database, query->as_database);

View File

@ -218,7 +218,45 @@ protected:
};
/** ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...] */
/** name BY expr TYPE typename(arg1, arg2, ...) GRANULARITY value */
class ParserIndexDeclaration : public IParserBase
{
public:
ParserIndexDeclaration() {}
protected:
const char * getName() const override { return "index declaration"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserColumnAndIndexDeclaraion : public IParserBase
{
protected:
const char * getName() const override { return "column or index declaration"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserIndexDeclarationList : public IParserBase
{
protected:
const char * getName() const override { return "index declaration list"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserColumnsOrIndicesDeclarationList : public IParserBase
{
protected:
const char * getName() const override { return "columns or indices declaration list"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
/**
* ENGINE = name [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [SETTINGS name = value, ...]
*/
class ParserStorage : public IParserBase
{
protected:
@ -233,6 +271,8 @@ protected:
* name1 type1,
* name2 type2,
* ...
* INDEX name1 expr TYPE type1(args) GRANULARITY value,
* ...
* ) ENGINE = engine
*
* Or:

View File

@ -8,6 +8,7 @@
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
@ -120,6 +121,35 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
command.order_by = command_ast->order_by;
return command;
}
else if (command_ast->type == ASTAlterCommand::ADD_INDEX)
{
AlterCommand command;
command.index_decl = command_ast->index_decl;
command.type = AlterCommand::ADD_INDEX;
const auto & ast_index_decl = typeid_cast<const ASTIndexDeclaration &>(*command_ast->index_decl);
command.index_name = ast_index_decl.name;
if (command_ast->index)
command.after_index_name = typeid_cast<const ASTIdentifier &>(*command_ast->index).name;
command.if_not_exists = command_ast->if_not_exists;
return command;
}
else if (command_ast->type == ASTAlterCommand::DROP_INDEX)
{
if (command_ast->clear_column)
throw Exception("\"ALTER TABLE table CLEAR COLUMN column\" queries are not supported yet. Use \"CLEAR COLUMN column IN PARTITION\".", ErrorCodes::NOT_IMPLEMENTED);
AlterCommand command;
command.type = AlterCommand::DROP_INDEX;
command.index_name = typeid_cast<const ASTIdentifier &>(*(command_ast->index)).name;
command.if_exists = command_ast->if_exists;
return command;
}
else
return {};
}
@ -132,7 +162,8 @@ static bool namesEqual(const String & name_without_dot, const DB::NameAndTypePai
return (name_with_dot == name_type.name.substr(0, name_without_dot.length() + 1) || name_without_dot == name_type.name);
}
void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
{
if (type == ADD_COLUMN)
{
@ -297,6 +328,60 @@ void AlterCommand::apply(ColumnsDescription & columns_description, ASTPtr & orde
{
columns_description.comments[column_name] = comment;
}
else if (type == ADD_INDEX)
{
if (std::any_of(
indices_description.indices.cbegin(),
indices_description.indices.cend(),
[this](const ASTPtr & index_ast)
{
return typeid_cast<const ASTIndexDeclaration &>(*index_ast).name == index_name;
}))
{
if (if_not_exists)
return;
else
throw Exception{"Cannot add index " + index_name + ": index with this name already exists",
ErrorCodes::ILLEGAL_COLUMN};
}
auto insert_it = indices_description.indices.end();
if (!after_index_name.empty())
{
insert_it = std::find_if(
indices_description.indices.begin(),
indices_description.indices.end(),
[this](const ASTPtr & index_ast)
{
return typeid_cast<const ASTIndexDeclaration &>(*index_ast).name == after_index_name;
});
if (insert_it == indices_description.indices.end())
throw Exception("Wrong index name. Cannot find index `" + after_index_name + "` to insert after.",
ErrorCodes::LOGICAL_ERROR);
++insert_it;
}
indices_description.indices.emplace(insert_it, std::dynamic_pointer_cast<ASTIndexDeclaration>(index_decl));
}
else if (type == DROP_INDEX)
{
auto erase_it = std::find_if(
indices_description.indices.begin(),
indices_description.indices.end(),
[this](const ASTPtr & index_ast)
{
return typeid_cast<const ASTIndexDeclaration &>(*index_ast).name == index_name;
});
if (erase_it == indices_description.indices.end())
throw Exception("Wrong index name. Cannot find index `" + index_name + "` to drop.",
ErrorCodes::LOGICAL_ERROR);
indices_description.indices.erase(erase_it);
}
else
throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR);
}
@ -311,17 +396,19 @@ bool AlterCommand::is_mutable() const
return true;
}
void AlterCommands::apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
void AlterCommands::apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const
{
auto new_columns_description = columns_description;
auto new_indices_description = indices_description;
auto new_order_by_ast = order_by_ast;
auto new_primary_key_ast = primary_key_ast;
for (const AlterCommand & command : *this)
if (!command.ignore)
command.apply(new_columns_description, new_order_by_ast, new_primary_key_ast);
command.apply(new_columns_description, new_indices_description, new_order_by_ast, new_primary_key_ast);
columns_description = std::move(new_columns_description);
indices_description = std::move(new_indices_description);
order_by_ast = std::move(new_order_by_ast);
primary_key_ast = std::move(new_primary_key_ast);
}
@ -538,14 +625,17 @@ void AlterCommands::validate(const IStorage & table, const Context & context)
void AlterCommands::apply(ColumnsDescription & columns_description) const
{
auto out_columns_description = columns_description;
IndicesDescription indices_description;
ASTPtr out_order_by;
ASTPtr out_primary_key;
apply(out_columns_description, out_order_by, out_primary_key);
apply(out_columns_description, indices_description, out_order_by, out_primary_key);
if (out_order_by)
throw Exception("Storage doesn't support modifying ORDER BY expression", ErrorCodes::NOT_IMPLEMENTED);
if (out_primary_key)
throw Exception("Storage doesn't support modifying PRIMARY KEY expression", ErrorCodes::NOT_IMPLEMENTED);
if (!indices_description.indices.empty())
throw Exception("Storage doesn't support modifying indices", ErrorCodes::NOT_IMPLEMENTED);
columns_description = std::move(out_columns_description);
}

View File

@ -3,6 +3,7 @@
#include <optional>
#include <Core/NamesAndTypes.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/IndicesDescription.h>
#include <optional>
@ -23,6 +24,8 @@ struct AlterCommand
MODIFY_COLUMN,
COMMENT_COLUMN,
MODIFY_ORDER_BY,
ADD_INDEX,
DROP_INDEX,
UKNOWN_TYPE,
};
@ -52,6 +55,13 @@ struct AlterCommand
/// For MODIFY_ORDER_BY
ASTPtr order_by;
/// For ADD INDEX
ASTPtr index_decl;
String after_index_name;
/// For ADD/DROP INDEX
String index_name;
/// indicates that this command should not be applied, for example in case of if_exists=true and column doesn't exist.
bool ignore = false;
@ -70,7 +80,8 @@ struct AlterCommand
static std::optional<AlterCommand> parse(const ASTAlterCommand * command);
void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const;
void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description,
ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const;
/// Checks that not only metadata touched by that command
bool is_mutable() const;
};
@ -81,7 +92,8 @@ class Context;
class AlterCommands : public std::vector<AlterCommand>
{
public:
void apply(ColumnsDescription & columns_description, ASTPtr & order_by_ast, ASTPtr & primary_key_ast) const;
void apply(ColumnsDescription & columns_description, IndicesDescription & indices_description, ASTPtr & order_by_ast,
ASTPtr & primary_key_ast) const;
/// For storages that don't support MODIFY_ORDER_BY.
void apply(ColumnsDescription & columns_description) const;

View File

@ -24,8 +24,9 @@ void IStorage::alter(const AlterCommands & params, const String & database_name,
auto lock = lockStructureForAlter();
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {});
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {});
setColumns(std::move(new_columns));
}

View File

@ -31,6 +31,11 @@ void ITableDeclaration::setColumns(ColumnsDescription columns_)
columns = std::move(columns_);
}
void ITableDeclaration::setIndicesDescription(IndicesDescription indices_)
{
indices = std::move(indices_);
}
bool ITableDeclaration::hasColumn(const String & column_name) const
{

View File

@ -1,6 +1,7 @@
#pragma once
#include <Storages/ColumnsDescription.h>
#include <Storages/IndicesDescription.h>
namespace DB
@ -15,6 +16,9 @@ public:
virtual const ColumnsDescription & getColumns() const { return columns; }
virtual void setColumns(ColumnsDescription columns_);
virtual const IndicesDescription & getIndicesDescription() const { return indices; }
virtual void setIndicesDescription(IndicesDescription indices_);
/// NOTE: These methods should include virtual columns, but should NOT include ALIAS columns
/// (they are treated separately).
virtual NameAndTypePair getColumn(const String & column_name) const;
@ -52,6 +56,7 @@ public:
private:
ColumnsDescription columns;
IndicesDescription indices;
};
}

View File

@ -0,0 +1,38 @@
#include <Storages/IndicesDescription.h>
#include <Parsers/formatAST.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
namespace DB
{
String IndicesDescription::toString() const
{
if (indices.empty())
return {};
ASTExpressionList list;
for (const auto & index : indices)
list.children.push_back(index);
return serializeAST(list, true);
}
IndicesDescription IndicesDescription::parse(const String & str)
{
if (str.empty())
return {};
IndicesDescription res;
ParserIndexDeclarationList parser;
ASTPtr list = parseQuery(parser, str, 0);
for (const auto & index : list->children)
res.indices.push_back(std::dynamic_pointer_cast<ASTIndexDeclaration>(index));
return res;
}
}

View File

@ -0,0 +1,22 @@
#pragma once
#include <Parsers/ASTIndexDeclaration.h>
namespace DB
{
using IndicesAsts = std::vector<std::shared_ptr<ASTIndexDeclaration>>;
struct IndicesDescription
{
IndicesAsts indices;
IndicesDescription() = default;
String toString() const;
static IndicesDescription parse(const String & str);
};
}

View File

@ -47,6 +47,7 @@
#include <algorithm>
#include <iomanip>
#include <set>
#include <thread>
#include <typeinfo>
#include <typeindex>
@ -88,6 +89,7 @@ namespace ErrorCodes
MergeTreeData::MergeTreeData(
const String & database_, const String & table_,
const String & full_path_, const ColumnsDescription & columns_,
const IndicesDescription & indices_,
Context & context_,
const String & date_column_name,
const ASTPtr & partition_by_ast_,
@ -113,7 +115,7 @@ MergeTreeData::MergeTreeData(
data_parts_by_info(data_parts_indexes.get<TagByInfo>()),
data_parts_by_state_and_info(data_parts_indexes.get<TagByStateAndInfo>())
{
setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_);
setPrimaryKeyIndicesAndColumns(order_by_ast_, primary_key_ast_, columns_, indices_);
/// NOTE: using the same columns list as is read when performing actual merges.
merging_params.check(getColumns().getAllPhysical());
@ -219,8 +221,9 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam
}
void MergeTreeData::setPrimaryKeyAndColumns(
const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check)
void MergeTreeData::setPrimaryKeyIndicesAndColumns(
const ASTPtr &new_order_by_ast, ASTPtr new_primary_key_ast,
const ColumnsDescription &new_columns, const IndicesDescription &indices_description, bool only_check)
{
if (!new_order_by_ast)
throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS);
@ -327,6 +330,50 @@ void MergeTreeData::setPrimaryKeyAndColumns(
new_primary_key_data_types.push_back(elem.type);
}
ASTPtr skip_indices_with_primary_key_expr_list = new_primary_key_expr_list->clone();
ASTPtr skip_indices_with_sorting_key_expr_list = new_sorting_key_expr_list->clone();
MergeTreeIndices new_indices;
if (!indices_description.indices.empty())
{
std::set<String> indices_names;
for (const auto & index_ast : indices_description.indices)
{
const auto & index_decl = std::dynamic_pointer_cast<ASTIndexDeclaration>(index_ast);
new_indices.push_back(
MergeTreeIndexFactory::instance().get(
all_columns,
std::dynamic_pointer_cast<ASTIndexDeclaration>(index_decl->clone()),
global_context));
if (indices_names.find(new_indices.back()->name) != indices_names.end())
throw Exception(
"Index with name `" + new_indices.back()->name + "` already exsists",
ErrorCodes::LOGICAL_ERROR);
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(index_decl->expr->clone());
for (const auto & expr : expr_list->children)
{
skip_indices_with_primary_key_expr_list->children.push_back(expr->clone());
skip_indices_with_sorting_key_expr_list->children.push_back(expr->clone());
}
indices_names.insert(new_indices.back()->name);
}
}
auto syntax_primary = SyntaxAnalyzer(global_context, {}).analyze(
skip_indices_with_primary_key_expr_list, all_columns);
auto new_indices_with_primary_key_expr = ExpressionAnalyzer(
skip_indices_with_primary_key_expr_list, syntax_primary, global_context).getActions(false);
auto syntax_sorting = SyntaxAnalyzer(global_context, {}).analyze(
skip_indices_with_sorting_key_expr_list, all_columns);
auto new_indices_with_sorting_key_expr = ExpressionAnalyzer(
skip_indices_with_sorting_key_expr_list, syntax_sorting, global_context).getActions(false);
if (!only_check)
{
setColumns(new_columns);
@ -342,6 +389,12 @@ void MergeTreeData::setPrimaryKeyAndColumns(
primary_key_expr = std::move(new_primary_key_expr);
primary_key_sample = std::move(new_primary_key_sample);
primary_key_data_types = std::move(new_primary_key_data_types);
setIndicesDescription(indices_description);
skip_indices = std::move(new_indices);
primary_key_and_skip_indices_expr = new_indices_with_primary_key_expr;
sorting_key_and_skip_indices_expr = new_indices_with_sorting_key_expr;
}
}
@ -1001,9 +1054,10 @@ void MergeTreeData::checkAlter(const AlterCommands & commands)
{
/// Check that needed transformations can be applied to the list of columns without considering type conversions.
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
ASTPtr new_order_by_ast = order_by_ast;
ASTPtr new_primary_key_ast = primary_key_ast;
commands.apply(new_columns, new_order_by_ast, new_primary_key_ast);
commands.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
/// Set of columns that shouldn't be altered.
NameSet columns_alter_forbidden;
@ -1021,6 +1075,12 @@ void MergeTreeData::checkAlter(const AlterCommands & commands)
columns_alter_forbidden.insert(col);
}
for (const auto & index : skip_indices)
{
for (const String & col : index->expr->getRequiredColumns())
columns_alter_forbidden.insert(col);
}
if (sorting_key_expr)
{
for (const ExpressionAction & action : sorting_key_expr->getActions())
@ -1075,18 +1135,21 @@ void MergeTreeData::checkAlter(const AlterCommands & commands)
}
}
setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true);
setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast,
new_columns, new_indices, /* only_check = */ true);
/// Check that type conversions are possible.
ExpressionActionsPtr unused_expression;
NameToNameMap unused_map;
bool unused_bool;
createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(), unused_expression, unused_map, unused_bool);
createConvertExpression(nullptr, getColumns().getAllPhysical(), new_columns.getAllPhysical(),
getIndicesDescription().indices, new_indices.indices, unused_expression, unused_map, unused_bool);
}
void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const
const IndicesAsts & old_indices, const IndicesAsts & new_indices, ExpressionActionsPtr & out_expression,
NameToNameMap & out_rename_map, bool & out_force_update_metadata) const
{
out_expression = nullptr;
out_rename_map = {};
@ -1100,6 +1163,21 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
/// For every column that need to be converted: source column name, column name of calculated expression for conversion.
std::vector<std::pair<String, String>> conversions;
/// Remove old indices
std::set<String> new_indices_set;
for (const auto & index_decl : new_indices)
new_indices_set.emplace(dynamic_cast<const ASTIndexDeclaration &>(*index_decl.get()).name);
for (const auto & index_decl : old_indices)
{
const auto & index = dynamic_cast<const ASTIndexDeclaration &>(*index_decl.get());
if (!new_indices_set.count(index.name))
{
out_rename_map["skp_idx_" + index.name + ".idx"] = "";
out_rename_map["skp_idx_" + index.name + ".mrk"] = "";
}
}
/// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes.
std::map<String, size_t> stream_counts;
for (const NameAndTypePair & column : old_columns)
@ -1230,12 +1308,15 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
const DataPartPtr & part,
const NamesAndTypesList & new_columns,
const IndicesAsts & new_indices,
bool skip_sanity_checks)
{
ExpressionActionsPtr expression;
AlterDataPartTransactionPtr transaction(new AlterDataPartTransaction(part)); /// Blocks changes to the part.
bool force_update_metadata;
createConvertExpression(part, part->columns, new_columns, expression, transaction->rename_map, force_update_metadata);
createConvertExpression(part, part->columns, new_columns,
getIndicesDescription().indices, new_indices,
expression, transaction->rename_map, force_update_metadata);
size_t num_files_to_modify = transaction->rename_map.size();
size_t num_files_to_remove = 0;
@ -2062,7 +2143,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const St
/// Check the data while we are at it.
if (part->checksums.empty())
{
part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types);
part->checksums = checkDataPart(full_part_path, index_granularity, false, primary_key_data_types, skip_indices);
{
WriteBufferFromFile out(full_part_path + "checksums.txt.tmp", 4096);

View File

@ -4,6 +4,7 @@
#include <Interpreters/Context.h>
#include <Interpreters/ExpressionActions.h>
#include <Storages/ITableDeclaration.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <IO/ReadBufferFromString.h>
@ -13,6 +14,7 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/GraphiteRollupSortedBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeDataPart.h>
#include <Storages/IndicesDescription.h>
#include <boost/multi_index_container.hpp>
#include <boost/multi_index/ordered_index.hpp>
@ -303,6 +305,7 @@ public:
MergeTreeData(const String & database_, const String & table_,
const String & full_path_,
const ColumnsDescription & columns_,
const IndicesDescription & indices_,
Context & context_,
const String & date_column_name,
const ASTPtr & partition_by_ast_,
@ -476,7 +479,7 @@ public:
/// Check if the ALTER can be performed:
/// - all needed columns are present.
/// - all type conversions can be done.
/// - columns corresponding to primary key, sign, sampling expression and date are not affected.
/// - columns corresponding to primary key, indices, sign, sampling expression and date are not affected.
/// If something is wrong, throws an exception.
void checkAlter(const AlterCommands & commands);
@ -487,6 +490,7 @@ public:
AlterDataPartTransactionPtr alterDataPart(
const DataPartPtr & part,
const NamesAndTypesList & new_columns,
const IndicesAsts & new_indices,
bool skip_sanity_checks);
/// Freezes all parts.
@ -508,6 +512,7 @@ public:
bool hasSortingKey() const { return !sorting_key_columns.empty(); }
bool hasPrimaryKey() const { return !primary_key_columns.empty(); }
bool hasSkipIndices() const { return !skip_indices.empty(); }
ASTPtr getSortingKeyAST() const { return sorting_key_expr_ast; }
ASTPtr getPrimaryKeyAST() const { return primary_key_expr_ast; }
@ -581,6 +586,12 @@ public:
Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column.
Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column.
/// Secondary (data skipping) indices for MergeTree
MergeTreeIndices skip_indices;
ExpressionActionsPtr primary_key_and_skip_indices_expr;
ExpressionActionsPtr sorting_key_and_skip_indices_expr;
/// Names of columns for primary key + secondary sorting columns.
Names sorting_key_columns;
ASTPtr sorting_key_expr_ast;
@ -721,7 +732,9 @@ private:
/// The same for clearOldTemporaryDirectories.
std::mutex clear_old_temporary_directories_mutex;
void setPrimaryKeyAndColumns(const ASTPtr & new_order_by_ast, ASTPtr new_primary_key_ast, const ColumnsDescription & new_columns, bool only_check = false);
void setPrimaryKeyIndicesAndColumns(const ASTPtr &new_order_by_ast, ASTPtr new_primary_key_ast,
const ColumnsDescription &new_columns,
const IndicesDescription &indices_description, bool only_check = false);
void initPartitionKey();
@ -733,6 +746,7 @@ private:
/// Files to be deleted are mapped to an empty string in out_rename_map.
/// If part == nullptr, just checks that all type conversions are possible.
void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
const IndicesAsts & old_indices, const IndicesAsts & new_indices,
ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const;
/// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked.

View File

@ -334,12 +334,19 @@ MergeTreeData::DataPartsVector MergeTreeDataMergerMutator::selectAllPartsFromPar
static void extractMergingAndGatheringColumns(
const NamesAndTypesList & all_columns,
const ExpressionActionsPtr & sorting_key_expr,
const MergeTreeIndices & indexes,
const MergeTreeData::MergingParams & merging_params,
NamesAndTypesList & gathering_columns, Names & gathering_column_names,
NamesAndTypesList & merging_columns, Names & merging_column_names)
{
Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns();
std::set<String> key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend());
for (const auto & index : indexes)
{
Names index_columns_vec = index->expr->getRequiredColumns();
std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(),
std::inserter(key_columns, key_columns.end()));
}
/// Force sign column for Collapsing mode
if (merging_params.mode == MergeTreeData::MergingParams::Collapsing)
@ -550,7 +557,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
NamesAndTypesList gathering_columns, merging_columns;
Names gathering_column_names, merging_column_names;
extractMergingAndGatheringColumns(
all_columns, data.sorting_key_expr,
all_columns, data.sorting_key_expr, data.skip_indices,
data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names);
MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(
@ -629,11 +636,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
input->setProgressCallback(MergeProgressCallback(
merge_entry, sum_input_rows_upper_bound, column_sizes, watch_prev_elapsed, merge_alg));
if (data.hasPrimaryKey())
src_streams.emplace_back(std::make_shared<MaterializingBlockInputStream>(
std::make_shared<ExpressionBlockInputStream>(BlockInputStreamPtr(std::move(input)), data.sorting_key_expr)));
else
src_streams.emplace_back(std::move(input));
BlockInputStreamPtr stream = std::move(input);
if (data.hasPrimaryKey() || data.hasSkipIndices())
stream = std::make_shared<MaterializingBlockInputStream>(
std::make_shared<ExpressionBlockInputStream>(stream, data.sorting_key_and_skip_indices_expr));
src_streams.emplace_back(stream);
}
Names sort_columns = data.sorting_key_columns;
@ -897,10 +905,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
if (in_header.columns() == all_columns.size())
{
/// All columns are modified, proceed to write a new part from scratch.
if (data.hasPrimaryKey())
if (data.hasPrimaryKey() || data.hasSkipIndices())
in = std::make_shared<MaterializingBlockInputStream>(
std::make_shared<ExpressionBlockInputStream>(in, data.primary_key_expr));
std::make_shared<ExpressionBlockInputStream>(in, data.primary_key_and_skip_indices_expr));
MergeTreeDataPart::MinMaxIndex minmax_idx;
@ -927,6 +934,20 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor
/// We will modify only some of the columns. Other columns and key values can be copied as-is.
/// TODO: check that we modify only non-key columns in this case.
/// Checks if columns used in skipping indexes modified/
for (const auto & col : in_header.getNames())
{
for (const auto & index : data.skip_indices)
{
const auto & index_cols = index->expr->getRequiredColumns();
auto it = find(cbegin(index_cols), cend(index_cols), col);
if (it != cend(index_cols))
throw Exception("You can not modify columns used in index. Index name: '"
+ index->name
+ "' bad column: '" + *it + "'", ErrorCodes::ILLEGAL_COLUMN);
}
}
NameSet files_to_skip = {"checksums.txt", "columns.txt"};
for (const auto & entry : in_header)
{

View File

@ -120,7 +120,7 @@ public:
enum class MergeAlgorithm
{
Horizontal, /// per-row merge of all columns
Vertical /// per-row merge of PK columns, per-column gather for non-PK columns
Vertical /// per-row merge of PK and secondary indices columns, per-column gather for non-PK columns
};
private:

View File

@ -4,6 +4,7 @@
#include <Core/Block.h>
#include <Core/Types.h>
#include <Core/NamesAndTypes.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreePartInfo.h>
#include <Storages/MergeTree/MergeTreePartition.h>
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>

View File

@ -1,11 +1,15 @@
#include <boost/rational.hpp> /// For calculations related to sampling coefficients.
#include <optional>
#include <Poco/File.h>
#include <Common/FieldVisitors.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Storages/MergeTree/MergeTreeSelectBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeReadPool.h>
#include <Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeIndexReader.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
@ -528,6 +532,17 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
else
ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}};
/// It can be done in multiple threads (one thread for each part).
/// Maybe it should be moved to BlockInputStream, but it can cause some problems.
for (const auto & index : data.skip_indices)
{
auto condition = index->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
{
ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings);
}
}
if (!ranges.ranges.empty())
{
parts_with_ranges.push_back(ranges);
@ -942,4 +957,70 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
return res;
}
MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
MergeTreeIndexPtr index,
IndexConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings) const
{
if (!Poco::File(part->getFullPath() + index->getFileName() + ".idx").exists())
{
LOG_DEBUG(log, "File for index `" << index->name << "` does not exist. Skipping it.");
return ranges;
}
const size_t min_marks_for_seek = (settings.merge_tree_min_rows_for_seek + data.index_granularity - 1) / data.index_granularity;
size_t granules_dropped = 0;
MergeTreeIndexReader reader(
index, part,
((part->marks_count + index->granularity - 1) / index->granularity),
ranges);
MarkRanges res;
/// Some granules can cover two or more ranges,
/// this variable is stored to avoid reading the same granule twice.
MergeTreeIndexGranulePtr granule = nullptr;
size_t last_index_mark = 0;
for (const auto & range : ranges)
{
MarkRange index_range(
range.begin / index->granularity,
(range.end + index->granularity - 1) / index->granularity);
if (last_index_mark != index_range.begin || !granule)
reader.seek(index_range.begin);
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin)
granule = reader.read();
MarkRange data_range(
std::max(range.begin, index_mark * index->granularity),
std::min(range.end, (index_mark + 1) * index->granularity));
if (!condition->mayBeTrueOnGranule(granule))
{
++granules_dropped;
continue;
}
if (res.empty() || res.back().end - data_range.begin >= min_marks_for_seek)
res.push_back(data_range);
else
res.back().end = data_range.end;
}
last_index_mark = index_range.end - 1;
}
LOG_DEBUG(log, "Index `" << index->name << "` has dropped " << granules_dropped << " granules.");
return res;
}
}

View File

@ -81,6 +81,13 @@ private:
const MergeTreeData::DataPart::Index & index,
const KeyCondition & key_condition,
const Settings & settings) const;
MarkRanges filterMarksUsingIndex(
MergeTreeIndexPtr index,
IndexConditionPtr condition,
MergeTreeData::DataPartPtr part,
const MarkRanges & ranges,
const Settings & settings) const;
};
}

View File

@ -180,8 +180,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
dir.createDirectories();
/// If we need to calculate some columns to sort.
if (data.hasSortingKey())
data.sorting_key_expr->execute(block);
if (data.hasSortingKey() || data.hasSkipIndices())
data.sorting_key_and_skip_indices_expr->execute(block);
Names sort_columns = data.sorting_key_columns;
SortDescription sort_description;

View File

@ -0,0 +1,29 @@
#include <Storages/MergeTree/MergeTreeIndexReader.h>
namespace DB
{
MergeTreeIndexReader::MergeTreeIndexReader(
MergeTreeIndexPtr index, MergeTreeData::DataPartPtr part, size_t marks_count, const MarkRanges & all_mark_ranges)
: index(index), stream(
part->getFullPath() + index->getFileName(), ".idx", marks_count,
all_mark_ranges, nullptr, false, nullptr, 0, DBMS_DEFAULT_BUFFER_SIZE,
ReadBufferFromFileBase::ProfileCallback{}, CLOCK_MONOTONIC_COARSE)
{
stream.seekToStart();
}
void MergeTreeIndexReader::seek(size_t mark)
{
stream.seekToMark(mark);
}
MergeTreeIndexGranulePtr MergeTreeIndexReader::read()
{
auto granule = index->createIndexGranule();
granule->deserializeBinary(*stream.data_buffer);
return granule;
}
}

View File

@ -0,0 +1,28 @@
#pragma once
#include <Storages/MergeTree/MergeTreeReaderStream.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
namespace DB
{
class MergeTreeIndexReader
{
public:
MergeTreeIndexReader(
MergeTreeIndexPtr index,
MergeTreeData::DataPartPtr part,
size_t marks_count,
const MarkRanges & all_mark_ranges);
void seek(size_t mark);
MergeTreeIndexGranulePtr read();
private:
MergeTreeIndexPtr index;
MergeTreeReaderStream stream;
};
}

View File

@ -0,0 +1,57 @@
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <numeric>
#include <boost/algorithm/string.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
extern const int UNKNOWN_EXCEPTION;
}
void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creator)
{
if (!indexes.emplace(name, std::move(creator)).second)
throw Exception("MergeTreeIndexFactory: the Index creator name '" + name + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
}
std::unique_ptr<MergeTreeIndex> MergeTreeIndexFactory::get(
const NamesAndTypesList & columns,
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context) const
{
if (!node->type)
throw Exception(
"for index TYPE is required", ErrorCodes::INCORRECT_QUERY);
if (node->type->parameters && !node->type->parameters->children.empty())
throw Exception(
"Index type can not have parameters", ErrorCodes::INCORRECT_QUERY);
boost::algorithm::to_lower(node->type->name);
auto it = indexes.find(node->type->name);
if (it == indexes.end())
throw Exception(
"Unknown Index type '" + node->type->name + "'. Available index types: " +
std::accumulate(indexes.cbegin(), indexes.cend(), std::string{},
[] (auto && lft, const auto & rht) -> std::string {
if (lft == "")
return rht.first;
else
return lft + ", " + rht.first;
}),
ErrorCodes::INCORRECT_QUERY);
return it->second(columns, node, context);
}
}

View File

@ -0,0 +1,126 @@
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
#include <memory>
#include <Core/Block.h>
#include <ext/singleton.h>
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/MarkRange.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/ASTIndexDeclaration.h>
constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
namespace DB
{
class MergeTreeData;
class MergeTreeIndex;
using MergeTreeIndexPtr = std::shared_ptr<const MergeTreeIndex>;
using MutableMergeTreeIndexPtr = std::shared_ptr<MergeTreeIndex>;
struct MergeTreeIndexGranule
{
virtual ~MergeTreeIndexGranule() = default;
virtual void serializeBinary(WriteBuffer & ostr) const = 0;
virtual void deserializeBinary(ReadBuffer & istr) = 0;
virtual String toString() const = 0;
virtual bool empty() const = 0;
virtual void update(const Block & block, size_t * pos, size_t limit) = 0;
};
using MergeTreeIndexGranulePtr = std::shared_ptr<MergeTreeIndexGranule>;
using MergeTreeIndexGranules = std::vector<MergeTreeIndexGranulePtr>;
/// Condition on the index.
class IndexCondition
{
public:
virtual ~IndexCondition() = default;
/// Checks if this index is useful for query.
virtual bool alwaysUnknownOrTrue() const = 0;
virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
};
using IndexConditionPtr = std::shared_ptr<IndexCondition>;
/// Structure for storing basic index info like columns, expression, arguments, ...
class MergeTreeIndex
{
public:
MergeTreeIndex(
String name,
ExpressionActionsPtr expr,
const Names & columns,
const DataTypes & data_types,
const Block & header,
size_t granularity)
: name(name)
, expr(expr)
, columns(columns)
, data_types(data_types)
, header(header)
, granularity(granularity) {}
virtual ~MergeTreeIndex() = default;
/// gets filename without extension
String getFileName() const { return INDEX_FILE_PREFIX + name; }
virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0;
virtual IndexConditionPtr createIndexCondition(
const SelectQueryInfo & query_info, const Context & context) const = 0;
String name;
ExpressionActionsPtr expr;
Names columns;
DataTypes data_types;
Block header;
size_t granularity;
};
using MergeTreeIndices = std::vector<MutableMergeTreeIndexPtr>;
class MergeTreeIndexFactory : public ext::singleton<MergeTreeIndexFactory>
{
friend class ext::singleton<MergeTreeIndexFactory>;
public:
using Creator = std::function<
std::unique_ptr<MergeTreeIndex>(
const NamesAndTypesList & columns,
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context)>;
std::unique_ptr<MergeTreeIndex> get(
const NamesAndTypesList & columns,
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context) const;
void registerIndex(const std::string & name, Creator creator);
const auto & getAllIndexes() const { return indexes; }
protected:
MergeTreeIndexFactory() = default;
private:
using Indexes = std::unordered_map<std::string, Creator>;
Indexes indexes;
};
}

View File

@ -0,0 +1,164 @@
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int INCORRECT_QUERY;
}
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
: MergeTreeIndexGranule(), index(index), parallelogram()
{
}
void MergeTreeMinMaxGranule::serializeBinary(WriteBuffer & ostr) const
{
if (empty())
throw Exception(
"Attempt to write empty minmax index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < index.columns.size(); ++i)
{
const DataTypePtr & type = index.data_types[i];
type->serializeBinary(parallelogram[i].left, ostr);
type->serializeBinary(parallelogram[i].right, ostr);
}
}
void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
{
parallelogram.clear();
for (size_t i = 0; i < index.columns.size(); ++i)
{
const DataTypePtr & type = index.data_types[i];
Field min_val;
type->deserializeBinary(min_val, istr);
Field max_val;
type->deserializeBinary(max_val, istr);
parallelogram.emplace_back(min_val, true, max_val, true);
}
}
String MergeTreeMinMaxGranule::toString() const
{
String res = "";
for (size_t i = 0; i < parallelogram.size(); ++i)
{
res += "["
+ applyVisitor(FieldVisitorToString(), parallelogram[i].left) + ", "
+ applyVisitor(FieldVisitorToString(), parallelogram[i].right) + "]";
}
return res;
}
void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit)
{
size_t rows_read = std::min(limit, block.rows() - *pos);
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & column = block.getByName(index.columns[i]).column;
Field field_min, field_max;
column->cut(*pos, rows_read)->getExtremes(field_min, field_max);
if (parallelogram.size() <= i)
{
parallelogram.emplace_back(field_min, true, field_max, true);
}
else
{
parallelogram[i].left = std::min(parallelogram[i].left, field_min);
parallelogram[i].right = std::max(parallelogram[i].right, field_max);
}
}
*pos += rows_read;
}
MinMaxCondition::MinMaxCondition(
const SelectQueryInfo &query,
const Context &context,
const MergeTreeMinMaxIndex &index)
: IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
bool MinMaxCondition::alwaysUnknownOrTrue() const
{
return condition.alwaysUnknownOrTrue();
}
bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
std::shared_ptr<MergeTreeMinMaxGranule> granule
= std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
if (!granule)
throw Exception(
"Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR);
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
}
MergeTreeIndexGranulePtr MergeTreeMinMaxIndex::createIndexGranule() const
{
return std::make_shared<MergeTreeMinMaxGranule>(*this);
}
IndexConditionPtr MergeTreeMinMaxIndex::createIndexCondition(
const SelectQueryInfo & query, const Context & context) const
{
return std::make_shared<MinMaxCondition>(query, context, *this);
};
std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
const NamesAndTypesList & new_columns,
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context)
{
if (node->name.empty())
throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY);
if (node->type->arguments)
throw Exception("Minmax index have not any arguments", ErrorCodes::INCORRECT_QUERY);
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
auto syntax = SyntaxAnalyzer(context, {}).analyze(
expr_list, new_columns);
auto minmax_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
auto sample = ExpressionAnalyzer(expr_list, syntax, context)
.getActions(true)->getSampleBlock();
Names columns;
DataTypes data_types;
for (size_t i = 0; i < expr_list->children.size(); ++i)
{
const auto & column = sample.getByPosition(i);
columns.emplace_back(column.name);
data_types.emplace_back(column.type);
}
return std::make_unique<MergeTreeMinMaxIndex>(
node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get<size_t>());;
}
}

View File

@ -0,0 +1,78 @@
#pragma once
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <memory>
namespace DB
{
class MergeTreeMinMaxIndex;
struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule
{
explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index);
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr) override;
String toString() const override;
bool empty() const override { return parallelogram.empty(); }
void update(const Block & block, size_t * pos, size_t limit) override;
~MergeTreeMinMaxGranule() override = default;
const MergeTreeMinMaxIndex & index;
std::vector<Range> parallelogram;
};
class MinMaxCondition : public IndexCondition
{
public:
MinMaxCondition(
const SelectQueryInfo & query,
const Context & context,
const MergeTreeMinMaxIndex & index);
bool alwaysUnknownOrTrue() const override;
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
~MinMaxCondition() override = default;
private:
const MergeTreeMinMaxIndex & index;
KeyCondition condition;
};
class MergeTreeMinMaxIndex : public MergeTreeIndex
{
public:
MergeTreeMinMaxIndex(
String name,
ExpressionActionsPtr expr,
const Names & columns,
const DataTypes & data_types,
const Block & header,
size_t granularity)
: MergeTreeIndex(name, expr, columns, data_types, header, granularity) {}
~MergeTreeMinMaxIndex() override = default;
MergeTreeIndexGranulePtr createIndexGranule() const override;
IndexConditionPtr createIndexCondition(
const SelectQueryInfo & query, const Context & context) const override;
};
std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context);
}

View File

@ -154,205 +154,6 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t
return read_rows;
}
MergeTreeReader::Stream::Stream(
const String & path_prefix_, const String & extension_, size_t marks_count_,
const MarkRanges & all_mark_ranges,
MarkCache * mark_cache_, bool save_marks_in_cache_,
UncompressedCache * uncompressed_cache,
size_t aio_threshold, size_t max_read_buffer_size,
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
: path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_)
, mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_)
{
/// Compute the size of the buffer.
size_t max_mark_range = 0;
for (size_t i = 0; i < all_mark_ranges.size(); ++i)
{
size_t right = all_mark_ranges[i].end;
/// NOTE: if we are reading the whole file, then right == marks_count
/// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
/// If the end of range is inside the block, we will need to read it too.
if (right < marks_count && getMark(right).offset_in_decompressed_block > 0)
{
while (right < marks_count
&& getMark(right).offset_in_compressed_file
== getMark(all_mark_ranges[i].end).offset_in_compressed_file)
{
++right;
}
}
/// If there are no marks after the end of range, just use max_read_buffer_size
if (right >= marks_count
|| (right + 1 == marks_count
&& getMark(right).offset_in_compressed_file
== getMark(all_mark_ranges[i].end).offset_in_compressed_file))
{
max_mark_range = max_read_buffer_size;
break;
}
max_mark_range = std::max(max_mark_range,
getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file);
}
/// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality.
/// For example: part has single dictionary and all marks point to the same position.
if (max_mark_range == 0)
max_mark_range = max_read_buffer_size;
size_t buffer_size = std::min(max_read_buffer_size, max_mark_range);
/// Estimate size of the data to be read.
size_t estimated_size = 0;
if (aio_threshold > 0)
{
for (const auto & mark_range : all_mark_ranges)
{
size_t offset_begin = (mark_range.begin > 0)
? getMark(mark_range.begin).offset_in_compressed_file
: 0;
size_t offset_end = (mark_range.end < marks_count)
? getMark(mark_range.end).offset_in_compressed_file
: Poco::File(path_prefix + extension).getSize();
if (offset_end > offset_begin)
estimated_size += offset_end - offset_begin;
}
}
/// Initialize the objects that shall be used to perform read operations.
if (uncompressed_cache)
{
auto buffer = std::make_unique<CachedCompressedReadBuffer>(
path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size);
if (profile_callback)
buffer->setProfileCallback(profile_callback, clock_type);
cached_buffer = std::move(buffer);
data_buffer = cached_buffer.get();
}
else
{
auto buffer = std::make_unique<CompressedReadBufferFromFile>(
path_prefix + extension, estimated_size, aio_threshold, buffer_size);
if (profile_callback)
buffer->setProfileCallback(profile_callback, clock_type);
non_cached_buffer = std::move(buffer);
data_buffer = non_cached_buffer.get();
}
}
const MarkInCompressedFile & MergeTreeReader::Stream::getMark(size_t index)
{
if (!marks)
loadMarks();
return (*marks)[index];
}
void MergeTreeReader::Stream::loadMarks()
{
std::string mrk_path = path_prefix + ".mrk";
auto load = [&]() -> MarkCache::MappedPtr
{
/// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
size_t file_size = Poco::File(mrk_path).getSize();
size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count;
if (expected_file_size != file_size)
throw Exception(
"bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),
ErrorCodes::CORRUPTED_DATA);
auto res = std::make_shared<MarksInCompressedFile>(marks_count);
/// Read directly to marks.
ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast<char *>(res->data()));
if (buffer.eof() || buffer.buffer().size() != file_size)
throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);
return res;
};
if (mark_cache)
{
auto key = mark_cache->hash(mrk_path);
if (save_marks_in_cache)
{
marks = mark_cache->getOrSet(key, load);
}
else
{
marks = mark_cache->get(key);
if (!marks)
marks = load();
}
}
else
marks = load();
if (!marks)
throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR);
}
void MergeTreeReader::Stream::seekToMark(size_t index)
{
MarkInCompressedFile mark = getMark(index);
try
{
if (cached_buffer)
cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
if (non_cached_buffer)
non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
}
catch (Exception & e)
{
/// Better diagnostics.
if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
e.addMessage("(while seeking to mark " + toString(index)
+ " of column " + path_prefix + "; offsets are: "
+ toString(mark.offset_in_compressed_file) + " "
+ toString(mark.offset_in_decompressed_block) + ")");
throw;
}
}
void MergeTreeReader::Stream::seekToStart()
{
try
{
if (cached_buffer)
cached_buffer->seek(0, 0);
if (non_cached_buffer)
non_cached_buffer->seek(0, 0);
}
catch (Exception & e)
{
/// Better diagnostics.
if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
e.addMessage("(while seeking to start of column " + path_prefix + ")");
throw;
}
}
void MergeTreeReader::addStreams(const String & name, const IDataType & type,
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
{
@ -371,7 +172,7 @@ void MergeTreeReader::addStreams(const String & name, const IDataType & type,
if (!data_file_exists)
return;
streams.emplace(stream_name, std::make_unique<Stream>(
streams.emplace(stream_name, std::make_unique<MergeTreeReaderStream>(
path + stream_name, DATA_FILE_EXTENSION, data_part->marks_count,
all_mark_ranges, mark_cache, save_marks_in_cache,
uncompressed_cache, aio_threshold, max_read_buffer_size, profile_callback, clock_type));
@ -401,7 +202,7 @@ void MergeTreeReader::readData(
if (it == streams.end())
return nullptr;
Stream & stream = *it->second;
MergeTreeReaderStream & stream = *it->second;
if (stream_for_prefix)
{

View File

@ -1,11 +1,7 @@
#pragma once
#include <Storages/MarkCache.h>
#include <Storages/MergeTree/MarkRange.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeRangeReader.h>
#include <Compression/CompressedReadBufferFromFile.h>
#include <Core/NamesAndTypes.h>
#include <Storages/MergeTree/MergeTreeReaderStream.h>
#include <port/clock.h>
@ -13,7 +9,6 @@ namespace DB
{
class IDataType;
class CachedCompressedReadBuffer;
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
@ -57,44 +52,7 @@ public:
size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res);
private:
class Stream
{
public:
Stream(
const String & path_prefix_, const String & extension_, size_t marks_count_,
const MarkRanges & all_mark_ranges,
MarkCache * mark_cache, bool save_marks_in_cache,
UncompressedCache * uncompressed_cache,
size_t aio_threshold, size_t max_read_buffer_size,
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type);
void seekToMark(size_t index);
void seekToStart();
ReadBuffer * data_buffer;
private:
Stream() = default;
/// NOTE: lazily loads marks from the marks cache.
const MarkInCompressedFile & getMark(size_t index);
void loadMarks();
std::string path_prefix;
std::string extension;
size_t marks_count;
MarkCache * mark_cache;
bool save_marks_in_cache;
MarkCache::MappedPtr marks;
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
};
using FileStreams = std::map<std::string, std::unique_ptr<Stream>>;
using FileStreams = std::map<std::string, std::unique_ptr<MergeTreeReaderStream>>;
/// avg_value_size_hints are used to reduce the number of reallocations when creating columns of variable size.
ValueSizeMap avg_value_size_hints;

View File

@ -0,0 +1,215 @@
#include <Common/MemoryTracker.h>
#include <Storages/MergeTree/MergeTreeReaderStream.h>
#include <Poco/File.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CORRUPTED_DATA;
extern const int CANNOT_READ_ALL_DATA;
extern const int ARGUMENT_OUT_OF_BOUND;
}
MergeTreeReaderStream::MergeTreeReaderStream(
const String & path_prefix_, const String & extension_, size_t marks_count_,
const MarkRanges & all_mark_ranges,
MarkCache * mark_cache_, bool save_marks_in_cache_,
UncompressedCache * uncompressed_cache,
size_t aio_threshold, size_t max_read_buffer_size,
const ReadBufferFromFileBase::ProfileCallback & profile_callback, clockid_t clock_type)
: path_prefix(path_prefix_), extension(extension_), marks_count(marks_count_)
, mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_)
{
/// Compute the size of the buffer.
size_t max_mark_range = 0;
for (size_t i = 0; i < all_mark_ranges.size(); ++i)
{
size_t right = all_mark_ranges[i].end;
/// NOTE: if we are reading the whole file, then right == marks_count
/// and we will use max_read_buffer_size for buffer size, thus avoiding the need to load marks.
/// If the end of range is inside the block, we will need to read it too.
if (right < marks_count && getMark(right).offset_in_decompressed_block > 0)
{
while (right < marks_count
&& getMark(right).offset_in_compressed_file
== getMark(all_mark_ranges[i].end).offset_in_compressed_file)
{
++right;
}
}
/// If there are no marks after the end of range, just use max_read_buffer_size
if (right >= marks_count
|| (right + 1 == marks_count
&& getMark(right).offset_in_compressed_file
== getMark(all_mark_ranges[i].end).offset_in_compressed_file))
{
max_mark_range = max_read_buffer_size;
break;
}
max_mark_range = std::max(max_mark_range,
getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file);
}
/// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality.
/// For example: part has single dictionary and all marks point to the same position.
if (max_mark_range == 0)
max_mark_range = max_read_buffer_size;
size_t buffer_size = std::min(max_read_buffer_size, max_mark_range);
/// Estimate size of the data to be read.
size_t estimated_size = 0;
if (aio_threshold > 0)
{
for (const auto & mark_range : all_mark_ranges)
{
size_t offset_begin = (mark_range.begin > 0)
? getMark(mark_range.begin).offset_in_compressed_file
: 0;
size_t offset_end = (mark_range.end < marks_count)
? getMark(mark_range.end).offset_in_compressed_file
: Poco::File(path_prefix + extension).getSize();
if (offset_end > offset_begin)
estimated_size += offset_end - offset_begin;
}
}
/// Initialize the objects that shall be used to perform read operations.
if (uncompressed_cache)
{
auto buffer = std::make_unique<CachedCompressedReadBuffer>(
path_prefix + extension, uncompressed_cache, estimated_size, aio_threshold, buffer_size);
if (profile_callback)
buffer->setProfileCallback(profile_callback, clock_type);
cached_buffer = std::move(buffer);
data_buffer = cached_buffer.get();
}
else
{
auto buffer = std::make_unique<CompressedReadBufferFromFile>(
path_prefix + extension, estimated_size, aio_threshold, buffer_size);
if (profile_callback)
buffer->setProfileCallback(profile_callback, clock_type);
non_cached_buffer = std::move(buffer);
data_buffer = non_cached_buffer.get();
}
}
const MarkInCompressedFile & MergeTreeReaderStream::getMark(size_t index)
{
if (!marks)
loadMarks();
return (*marks)[index];
}
void MergeTreeReaderStream::loadMarks()
{
std::string mrk_path = path_prefix + ".mrk";
auto load = [&]() -> MarkCache::MappedPtr
{
/// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock();
size_t file_size = Poco::File(mrk_path).getSize();
size_t expected_file_size = sizeof(MarkInCompressedFile) * marks_count;
if (expected_file_size != file_size)
throw Exception(
"bad size of marks file `" + mrk_path + "':" + std::to_string(file_size) + ", must be: " + std::to_string(expected_file_size),
ErrorCodes::CORRUPTED_DATA);
auto res = std::make_shared<MarksInCompressedFile>(marks_count);
/// Read directly to marks.
ReadBufferFromFile buffer(mrk_path, file_size, -1, reinterpret_cast<char *>(res->data()));
if (buffer.eof() || buffer.buffer().size() != file_size)
throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);
return res;
};
if (mark_cache)
{
auto key = mark_cache->hash(mrk_path);
if (save_marks_in_cache)
{
marks = mark_cache->getOrSet(key, load);
}
else
{
marks = mark_cache->get(key);
if (!marks)
marks = load();
}
}
else
marks = load();
if (!marks)
throw Exception("Failed to load marks: " + mrk_path, ErrorCodes::LOGICAL_ERROR);
}
void MergeTreeReaderStream::seekToMark(size_t index)
{
MarkInCompressedFile mark = getMark(index);
try
{
if (cached_buffer)
cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
if (non_cached_buffer)
non_cached_buffer->seek(mark.offset_in_compressed_file, mark.offset_in_decompressed_block);
}
catch (Exception & e)
{
/// Better diagnostics.
if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
e.addMessage("(while seeking to mark " + toString(index)
+ " of column " + path_prefix + "; offsets are: "
+ toString(mark.offset_in_compressed_file) + " "
+ toString(mark.offset_in_decompressed_block) + ")");
throw;
}
}
void MergeTreeReaderStream::seekToStart()
{
try
{
if (cached_buffer)
cached_buffer->seek(0, 0);
if (non_cached_buffer)
non_cached_buffer->seek(0, 0);
}
catch (Exception & e)
{
/// Better diagnostics.
if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
e.addMessage("(while seeking to start of column " + path_prefix + ")");
throw;
}
}
}

View File

@ -0,0 +1,49 @@
#include <Storages/MarkCache.h>
#include <Storages/MergeTree/MarkRange.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeRangeReader.h>
#include <Compression/CachedCompressedReadBuffer.h>
#include <Compression/CompressedReadBufferFromFile.h>
namespace DB
{
class MergeTreeReaderStream
{
public:
MergeTreeReaderStream(
const String &path_prefix_, const String &extension_, size_t marks_count_,
const MarkRanges &all_mark_ranges,
MarkCache *mark_cache, bool save_marks_in_cache,
UncompressedCache *uncompressed_cache,
size_t aio_threshold, size_t max_read_buffer_size,
const ReadBufferFromFileBase::ProfileCallback &profile_callback, clockid_t clock_type);
void seekToMark(size_t index);
void seekToStart();
ReadBuffer *data_buffer;
private:
MergeTreeReaderStream() = default;
/// NOTE: lazily loads marks from the marks cache.
const MarkInCompressedFile &getMark(size_t index);
void loadMarks();
std::string path_prefix;
std::string extension;
size_t marks_count;
MarkCache *mark_cache;
bool save_marks_in_cache;
MarkCache::MappedPtr marks;
std::unique_ptr<CachedCompressedReadBuffer> cached_buffer;
std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
};
}

View File

@ -0,0 +1,393 @@
#include <Storages/MergeTree/MergeTreeUniqueIndex.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
}
MergeTreeUniqueGranule::MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index)
: MergeTreeIndexGranule(), index(index), set(new Set(SizeLimits{}, true))
{
set->setHeader(index.header);
}
void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const
{
if (empty())
throw Exception(
"Attempt to write empty unique index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR);
const auto & columns = set->getSetElements();
const auto & size_type = DataTypePtr(std::make_shared<DataTypeUInt64>());
if (index.max_rows && size() > index.max_rows)
{
size_type->serializeBinary(0, ostr);
return;
}
size_type->serializeBinary(size(), ostr);
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & type = index.data_types[i];
type->serializeBinaryBulk(*columns[i], ostr, 0, size());
}
}
void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr)
{
if (!set->empty())
{
auto new_set = std::make_unique<Set>(SizeLimits{}, true);
new_set->setHeader(index.header);
set.swap(new_set);
}
Block block;
Field field_rows;
const auto & size_type = DataTypePtr(std::make_shared<DataTypeUInt64>());
size_type->deserializeBinary(field_rows, istr);
size_t rows_to_read = field_rows.get<size_t>();
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & type = index.data_types[i];
auto new_column = type->createColumn();
type->deserializeBinaryBulk(*new_column, istr, rows_to_read, 0);
block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i]));
}
set->insertFromBlock(block);
}
String MergeTreeUniqueGranule::toString() const
{
String res = "";
const auto & columns = set->getSetElements();
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & column = columns[i];
res += " [";
for (size_t j = 0; j < column->size(); ++j)
{
if (j != 0)
res += ", ";
Field field;
column->get(j, field);
res += applyVisitor(FieldVisitorToString(), field);
}
res += "]\n";
}
return res;
}
void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_t limit)
{
size_t rows_read = std::min(limit, new_block.rows() - *pos);
if (index.max_rows && size() > index.max_rows)
{
*pos += rows_read;
return;
}
Block key_block;
for (size_t i = 0; i < index.columns.size(); ++i)
{
const auto & name = index.columns[i];
const auto & type = index.data_types[i];
key_block.insert(
ColumnWithTypeAndName(
new_block.getByName(name).column->cut(*pos, rows_read),
type,
name));
}
set->insertFromBlock(key_block);
*pos += rows_read;
}
Block MergeTreeUniqueGranule::getElementsBlock() const
{
if (index.max_rows && size() > index.max_rows)
return index.header;
return index.header.cloneWithColumns(set->getSetElements());
}
UniqueCondition::UniqueCondition(
const SelectQueryInfo & query,
const Context & context,
const MergeTreeUniqueIndex &index)
: IndexCondition(), index(index)
{
for (size_t i = 0, size = index.columns.size(); i < size; ++i)
{
std::string name = index.columns[i];
if (!key_columns.count(name))
key_columns.insert(name);
}
const ASTSelectQuery & select = typeid_cast<const ASTSelectQuery &>(*query.query);
/// Replace logical functions with bit functions.
/// Working with UInt8: last bit = can be true, previous = can be false.
ASTPtr new_expression;
if (select.where_expression && select.prewhere_expression)
new_expression = makeASTFunction(
"and",
select.where_expression->clone(),
select.prewhere_expression->clone());
else if (select.where_expression)
new_expression = select.where_expression->clone();
else if (select.prewhere_expression)
new_expression = select.prewhere_expression->clone();
else
/// 0b11 -- can be true and false at the same time
new_expression = std::make_shared<ASTLiteral>(Field(3));
useless = checkASTAlwaysUnknownOrTrue(new_expression);
/// Do not proceed if index is useless for this query.
if (useless)
return;
expression_ast = makeASTFunction(
"bitAnd",
new_expression,
std::make_shared<ASTLiteral>(Field(1)));
traverseAST(expression_ast);
auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(
expression_ast, index.header.getNamesAndTypesList());
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
}
bool UniqueCondition::alwaysUnknownOrTrue() const
{
return useless;
}
bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
auto granule = std::dynamic_pointer_cast<MergeTreeUniqueGranule>(idx_granule);
if (!granule)
throw Exception(
"Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR);
if (useless)
return true;
if (index.max_rows && granule->size() > index.max_rows)
return true;
Block result = granule->getElementsBlock();
actions->execute(result);
const auto & column = result.getByName(expression_ast->getColumnName()).column;
for (size_t i = 0; i < column->size(); ++i)
if (column->getBool(i))
return true;
return false;
}
void UniqueCondition::traverseAST(ASTPtr & node) const
{
if (operatorFromAST(node))
{
auto * func = typeid_cast<ASTFunction *>(&*node);
auto & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
for (auto & arg : args)
traverseAST(arg);
return;
}
if (!atomFromAST(node))
node = std::make_shared<ASTLiteral>(Field(3)); /// can_be_true=1 can_be_false=1
}
bool UniqueCondition::atomFromAST(ASTPtr & node) const
{
/// Function, literal or column
if (typeid_cast<const ASTLiteral *>(node.get()))
return true;
if (const auto * identifier = typeid_cast<const ASTIdentifier *>(node.get()))
return key_columns.count(identifier->getColumnName()) != 0;
if (auto * func = typeid_cast<ASTFunction *>(node.get()))
{
if (key_columns.count(func->getColumnName()))
{
/// Function is already calculated.
node = std::make_shared<ASTIdentifier>(func->getColumnName());
return true;
}
ASTs & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
for (auto & arg : args)
if (!atomFromAST(arg))
return false;
return true;
}
return false;
}
bool UniqueCondition::operatorFromAST(ASTPtr & node) const
{
/// Functions AND, OR, NOT. Replace with bit*.
auto * func = typeid_cast<ASTFunction *>(&*node);
if (!func)
return false;
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
if (func->name == "not")
{
if (args.size() != 1)
return false;
func->name = "__bitSwapLastTwo";
}
else if (func->name == "and" || func->name == "indexHint")
func->name = "bitAnd";
else if (func->name == "or")
func->name = "bitOr";
else
return false;
return true;
}
bool checkAtomName(const String & name)
{
static std::set<String> atoms = {
"notEquals",
"equals",
"less",
"greater",
"lessOrEquals",
"greaterOrEquals",
"in",
"notIn",
"like"
};
return atoms.find(name) != atoms.end();
}
bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic) const
{
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
{
if (key_columns.count(func->getColumnName()))
return false;
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
if (func->name == "and" || func->name == "indexHint")
return checkASTAlwaysUnknownOrTrue(args[0], atomic) && checkASTAlwaysUnknownOrTrue(args[1], atomic);
else if (func->name == "or")
return checkASTAlwaysUnknownOrTrue(args[0], atomic) || checkASTAlwaysUnknownOrTrue(args[1], atomic);
else if (func->name == "not")
return checkASTAlwaysUnknownOrTrue(args[0], atomic);
else if (!atomic && checkAtomName(func->name))
return checkASTAlwaysUnknownOrTrue(node, true);
else
return std::any_of(args.begin(), args.end(),
[this, &atomic](const auto & arg) { return checkASTAlwaysUnknownOrTrue(arg, atomic); });
}
else if (const auto * literal = typeid_cast<const ASTLiteral *>(node.get()))
return !atomic && literal->value.get<bool>();
else if (const auto * identifier = typeid_cast<const ASTIdentifier *>(node.get()))
return key_columns.find(identifier->getColumnName()) == key_columns.end();
else
return true;
}
MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const
{
return std::make_shared<MergeTreeUniqueGranule>(*this);
}
IndexConditionPtr MergeTreeUniqueIndex::createIndexCondition(
const SelectQueryInfo & query, const Context & context) const
{
return std::make_shared<UniqueCondition>(query, context, *this);
};
std::unique_ptr<MergeTreeIndex> MergeTreeUniqueIndexCreator(
const NamesAndTypesList & new_columns,
std::shared_ptr<ASTIndexDeclaration> node,
const Context & context)
{
if (node->name.empty())
throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY);
size_t max_rows = 0;
if (node->type->arguments)
{
if (node->type->arguments->children.size() > 1)
throw Exception("Unique index cannot have only 0 or 1 argument", ErrorCodes::INCORRECT_QUERY);
else if (node->type->arguments->children.size() == 1)
max_rows = typeid_cast<const ASTLiteral &>(
*node->type->arguments->children[0]).value.get<size_t>();
}
ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone());
auto syntax = SyntaxAnalyzer(context, {}).analyze(
expr_list, new_columns);
auto unique_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false);
auto sample = ExpressionAnalyzer(expr_list, syntax, context)
.getActions(true)->getSampleBlock();
Block header;
Names columns;
DataTypes data_types;
for (size_t i = 0; i < expr_list->children.size(); ++i)
{
const auto & column = sample.getByPosition(i);
columns.emplace_back(column.name);
data_types.emplace_back(column.type);
header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
}
return std::make_unique<MergeTreeUniqueIndex>(
node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get<size_t>(), max_rows);;
}
}

View File

@ -0,0 +1,93 @@
#pragma once
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/Set.h>
#include <memory>
#include <set>
namespace DB
{
class MergeTreeUniqueIndex;
struct MergeTreeUniqueGranule : public MergeTreeIndexGranule
{
explicit MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index);
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr) override;
String toString() const override;
size_t size() const { return set->getTotalRowCount(); }
bool empty() const override { return !size(); }
void update(const Block & block, size_t * pos, size_t limit) override;
Block getElementsBlock() const;
~MergeTreeUniqueGranule() override = default;
const MergeTreeUniqueIndex & index;
std::unique_ptr<Set> set;
};
class UniqueCondition : public IndexCondition
{
public:
UniqueCondition(
const SelectQueryInfo & query,
const Context & context,
const MergeTreeUniqueIndex & index);
bool alwaysUnknownOrTrue() const override;
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
~UniqueCondition() override = default;
private:
void traverseAST(ASTPtr & node) const;
bool atomFromAST(ASTPtr & node) const;
bool operatorFromAST(ASTPtr & node) const;
bool checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic = false) const;
const MergeTreeUniqueIndex & index;
bool useless;
std::set<String> key_columns;
ASTPtr expression_ast;
ExpressionActionsPtr actions;
};
class MergeTreeUniqueIndex : public MergeTreeIndex
{
public:
MergeTreeUniqueIndex(
String name,
ExpressionActionsPtr expr,
const Names & columns,
const DataTypes & data_types,
const Block & header,
size_t granularity,
size_t _max_rows)
: MergeTreeIndex(std::move(name), std::move(expr), columns, data_types, header, granularity), max_rows(_max_rows) {}
~MergeTreeUniqueIndex() override = default;
MergeTreeIndexGranulePtr createIndexGranule() const override;
IndexConditionPtr createIndexCondition(
const SelectQueryInfo & query, const Context & context) const override;
size_t max_rows = 0;
};
std::unique_ptr<MergeTreeIndex> MergeTreeUniqueIndexCreator(
const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context);
}

View File

@ -16,6 +16,7 @@ namespace
constexpr auto DATA_FILE_EXTENSION = ".bin";
constexpr auto MARKS_FILE_EXTENSION = ".mrk";
constexpr auto INDEX_FILE_EXTENSION = ".idx";
}
@ -325,6 +326,18 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
}
}
/// Finish skip index serialization
for (size_t i = 0; i < storage.skip_indices.size(); ++i)
{
auto & stream = *skip_indices_streams[i];
if (skip_indices_granules[i] && !skip_indices_granules[i]->empty())
{
skip_indices_granules[i]->serializeBinary(stream.compressed);
skip_indices_granules[i].reset();
}
}
if (!total_column_list)
total_column_list = &columns_list;
@ -342,6 +355,16 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart(
index_stream = nullptr;
}
for (auto & stream : skip_indices_streams)
{
stream->finalize();
stream->addToChecksums(checksums);
}
skip_indices_streams.clear();
skip_indices_granules.clear();
skip_index_filling.clear();
for (ColumnStreams::iterator it = column_streams.begin(); it != column_streams.end(); ++it)
{
it->second->finalize();
@ -398,6 +421,21 @@ void MergedBlockOutputStream::init()
part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, O_TRUNC | O_CREAT | O_WRONLY);
index_stream = std::make_unique<HashingWriteBuffer>(*index_file_stream);
}
for (const auto & index : storage.skip_indices)
{
String stream_name = index->getFileName();
skip_indices_streams.emplace_back(
std::make_unique<ColumnStream>(
stream_name,
part_path + stream_name, INDEX_FILE_EXTENSION,
part_path + stream_name, MARKS_FILE_EXTENSION,
codec, max_compress_block_size,
0, aio_threshold));
skip_indices_granules.emplace_back(nullptr);
skip_index_filling.push_back(0);
}
}
@ -410,6 +448,9 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
WrittenOffsetColumns offset_columns;
auto primary_key_column_names = storage.primary_key_columns;
Names skip_indexes_column_names;
for (const auto & index : storage.skip_indices)
std::copy(index->columns.cbegin(), index->columns.cend(), std::back_inserter(skip_indexes_column_names));
/// Here we will add the columns related to the Primary Key, then write the index.
std::vector<ColumnWithTypeAndName> primary_key_columns(primary_key_column_names.size());
@ -429,6 +470,21 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
primary_key_columns[i].column = primary_key_columns[i].column->permute(*permutation, 0);
}
/// The same for skip indexes columns
std::vector<ColumnWithTypeAndName> skip_indexes_columns(skip_indexes_column_names.size());
std::map<String, size_t> skip_indexes_column_name_to_position;
for (size_t i = 0, size = skip_indexes_column_names.size(); i < size; ++i)
{
const auto & name = skip_indexes_column_names[i];
skip_indexes_column_name_to_position.emplace(name, i);
skip_indexes_columns[i] = block.getByName(name);
/// Reorder index columns in advance.
if (permutation)
skip_indexes_columns[i].column = skip_indexes_columns[i].column->permute(*permutation, 0);
}
if (index_columns.empty())
{
index_columns.resize(primary_key_column_names.size());
@ -459,11 +515,17 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
if (permutation)
{
auto primary_column_it = primary_key_column_name_to_position.find(it->name);
auto skip_index_column_it = skip_indexes_column_name_to_position.find(it->name);
if (primary_key_column_name_to_position.end() != primary_column_it)
{
auto & primary_column = *primary_key_columns[primary_column_it->second].column;
const auto & primary_column = *primary_key_columns[primary_column_it->second].column;
writeData(column.name, *column.type, primary_column, offset_columns, false, serialization_states[i]);
}
else if (skip_indexes_column_name_to_position.end() != skip_index_column_it)
{
const auto & index_column = *skip_indexes_columns[skip_index_column_it->second].column;
writeData(column.name, *column.type, index_column, offset_columns, false, serialization_states[i]);
}
else
{
/// We rearrange the columns that are not included in the primary key here; Then the result is released - to save RAM.
@ -479,6 +541,57 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm
rows_count += rows;
{
/// Filling and writing skip indices like in IMergedBlockOutputStream::writeData
for (size_t i = 0; i < storage.skip_indices.size(); ++i)
{
const auto index = storage.skip_indices[i];
auto & stream = *skip_indices_streams[i];
size_t prev_pos = 0;
while (prev_pos < rows)
{
size_t limit = 0;
if (prev_pos == 0 && index_offset != 0)
{
limit = index_offset;
}
else
{
limit = storage.index_granularity;
if (!skip_indices_granules[i])
{
skip_indices_granules[i] = index->createIndexGranule();
skip_index_filling[i] = 0;
if (stream.compressed.offset() >= min_compress_block_size)
stream.compressed.next();
writeIntBinary(stream.plain_hashing.count(), stream.marks);
writeIntBinary(stream.compressed.offset(), stream.marks);
}
}
size_t pos = prev_pos;
skip_indices_granules[i]->update(block, &pos, limit);
if (pos == prev_pos + limit)
{
++skip_index_filling[i];
/// write index if it is filled
if (skip_index_filling[i] == index->granularity)
{
skip_indices_granules[i]->serializeBinary(stream.compressed);
skip_indices_granules[i].reset();
skip_index_filling[i] = 0;
}
}
prev_pos = pos;
}
}
}
{
/** While filling index (index_columns), disable memory tracker.
* Because memory is allocated here (maybe in context of INSERT query),

View File

@ -149,6 +149,10 @@ private:
std::unique_ptr<WriteBufferFromFile> index_file_stream;
std::unique_ptr<HashingWriteBuffer> index_stream;
MutableColumns index_columns;
std::vector<std::unique_ptr<ColumnStream>> skip_indices_streams;
MergeTreeIndexGranules skip_indices_granules;
std::vector<size_t> skip_index_filling;
};

View File

@ -145,14 +145,14 @@ void ReplicatedMergeTreeAlterThread::run()
parts = storage.data.getDataParts();
const auto columns_for_parts = storage.getColumns().getAllPhysical();
const auto indices_for_parts = storage.getIndicesDescription();
for (const MergeTreeData::DataPartPtr & part : parts)
{
/// Update the part and write result to temporary files.
/// TODO: You can skip checking for too large changes if ZooKeeper has, for example,
/// node /flags/force_alter.
auto transaction = storage.data.alterDataPart(part, columns_for_parts, false);
auto transaction = storage.data.alterDataPart(part, columns_for_parts, indices_for_parts.indices, false);
if (!transaction)
continue;

View File

@ -238,6 +238,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name)
storage.data.index_granularity,
true,
storage.data.primary_key_data_types,
storage.data.skip_indices,
[this] { return need_stop.load(); });
if (need_stop)

View File

@ -44,6 +44,8 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr
if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast));
skip_indices = data.getIndicesDescription().toString();
}
void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const
@ -64,6 +66,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const
if (!sorting_key.empty())
out << "sorting key: " << sorting_key << "\n";
if (!skip_indices.empty())
out << "indices: " << skip_indices << "\n";
}
String ReplicatedMergeTreeTableMetadata::toString() const
@ -93,6 +98,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in)
if (checkString("sorting key: ", in))
in >> sorting_key >> "\n";
if (checkString("indices: ", in))
in >> skip_indices >> "\n";
}
ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s)
@ -175,6 +183,21 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl
ErrorCodes::METADATA_MISMATCH);
}
if (skip_indices != from_zk.skip_indices)
{
if (allow_alter)
{
diff.skip_indices_changed = true;
diff.new_skip_indices = from_zk.skip_indices;
}
else
throw Exception(
"Existing table metadata in ZooKeeper differs in skip indexes."
" Stored in ZooKeeper: " + from_zk.skip_indices +
", local: " + skip_indices,
ErrorCodes::METADATA_MISMATCH);
}
return diff;
}

View File

@ -25,6 +25,7 @@ struct ReplicatedMergeTreeTableMetadata
MergeTreeDataFormatVersion data_format_version;
String partition_key;
String sorting_key;
String skip_indices;
ReplicatedMergeTreeTableMetadata() = default;
explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data);
@ -40,7 +41,10 @@ struct ReplicatedMergeTreeTableMetadata
bool sorting_key_changed = false;
String new_sorting_key;
bool empty() const { return !sorting_key_changed; }
bool skip_indices_changed = false;
String new_skip_indices;
bool empty() const { return !sorting_key_changed && !skip_indices_changed; }
};
Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, bool allow_alter) const;

View File

@ -30,12 +30,13 @@ namespace ErrorCodes
namespace
{
/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column.
/** To read and checksum single stream (a pair of .bin, .mrk files) for a single column or secondary index.
*/
class Stream
{
public:
String base_name;
String bin_file_ext;
String bin_file_path;
String mrk_file_path;
private:
@ -50,10 +51,11 @@ private:
public:
HashingReadBuffer mrk_hashing_buf;
Stream(const String & path, const String & base_name)
Stream(const String & path, const String & base_name, const String & bin_file_ext = ".bin")
:
base_name(base_name),
bin_file_path(path + base_name + ".bin"),
bin_file_ext(bin_file_ext),
bin_file_path(path + base_name + bin_file_ext),
mrk_file_path(path + base_name + ".mrk"),
file_buf(bin_file_path),
compressed_hashing_buf(file_buf),
@ -118,7 +120,7 @@ public:
void saveChecksums(MergeTreeData::DataPart::Checksums & checksums)
{
checksums.files[base_name + ".bin"] = MergeTreeData::DataPart::Checksums::Checksum(
checksums.files[base_name + bin_file_ext] = MergeTreeData::DataPart::Checksums::Checksum(
compressed_hashing_buf.count(), compressed_hashing_buf.getHash(),
uncompressed_hashing_buf.count(), uncompressed_hashing_buf.getHash());
@ -135,6 +137,7 @@ MergeTreeData::DataPart::Checksums checkDataPart(
size_t index_granularity,
bool require_checksums,
const DataTypes & primary_key_data_types,
const MergeTreeIndices & indices,
std::function<bool()> is_cancelled)
{
Logger * log = &Logger::get("checkDataPart");
@ -239,6 +242,48 @@ MergeTreeData::DataPart::Checksums checkDataPart(
rows = count;
}
/// Read and check skip indices.
for (const auto & index : indices)
{
Stream stream(path, index->getFileName(), ".idx");
size_t mark_num = 0;
while (!stream.uncompressed_hashing_buf.eof())
{
if (stream.mrk_hashing_buf.eof())
throw Exception("Unexpected end of mrk file while reading index " + index->name,
ErrorCodes::CORRUPTED_DATA);
try
{
stream.assertMark();
}
catch (Exception &e)
{
e.addMessage("Cannot read mark " + toString(mark_num)
+ " in file " + stream.mrk_file_path
+ ", mrk file offset: " + toString(stream.mrk_hashing_buf.count()));
throw;
}
try
{
index->createIndexGranule()->deserializeBinary(stream.uncompressed_hashing_buf);
}
catch (Exception &e)
{
e.addMessage("Cannot read granule " + toString(mark_num)
+ " in file " + stream.bin_file_path
+ ", mrk file offset: " + toString(stream.mrk_hashing_buf.count()));
throw;
}
++mark_num;
if (is_cancelled())
return {};
}
stream.assertEnd();
stream.saveChecksums(checksums_data);
}
/// Read all columns, calculate checksums and validate marks.
for (const NameAndTypePair & name_type : columns)
{

View File

@ -17,6 +17,7 @@ MergeTreeData::DataPart::Checksums checkDataPart(
size_t index_granularity,
bool require_checksums,
const DataTypes & primary_key_data_types, /// Check the primary key. If it is not necessary, pass an empty array.
const MergeTreeIndices & indices = {}, /// Check skip indices
std::function<bool()> is_cancelled = []{ return false; });
}

View File

@ -1,6 +1,9 @@
#include <Storages/StorageFactory.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
#include <Storages/MergeTree/MergeTreeUniqueIndex.h>
#include <Common/typeid_cast.h>
#include <Common/OptimizedRegularExpression.h>
@ -336,7 +339,8 @@ static StoragePtr create(const StorageFactory::Arguments & args)
*/
bool is_extended_storage_def =
args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by || args.storage_def->sample_by || args.storage_def->settings;
args.storage_def->partition_by || args.storage_def->primary_key || args.storage_def->order_by
|| args.storage_def->sample_by || (args.query.columns_list->indices && !args.query.columns_list->indices->children.empty()) || args.storage_def->settings;
String name_part = args.engine_name.substr(0, args.engine_name.size() - strlen("MergeTree"));
@ -551,6 +555,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
ASTPtr order_by_ast;
ASTPtr primary_key_ast;
ASTPtr sample_by_ast;
IndicesDescription indices_description;
MergeTreeSettings storage_settings = args.context.getMergeTreeSettings();
if (is_extended_storage_def)
@ -571,6 +576,11 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (args.storage_def->sample_by)
sample_by_ast = args.storage_def->sample_by->ptr();
if (args.query.columns_list && args.query.columns_list->indices)
for (const auto & index : args.query.columns_list->indices->children)
indices_description.indices.push_back(
std::dynamic_pointer_cast<ASTIndexDeclaration>(index->ptr()));
storage_settings.loadFromQuery(*args.storage_def);
}
else
@ -603,16 +613,22 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (replicated)
return StorageReplicatedMergeTree::create(
zookeeper_path, replica_name, args.attach, args.data_path, args.database_name, args.table_name,
args.columns,
args.columns, indices_description,
args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast,
sample_by_ast, merging_params, storage_settings,
args.has_force_restore_data_flag);
sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag);
else
return StorageMergeTree::create(
args.data_path, args.database_name, args.table_name, args.columns, args.attach,
args.context, date_column_name, partition_by_ast, order_by_ast, primary_key_ast,
sample_by_ast, merging_params, storage_settings,
args.has_force_restore_data_flag);
args.data_path, args.database_name, args.table_name, args.columns, indices_description,
args.attach, args.context, date_column_name, partition_by_ast, order_by_ast,
primary_key_ast, sample_by_ast, merging_params, storage_settings, args.has_force_restore_data_flag);
}
static void registerMergeTreeSkipIndices()
{
auto & factory = MergeTreeIndexFactory::instance();
factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator);
factory.registerIndex("unique", MergeTreeUniqueIndexCreator);
}
@ -633,6 +649,8 @@ void registerStorageMergeTree(StorageFactory & factory)
factory.registerStorage("ReplicatedSummingMergeTree", create);
factory.registerStorage("ReplicatedGraphiteMergeTree", create);
factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create);
registerMergeTreeSkipIndices();
}
}

View File

@ -684,9 +684,10 @@ void StorageBuffer::alter(const AlterCommands & params, const String & database_
/// So that no blocks of the old structure remain.
optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context);
ColumnsDescription new_columns = getColumns();
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {});
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {});
setColumns(std::move(new_columns));
}

View File

@ -339,9 +339,10 @@ void StorageDistributed::alter(const AlterCommands & params, const String & data
{
auto lock = lockStructureForAlter();
ColumnsDescription new_columns = getColumns();
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, {});
context.getDatabase(database_name)->alterTable(context, current_table_name, new_columns, new_indices, {});
setColumns(std::move(new_columns));
}

View File

@ -94,11 +94,12 @@ StoragePtr StorageFactory::get(
ErrorCodes::BAD_ARGUMENTS);
}
if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by)
if ((storage_def->partition_by || storage_def->primary_key || storage_def->order_by || storage_def->sample_by ||
(query.columns_list && query.columns_list->indices && !query.columns_list->indices->children.empty()))
&& !endsWith(name, "MergeTree"))
{
throw Exception(
"Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses. "
"Engine " + name + " doesn't support PARTITION BY, PRIMARY KEY, ORDER BY or SAMPLE BY clauses and skipping indices. "
"Currently only the MergeTree family of engines supports them", ErrorCodes::BAD_ARGUMENTS);
}

View File

@ -139,7 +139,11 @@ StorageMaterializedView::StorageMaterializedView(
auto manual_create_query = std::make_shared<ASTCreateQuery>();
manual_create_query->database = target_database_name;
manual_create_query->table = target_table_name;
manual_create_query->set(manual_create_query->columns, query.columns->ptr());
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, query.columns_list->columns->ptr());
manual_create_query->set(manual_create_query->columns_list, new_columns_list);
manual_create_query->set(manual_create_query->storage, query.storage->ptr());
/// Execute the query.

View File

@ -398,9 +398,10 @@ void StorageMerge::alter(const AlterCommands & params, const String & database_n
{
auto lock = lockStructureForAlter();
ColumnsDescription new_columns = getColumns();
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, {});
context.getDatabase(database_name)->alterTable(context, table_name, new_columns, new_indices, {});
setColumns(new_columns);
}

View File

@ -51,6 +51,7 @@ StorageMergeTree::StorageMergeTree(
const String & database_name_,
const String & table_name_,
const ColumnsDescription & columns_,
const IndicesDescription & indices_,
bool attach,
Context & context_,
const String & date_column_name,
@ -64,10 +65,9 @@ StorageMergeTree::StorageMergeTree(
: path(path_), database_name(database_name_), table_name(table_name_), full_path(path + escapeForFileName(table_name) + '/'),
global_context(context_), background_pool(context_.getBackgroundPool()),
data(database_name, table_name,
full_path, columns_,
full_path, columns_, indices_,
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
sample_by_ast_, merging_params_,
settings_, false, attach),
sample_by_ast_, merging_params_, settings_, false, attach),
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()),
log(&Logger::get(database_name_ + "." + table_name + " (StorageMergeTree)"))
{
@ -199,8 +199,9 @@ void StorageMergeTree::alter(
{
auto table_soft_lock = lockStructureForAlter();
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {});
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {});
setColumns(std::move(new_columns));
return;
}
@ -213,16 +214,17 @@ void StorageMergeTree::alter(
data.checkAlter(params);
auto new_columns = data.getColumns();
auto new_indices = data.getIndicesDescription();
ASTPtr new_order_by_ast = data.order_by_ast;
ASTPtr new_primary_key_ast = data.primary_key_ast;
params.apply(new_columns, new_order_by_ast, new_primary_key_ast);
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
auto parts = data.getDataParts({MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated});
auto columns_for_parts = new_columns.getAllPhysical();
std::vector<MergeTreeData::AlterDataPartTransactionPtr> transactions;
for (const MergeTreeData::DataPartPtr & part : parts)
{
if (auto transaction = data.alterDataPart(part, columns_for_parts, false))
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
transactions.push_back(std::move(transaction));
}
@ -239,10 +241,10 @@ void StorageMergeTree::alter(
storage_ast.set(storage_ast.primary_key, new_primary_key_ast);
};
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, storage_modifier);
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, storage_modifier);
/// Reinitialize primary key because primary key column types might have changed.
data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns);
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
for (auto & transaction : transactions)
transaction->commit();
@ -698,9 +700,10 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
alter_command.column_name = get<String>(column_name);
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
ASTPtr ignored_order_by_ast;
ASTPtr ignored_primary_key_ast;
alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast);
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast);
auto columns_for_parts = new_columns.getAllPhysical();
for (const auto & part : parts)
@ -708,7 +711,7 @@ void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Fi
if (part->info.partition_id != partition_id)
throw Exception("Unexpected partition ID " + part->info.partition_id + ". This is a bug.", ErrorCodes::LOGICAL_ERROR);
if (auto transaction = data.alterDataPart(part, columns_for_parts, false))
if (auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false))
transactions.push_back(std::move(transaction));
LOG_DEBUG(log, "Removing column " << get<String>(column_name) << " from part " << part->name);

View File

@ -43,6 +43,9 @@ public:
const ColumnsDescription & getColumns() const override { return data.getColumns(); }
void setColumns(ColumnsDescription columns_) override { return data.setColumns(std::move(columns_)); }
virtual const IndicesDescription & getIndicesDescription() const override { return data.getIndicesDescription(); }
virtual void setIndicesDescription(IndicesDescription indices_) override { data.setIndicesDescription(std::move(indices_)); }
NameAndTypePair getColumn(const String & column_name) const override { return data.getColumn(column_name); }
bool hasColumn(const String & column_name) const override { return data.hasColumn(column_name); }
@ -167,6 +170,7 @@ protected:
const String & database_name_,
const String & table_name_,
const ColumnsDescription & columns_,
const IndicesDescription & indices_,
bool attach,
Context & context_,
const String & date_column_name,

View File

@ -35,8 +35,9 @@ void StorageNull::alter(const AlterCommands & params, const String & current_dat
auto lock = lockStructureForAlter();
ColumnsDescription new_columns = getColumns();
IndicesDescription new_indices = getIndicesDescription();
params.apply(new_columns);
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, {});
context.getDatabase(current_database_name)->alterTable(context, current_table_name, new_columns, new_indices, {});
setColumns(std::move(new_columns));
}

View File

@ -113,6 +113,7 @@ namespace ErrorCodes
extern const int KEEPER_EXCEPTION;
extern const int ALL_REPLICAS_LOST;
extern const int REPLICA_STATUS_CHANGED;
extern const int INCORRECT_QUERY;
}
namespace ActionLocks
@ -200,6 +201,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
bool attach,
const String & path_, const String & database_name_, const String & name_,
const ColumnsDescription & columns_,
const IndicesDescription & indices_,
Context & context_,
const String & date_column_name,
const ASTPtr & partition_by_ast_,
@ -215,10 +217,9 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
zookeeper_path(global_context.getMacros()->expand(zookeeper_path_, database_name, table_name)),
replica_name(global_context.getMacros()->expand(replica_name_, database_name, table_name)),
data(database_name, table_name,
full_path, columns_,
full_path, columns_, indices_,
context_, date_column_name, partition_by_ast_, order_by_ast_, primary_key_ast_,
sample_by_ast_, merging_params_,
settings_, true, attach,
sample_by_ast_, merging_params_, settings_, true, attach,
[this] (const std::string & name) { enqueuePartForCheck(name); }),
reader(data), writer(data), merger_mutator(data, global_context.getBackgroundPool()), queue(*this),
fetcher(data),
@ -417,8 +418,11 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
{
ASTPtr new_primary_key_ast = data.primary_key_ast;
ASTPtr new_order_by_ast = data.order_by_ast;
auto new_indices = data.getIndicesDescription();
IDatabase::ASTModifier storage_modifier;
if (!metadata_diff.empty())
{
if (metadata_diff.sorting_key_changed)
{
ParserNotEmptyExpressionList parser(false);
auto new_sorting_key_expr_list = parseQuery(parser, metadata_diff.new_sorting_key, 0);
@ -438,6 +442,10 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
/// save the old ORDER BY expression as the new primary key.
new_primary_key_ast = data.order_by_ast->clone();
}
}
if (metadata_diff.skip_indices_changed)
new_indices = IndicesDescription::parse(metadata_diff.new_skip_indices);
storage_modifier = [&](IAST & ast)
{
@ -455,11 +463,11 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column
};
}
global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, storage_modifier);
global_context.getDatabase(database_name)->alterTable(global_context, table_name, new_columns, new_indices, storage_modifier);
/// Even if the primary/sorting keys didn't change we must reinitialize it
/// because primary key column types might have changed.
data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns);
data.setPrimaryKeyIndicesAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, new_indices);
}
@ -1527,9 +1535,10 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
alter_command.column_name = entry.column_name;
auto new_columns = getColumns();
auto new_indices = getIndicesDescription();
ASTPtr ignored_order_by_ast;
ASTPtr ignored_primary_key_ast;
alter_command.apply(new_columns, ignored_order_by_ast, ignored_primary_key_ast);
alter_command.apply(new_columns, new_indices, ignored_order_by_ast, ignored_primary_key_ast);
size_t modified_parts = 0;
auto parts = data.getDataParts();
@ -1549,7 +1558,7 @@ void StorageReplicatedMergeTree::executeClearColumnInPartition(const LogEntry &
LOG_DEBUG(log, "Clearing column " << entry.column_name << " in part " << part->name);
auto transaction = data.alterDataPart(part, columns_for_parts, false);
auto transaction = data.alterDataPart(part, columns_for_parts, new_indices.indices, false);
if (!transaction)
continue;
@ -3117,9 +3126,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params,
data.checkAlter(params);
ColumnsDescription new_columns = data.getColumns();
IndicesDescription new_indices = data.getIndicesDescription();
ASTPtr new_order_by_ast = data.order_by_ast;
ASTPtr new_primary_key_ast = data.primary_key_ast;
params.apply(new_columns, new_order_by_ast, new_primary_key_ast);
params.apply(new_columns, new_indices, new_order_by_ast, new_primary_key_ast);
String new_columns_str = new_columns.toString();
if (new_columns_str != data.getColumns().toString())
@ -3129,6 +3139,10 @@ void StorageReplicatedMergeTree::alter(const AlterCommands & params,
if (new_order_by_ast.get() != data.order_by_ast.get())
new_metadata.sorting_key = serializeAST(*MergeTreeData::extractKeyExpressionList(new_order_by_ast));
String new_indices_str = new_indices.toString();
if (new_indices_str != data.getIndicesDescription().toString())
new_metadata.skip_indices = new_indices_str;
String new_metadata_str = new_metadata.toString();
if (new_metadata_str != ReplicatedMergeTreeTableMetadata(data).toString())
changed_nodes.emplace_back(zookeeper_path, "metadata", new_metadata_str);

View File

@ -552,6 +552,7 @@ protected:
bool attach,
const String & path_, const String & database_name_, const String & name_,
const ColumnsDescription & columns_,
const IndicesDescription & indices_,
Context & context_,
const String & date_column_name,
const ASTPtr & partition_by_ast_,

View File

@ -0,0 +1,8 @@
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01

View File

@ -0,0 +1,41 @@
DROP TABLE IF EXISTS test.minmax_idx;
CREATE TABLE test.minmax_idx
(
u64 UInt64,
i32 Int32,
f64 Float64,
d Decimal(10, 2),
s String,
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
dt Date,
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4,
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
) ENGINE = MergeTree()
ORDER BY u64
SETTINGS index_granularity = 2;
/* many small inserts => table will make merges */
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
/* simple select */
SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
/* select with hole made by primary key */
SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt;
DROP TABLE test.minmax_idx;

View File

@ -0,0 +1,16 @@
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01

View File

@ -0,0 +1,69 @@
DROP TABLE IF EXISTS test.minmax_idx1;
DROP TABLE IF EXISTS test.minmax_idx2;
CREATE TABLE test.minmax_idx1
(
u64 UInt64,
i32 Int32,
f64 Float64,
d Decimal(10, 2),
s String,
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
dt Date,
INDEX
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
INDEX
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1')
ORDER BY u64
SETTINGS index_granularity = 2;
CREATE TABLE test.minmax_idx2
(
u64 UInt64,
i32 Int32,
f64 Float64,
d Decimal(10, 2),
s String,
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
dt Date,
INDEX
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
INDEX
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2')
ORDER BY u64
SETTINGS index_granularity = 2;
/* many small inserts => table will make merges */
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
INSERT INTO test.minmax_idx2 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
SYSTEM SYNC REPLICA test.minmax_idx1;
SYSTEM SYNC REPLICA test.minmax_idx2;
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
SYSTEM SYNC REPLICA test.minmax_idx1;
SYSTEM SYNC REPLICA test.minmax_idx2;
/* simple select */
SELECT * FROM test.minmax_idx1 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
SELECT * FROM test.minmax_idx2 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
/* select with hole made by primary key */
SELECT * FROM test.minmax_idx1 WHERE u64 != 1 AND e = 'b' ORDER BY dt;
SELECT * FROM test.minmax_idx2 WHERE u64 != 1 AND e = 'b' ORDER BY dt;
DROP TABLE test.minmax_idx1;
DROP TABLE test.minmax_idx2;

View File

@ -0,0 +1,28 @@
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 2
1 2
1 2
1 2
1 2
1 2
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 2
1 2
1 2
1 2
1 2
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 2
1 2
1 2
1 2
1 2
1 2
1 2
CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 2

View File

@ -0,0 +1,68 @@
DROP TABLE IF EXISTS test.minmax_idx;
DROP TABLE IF EXISTS test.minmax_idx2;
CREATE TABLE test.minmax_idx
(
u64 UInt64,
i32 Int32
) ENGINE = MergeTree()
ORDER BY u64;
INSERT INTO test.minmax_idx VALUES (1, 2);
ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10;
ALTER TABLE test.minmax_idx ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10;
ALTER TABLE test.minmax_idx ADD INDEX idx3 (u64 - i32) TYPE minmax GRANULARITY 10 AFTER idx1;
SHOW CREATE TABLE test.minmax_idx;
SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2;
INSERT INTO test.minmax_idx VALUES (1, 2);
INSERT INTO test.minmax_idx VALUES (1, 2);
INSERT INTO test.minmax_idx VALUES (1, 2);
INSERT INTO test.minmax_idx VALUES (1, 2);
INSERT INTO test.minmax_idx VALUES (1, 2);
SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2;
ALTER TABLE test.minmax_idx DROP INDEX idx1;
SHOW CREATE TABLE test.minmax_idx;
SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2;
ALTER TABLE test.minmax_idx DROP INDEX idx2;
ALTER TABLE test.minmax_idx DROP INDEX idx3;
SHOW CREATE TABLE test.minmax_idx;
ALTER TABLE test.minmax_idx ADD INDEX idx1 (u64 * i32) TYPE minmax GRANULARITY 10;
SHOW CREATE TABLE test.minmax_idx;
SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2;
CREATE TABLE test.minmax_idx2
(
u64 UInt64,
i32 Int32,
INDEX idx1 (u64 + i32) TYPE minmax GRANULARITY 10,
INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10
) ENGINE = MergeTree()
ORDER BY u64;
INSERT INTO test.minmax_idx2 VALUES (1, 2);
INSERT INTO test.minmax_idx2 VALUES (1, 2);
SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2;
ALTER TABLE test.minmax_idx2 DROP INDEX idx1, DROP INDEX idx2;
SHOW CREATE TABLE test.minmax_idx2;
SELECT * FROM test.minmax_idx2 WHERE u64 * i32 = 2;
DROP TABLE test.minmax_idx;
DROP TABLE test.minmax_idx2;

View File

@ -0,0 +1,58 @@
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 2
1 2
1 4
1 5
3 2
19 9
65 75
1 2
1 4
1 5
3 2
19 9
65 75
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 4
1 5
3 2
19 9
65 75
1 2
1 4
1 5
3 2
19 9
65 75
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 4
1 5
3 2
19 9
65 75
1 2
1 4
1 5
3 2
19 9
65 75
CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 3
1 2
1 3
CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192
CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192
1 2
1 3
1 2
1 3

View File

@ -0,0 +1,111 @@
DROP TABLE IF EXISTS test.minmax_idx;
DROP TABLE IF EXISTS test.minmax_idx_r;
DROP TABLE IF EXISTS test.minmax_idx2;
DROP TABLE IF EXISTS test.minmax_idx2_r;
CREATE TABLE test.minmax_idx
(
u64 UInt64,
i32 Int32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r1')
ORDER BY u64;
CREATE TABLE test.minmax_idx_r
(
u64 UInt64,
i32 Int32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter1', 'r2')
ORDER BY u64;
INSERT INTO test.minmax_idx VALUES (1, 2);
SYSTEM SYNC REPLICA test.minmax_idx_r;
ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10;
ALTER TABLE test.minmax_idx_r ADD INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10;
ALTER TABLE test.minmax_idx ADD INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10 AFTER idx1;
SHOW CREATE TABLE test.minmax_idx;
SHOW CREATE TABLE test.minmax_idx_r;
SELECT * FROM test.minmax_idx WHERE u64 * i32 = 2 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx_r WHERE u64 * i32 = 2 ORDER BY (u64, i32);
INSERT INTO test.minmax_idx VALUES (1, 4);
INSERT INTO test.minmax_idx_r VALUES (3, 2);
INSERT INTO test.minmax_idx VALUES (1, 5);
INSERT INTO test.minmax_idx_r VALUES (65, 75);
INSERT INTO test.minmax_idx VALUES (19, 9);
SYSTEM SYNC REPLICA test.minmax_idx;
SYSTEM SYNC REPLICA test.minmax_idx_r;
SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32);
ALTER TABLE test.minmax_idx DROP INDEX idx1;
SHOW CREATE TABLE test.minmax_idx;
SHOW CREATE TABLE test.minmax_idx_r;
SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32);
ALTER TABLE test.minmax_idx DROP INDEX idx2;
ALTER TABLE test.minmax_idx_r DROP INDEX idx3;
SHOW CREATE TABLE test.minmax_idx;
SHOW CREATE TABLE test.minmax_idx_r;
ALTER TABLE test.minmax_idx ADD INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10;
SHOW CREATE TABLE test.minmax_idx;
SHOW CREATE TABLE test.minmax_idx_r;
SELECT * FROM test.minmax_idx WHERE u64 * i32 > 1 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx_r WHERE u64 * i32 > 1 ORDER BY (u64, i32);
CREATE TABLE test.minmax_idx2
(
u64 UInt64,
i32 Int32,
INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10,
INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r1')
ORDER BY u64;
CREATE TABLE test.minmax_idx2_r
(
u64 UInt64,
i32 Int32,
INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10,
INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/indices_alter2', 'r2')
ORDER BY u64;
SHOW CREATE TABLE test.minmax_idx2;
SHOW CREATE TABLE test.minmax_idx2_r;
INSERT INTO test.minmax_idx2 VALUES (1, 2);
INSERT INTO test.minmax_idx2_r VALUES (1, 3);
SYSTEM SYNC REPLICA test.minmax_idx2;
SYSTEM SYNC REPLICA test.minmax_idx2_r;
SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32);
ALTER TABLE test.minmax_idx2_r DROP INDEX idx1, DROP INDEX idx2;
SHOW CREATE TABLE test.minmax_idx2;
SHOW CREATE TABLE test.minmax_idx2_r;
SELECT * FROM test.minmax_idx2 WHERE u64 * i32 >= 2 ORDER BY (u64, i32);
SELECT * FROM test.minmax_idx2_r WHERE u64 * i32 >= 2 ORDER BY (u64, i32);
DROP TABLE test.minmax_idx;
DROP TABLE test.minmax_idx_r;
DROP TABLE test.minmax_idx2;
DROP TABLE test.minmax_idx2_r;

View File

@ -0,0 +1,8 @@
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01
0 5 4.7 6.50 cba b 2014-01-04
0 5 4.7 6.50 cba b 2014-03-11
2 5 4.7 6.50 cba b 2014-06-11
2 5 4.7 6.50 cba b 2015-01-01

View File

@ -0,0 +1,41 @@
DROP TABLE IF EXISTS test.unique_idx;
CREATE TABLE test.unique_idx
(
u64 UInt64,
i32 Int32,
f64 Float64,
d Decimal(10, 2),
s String,
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
dt Date,
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 4,
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 2,
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique GRANULARITY 3
) ENGINE = MergeTree()
ORDER BY u64
SETTINGS index_granularity = 2;
/* many small inserts => table will make merges */
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
/* simple select */
SELECT * FROM test.unique_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
/* select with hole made by primary key */
SELECT * FROM test.unique_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt;
DROP TABLE test.unique_idx;

1
debian/control vendored
View File

@ -26,6 +26,7 @@ Description: Client binary for ClickHouse
Package: clickhouse-common-static
Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata
Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-server-base
Provides: clickhouse-server-base
Description: Common files for ClickHouse

View File

@ -21,9 +21,14 @@ It is highly recommended to set up monitoring for:
ClickHouse server has embedded instruments for self-state monitoring.
To monitor server events use server logs. See the [logger](#server_settings-logger) section of the configuration file.
To track server events use server logs. See the [logger](#server_settings-logger) section of the configuration file.
ClickHouse collects different metrics of computational resources usage and common statistics of queries processing. You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics).
ClickHouse collects:
- Different metrics of how the server uses computational resources.
- Common statistics of queries processing.
You can find metrics in tables [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics).
You can configure ClickHouse to export metrics to [Graphite](https://github.com/graphite-project). See the [Graphite section](server_settings/settings.md#server_settings-graphite) of ClickHouse server configuration file. Before configuring metrics export, you should set up Graphite by following their official guide https://graphite.readthedocs.io/en/latest/install.html.

View File

@ -2,7 +2,7 @@
## CPU
In case of installation from prebuilt deb-packages use CPU with x86/64 architecture and SSE 4.2 instructions support. If you build ClickHouse from sources, you can use other processors.
In case of installation from prebuilt deb-packages use CPU with x86_64 architecture and SSE 4.2 instructions support. To run ClickHouse with processors than does not support SSE 4.2 or has AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficient at configurations with a large number of cores but lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable than 8 cores with 3600 MHz.
@ -15,7 +15,7 @@ We recommend to use 4GB of RAM as minimum to be able to perform non-trivial quer
The required volume of RAM depends on:
- The complexity of queries.
- Amount of the data in queries.
- Amount of the data, that processed in queries.
To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) and other operations you use.
@ -33,20 +33,22 @@ The volume of storage required for your data should be calculated separately. As
- Estimation of a data volume.
You can take the sample of the data and get the size of a row from it. Then multiply the size of the row with a number of rows you plan to store.
You can take the sample of the data and get an average size of a row from it. Then multiply the value with a number of rows you plan to store.
- Data compression coefficient.
To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, the typical compression coefficient for clickstream data lays in a range of 6-10 times.
To estimate the data compression coefficient, load some sample of your data into ClickHouse and compare the actual size of the data with the size of the table stored. For example, clickstream data are usually compressed by 6-10 times.
To calculate the final volume of data to be stored, divide the estimated data volume by the compression coefficient.
To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply estimated volume with the number of replicas.
## Network
If possible, use a 10G network.
If possible, use networks of 10G of higher class.
A bandwidth of the network is critical for processing of distributed queries with a large amount of intermediate data. Also, network speed affects replication processes.
## Software
ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. Name and version of an operating system where ClickHouse runs depend on the method of installation. See details in [Getting started](../getting_started/index.md) section of the documentation.
ClickHouse is developed for Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
ClickHouse also can work in other families of operating systems. See details in [Getting started](../getting_started/index.md) section of the documentation.

View File

@ -34,6 +34,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
[PARTITION BY expr]
[ORDER BY expr]
@ -225,6 +227,56 @@ To check whether ClickHouse can use the index when running a query, use the sett
The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date.
### Data Skipping Indices
Index declaration in the columns section of the `CREATE` query.
```sql
INDEX index_name expr TYPE type(...) GRANULARITY granularity_value
```
For tables from the `*MergeTree` family data skipping indices can be specified.
These indices aggregate some information about the specified expression on blocks, which consist of `granularity_value` granules,
then these aggregates are used in `SELECT` queries for reducing the amount of data to read from the disk by skipping big blocks of data where `where` query cannot be satisfied.
Example
```sql
CREATE TABLE table_name
(
u64 UInt64,
i32 Int32,
s String,
...
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
INDEX b (u64 * length(s)) TYPE unique GRANULARITY 4
) ENGINE = MergeTree()
...
```
Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in following queries.
```sql
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
```
#### Available Types of Indices
* `minmax`
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key.
* `unique(max_rows)`
Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data.
If `max_rows=0`, then there are no limits for storing values. `unique` without parameters is equal to `unique(0)`.
```sql
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4
```
## Concurrent Data Access
For concurrent table access, we use multi-versioning. In other words, when a table is simultaneously read and updated, data is read from a set of parts that is current at the time of the query. There are no lengthy locks. Inserts do not get in the way of read operations.

View File

@ -1,13 +1,11 @@
# Troubleshooting
Known issues:
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
- [Queries processing](#troubleshooting-does-not-process-queries)
- [Efficiency of queries processing](#troubleshooting-too-slow)
- [Installation errors](#troubleshooting-installation-errors).
- [The server does not accept the connections](#troubleshooting-accepts-no-connections).
- [ClickHouse does not process queries](#troubleshooting-does-not-process-queries).
- [ClickHouse processes queries too slow](#troubleshooting-too-slow).
## Installation Errors {#troubleshooting-installation-errors}
## Installation {#troubleshooting-installation-errors}
### You Can Not Get Deb-packages from ClickHouse Repository With apt-get
@ -15,9 +13,9 @@ Known issues:
- If you can not access the repository by any reason, download packages as described in the [Getting started](../getting_started/index.md) article and install them manually with `sudo dpkg -i <packages>` command. Also, you need `tzdata` package.
## Server Does Not Accept the Connections {#troubleshooting-accepts-no-connections}
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible reasons:
Possible issues:
- The server is not running.
- Unexpected or wrong configuration parameters.
@ -122,7 +120,7 @@ Check:
You may use the wrong user name or password for it.
## ClickHouse Does Not Process Queries {#troubleshooting-does-not-process-queries}
## Queries Processing {#troubleshooting-does-not-process-queries}
If ClickHouse can not process the query, it sends the description of an error to the client. In the `clickhouse-client` you get a description of an error in console. If you use HTTP interface, ClickHouse sends error description in response body. For example,
@ -135,7 +133,7 @@ If you start `clickhouse-client` with `stack-trace` parameter, ClickHouse return
It is possible that you see the message of connection broken. In this case, you can repeat query. If connection brakes any time you perform the query you should check the server logs for errors.
## ClickHouse Processes Queries Not Fast Enough {#troubleshooting-too-slow}
## Efficiency of Queries Processing {#troubleshooting-too-slow}
If you see that ClickHouse works too slow, you need to profile the load of the server resources and network for your queries.

View File

@ -83,6 +83,19 @@ rows are ordered by the sorting key expression you cannot add expressions contai
to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query).
### Manipulations With Data Skipping Indices
It only works for tables in the [`*MergeTree`](../operations/table_engines/mergetree.md) family (including
[replicated](../operations/table_engines/replication.md) tables). The following operations
are available:
* `ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata.
* `ALTER DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
These commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated (syncing indices metadata through ZooKeeper).
### Manipulations With Partitions and Parts
It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including

View File

@ -0,0 +1,37 @@
# Мониторинг
Вы можете отслеживать:
- Использование аппаратных ресурсов.
- Метрики сервера ClickHouse.
## Использование ресурсов
ClickHouse не отслеживает состояние аппаратных ресурсов самостоятельно.
Рекомендуем контролировать:
- Загрузку и температуру процессоров.
Можно использовать [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) или другие инструменты.
- Использование системы хранения, оперативной памяти и сети.
## Метрики сервера ClickHouse.
Сервер ClickHouse имеет встроенные инструменты мониторинга.
Для отслеживания событий на сервере используйте логи. Подробнее смотрите в разделе конфигурационного файла [logger](#server_settings-logger).
ClickHouse собирает:
- Различные метрики того, как сервер использует вычислительные ресурсы.
- Общую статистику обработки запросов.
Метрики находятся в таблицах [system.metrics](#system_tables-metrics), [system.events](#system_tables-events) и [system.asynchronous_metrics](#system_tables-asynchronous_metrics).
Можно настроить экспорт метрик из ClickHouse в [Graphite](https://github.com/graphite-project). Смотрите секцию [graphite](server_settings/settings.md#server_settings-graphite) конфигурационного файла ClickHouse. Перед настройкой экспорта метрик необходимо настроить Graphite, как указано в [официальном руководстве](https://graphite.readthedocs.io/en/latest/install.html).
Также, можно отслеживать доступность сервера через HTTP API. Отправьте `HTTP GET` к ресурсу `/`. Если сервер доступен, он отвечает `200 OK`.
Для мониторинга серверов в кластерной конфигурации необходимо установить параметр [max_replica_delay_for_distributed_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries) и использовать HTTP ресурс `/replicas-delay`. Если реплика доступна и не отстаёт от других реплик, то запрос к `/replicas-delay` возвращает `200 OK`. Если реплика отстаёт, то она возвращает информацию о размере отставания.

View File

@ -0,0 +1,54 @@
# Требования
## Процессор
В случае установки из готовых deb-пакетов используйте процессоры с архитектурой x86_64 и поддержкой инструкций SSE 4.2. Для запуска ClickHouse на процессорах без поддержки SSE 4.2 или на процессорах с архитектурой AArch64 и PowerPC64LE необходимо собирать ClickHouse из исходников.
ClickHouse реализует параллельную обработку данных и использует все доступные аппаратные ресурсы. При выборе процессора учитывайте, что ClickHouse работает более эффективно в конфигурациях с большим количеством ядер, но с более низкой тактовой частотой, чем в конфигурациях с меньшим количеством ядер и более высокой тактовой частотой. Например, 16 ядер с 2600 MHz предпочтительнее, чем 8 ядер с 3600 MHz.
Рекомендуется использовать технологии **Turbo Boost** и **hyper-threading**. Их использование существенно улучшает производительность при типичной нагрузке.
## RAM
Мы рекомендуем использовать как минимум 4 ГБ оперативной памяти, чтобы иметь возможность выполнять нетривиальные запросы. Сервер ClickHouse может работать с гораздо меньшим объёмом RAM, память требуется для обработки запросов.
Необходимый объем RAM зависит от:
- Сложности запросов.
- Объёма данных, обрабатываемых в запросах.
Для расчета объема RAM необходимо оценить размер промежуточных данных для операций [GROUP BY](../query_language/select.md#select-group-by-clause), [DISTINCT](../query_language/select.md#select-distinct), [JOIN](../query_language/select.md#select-join) а также других операций, которыми вы пользуетесь.
ClickHouse может использовать внешнюю память для промежуточных данных. Подробнее смотрите в разделе [GROUP BY во внешней памяти](../query_language/select.md#select-group-by-in-external-memory).
## Файл подкачки
Отключайте файл подкачки в продуктовых средах.
## Подсистема хранения
Для установки ClickHouse необходимо 2ГБ свободного места на диске.
Объём дискового пространства, необходимый для хранения ваших данных, необходимо рассчитывать отдельно. Расчёт должен включать:
- Приблизительную оценку объёма данных.
Можно взять образец данных и получить из него средний размер строки. Затем умножьте полученное значение на количество строк, которое вы планируете хранить.
- Оценку коэффициента сжатия данных.
Чтобы оценить коэффициент сжатия данных, загрузите некоторую выборку данных в ClickHouse и сравните действительный размер данных с размером сохранённой таблицы. Например, данные типа clickstream обычно сжимаются в 6-10 раз.
Для оценки объёма хранилища, примените коэффициент сжатия к размеру данных. Если вы планируете хранить данные в нескольких репликах, то необходимо полученный объём умножить на количество реплик.
## Сеть
По возможности, используйте сети 10G и более высокого класса.
Пропускная способность сети критически важна для обработки распределенных запросов с большим количеством промежуточных данных. Также, скорость сети влияет на задержки в процессах репликации.
## Программное обеспечение
ClickHouse разработан для семейства операционных систем Linux. Рекомендуемый дистрибутив Linux — Ubuntu. В системе должен быть установлен пакет `tzdata`.
ClickHouse может работать и в других семействах операционных систем. Подробнее смотрите разделе документации [Начало работы](../getting_started/index.md).

View File

@ -32,6 +32,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
[PARTITION BY expr]
[ORDER BY expr]
@ -224,6 +226,50 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%'
Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату.
### Дополнительные индексы
Для таблиц семейства `*MergeTree` можно задать дополнительные индексы в секции столбцов.
Индексы аггрегируют для заданного выражения некоторые данные, а потом при `SELECT` запросе используют для пропуска боков данных (пропускаемый блок состоих из гранул данных в количестве равном гранулярности данного индекса), на которых секция `WHERE` не может быть выполнена, тем самым уменьшая объем данных читаемых с диска.
Пример
```sql
CREATE TABLE table_name
(
u64 UInt64,
i32 Int32,
s String,
...
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
INDEX b (u64 * length(s), i32) TYPE unique GRANULARITY 4
) ENGINE = MergeTree()
...
```
Эти индексы смогут использоваться для оптимизации следующих запросов
```sql
SELECT count() FROM table WHERE s < 'z'
SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
```
#### Доступные индексы
* `minmax`
Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу.
* `unique(max_rows)`
Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных.
Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`.
Примеры
```sql
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4
```
## Конкурентный доступ к данным
Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям.

View File

@ -0,0 +1,139 @@
# Устранение неисправностей
- [Установка дистрибутива](#troubleshooting-installation-errors)
- [Соединение с сервером](#troubleshooting-accepts-no-connections)
- [Обработка запросов](#troubleshooting-does-not-process-queries)
- [Скорость обработки запросов](#troubleshooting-too-slow)
## Установка дистрибутива {#troubleshooting-installation-errors}
### Не получается скачать deb-пакеты из репозитория ClickHouse с помощью apt-get
- Проверьте настройки брандмауэра.
- Если по какой-либо причине вы не можете получить доступ к репозиторию, скачайте пакеты как описано в разделе [Начало работы](../getting_started/index.md) и установите их вручную командой `sudo dpkg -i <packages>`. Также, необходим пакет `tzdata`.
## Соединение с сервером {#troubleshooting-accepts-no-connections}
Возможные проблемы:
- Сервер не запущен.
- Неожиданные или неправильные параметры конфигурации.
### Сервер не запущен
**Проверьте, запущен ли сервер**
Команда:
```
sudo service clickhouse-server status
```
Если сервер не запущен, запустите его с помощью команды:
```
sudo service clickhouse-server start
```
**Проверьте журналы**
Основной лог `clickhouse-server` по умолчанию — `/var/log/clickhouse-server/clickhouse-server.log`.
В случае успешного запуска вы должны увидеть строки, содержащие:
- `<Information> Application: starting up.` — сервер запускается.
- `<Information> Application: Ready for connections.` — сервер запущен и готов принимать соединения.
Если `clickhouse-server` не запустился из-за ошибки конфигурации вы увидите `<Error>` строку с описанием ошибки. Например:
```
2019.01.11 15:23:25.549505 [ 45 ] {} <Error> ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused
```
Если вы не видите ошибки в конце файла, просмотрите весь файл начиная со строки:
```
<Information> Application: starting up.
```
При попытке запустить второй экземпляр `clickhouse-server` журнал выглядит следующим образом:
```
2019.01.11 15:25:11.151730 [ 1 ] {} <Information> : Starting ClickHouse 19.1.0 with revision 54413
2019.01.11 15:25:11.154578 [ 1 ] {} <Information> Application: starting up
2019.01.11 15:25:11.156361 [ 1 ] {} <Information> StatusFile: Status file ./status already exists - unclean restart. Contents:
PID: 8510
Started at: 2019-01-11 15:24:23
Revision: 54413
2019.01.11 15:25:11.156673 [ 1 ] {} <Error> Application: DB::Exception: Cannot lock file ./status. Another server instance in same directory is already running.
2019.01.11 15:25:11.156682 [ 1 ] {} <Information> Application: shutting down
2019.01.11 15:25:11.156686 [ 1 ] {} <Debug> Application: Uninitializing subsystem: Logging Subsystem
2019.01.11 15:25:11.156716 [ 2 ] {} <Information> BaseDaemon: Stop SignalListener thread
```
**Проверьте логи system.d**
Если из логов `clickhouse-server` вы не получили необходимой информации или логов нет, то вы можете посмотреть логи `system.d` командой:
```
sudo journalctl -u clickhouse-server
```
**Запустите clickhouse-server в интерактивном режиме**
```
sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-server/config.xml
```
Эта команда запускает сервер как интерактивное приложение со стандартными параметрами скрипта автозапуска. В этом режиме `clickhouse-server` выводит сообщения в консоль.
### Параметры конфигурации
Проверьте:
- Настройки Docker.
При запуске ClickHouse в Docker в сети IPv6 убедитесь, что установлено `network=host`.
- Параметры endpoint.
Проверьте настройки [listen_host](server_settings/settings.md#server_settings-listen_host) и [tcp_port](server_settings/settings.md#server_settings-tcp_port).
По умолчанию, сервер ClickHouse принимает только локальные подключения.
- Настройки протокола HTTP.
Проверьте настройки протокола для HTTP API.
- Параметры безопасного подключения.
Проверьте:
- Настройку `tcp_port_secure`.
- Параметры для SSL-сертификатов.
Используйте правильные параметры при подключении. Например, используйте параметр `port_secure` при использовании `clickhouse_client`.
- Настройки пользователей.
Возможно, вы используете неверное имя пользователя или пароль.
## Обработка запросов {#troubleshooting-does-not-process-queries}
Если ClickHouse не может обработать запрос, он отправляет клиенту описание ошибки. В `clickhouse-client` вы получаете описание ошибки в консоли. При использовании интерфейса HTTP, ClickHouse отправляет описание ошибки в теле ответа. Например:
```bash
$ curl 'http://localhost:8123/' --data-binary "SELECT a"
Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there is no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception
```
Если вы запускаете `clickhouse-client` c параметром `stack-trace`, то ClickHouse возвращает описание ошибки и соответствущий стек вызовов функций на сервере.
Может появиться сообщение о разрыве соединения. В этом случае необходимо повторить запрос. Если соединение прерывается каждый раз при выполнении запроса, следует проверить журналы сервера на наличие ошибок.
## Скорость обработки запросов {#troubleshooting-too-slow}
Если вы видите, что ClickHouse работает слишком медленно, необходимо профилировать загрузку ресурсов сервера и сети для ваших запросов.
Для профилирования запросов можно использовать утилиту clickhouse-benchmark. Она показывает количество запросов, обработанных за секунду, количество строк, обработанных за секунду и перцентили времени обработки запросов.

View File

@ -79,6 +79,20 @@ MODIFY ORDER BY new_expression
сортировки, разрешено добавлять в ключ только новые столбцы (т.е. столбцы, добавляемые командой `ADD COLUMN`
в том же запросе `ALTER`), у которых нет выражения по умолчанию.
### Манипуляции с индексами
Добавить или удалить индекс можно с помощью операций
```
ALTER ADD INDEX name expression TYPE type GRANULARITY value [AFTER name]
ALTER DROP INDEX name
```
Поддерживается только таблицами семейства `*MergeTree`.
Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно.
Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`.
Запрос на изменение индексов реплицируется, сохраняя новые метаданные в ZooKeeper и применяя изменения на всех репликах.
### Манипуляции с партициями и кусками