mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Analyzer support Set index
This commit is contained in:
parent
b2cce09004
commit
ddad879cb4
@ -9,6 +9,10 @@
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Planner/PlannerActionsVisitor.h>
|
||||
|
||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -242,67 +246,78 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
|
||||
|
||||
MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block_,
|
||||
const Block & index_sample_block,
|
||||
size_t max_rows_,
|
||||
const SelectQueryInfo & query,
|
||||
const SelectQueryInfo & query_info,
|
||||
ContextPtr context)
|
||||
: index_name(index_name_)
|
||||
, max_rows(max_rows_)
|
||||
, index_sample_block(index_sample_block_)
|
||||
{
|
||||
for (const auto & name : index_sample_block.getNames())
|
||||
if (!key_columns.contains(name))
|
||||
key_columns.insert(name);
|
||||
|
||||
const auto & select = query.query->as<ASTSelectQuery &>();
|
||||
|
||||
if (select.where() && select.prewhere())
|
||||
expression_ast = makeASTFunction(
|
||||
"and",
|
||||
select.where()->clone(),
|
||||
select.prewhere()->clone());
|
||||
else if (select.where())
|
||||
expression_ast = select.where()->clone();
|
||||
else if (select.prewhere())
|
||||
expression_ast = select.prewhere()->clone();
|
||||
|
||||
useless = checkASTUseless(expression_ast);
|
||||
/// Do not proceed if index is useless for this query.
|
||||
if (useless)
|
||||
ASTPtr ast_filter_node = buildFilterNode(query_info.query);
|
||||
if (!ast_filter_node)
|
||||
return;
|
||||
|
||||
/// Replace logical functions with bit functions.
|
||||
/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
|
||||
traverseAST(expression_ast);
|
||||
if (context->getSettingsRef().allow_experimental_analyzer)
|
||||
{
|
||||
if (!query_info.filter_actions_dag)
|
||||
return;
|
||||
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(
|
||||
expression_ast, index_sample_block.getNamesAndTypesList());
|
||||
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
|
||||
if (checkDAGUseless(*query_info.filter_actions_dag->getOutputs().at(0), context))
|
||||
return;
|
||||
|
||||
const auto * filter_node = query_info.filter_actions_dag->getOutputs().at(0);
|
||||
auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG({filter_node}, {}, context);
|
||||
const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0);
|
||||
|
||||
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> node_to_result_node;
|
||||
filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node);
|
||||
|
||||
filter_actions_dag->removeUnusedActions();
|
||||
actions = std::make_shared<ExpressionActions>(filter_actions_dag);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (checkASTUseless(ast_filter_node))
|
||||
return;
|
||||
|
||||
auto expression_ast = ast_filter_node->clone();
|
||||
auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList());
|
||||
|
||||
/// Replace logical functions with bit functions.
|
||||
/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
|
||||
traverseAST(expression_ast);
|
||||
|
||||
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
|
||||
}
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const
|
||||
{
|
||||
return useless;
|
||||
return isUseless();
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
|
||||
{
|
||||
if (useless)
|
||||
if (isUseless())
|
||||
return true;
|
||||
|
||||
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleSet>(idx_granule);
|
||||
if (!granule)
|
||||
throw Exception(
|
||||
"Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Set index condition got a granule with the wrong type");
|
||||
|
||||
if (useless || granule->empty() || (max_rows != 0 && granule->size() > max_rows))
|
||||
if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows))
|
||||
return true;
|
||||
|
||||
Block result = granule->block;
|
||||
actions->execute(result);
|
||||
|
||||
auto column
|
||||
= result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
|
||||
const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name;
|
||||
auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
|
||||
|
||||
if (column->onlyNull())
|
||||
return false;
|
||||
@ -318,17 +333,214 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
}
|
||||
|
||||
if (!col_uint8)
|
||||
throw Exception("ColumnUInt8 expected as Set index condition result.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"ColumnUInt8 expected as Set index condition result");
|
||||
|
||||
const auto & condition = col_uint8->getData();
|
||||
size_t column_size = column->size();
|
||||
|
||||
for (size_t i = 0; i < column->size(); ++i)
|
||||
for (size_t i = 0; i < column_size; ++i)
|
||||
if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node,
|
||||
ActionsDAGPtr & result_dag,
|
||||
const ContextPtr & context,
|
||||
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const
|
||||
{
|
||||
auto result_node_it = node_to_result_node.find(&node);
|
||||
if (result_node_it != node_to_result_node.end())
|
||||
return *result_node_it->second;
|
||||
|
||||
const ActionsDAG::Node * result_node = nullptr;
|
||||
|
||||
if (const auto * operator_node_ptr = operatorFromDAG(node, result_dag, context, node_to_result_node))
|
||||
{
|
||||
result_node = operator_node_ptr;
|
||||
}
|
||||
else if (const auto * atom_node_ptr = atomFromDAG(node, result_dag, context))
|
||||
{
|
||||
result_node = atom_node_ptr;
|
||||
|
||||
if (atom_node_ptr->type == ActionsDAG::ActionType::INPUT ||
|
||||
atom_node_ptr->type == ActionsDAG::ActionType::FUNCTION)
|
||||
{
|
||||
auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context);
|
||||
result_node = &result_dag->addFunction(bit_wrapper_function, {atom_node_ptr}, {});
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ColumnWithTypeAndName unknown_field_column_with_type;
|
||||
|
||||
unknown_field_column_with_type.name = calculateConstantActionNodeName(UNKNOWN_FIELD);
|
||||
unknown_field_column_with_type.type = std::make_shared<DataTypeUInt8>();
|
||||
unknown_field_column_with_type.column = unknown_field_column_with_type.type->createColumnConst(1, UNKNOWN_FIELD);
|
||||
|
||||
result_node = &result_dag->addColumn(unknown_field_column_with_type);
|
||||
}
|
||||
|
||||
node_to_result_node.emplace(&node, result_node);
|
||||
return *result_node;
|
||||
}
|
||||
|
||||
const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context) const
|
||||
{
|
||||
/// Function, literal or column
|
||||
|
||||
const auto * node_to_check = &node;
|
||||
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
|
||||
node_to_check = node_to_check->children[0];
|
||||
|
||||
if (node_to_check->column && isColumnConst(*node_to_check->column))
|
||||
return &node;
|
||||
|
||||
RPNBuilderTreeContext tree_context(context);
|
||||
RPNBuilderTreeNode tree_node(node_to_check, tree_context);
|
||||
|
||||
auto column_name = tree_node.getColumnName();
|
||||
if (key_columns.contains(column_name))
|
||||
{
|
||||
const auto * result_node = node_to_check;
|
||||
|
||||
if (node.type != ActionsDAG::ActionType::INPUT)
|
||||
result_node = &result_dag->addInput(column_name, node.result_type);
|
||||
|
||||
return result_node;
|
||||
}
|
||||
|
||||
if (node.type != ActionsDAG::ActionType::FUNCTION)
|
||||
return nullptr;
|
||||
|
||||
const auto & arguments = node.children;
|
||||
size_t arguments_size = arguments.size();
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs children(arguments_size);
|
||||
|
||||
for (size_t i = 0; i < arguments_size; ++i)
|
||||
{
|
||||
children[i] = atomFromDAG(*arguments[i], result_dag, context);
|
||||
|
||||
if (!children[i])
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &result_dag->addFunction(node.function_builder, children, {});
|
||||
}
|
||||
|
||||
const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node,
|
||||
ActionsDAGPtr & result_dag,
|
||||
const ContextPtr & context,
|
||||
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const
|
||||
{
|
||||
/// Functions AND, OR, NOT. Replace with bit*.
|
||||
|
||||
const auto * node_to_check = &node;
|
||||
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
|
||||
node_to_check = node_to_check->children[0];
|
||||
|
||||
if (node_to_check->column && isColumnConst(*node_to_check->column))
|
||||
return nullptr;
|
||||
|
||||
if (node_to_check->type != ActionsDAG::ActionType::FUNCTION)
|
||||
return nullptr;
|
||||
|
||||
auto function_name = node_to_check->function->getName();
|
||||
const auto & arguments = node_to_check->children;
|
||||
size_t arguments_size = arguments.size();
|
||||
|
||||
if (function_name == "not")
|
||||
{
|
||||
if (arguments_size != 1)
|
||||
return nullptr;
|
||||
|
||||
auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context);
|
||||
return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {});
|
||||
}
|
||||
else if (function_name == "and" || function_name == "indexHint" || function_name == "or")
|
||||
{
|
||||
if (arguments_size < 2)
|
||||
return nullptr;
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs children;
|
||||
children.resize(arguments_size);
|
||||
|
||||
for (size_t i = 0; i < arguments_size; ++i)
|
||||
children[i] = &traverseDAG(*arguments[i], result_dag, context, node_to_result_node);
|
||||
|
||||
FunctionOverloadResolverPtr function;
|
||||
|
||||
if (function_name == "and" || function_name == "indexHint")
|
||||
function = FunctionFactory::instance().get("__bitBoolMaskAnd", context);
|
||||
else
|
||||
function = FunctionFactory::instance().get("__bitBoolMaskOr", context);
|
||||
|
||||
const auto * last_argument = children.back();
|
||||
children.pop_back();
|
||||
|
||||
const auto * before_last_argument = children.back();
|
||||
children.pop_back();
|
||||
|
||||
while (true)
|
||||
{
|
||||
last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {});
|
||||
|
||||
if (children.empty())
|
||||
break;
|
||||
|
||||
before_last_argument = children.back();
|
||||
children.pop_back();
|
||||
}
|
||||
|
||||
return last_argument;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic) const
|
||||
{
|
||||
const auto * node_to_check = &node;
|
||||
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
|
||||
node_to_check = node_to_check->children[0];
|
||||
|
||||
RPNBuilderTreeContext tree_context(context);
|
||||
RPNBuilderTreeNode tree_node(node_to_check, tree_context);
|
||||
|
||||
if (node.column && isColumnConst(*node.column))
|
||||
{
|
||||
Field literal;
|
||||
node.column->get(0, literal);
|
||||
return !atomic && literal.safeGet<bool>();
|
||||
}
|
||||
else if (node.type == ActionsDAG::ActionType::FUNCTION)
|
||||
{
|
||||
auto column_name = tree_node.getColumnName();
|
||||
if (key_columns.contains(column_name))
|
||||
return false;
|
||||
|
||||
auto function_name = node.function_builder->getName();
|
||||
const auto & arguments = node.children;
|
||||
|
||||
if (function_name == "and" || function_name == "indexHint")
|
||||
return std::all_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); });
|
||||
else if (function_name == "or")
|
||||
return std::any_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); });
|
||||
else if (function_name == "not")
|
||||
return checkDAGUseless(*arguments.at(0), context, atomic);
|
||||
else
|
||||
return std::any_of(arguments.begin(), arguments.end(),
|
||||
[&](const auto & arg) { return checkDAGUseless(*arg, context, true /*atomic*/); });
|
||||
}
|
||||
|
||||
auto column_name = tree_node.getColumnName();
|
||||
return !key_columns.contains(column_name);
|
||||
}
|
||||
|
||||
void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const
|
||||
{
|
||||
if (operatorFromAST(node))
|
||||
@ -465,7 +677,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi
|
||||
else if (const auto * literal = node->as<ASTLiteral>())
|
||||
return !atomic && literal->value.safeGet<bool>();
|
||||
else if (const auto * identifier = node->as<ASTIdentifier>())
|
||||
return key_columns.find(identifier->getColumnName()) == std::end(key_columns);
|
||||
return !key_columns.contains(identifier->getColumnName());
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
@ -84,9 +84,9 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition
|
||||
public:
|
||||
MergeTreeIndexConditionSet(
|
||||
const String & index_name_,
|
||||
const Block & index_sample_block_,
|
||||
const Block & index_sample_block,
|
||||
size_t max_rows_,
|
||||
const SelectQueryInfo & query,
|
||||
const SelectQueryInfo & query_info,
|
||||
ContextPtr context);
|
||||
|
||||
bool alwaysUnknownOrTrue() const override;
|
||||
@ -95,20 +95,39 @@ public:
|
||||
|
||||
~MergeTreeIndexConditionSet() override = default;
|
||||
private:
|
||||
const ActionsDAG::Node & traverseDAG(const ActionsDAG::Node & node,
|
||||
ActionsDAGPtr & result_dag,
|
||||
const ContextPtr & context,
|
||||
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const;
|
||||
|
||||
const ActionsDAG::Node * atomFromDAG(const ActionsDAG::Node & node,
|
||||
ActionsDAGPtr & result_dag,
|
||||
const ContextPtr & context) const;
|
||||
|
||||
const ActionsDAG::Node * operatorFromDAG(const ActionsDAG::Node & node,
|
||||
ActionsDAGPtr & result_dag,
|
||||
const ContextPtr & context,
|
||||
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const;
|
||||
|
||||
bool checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic = false) const;
|
||||
|
||||
void traverseAST(ASTPtr & node) const;
|
||||
|
||||
bool atomFromAST(ASTPtr & node) const;
|
||||
|
||||
static bool operatorFromAST(ASTPtr & node);
|
||||
|
||||
bool checkASTUseless(const ASTPtr & node, bool atomic = false) const;
|
||||
|
||||
|
||||
String index_name;
|
||||
size_t max_rows;
|
||||
Block index_sample_block;
|
||||
|
||||
bool useless;
|
||||
std::set<String> key_columns;
|
||||
ASTPtr expression_ast;
|
||||
bool isUseless() const
|
||||
{
|
||||
return actions == nullptr;
|
||||
}
|
||||
|
||||
std::unordered_set<String> key_columns;
|
||||
ExpressionActionsPtr actions;
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
1
|
||||
1
|
18
tests/queries/0_stateless/02499_analyzer_set_index.sql
Normal file
18
tests/queries/0_stateless/02499_analyzer_set_index.sql
Normal file
@ -0,0 +1,18 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
id UInt64,
|
||||
value String,
|
||||
INDEX value_idx (value) TYPE set(1000) GRANULARITY 1
|
||||
) ENGINE=MergeTree ORDER BY id;
|
||||
|
||||
INSERT INTO test_table SELECT number, toString(number) FROM numbers(10);
|
||||
|
||||
SELECT count() FROM test_table WHERE value = '1' SETTINGS force_data_skipping_indices = 'value_idx';
|
||||
|
||||
SELECT count() FROM test_table AS t1 INNER JOIN (SELECT number AS id FROM numbers(10)) AS t2 ON t1.id = t2.id
|
||||
WHERE t1.value = '1' SETTINGS force_data_skipping_indices = 'value_idx';
|
||||
|
||||
DROP TABLE test_table;
|
Loading…
Reference in New Issue
Block a user