Analyzer support Set index

This commit is contained in:
Maksim Kita 2022-12-09 18:15:39 +01:00
parent b2cce09004
commit ddad879cb4
4 changed files with 292 additions and 41 deletions

View File

@ -9,6 +9,10 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Functions/FunctionFactory.h>
#include <Planner/PlannerActionsVisitor.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
namespace DB
{
@ -242,67 +246,78 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorSet::getGranuleAndReset()
MergeTreeIndexConditionSet::MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block_,
const Block & index_sample_block,
size_t max_rows_,
const SelectQueryInfo & query,
const SelectQueryInfo & query_info,
ContextPtr context)
: index_name(index_name_)
, max_rows(max_rows_)
, index_sample_block(index_sample_block_)
{
for (const auto & name : index_sample_block.getNames())
if (!key_columns.contains(name))
key_columns.insert(name);
const auto & select = query.query->as<ASTSelectQuery &>();
if (select.where() && select.prewhere())
expression_ast = makeASTFunction(
"and",
select.where()->clone(),
select.prewhere()->clone());
else if (select.where())
expression_ast = select.where()->clone();
else if (select.prewhere())
expression_ast = select.prewhere()->clone();
useless = checkASTUseless(expression_ast);
/// Do not proceed if index is useless for this query.
if (useless)
ASTPtr ast_filter_node = buildFilterNode(query_info.query);
if (!ast_filter_node)
return;
/// Replace logical functions with bit functions.
/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
traverseAST(expression_ast);
if (context->getSettingsRef().allow_experimental_analyzer)
{
if (!query_info.filter_actions_dag)
return;
auto syntax_analyzer_result = TreeRewriter(context).analyze(
expression_ast, index_sample_block.getNamesAndTypesList());
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
if (checkDAGUseless(*query_info.filter_actions_dag->getOutputs().at(0), context))
return;
const auto * filter_node = query_info.filter_actions_dag->getOutputs().at(0);
auto filter_actions_dag = ActionsDAG::buildFilterActionsDAG({filter_node}, {}, context);
const auto * filter_actions_dag_node = filter_actions_dag->getOutputs().at(0);
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> node_to_result_node;
filter_actions_dag->getOutputs()[0] = &traverseDAG(*filter_actions_dag_node, filter_actions_dag, context, node_to_result_node);
filter_actions_dag->removeUnusedActions();
actions = std::make_shared<ExpressionActions>(filter_actions_dag);
}
else
{
if (checkASTUseless(ast_filter_node))
return;
auto expression_ast = ast_filter_node->clone();
auto syntax_analyzer_result = TreeRewriter(context).analyze(expression_ast, index_sample_block.getNamesAndTypesList());
/// Replace logical functions with bit functions.
/// Working with UInt8: last bit = can be true, previous = can be false (Like src/Storages/MergeTree/BoolMask.h).
traverseAST(expression_ast);
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
}
}
bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const
{
return useless;
return isUseless();
}
bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
{
if (useless)
if (isUseless())
return true;
auto granule = std::dynamic_pointer_cast<MergeTreeIndexGranuleSet>(idx_granule);
if (!granule)
throw Exception(
"Set index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Set index condition got a granule with the wrong type");
if (useless || granule->empty() || (max_rows != 0 && granule->size() > max_rows))
if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows))
return true;
Block result = granule->block;
actions->execute(result);
auto column
= result.getByName(expression_ast->getColumnName()).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name;
auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
if (column->onlyNull())
return false;
@ -318,17 +333,214 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
}
if (!col_uint8)
throw Exception("ColumnUInt8 expected as Set index condition result.", ErrorCodes::LOGICAL_ERROR);
throw Exception(ErrorCodes::LOGICAL_ERROR,
"ColumnUInt8 expected as Set index condition result");
const auto & condition = col_uint8->getData();
size_t column_size = column->size();
for (size_t i = 0; i < column->size(); ++i)
for (size_t i = 0; i < column_size; ++i)
if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1)
return true;
return false;
}
const ActionsDAG::Node & MergeTreeIndexConditionSet::traverseDAG(const ActionsDAG::Node & node,
ActionsDAGPtr & result_dag,
const ContextPtr & context,
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const
{
auto result_node_it = node_to_result_node.find(&node);
if (result_node_it != node_to_result_node.end())
return *result_node_it->second;
const ActionsDAG::Node * result_node = nullptr;
if (const auto * operator_node_ptr = operatorFromDAG(node, result_dag, context, node_to_result_node))
{
result_node = operator_node_ptr;
}
else if (const auto * atom_node_ptr = atomFromDAG(node, result_dag, context))
{
result_node = atom_node_ptr;
if (atom_node_ptr->type == ActionsDAG::ActionType::INPUT ||
atom_node_ptr->type == ActionsDAG::ActionType::FUNCTION)
{
auto bit_wrapper_function = FunctionFactory::instance().get("__bitWrapperFunc", context);
result_node = &result_dag->addFunction(bit_wrapper_function, {atom_node_ptr}, {});
}
}
else
{
ColumnWithTypeAndName unknown_field_column_with_type;
unknown_field_column_with_type.name = calculateConstantActionNodeName(UNKNOWN_FIELD);
unknown_field_column_with_type.type = std::make_shared<DataTypeUInt8>();
unknown_field_column_with_type.column = unknown_field_column_with_type.type->createColumnConst(1, UNKNOWN_FIELD);
result_node = &result_dag->addColumn(unknown_field_column_with_type);
}
node_to_result_node.emplace(&node, result_node);
return *result_node;
}
const ActionsDAG::Node * MergeTreeIndexConditionSet::atomFromDAG(const ActionsDAG::Node & node, ActionsDAGPtr & result_dag, const ContextPtr & context) const
{
/// Function, literal or column
const auto * node_to_check = &node;
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
node_to_check = node_to_check->children[0];
if (node_to_check->column && isColumnConst(*node_to_check->column))
return &node;
RPNBuilderTreeContext tree_context(context);
RPNBuilderTreeNode tree_node(node_to_check, tree_context);
auto column_name = tree_node.getColumnName();
if (key_columns.contains(column_name))
{
const auto * result_node = node_to_check;
if (node.type != ActionsDAG::ActionType::INPUT)
result_node = &result_dag->addInput(column_name, node.result_type);
return result_node;
}
if (node.type != ActionsDAG::ActionType::FUNCTION)
return nullptr;
const auto & arguments = node.children;
size_t arguments_size = arguments.size();
ActionsDAG::NodeRawConstPtrs children(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
{
children[i] = atomFromDAG(*arguments[i], result_dag, context);
if (!children[i])
return nullptr;
}
return &result_dag->addFunction(node.function_builder, children, {});
}
const ActionsDAG::Node * MergeTreeIndexConditionSet::operatorFromDAG(const ActionsDAG::Node & node,
ActionsDAGPtr & result_dag,
const ContextPtr & context,
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const
{
/// Functions AND, OR, NOT. Replace with bit*.
const auto * node_to_check = &node;
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
node_to_check = node_to_check->children[0];
if (node_to_check->column && isColumnConst(*node_to_check->column))
return nullptr;
if (node_to_check->type != ActionsDAG::ActionType::FUNCTION)
return nullptr;
auto function_name = node_to_check->function->getName();
const auto & arguments = node_to_check->children;
size_t arguments_size = arguments.size();
if (function_name == "not")
{
if (arguments_size != 1)
return nullptr;
auto bit_swap_last_two_function = FunctionFactory::instance().get("__bitSwapLastTwo", context);
return &result_dag->addFunction(bit_swap_last_two_function, {arguments[0]}, {});
}
else if (function_name == "and" || function_name == "indexHint" || function_name == "or")
{
if (arguments_size < 2)
return nullptr;
ActionsDAG::NodeRawConstPtrs children;
children.resize(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
children[i] = &traverseDAG(*arguments[i], result_dag, context, node_to_result_node);
FunctionOverloadResolverPtr function;
if (function_name == "and" || function_name == "indexHint")
function = FunctionFactory::instance().get("__bitBoolMaskAnd", context);
else
function = FunctionFactory::instance().get("__bitBoolMaskOr", context);
const auto * last_argument = children.back();
children.pop_back();
const auto * before_last_argument = children.back();
children.pop_back();
while (true)
{
last_argument = &result_dag->addFunction(function, {before_last_argument, last_argument}, {});
if (children.empty())
break;
before_last_argument = children.back();
children.pop_back();
}
return last_argument;
}
return nullptr;
}
bool MergeTreeIndexConditionSet::checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic) const
{
const auto * node_to_check = &node;
while (node_to_check->type == ActionsDAG::ActionType::ALIAS)
node_to_check = node_to_check->children[0];
RPNBuilderTreeContext tree_context(context);
RPNBuilderTreeNode tree_node(node_to_check, tree_context);
if (node.column && isColumnConst(*node.column))
{
Field literal;
node.column->get(0, literal);
return !atomic && literal.safeGet<bool>();
}
else if (node.type == ActionsDAG::ActionType::FUNCTION)
{
auto column_name = tree_node.getColumnName();
if (key_columns.contains(column_name))
return false;
auto function_name = node.function_builder->getName();
const auto & arguments = node.children;
if (function_name == "and" || function_name == "indexHint")
return std::all_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); });
else if (function_name == "or")
return std::any_of(arguments.begin(), arguments.end(), [&, atomic](const auto & arg) { return checkDAGUseless(*arg, context, atomic); });
else if (function_name == "not")
return checkDAGUseless(*arguments.at(0), context, atomic);
else
return std::any_of(arguments.begin(), arguments.end(),
[&](const auto & arg) { return checkDAGUseless(*arg, context, true /*atomic*/); });
}
auto column_name = tree_node.getColumnName();
return !key_columns.contains(column_name);
}
void MergeTreeIndexConditionSet::traverseAST(ASTPtr & node) const
{
if (operatorFromAST(node))
@ -465,7 +677,7 @@ bool MergeTreeIndexConditionSet::checkASTUseless(const ASTPtr & node, bool atomi
else if (const auto * literal = node->as<ASTLiteral>())
return !atomic && literal->value.safeGet<bool>();
else if (const auto * identifier = node->as<ASTIdentifier>())
return key_columns.find(identifier->getColumnName()) == std::end(key_columns);
return !key_columns.contains(identifier->getColumnName());
else
return true;
}

View File

@ -84,9 +84,9 @@ class MergeTreeIndexConditionSet final : public IMergeTreeIndexCondition
public:
MergeTreeIndexConditionSet(
const String & index_name_,
const Block & index_sample_block_,
const Block & index_sample_block,
size_t max_rows_,
const SelectQueryInfo & query,
const SelectQueryInfo & query_info,
ContextPtr context);
bool alwaysUnknownOrTrue() const override;
@ -95,20 +95,39 @@ public:
~MergeTreeIndexConditionSet() override = default;
private:
const ActionsDAG::Node & traverseDAG(const ActionsDAG::Node & node,
ActionsDAGPtr & result_dag,
const ContextPtr & context,
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const;
const ActionsDAG::Node * atomFromDAG(const ActionsDAG::Node & node,
ActionsDAGPtr & result_dag,
const ContextPtr & context) const;
const ActionsDAG::Node * operatorFromDAG(const ActionsDAG::Node & node,
ActionsDAGPtr & result_dag,
const ContextPtr & context,
std::unordered_map<const ActionsDAG::Node *, const ActionsDAG::Node *> & node_to_result_node) const;
bool checkDAGUseless(const ActionsDAG::Node & node, const ContextPtr & context, bool atomic = false) const;
void traverseAST(ASTPtr & node) const;
bool atomFromAST(ASTPtr & node) const;
static bool operatorFromAST(ASTPtr & node);
bool checkASTUseless(const ASTPtr & node, bool atomic = false) const;
String index_name;
size_t max_rows;
Block index_sample_block;
bool useless;
std::set<String> key_columns;
ASTPtr expression_ast;
bool isUseless() const
{
return actions == nullptr;
}
std::unordered_set<String> key_columns;
ExpressionActionsPtr actions;
};

View File

@ -0,0 +1,2 @@
1
1

View File

@ -0,0 +1,18 @@
SET allow_experimental_analyzer = 1;
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
id UInt64,
value String,
INDEX value_idx (value) TYPE set(1000) GRANULARITY 1
) ENGINE=MergeTree ORDER BY id;
INSERT INTO test_table SELECT number, toString(number) FROM numbers(10);
SELECT count() FROM test_table WHERE value = '1' SETTINGS force_data_skipping_indices = 'value_idx';
SELECT count() FROM test_table AS t1 INNER JOIN (SELECT number AS id FROM numbers(10)) AS t2 ON t1.id = t2.id
WHERE t1.value = '1' SETTINGS force_data_skipping_indices = 'value_idx';
DROP TABLE test_table;