Revert "Revert "Support optimize_or_like_chain in QueryTreePassManager""

This commit is contained in:
Dmitry Novik 2023-01-18 18:14:03 +01:00 committed by GitHub
parent 821c55463f
commit df26f4fc37
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 299 additions and 41 deletions

View File

@ -145,6 +145,11 @@ public:
*/
void resolveAsFunction(FunctionBasePtr function_value);
void resolveAsFunction(const FunctionOverloadResolverPtr & resolver)
{
resolveAsFunction(resolver->build(getArgumentColumns()));
}
/** Resolve function node as aggregate function.
* It is important that function name is updated with resolved function name.
* Main motivation for this is query tree optimizations.

View File

@ -1,8 +1,13 @@
#pragma once
#include <optional>
#include <utility>
#include <Common/SettingsChanges.h>
#include <Common/Exception.h>
#include <Core/Settings.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/QueryNode.h>
namespace DB

View File

@ -16,6 +16,8 @@ using ListNodePtr = std::shared_ptr<ListNode>;
class ListNode final : public IQueryTreeNode
{
public:
using iterator = QueryTreeNodes::iterator;
/// Initialize list node with empty nodes
ListNode();
@ -41,6 +43,9 @@ public:
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
iterator begin() { return children.begin(); }
iterator end() { return children.end(); }
protected:
bool isEqualImpl(const IQueryTreeNode & rhs) const override;

View File

@ -0,0 +1,131 @@
#include <memory>
#include <unordered_map>
#include <vector>
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/UnionNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/HashUtils.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Core/Field.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/likePatternToRegexp.h>
#include <Interpreters/Context.h>
namespace DB
{
namespace
{
class ConvertOrLikeChainVisitor : public InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>
{
using FunctionNodes = std::vector<std::shared_ptr<FunctionNode>>;
const FunctionOverloadResolverPtr match_function_ref;
public:
explicit ConvertOrLikeChainVisitor(FunctionOverloadResolverPtr _match_function_ref)
: InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>()
, match_function_ref(_match_function_ref)
{}
static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType &)
{
ContextPtr context;
if (auto * query = parent->as<QueryNode>())
context = query->getContext();
else if (auto * union_node = parent->as<UnionNode>())
context = union_node->getContext();
if (context)
{
const auto & settings = context->getSettingsRef();
return settings.optimize_or_like_chain
&& settings.allow_hyperscan
&& settings.max_hyperscan_regexp_length == 0
&& settings.max_hyperscan_regexp_total_length == 0;
}
return true;
}
void visitImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "or")
return;
QueryTreeNodes unique_elems;
QueryTreeNodePtrWithHashMap<Array> node_to_patterns;
FunctionNodes match_functions;
for (auto & arg : function_node->getArguments())
{
unique_elems.push_back(arg);
auto * arg_func = arg->as<FunctionNode>();
if (!arg_func)
continue;
const bool is_like = arg_func->getFunctionName() == "like";
const bool is_ilike = arg_func->getFunctionName() == "ilike";
/// Not {i}like -> bail out.
if (!is_like && !is_ilike)
continue;
const auto & like_arguments = arg_func->getArguments().getNodes();
if (like_arguments.size() != 2)
continue;
auto identifier = like_arguments[0];
auto * pattern = like_arguments[1]->as<ConstantNode>();
if (!pattern || !isString(pattern->getResultType()))
continue;
auto regexp = likePatternToRegexp(pattern->getValue().get<String>());
/// Case insensitive. Works with UTF-8 as well.
if (is_ilike)
regexp = "(?i)" + regexp;
unique_elems.pop_back();
auto it = node_to_patterns.find(identifier);
if (it == node_to_patterns.end())
{
it = node_to_patterns.insert({identifier, Array{}}).first;
/// The second argument will be added when all patterns are known.
auto match_function = std::make_shared<FunctionNode>("multiMatchAny");
match_function->getArguments().getNodes().push_back(identifier);
match_functions.push_back(match_function);
unique_elems.push_back(std::move(match_function));
}
it->second.push_back(regexp);
}
/// Add all the patterns into the function arguments lists.
for (auto & match_function : match_functions)
{
auto & arguments = match_function->getArguments().getNodes();
auto & patterns = node_to_patterns.at(arguments[0]);
arguments.push_back(std::make_shared<ConstantNode>(Field{std::move(patterns)}));
match_function->resolveAsFunction(match_function_ref);
}
/// OR must have at least two arguments.
if (unique_elems.size() == 1)
unique_elems.push_back(std::make_shared<ConstantNode>(false));
function_node->getArguments().getNodes() = std::move(unique_elems);
}
};
}
void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
ConvertOrLikeChainVisitor visitor(FunctionFactory::instance().get("multiMatchAny", context));
visitor.visit(query_tree_node);
}
}

View File

@ -0,0 +1,20 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Replaces all the "or"'s with {i}like to multiMatchAny
*/
class ConvertOrLikeChainPass final : public IQueryTreePass
{
public:
String getName() override { return "ConvertOrLikeChain"; }
String getDescription() override { return "Replaces all the 'or's with {i}like to multiMatchAny"; }
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -17,6 +17,7 @@
#include <Parsers/ASTSetQuery.h>
#include <Analyzer/Utils.h>
#include <fmt/core.h>
namespace DB
{
@ -179,6 +180,16 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
buffer << '\n' << std::string(indent + 2, ' ') << "OFFSET\n";
getOffset()->dumpTreeImpl(buffer, format_state, indent + 4);
}
if (hasSettingsChanges())
{
buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
for (const auto & change : settings_changes)
{
buffer << fmt::format(" {}={}", change.name, toString(change.value));
}
buffer << '\n';
}
}
bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const

View File

@ -1,3 +1,4 @@
#include <memory>
#include <Analyzer/QueryTreePassManager.h>
#include <Common/Exception.h>
@ -29,6 +30,7 @@
#include <Analyzer/Passes/FuseFunctionsPass.h>
#include <Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h>
#include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
namespace DB
@ -254,6 +256,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)
if (settings.optimize_if_transform_strings_to_enum)
manager.addPass(std::make_unique<IfTransformStringsToEnumPass>());
manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
}
}

View File

@ -0,0 +1,109 @@
SELECT materialize(\'Привет, World\') AS s
WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 0
QUERY id: 0
PROJECTION COLUMNS
s String
PROJECTION
LIST id: 1, nodes: 1
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
JOIN TREE
TABLE id: 5, table_name: system.one
WHERE
FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 7, nodes: 3
FUNCTION id: 8, function_name: like, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 9, nodes: 2
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
CONSTANT id: 10, constant_value: \'hell%\', constant_value_type: String
FUNCTION id: 11, function_name: ilike, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 12, nodes: 2
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
CONSTANT id: 13, constant_value: \'%привет%\', constant_value_type: String
FUNCTION id: 14, function_name: ilike, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 15, nodes: 2
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
CONSTANT id: 16, constant_value: \'world%\', constant_value_type: String
SETTINGS optimize_or_like_chain=0 allow_experimental_analyzer=1
SELECT materialize(\'Привет, World\') AS s
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
SETTINGS optimize_or_like_chain = 1
QUERY id: 0
PROJECTION COLUMNS
s String
PROJECTION
LIST id: 1, nodes: 1
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
JOIN TREE
TABLE id: 5, table_name: system.one
WHERE
FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 7, nodes: 2
FUNCTION id: 8, function_name: multiMatchAny, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 9, nodes: 2
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
ARGUMENTS
LIST id: 3, nodes: 1
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
CONSTANT id: 10, constant_value: Array_[\'^hell\', \'(?i)привет\', \'(?i)^world\'], constant_value_type: Array(String)
CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: Bool
SETTINGS optimize_or_like_chain=1 allow_experimental_analyzer=1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\'])
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\')
SETTINGS optimize_or_like_chain = 1
Привет, optimized World
Привет, optimized World
Привет, World
Привет, World
Привет, optimized World
Привет, optimized World
Привет, World
Привет, World
SELECT
(materialize(\'Привет, World\') AS s) LIKE \'hell%\' AS test,
s
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
SETTINGS optimize_or_like_chain = 1

View File

@ -1,6 +1,7 @@
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0;
EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS allow_hyperscan = 0;
@ -10,9 +11,16 @@ EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('П
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1;
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1;
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
-- Aliases

View File

@ -1,40 +0,0 @@
SELECT materialize(\'Привет, World\') AS s
WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 0
SELECT materialize(\'Привет, World\') AS s
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\'])
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\')
SETTINGS optimize_or_like_chain = 1
Привет, optimized World
Привет, World
Привет, optimized World
Привет, World
SELECT
(materialize(\'Привет, World\') AS s) LIKE \'hell%\' AS test,
s
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
SETTINGS optimize_or_like_chain = 1