mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Revert "Revert "Support optimize_or_like_chain in QueryTreePassManager""
This commit is contained in:
parent
821c55463f
commit
df26f4fc37
@ -145,6 +145,11 @@ public:
|
||||
*/
|
||||
void resolveAsFunction(FunctionBasePtr function_value);
|
||||
|
||||
void resolveAsFunction(const FunctionOverloadResolverPtr & resolver)
|
||||
{
|
||||
resolveAsFunction(resolver->build(getArgumentColumns()));
|
||||
}
|
||||
|
||||
/** Resolve function node as aggregate function.
|
||||
* It is important that function name is updated with resolved function name.
|
||||
* Main motivation for this is query tree optimizations.
|
||||
|
@ -1,8 +1,13 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <Common/SettingsChanges.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Settings.h>
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -16,6 +16,8 @@ using ListNodePtr = std::shared_ptr<ListNode>;
|
||||
class ListNode final : public IQueryTreeNode
|
||||
{
|
||||
public:
|
||||
using iterator = QueryTreeNodes::iterator;
|
||||
|
||||
/// Initialize list node with empty nodes
|
||||
ListNode();
|
||||
|
||||
@ -41,6 +43,9 @@ public:
|
||||
|
||||
void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override;
|
||||
|
||||
iterator begin() { return children.begin(); }
|
||||
iterator end() { return children.end(); }
|
||||
|
||||
protected:
|
||||
bool isEqualImpl(const IQueryTreeNode & rhs) const override;
|
||||
|
||||
|
131
src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
Normal file
131
src/Analyzer/Passes/ConvertOrLikeChainPass.cpp
Normal file
@ -0,0 +1,131 @@
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/UnionNode.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/likePatternToRegexp.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class ConvertOrLikeChainVisitor : public InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>
|
||||
{
|
||||
using FunctionNodes = std::vector<std::shared_ptr<FunctionNode>>;
|
||||
|
||||
const FunctionOverloadResolverPtr match_function_ref;
|
||||
public:
|
||||
|
||||
explicit ConvertOrLikeChainVisitor(FunctionOverloadResolverPtr _match_function_ref)
|
||||
: InDepthQueryTreeVisitor<ConvertOrLikeChainVisitor>()
|
||||
, match_function_ref(_match_function_ref)
|
||||
{}
|
||||
|
||||
static bool needChildVisit(VisitQueryTreeNodeType & parent, VisitQueryTreeNodeType &)
|
||||
{
|
||||
ContextPtr context;
|
||||
if (auto * query = parent->as<QueryNode>())
|
||||
context = query->getContext();
|
||||
else if (auto * union_node = parent->as<UnionNode>())
|
||||
context = union_node->getContext();
|
||||
if (context)
|
||||
{
|
||||
const auto & settings = context->getSettingsRef();
|
||||
return settings.optimize_or_like_chain
|
||||
&& settings.allow_hyperscan
|
||||
&& settings.max_hyperscan_regexp_length == 0
|
||||
&& settings.max_hyperscan_regexp_total_length == 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void visitImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
auto * function_node = node->as<FunctionNode>();
|
||||
if (!function_node || function_node->getFunctionName() != "or")
|
||||
return;
|
||||
|
||||
QueryTreeNodes unique_elems;
|
||||
|
||||
QueryTreeNodePtrWithHashMap<Array> node_to_patterns;
|
||||
FunctionNodes match_functions;
|
||||
for (auto & arg : function_node->getArguments())
|
||||
{
|
||||
unique_elems.push_back(arg);
|
||||
|
||||
auto * arg_func = arg->as<FunctionNode>();
|
||||
if (!arg_func)
|
||||
continue;
|
||||
|
||||
const bool is_like = arg_func->getFunctionName() == "like";
|
||||
const bool is_ilike = arg_func->getFunctionName() == "ilike";
|
||||
|
||||
/// Not {i}like -> bail out.
|
||||
if (!is_like && !is_ilike)
|
||||
continue;
|
||||
|
||||
const auto & like_arguments = arg_func->getArguments().getNodes();
|
||||
if (like_arguments.size() != 2)
|
||||
continue;
|
||||
|
||||
auto identifier = like_arguments[0];
|
||||
auto * pattern = like_arguments[1]->as<ConstantNode>();
|
||||
if (!pattern || !isString(pattern->getResultType()))
|
||||
continue;
|
||||
|
||||
auto regexp = likePatternToRegexp(pattern->getValue().get<String>());
|
||||
/// Case insensitive. Works with UTF-8 as well.
|
||||
if (is_ilike)
|
||||
regexp = "(?i)" + regexp;
|
||||
|
||||
unique_elems.pop_back();
|
||||
auto it = node_to_patterns.find(identifier);
|
||||
if (it == node_to_patterns.end())
|
||||
{
|
||||
it = node_to_patterns.insert({identifier, Array{}}).first;
|
||||
/// The second argument will be added when all patterns are known.
|
||||
auto match_function = std::make_shared<FunctionNode>("multiMatchAny");
|
||||
match_function->getArguments().getNodes().push_back(identifier);
|
||||
|
||||
match_functions.push_back(match_function);
|
||||
unique_elems.push_back(std::move(match_function));
|
||||
}
|
||||
it->second.push_back(regexp);
|
||||
}
|
||||
|
||||
/// Add all the patterns into the function arguments lists.
|
||||
for (auto & match_function : match_functions)
|
||||
{
|
||||
auto & arguments = match_function->getArguments().getNodes();
|
||||
auto & patterns = node_to_patterns.at(arguments[0]);
|
||||
arguments.push_back(std::make_shared<ConstantNode>(Field{std::move(patterns)}));
|
||||
match_function->resolveAsFunction(match_function_ref);
|
||||
}
|
||||
|
||||
/// OR must have at least two arguments.
|
||||
if (unique_elems.size() == 1)
|
||||
unique_elems.push_back(std::make_shared<ConstantNode>(false));
|
||||
|
||||
function_node->getArguments().getNodes() = std::move(unique_elems);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void ConvertOrLikeChainPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
ConvertOrLikeChainVisitor visitor(FunctionFactory::instance().get("multiMatchAny", context));
|
||||
visitor.visit(query_tree_node);
|
||||
}
|
||||
|
||||
}
|
20
src/Analyzer/Passes/ConvertOrLikeChainPass.h
Normal file
20
src/Analyzer/Passes/ConvertOrLikeChainPass.h
Normal file
@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreePass.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** Replaces all the "or"'s with {i}like to multiMatchAny
|
||||
*/
|
||||
class ConvertOrLikeChainPass final : public IQueryTreePass
|
||||
{
|
||||
public:
|
||||
String getName() override { return "ConvertOrLikeChain"; }
|
||||
|
||||
String getDescription() override { return "Replaces all the 'or's with {i}like to multiMatchAny"; }
|
||||
|
||||
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
|
||||
};
|
||||
|
||||
}
|
@ -17,6 +17,7 @@
|
||||
#include <Parsers/ASTSetQuery.h>
|
||||
|
||||
#include <Analyzer/Utils.h>
|
||||
#include <fmt/core.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -179,6 +180,16 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "OFFSET\n";
|
||||
getOffset()->dumpTreeImpl(buffer, format_state, indent + 4);
|
||||
}
|
||||
|
||||
if (hasSettingsChanges())
|
||||
{
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
|
||||
for (const auto & change : settings_changes)
|
||||
{
|
||||
buffer << fmt::format(" {}={}", change.name, toString(change.value));
|
||||
}
|
||||
buffer << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <memory>
|
||||
#include <Analyzer/QueryTreePassManager.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
@ -29,6 +30,7 @@
|
||||
#include <Analyzer/Passes/FuseFunctionsPass.h>
|
||||
#include <Analyzer/Passes/OptimizeGroupByFunctionKeysPass.h>
|
||||
#include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
|
||||
#include <Analyzer/Passes/ConvertOrLikeChainPass.h>
|
||||
#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
|
||||
|
||||
namespace DB
|
||||
@ -254,6 +256,8 @@ void addQueryTreePasses(QueryTreePassManager & manager)
|
||||
|
||||
if (settings.optimize_if_transform_strings_to_enum)
|
||||
manager.addPass(std::make_unique<IfTransformStringsToEnumPass>());
|
||||
|
||||
manager.addPass(std::make_unique<ConvertOrLikeChainPass>());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,109 @@
|
||||
SELECT materialize(\'Привет, World\') AS s
|
||||
WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 0
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
s String
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 1
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
JOIN TREE
|
||||
TABLE id: 5, table_name: system.one
|
||||
WHERE
|
||||
FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 7, nodes: 3
|
||||
FUNCTION id: 8, function_name: like, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
CONSTANT id: 10, constant_value: \'hell%\', constant_value_type: String
|
||||
FUNCTION id: 11, function_name: ilike, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 12, nodes: 2
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
CONSTANT id: 13, constant_value: \'%привет%\', constant_value_type: String
|
||||
FUNCTION id: 14, function_name: ilike, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 15, nodes: 2
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
CONSTANT id: 16, constant_value: \'world%\', constant_value_type: String
|
||||
SETTINGS optimize_or_like_chain=0 allow_experimental_analyzer=1
|
||||
SELECT materialize(\'Привет, World\') AS s
|
||||
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
s String
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 1
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
JOIN TREE
|
||||
TABLE id: 5, table_name: system.one
|
||||
WHERE
|
||||
FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 7, nodes: 2
|
||||
FUNCTION id: 8, function_name: multiMatchAny, function_type: ordinary, result_type: UInt8
|
||||
ARGUMENTS
|
||||
LIST id: 9, nodes: 2
|
||||
FUNCTION id: 2, function_name: materialize, function_type: ordinary, result_type: String
|
||||
ARGUMENTS
|
||||
LIST id: 3, nodes: 1
|
||||
CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String
|
||||
CONSTANT id: 10, constant_value: Array_[\'^hell\', \'(?i)привет\', \'(?i)^world\'], constant_value_type: Array(String)
|
||||
CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: Bool
|
||||
SETTINGS optimize_or_like_chain=1 allow_experimental_analyzer=1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\'])
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
Привет, optimized World
|
||||
Привет, optimized World
|
||||
Привет, World
|
||||
Привет, World
|
||||
Привет, optimized World
|
||||
Привет, optimized World
|
||||
Привет, World
|
||||
Привет, World
|
||||
SELECT
|
||||
(materialize(\'Привет, World\') AS s) LIKE \'hell%\' AS test,
|
||||
s
|
||||
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
|
||||
SETTINGS optimize_or_like_chain = 1
|
@ -1,6 +1,7 @@
|
||||
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0;
|
||||
EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
|
||||
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
|
||||
|
||||
EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
|
||||
|
||||
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
|
||||
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS allow_hyperscan = 0;
|
||||
@ -10,9 +11,16 @@ EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('П
|
||||
|
||||
|
||||
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1;
|
||||
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0;
|
||||
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1;
|
||||
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1, allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0;
|
||||
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1;
|
||||
|
||||
-- Aliases
|
||||
|
@ -1,40 +0,0 @@
|
||||
SELECT materialize(\'Привет, World\') AS s
|
||||
WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 0
|
||||
SELECT materialize(\'Привет, World\') AS s
|
||||
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\'])
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
SELECT
|
||||
materialize(\'Привет, World\') AS s1,
|
||||
materialize(\'Привет, World\') AS s2
|
||||
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\')
|
||||
SETTINGS optimize_or_like_chain = 1
|
||||
Привет, optimized World
|
||||
Привет, World
|
||||
Привет, optimized World
|
||||
Привет, World
|
||||
SELECT
|
||||
(materialize(\'Привет, World\') AS s) LIKE \'hell%\' AS test,
|
||||
s
|
||||
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
|
||||
SETTINGS optimize_or_like_chain = 1
|
Loading…
Reference in New Issue
Block a user