Merge branch 'chained_or' of github.com:danlark1/ClickHouse into chained-or

This commit is contained in:
Alexey Milovidov 2022-05-08 23:37:09 +02:00
commit 56abce6a75
6 changed files with 163 additions and 0 deletions

View File

@ -443,6 +443,7 @@ class IColumn;
M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \
M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
M(Bool, optimize_or_like_chain, true, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \

View File

@ -0,0 +1,76 @@
#include <Functions/likePatternToRegexp.h>
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/IAST.h>
#include <Common/typeid_cast.h>
namespace DB
{
void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &)
{
if (function.name != "or")
return;
std::unordered_map<ASTPtr, std::shared_ptr<ASTLiteral>> identifier_to_literals;
for (auto & child : function.children)
{
if (auto expr_list_fn = child->as<ASTExpressionList>())
{
ASTs unique_elems;
for (auto child_expr_fn : expr_list_fn->children)
{
unique_elems.push_back(child_expr_fn);
if (const auto * child_fn = child_expr_fn->as<ASTFunction>())
{
const bool is_like = child_fn->name == "like";
const bool is_ilike = child_fn->name == "ilike";
/// Not {i}like -> bail out.
if (!is_like && !is_ilike)
continue;
const auto & arguments = child_fn->arguments->children;
/// They should have 2 arguments.
if (arguments.size() != 2)
continue;
/// Second one is string literal.
auto identifier = arguments[0];
auto literal = arguments[1]->as<ASTLiteral>();
if (!identifier || !literal || literal->value.getType() != Field::Types::String)
continue;
String regexp = likePatternToRegexp(literal->value.get<String>());
/// Case insensitive. Works with UTF-8 as well.
if (is_ilike)
regexp = "(?i)" + regexp;
unique_elems.pop_back();
auto it = identifier_to_literals.find(identifier);
if (it == identifier_to_literals.end())
{
it = identifier_to_literals.insert({identifier, std::make_shared<ASTLiteral>(Field{Array{}})}).first;
auto match = makeASTFunction("multiMatchAny");
match->arguments->children.push_back(std::move(arguments[0]));
match->arguments->children.push_back(it->second);
unique_elems.push_back(std::move(match));
}
it->second->value.get<Array>().push_back(regexp);
}
}
/// OR must have at least two arguments.
if (unique_elems.size() == 1)
unique_elems.push_back(std::make_shared<ASTLiteral>(Field(false)));
expr_list_fn->children = std::move(unique_elems);
}
}
}
}

View File

@ -0,0 +1,22 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class ASTFunction;
/// Replaces all the "or"'s with {i}like to multiMatchAny
class ConvertFunctionOrLikeData
{
public:
using TypeToVisit = ASTFunction;
void visit(ASTFunction & function, ASTPtr & ast);
};
using ConvertFunctionOrLikeVisitor = InDepthNodeVisitor<OneTypeMatcher<ConvertFunctionOrLikeData>, true>;
}

View File

@ -17,6 +17,7 @@
#include <Interpreters/RewriteCountVariantsVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
#include <Interpreters/ConvertStringsToEnumVisitor.h>
#include <Interpreters/ConvertFunctionOrLikeVisitor.h>
#include <Interpreters/RewriteFunctionToSubcolumnVisitor.h>
#include <Interpreters/Context.h>
#include <Interpreters/ExternalDictionariesLoader.h>
@ -729,6 +730,12 @@ void optimizeFuseQuantileFunctions(ASTPtr & query)
}
}
void optimizeOrLikeChain(ASTPtr & query)
{
ConvertFunctionOrLikeVisitor::Data data = {};
ConvertFunctionOrLikeVisitor(data).visit(query);
}
}
void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif)
@ -836,6 +843,14 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
if (settings.optimize_syntax_fuse_functions)
optimizeFuseQuantileFunctions(query);
if (settings.optimize_or_like_chain
&& settings.allow_hyperscan
&& settings.max_hyperscan_regexp_length == 0
&& settings.max_hyperscan_regexp_total_length == 0)
{
optimizeOrLikeChain(query);
}
}
}

View File

@ -0,0 +1,34 @@
SELECT materialize(\'Привет, World\') AS s
WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\')
SELECT materialize(\'Привет, World\') AS s
WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\'])
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\')
SETTINGS optimize_or_like_chain = 1
SELECT
materialize(\'Привет, World\') AS s1,
materialize(\'Привет, World\') AS s2
WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\')
SETTINGS optimize_or_like_chain = 1
Привет, optimized World
Привет, World
Привет, optimized World
Привет, World

View File

@ -0,0 +1,15 @@
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS allow_hyperscan = 0;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_length = 10;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_total_length = 10;
EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') OR s1 == 'Привет' SETTINGS optimize_or_like_chain = 1;
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0;
SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1;
SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0;