mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge pull request #67423 from ClickHouse/bff
Fix bloom filter index breaking some queries
This commit is contained in:
commit
2df8d6acde
@ -371,67 +371,78 @@ bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTre
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent)
|
||||
{
|
||||
bool maybe_useful = false;
|
||||
if (!node.isFunction())
|
||||
return false;
|
||||
|
||||
if (node.isFunction())
|
||||
const auto function = node.toFunctionNode();
|
||||
auto arguments_size = function.getArgumentsSize();
|
||||
auto function_name = function.getFunctionName();
|
||||
|
||||
if (parent == nullptr)
|
||||
{
|
||||
const auto function = node.toFunctionNode();
|
||||
auto arguments_size = function.getArgumentsSize();
|
||||
auto function_name = function.getFunctionName();
|
||||
|
||||
/// Recurse a little bit for indexOf().
|
||||
for (size_t i = 0; i < arguments_size; ++i)
|
||||
{
|
||||
auto argument = function.getArgumentAt(i);
|
||||
if (traverseFunction(argument, out, &node))
|
||||
maybe_useful = true;
|
||||
}
|
||||
|
||||
if (arguments_size != 2)
|
||||
return false;
|
||||
|
||||
auto lhs_argument = function.getArgumentAt(0);
|
||||
auto rhs_argument = function.getArgumentAt(1);
|
||||
|
||||
if (functionIsInOrGlobalInOperator(function_name))
|
||||
{
|
||||
if (auto future_set = rhs_argument.tryGetPreparedSet(); future_set)
|
||||
{
|
||||
if (auto prepared_set = future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); prepared_set)
|
||||
{
|
||||
if (prepared_set->hasExplicitSetElements())
|
||||
{
|
||||
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
||||
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
|
||||
maybe_useful = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (function_name == "equals" ||
|
||||
function_name == "notEquals" ||
|
||||
function_name == "has" ||
|
||||
function_name == "mapContains" ||
|
||||
function_name == "indexOf" ||
|
||||
function_name == "hasAny" ||
|
||||
function_name == "hasAll")
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
|
||||
if (rhs_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
|
||||
maybe_useful = true;
|
||||
}
|
||||
else if (lhs_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
|
||||
maybe_useful = true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return maybe_useful;
|
||||
if (arguments_size != 2)
|
||||
return false;
|
||||
|
||||
/// indexOf() should be inside comparison function, e.g. greater(indexOf(key, 42), 0).
|
||||
/// Other conditions should be at top level, e.g. equals(key, 42), not equals(equals(key, 42), 1).
|
||||
if ((function_name == "indexOf") != (parent != nullptr))
|
||||
return false;
|
||||
|
||||
auto lhs_argument = function.getArgumentAt(0);
|
||||
auto rhs_argument = function.getArgumentAt(1);
|
||||
|
||||
if (functionIsInOrGlobalInOperator(function_name))
|
||||
{
|
||||
if (auto future_set = rhs_argument.tryGetPreparedSet(); future_set)
|
||||
{
|
||||
if (auto prepared_set = future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); prepared_set)
|
||||
{
|
||||
if (prepared_set->hasExplicitSetElements())
|
||||
{
|
||||
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
||||
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (function_name == "equals" ||
|
||||
function_name == "notEquals" ||
|
||||
function_name == "has" ||
|
||||
function_name == "mapContains" ||
|
||||
function_name == "indexOf" ||
|
||||
function_name == "hasAny" ||
|
||||
function_name == "hasAll")
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
|
||||
if (rhs_argument.tryGetConstant(const_value, const_type))
|
||||
{
|
||||
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
|
||||
return true;
|
||||
}
|
||||
else if (lhs_argument.tryGetConstant(const_value, const_type) && (function_name == "equals" || function_name == "notEquals"))
|
||||
{
|
||||
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
|
||||
|
@ -28,6 +28,8 @@
|
||||
"rows_read": 3,
|
||||
8 aбвгдеёж
|
||||
"rows_read": 2,
|
||||
13
|
||||
1
|
||||
1 column-oriented
|
||||
2 column-oriented
|
||||
"rows_read": 4,
|
||||
|
@ -103,6 +103,10 @@ $CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filte
|
||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# Weird conditions not supported by the index.
|
||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom_filter_idx WHERE (s = 'asd') = (s = 'asd')"
|
||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom_filter_idx WHERE has(['asd', 'some string'], s)"
|
||||
|
||||
|
||||
# TOKEN BF
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
|
@ -14,6 +14,11 @@
|
||||
0
|
||||
2
|
||||
2
|
||||
18
|
||||
100
|
||||
100
|
||||
3
|
||||
100
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -25,6 +25,15 @@ WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM single_column_bl
|
||||
WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
||||
WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
||||
|
||||
-- Check that indexHint() works (but it doesn't work with COUNT()).
|
||||
SELECT SUM(ignore(*) + 1) FROM single_column_bloom_filter WHERE indexHint(i32 in (3, 15, 50));
|
||||
|
||||
-- The index doesn't understand expressions like these, but it shouldn't break the query.
|
||||
SELECT COUNT() FROM single_column_bloom_filter WHERE (i32 = 200) = (i32 = 200);
|
||||
SELECT SUM(ignore(*) + 1) FROM single_column_bloom_filter WHERE indexHint((i32 = 200) != (i32 = 200));
|
||||
SELECT COUNT() FROM single_column_bloom_filter WHERE indexOf([10, 20, 30], i32) != 0;
|
||||
SELECT COUNT() FROM single_column_bloom_filter WHERE has([100, 200, 300], 200);
|
||||
|
||||
DROP TABLE IF EXISTS single_column_bloom_filter;
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user