mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #67423 from ClickHouse/bff
Fix bloom filter index breaking some queries
This commit is contained in:
commit
2df8d6acde
@ -371,67 +371,78 @@ bool MergeTreeIndexConditionBloomFilter::extractAtomFromTree(const RPNBuilderTre
|
|||||||
|
|
||||||
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent)
|
bool MergeTreeIndexConditionBloomFilter::traverseFunction(const RPNBuilderTreeNode & node, RPNElement & out, const RPNBuilderTreeNode * parent)
|
||||||
{
|
{
|
||||||
bool maybe_useful = false;
|
if (!node.isFunction())
|
||||||
|
return false;
|
||||||
|
|
||||||
if (node.isFunction())
|
const auto function = node.toFunctionNode();
|
||||||
|
auto arguments_size = function.getArgumentsSize();
|
||||||
|
auto function_name = function.getFunctionName();
|
||||||
|
|
||||||
|
if (parent == nullptr)
|
||||||
{
|
{
|
||||||
const auto function = node.toFunctionNode();
|
/// Recurse a little bit for indexOf().
|
||||||
auto arguments_size = function.getArgumentsSize();
|
|
||||||
auto function_name = function.getFunctionName();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < arguments_size; ++i)
|
for (size_t i = 0; i < arguments_size; ++i)
|
||||||
{
|
{
|
||||||
auto argument = function.getArgumentAt(i);
|
auto argument = function.getArgumentAt(i);
|
||||||
if (traverseFunction(argument, out, &node))
|
if (traverseFunction(argument, out, &node))
|
||||||
maybe_useful = true;
|
return true;
|
||||||
}
|
|
||||||
|
|
||||||
if (arguments_size != 2)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
auto lhs_argument = function.getArgumentAt(0);
|
|
||||||
auto rhs_argument = function.getArgumentAt(1);
|
|
||||||
|
|
||||||
if (functionIsInOrGlobalInOperator(function_name))
|
|
||||||
{
|
|
||||||
if (auto future_set = rhs_argument.tryGetPreparedSet(); future_set)
|
|
||||||
{
|
|
||||||
if (auto prepared_set = future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); prepared_set)
|
|
||||||
{
|
|
||||||
if (prepared_set->hasExplicitSetElements())
|
|
||||||
{
|
|
||||||
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
|
||||||
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
|
|
||||||
maybe_useful = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (function_name == "equals" ||
|
|
||||||
function_name == "notEquals" ||
|
|
||||||
function_name == "has" ||
|
|
||||||
function_name == "mapContains" ||
|
|
||||||
function_name == "indexOf" ||
|
|
||||||
function_name == "hasAny" ||
|
|
||||||
function_name == "hasAll")
|
|
||||||
{
|
|
||||||
Field const_value;
|
|
||||||
DataTypePtr const_type;
|
|
||||||
|
|
||||||
if (rhs_argument.tryGetConstant(const_value, const_type))
|
|
||||||
{
|
|
||||||
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
|
|
||||||
maybe_useful = true;
|
|
||||||
}
|
|
||||||
else if (lhs_argument.tryGetConstant(const_value, const_type))
|
|
||||||
{
|
|
||||||
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
|
|
||||||
maybe_useful = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return maybe_useful;
|
if (arguments_size != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/// indexOf() should be inside comparison function, e.g. greater(indexOf(key, 42), 0).
|
||||||
|
/// Other conditions should be at top level, e.g. equals(key, 42), not equals(equals(key, 42), 1).
|
||||||
|
if ((function_name == "indexOf") != (parent != nullptr))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auto lhs_argument = function.getArgumentAt(0);
|
||||||
|
auto rhs_argument = function.getArgumentAt(1);
|
||||||
|
|
||||||
|
if (functionIsInOrGlobalInOperator(function_name))
|
||||||
|
{
|
||||||
|
if (auto future_set = rhs_argument.tryGetPreparedSet(); future_set)
|
||||||
|
{
|
||||||
|
if (auto prepared_set = future_set->buildOrderedSetInplace(rhs_argument.getTreeContext().getQueryContext()); prepared_set)
|
||||||
|
{
|
||||||
|
if (prepared_set->hasExplicitSetElements())
|
||||||
|
{
|
||||||
|
const auto prepared_info = getPreparedSetInfo(prepared_set);
|
||||||
|
if (traverseTreeIn(function_name, lhs_argument, prepared_set, prepared_info.type, prepared_info.column, out))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (function_name == "equals" ||
|
||||||
|
function_name == "notEquals" ||
|
||||||
|
function_name == "has" ||
|
||||||
|
function_name == "mapContains" ||
|
||||||
|
function_name == "indexOf" ||
|
||||||
|
function_name == "hasAny" ||
|
||||||
|
function_name == "hasAll")
|
||||||
|
{
|
||||||
|
Field const_value;
|
||||||
|
DataTypePtr const_type;
|
||||||
|
|
||||||
|
if (rhs_argument.tryGetConstant(const_value, const_type))
|
||||||
|
{
|
||||||
|
if (traverseTreeEquals(function_name, lhs_argument, const_type, const_value, out, parent))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if (lhs_argument.tryGetConstant(const_value, const_type) && (function_name == "equals" || function_name == "notEquals"))
|
||||||
|
{
|
||||||
|
if (traverseTreeEquals(function_name, rhs_argument, const_type, const_value, out, parent))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
|
bool MergeTreeIndexConditionBloomFilter::traverseTreeIn(
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
"rows_read": 3,
|
"rows_read": 3,
|
||||||
8 aбвгдеёж
|
8 aбвгдеёж
|
||||||
"rows_read": 2,
|
"rows_read": 2,
|
||||||
|
13
|
||||||
|
1
|
||||||
1 column-oriented
|
1 column-oriented
|
||||||
2 column-oriented
|
2 column-oriented
|
||||||
"rows_read": 4,
|
"rows_read": 4,
|
||||||
|
@ -103,6 +103,10 @@ $CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filte
|
|||||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k"
|
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k"
|
||||||
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read"
|
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# Weird conditions not supported by the index.
|
||||||
|
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom_filter_idx WHERE (s = 'asd') = (s = 'asd')"
|
||||||
|
$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom_filter_idx WHERE has(['asd', 'some string'], s)"
|
||||||
|
|
||||||
|
|
||||||
# TOKEN BF
|
# TOKEN BF
|
||||||
$CLICKHOUSE_CLIENT -n --query="
|
$CLICKHOUSE_CLIENT -n --query="
|
||||||
|
@ -14,6 +14,11 @@
|
|||||||
0
|
0
|
||||||
2
|
2
|
||||||
2
|
2
|
||||||
|
18
|
||||||
|
100
|
||||||
|
100
|
||||||
|
3
|
||||||
|
100
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
1
|
1
|
||||||
|
@ -25,6 +25,15 @@ WITH ((1, 2), (2, 3)) AS liter_prepared_set SELECT COUNT() FROM single_column_bl
|
|||||||
WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
WITH ((1, 1), (2, 2)) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i32, i64) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
||||||
WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
WITH ((1, (1, 1)), (2, (2, 2))) AS liter_prepared_set SELECT COUNT() FROM single_column_bloom_filter WHERE (i64, (i64, i32)) IN liter_prepared_set SETTINGS max_rows_to_read = 6;
|
||||||
|
|
||||||
|
-- Check that indexHint() works (but it doesn't work with COUNT()).
|
||||||
|
SELECT SUM(ignore(*) + 1) FROM single_column_bloom_filter WHERE indexHint(i32 in (3, 15, 50));
|
||||||
|
|
||||||
|
-- The index doesn't understand expressions like these, but it shouldn't break the query.
|
||||||
|
SELECT COUNT() FROM single_column_bloom_filter WHERE (i32 = 200) = (i32 = 200);
|
||||||
|
SELECT SUM(ignore(*) + 1) FROM single_column_bloom_filter WHERE indexHint((i32 = 200) != (i32 = 200));
|
||||||
|
SELECT COUNT() FROM single_column_bloom_filter WHERE indexOf([10, 20, 30], i32) != 0;
|
||||||
|
SELECT COUNT() FROM single_column_bloom_filter WHERE has([100, 200, 300], 200);
|
||||||
|
|
||||||
DROP TABLE IF EXISTS single_column_bloom_filter;
|
DROP TABLE IF EXISTS single_column_bloom_filter;
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user