mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #5965 from dimarub2000/master
Primary key, MergeTreeIndexFullText and MergeTreeIndexSet support for string functions
This commit is contained in:
commit
ceffbf39d6
@ -178,6 +178,24 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"empty",
|
||||||
|
[] (RPNElement & out, const Field &)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_IN_RANGE;
|
||||||
|
out.range = Range("");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"notEmpty",
|
||||||
|
[] (RPNElement & out, const Field &)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||||
|
out.range = Range("");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"like",
|
"like",
|
||||||
[] (RPNElement & out, const Field & value)
|
[] (RPNElement & out, const Field & value)
|
||||||
@ -199,6 +217,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"notLike",
|
||||||
|
[] (RPNElement & out, const Field & value)
|
||||||
|
{
|
||||||
|
if (value.getType() != Field::Types::String)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
|
||||||
|
if (prefix.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
|
||||||
|
|
||||||
|
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||||
|
out.range = !right_bound.empty()
|
||||||
|
? Range(prefix, true, right_bound, false)
|
||||||
|
: Range::createLeftBounded(prefix, true);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"startsWith",
|
"startsWith",
|
||||||
[] (RPNElement & out, const Field & value)
|
[] (RPNElement & out, const Field & value)
|
||||||
@ -645,92 +684,102 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
{
|
{
|
||||||
const ASTs & args = func->arguments->children;
|
const ASTs & args = func->arguments->children;
|
||||||
|
|
||||||
if (args.size() != 2)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
DataTypePtr key_expr_type; /// Type of expression containing key column
|
DataTypePtr key_expr_type; /// Type of expression containing key column
|
||||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
|
||||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||||
MonotonicFunctionsChain chain;
|
MonotonicFunctionsChain chain;
|
||||||
bool is_set_const = false;
|
std::string func_name = func->name;
|
||||||
bool is_constant_transformed = false;
|
|
||||||
|
|
||||||
if (functionIsInOrGlobalInOperator(func->name)
|
if (args.size() == 1)
|
||||||
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
|
||||||
{
|
{
|
||||||
key_arg_pos = 0;
|
if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)))
|
||||||
is_set_const = true;
|
return false;
|
||||||
|
|
||||||
|
if (key_column_num == static_cast<size_t>(-1))
|
||||||
|
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
else if (args.size() == 2)
|
||||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
|
||||||
{
|
{
|
||||||
key_arg_pos = 0;
|
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||||
}
|
bool is_set_const = false;
|
||||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
bool is_constant_transformed = false;
|
||||||
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
|
||||||
{
|
if (functionIsInOrGlobalInOperator(func_name)
|
||||||
key_arg_pos = 0;
|
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
||||||
is_constant_transformed = true;
|
{
|
||||||
}
|
key_arg_pos = 0;
|
||||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
is_set_const = true;
|
||||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
}
|
||||||
{
|
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||||
key_arg_pos = 1;
|
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
||||||
}
|
{
|
||||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
key_arg_pos = 0;
|
||||||
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
}
|
||||||
{
|
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||||
key_arg_pos = 1;
|
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
||||||
is_constant_transformed = true;
|
{
|
||||||
|
key_arg_pos = 0;
|
||||||
|
is_constant_transformed = true;
|
||||||
|
}
|
||||||
|
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||||
|
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
||||||
|
{
|
||||||
|
key_arg_pos = 1;
|
||||||
|
}
|
||||||
|
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||||
|
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
||||||
|
{
|
||||||
|
key_arg_pos = 1;
|
||||||
|
is_constant_transformed = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (key_column_num == static_cast<size_t>(-1))
|
||||||
|
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
||||||
|
if (is_constant_transformed)
|
||||||
|
{
|
||||||
|
if (func_name == "less")
|
||||||
|
func_name = "lessOrEquals";
|
||||||
|
else if (func_name == "greater")
|
||||||
|
func_name = "greaterOrEquals";
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
||||||
|
if (key_arg_pos == 1)
|
||||||
|
{
|
||||||
|
if (func_name == "less")
|
||||||
|
func_name = "greater";
|
||||||
|
else if (func_name == "greater")
|
||||||
|
func_name = "less";
|
||||||
|
else if (func_name == "greaterOrEquals")
|
||||||
|
func_name = "lessOrEquals";
|
||||||
|
else if (func_name == "lessOrEquals")
|
||||||
|
func_name = "greaterOrEquals";
|
||||||
|
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
||||||
|
{
|
||||||
|
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cast_not_needed =
|
||||||
|
is_set_const /// Set args are already casted inside Set::createFromAST
|
||||||
|
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
||||||
|
|
||||||
|
if (!cast_not_needed)
|
||||||
|
castValueToType(key_expr_type, const_value, const_type, node);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (key_column_num == static_cast<size_t>(-1))
|
|
||||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
|
||||||
|
|
||||||
std::string func_name = func->name;
|
|
||||||
|
|
||||||
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
|
||||||
if (is_constant_transformed)
|
|
||||||
{
|
|
||||||
if (func_name == "less")
|
|
||||||
func_name = "lessOrEquals";
|
|
||||||
else if (func_name == "greater")
|
|
||||||
func_name = "greaterOrEquals";
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
|
||||||
if (key_arg_pos == 1)
|
|
||||||
{
|
|
||||||
if (func_name == "less")
|
|
||||||
func_name = "greater";
|
|
||||||
else if (func_name == "greater")
|
|
||||||
func_name = "less";
|
|
||||||
else if (func_name == "greaterOrEquals")
|
|
||||||
func_name = "lessOrEquals";
|
|
||||||
else if (func_name == "lessOrEquals")
|
|
||||||
func_name = "greaterOrEquals";
|
|
||||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
|
||||||
{
|
|
||||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out.key_column = key_column_num;
|
|
||||||
out.monotonic_functions_chain = std::move(chain);
|
|
||||||
|
|
||||||
const auto atom_it = atom_map.find(func_name);
|
const auto atom_it = atom_map.find(func_name);
|
||||||
if (atom_it == std::end(atom_map))
|
if (atom_it == std::end(atom_map))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
bool cast_not_needed =
|
out.key_column = key_column_num;
|
||||||
is_set_const /// Set args are already casted inside Set::createFromAST
|
out.monotonic_functions_chain = std::move(chain);
|
||||||
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
|
||||||
|
|
||||||
if (!cast_not_needed)
|
|
||||||
castValueToType(key_expr_type, const_value, const_type, node);
|
|
||||||
|
|
||||||
return atom_it->second(out, const_value);
|
return atom_it->second(out, const_value);
|
||||||
}
|
}
|
||||||
@ -748,7 +797,6 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
|||||||
"like",
|
"like",
|
||||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||||
{
|
{
|
||||||
out.function = RPNElement::FUNCTION_LIKE;
|
out.function = RPNElement::FUNCTION_EQUALS;
|
||||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||||
|
|
||||||
@ -151,6 +151,66 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"notLike",
|
||||||
|
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_NOT_EQUALS;
|
||||||
|
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||||
|
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||||
|
|
||||||
|
const auto & str = value.get<String>();
|
||||||
|
likeStringToBloomFilter(str, idx.token_extractor_func, *out.bloom_filter);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"startsWith",
|
||||||
|
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_EQUALS;
|
||||||
|
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||||
|
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||||
|
|
||||||
|
const auto & prefix = value.get<String>();
|
||||||
|
stringToBloomFilter(prefix.c_str(), prefix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"endsWith",
|
||||||
|
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_EQUALS;
|
||||||
|
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||||
|
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||||
|
|
||||||
|
const auto & suffix = value.get<String>();
|
||||||
|
stringToBloomFilter(suffix.c_str(), suffix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"multiSearchAny",
|
||||||
|
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||||
|
{
|
||||||
|
out.function = RPNElement::FUNCTION_MULTI_SEARCH;
|
||||||
|
|
||||||
|
std::vector<std::vector<BloomFilter>> bloom_filters;
|
||||||
|
bloom_filters.emplace_back();
|
||||||
|
for (const auto & element : value.get<Array>())
|
||||||
|
{
|
||||||
|
if (element.getType() != Field::Types::String)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
bloom_filters.back().emplace_back(idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||||
|
const auto & str = element.get<String>();
|
||||||
|
stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, bloom_filters.back().back());
|
||||||
|
}
|
||||||
|
out.set_bloom_filters = std::move(bloom_filters);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"notIn",
|
"notIn",
|
||||||
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
|
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
|
||||||
@ -197,10 +257,9 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
|
|||||||
}
|
}
|
||||||
else if (element.function == RPNElement::FUNCTION_EQUALS
|
else if (element.function == RPNElement::FUNCTION_EQUALS
|
||||||
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|
||||||
|| element.function == RPNElement::FUNCTION_LIKE
|
|
||||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE
|
|
||||||
|| element.function == RPNElement::FUNCTION_IN
|
|| element.function == RPNElement::FUNCTION_IN
|
||||||
|| element.function == RPNElement::FUNCTION_NOT_IN
|
|| element.function == RPNElement::FUNCTION_NOT_IN
|
||||||
|
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
||||||
|| element.function == RPNElement::ALWAYS_FALSE)
|
|| element.function == RPNElement::ALWAYS_FALSE)
|
||||||
{
|
{
|
||||||
rpn_stack.push_back(false);
|
rpn_stack.push_back(false);
|
||||||
@ -255,17 +314,8 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
|||||||
if (element.function == RPNElement::FUNCTION_NOT_EQUALS)
|
if (element.function == RPNElement::FUNCTION_NOT_EQUALS)
|
||||||
rpn_stack.back() = !rpn_stack.back();
|
rpn_stack.back() = !rpn_stack.back();
|
||||||
}
|
}
|
||||||
else if (element.function == RPNElement::FUNCTION_LIKE
|
|
||||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE)
|
|
||||||
{
|
|
||||||
rpn_stack.emplace_back(
|
|
||||||
granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
|
|
||||||
|
|
||||||
if (element.function == RPNElement::FUNCTION_NOT_LIKE)
|
|
||||||
rpn_stack.back() = !rpn_stack.back();
|
|
||||||
}
|
|
||||||
else if (element.function == RPNElement::FUNCTION_IN
|
else if (element.function == RPNElement::FUNCTION_IN
|
||||||
|| element.function == RPNElement::FUNCTION_NOT_IN)
|
|| element.function == RPNElement::FUNCTION_NOT_IN)
|
||||||
{
|
{
|
||||||
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||||
|
|
||||||
@ -283,6 +333,18 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
|||||||
if (element.function == RPNElement::FUNCTION_NOT_IN)
|
if (element.function == RPNElement::FUNCTION_NOT_IN)
|
||||||
rpn_stack.back() = !rpn_stack.back();
|
rpn_stack.back() = !rpn_stack.back();
|
||||||
}
|
}
|
||||||
|
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH)
|
||||||
|
{
|
||||||
|
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||||
|
|
||||||
|
const auto & bloom_filters = element.set_bloom_filters[0];
|
||||||
|
|
||||||
|
for (size_t row = 0; row < bloom_filters.size(); ++row)
|
||||||
|
result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
|
||||||
|
|
||||||
|
rpn_stack.emplace_back(
|
||||||
|
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
|
||||||
|
}
|
||||||
else if (element.function == RPNElement::FUNCTION_NOT)
|
else if (element.function == RPNElement::FUNCTION_NOT)
|
||||||
{
|
{
|
||||||
rpn_stack.back() = !rpn_stack.back();
|
rpn_stack.back() = !rpn_stack.back();
|
||||||
@ -343,8 +405,9 @@ bool MergeTreeConditionFullText::atomFromAST(
|
|||||||
|
|
||||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||||
|
std::string func_name = func->name;
|
||||||
|
|
||||||
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out))
|
if (functionIsInOrGlobalInOperator(func_name) && tryPrepareSetBloomFilter(args, out))
|
||||||
{
|
{
|
||||||
key_arg_pos = 0;
|
key_arg_pos = 0;
|
||||||
}
|
}
|
||||||
@ -359,17 +422,17 @@ bool MergeTreeConditionFullText::atomFromAST(
|
|||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (const_type && const_type->getTypeId() != TypeIndex::String && const_type->getTypeId() != TypeIndex::FixedString)
|
if (const_type && const_type->getTypeId() != TypeIndex::String
|
||||||
|
&& const_type->getTypeId() != TypeIndex::FixedString
|
||||||
|
&& const_type->getTypeId() != TypeIndex::Array)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (key_arg_pos == 1 && (func->name != "equals" || func->name != "notEquals"))
|
if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
|
||||||
return false;
|
return false;
|
||||||
else if (!index.token_extractor_func->supportLike() && (func->name == "like" || func->name == "notLike"))
|
else if (!index.token_extractor_func->supportLike() && (func_name == "like" || func_name == "notLike"))
|
||||||
return false;
|
return false;
|
||||||
else
|
|
||||||
key_arg_pos = 0;
|
|
||||||
|
|
||||||
const auto atom_it = atom_map.find(func->name);
|
const auto atom_it = atom_map.find(func_name);
|
||||||
if (atom_it == std::end(atom_map))
|
if (atom_it == std::end(atom_map))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -380,8 +443,8 @@ bool MergeTreeConditionFullText::atomFromAST(
|
|||||||
{
|
{
|
||||||
/// Check constant like in KeyCondition
|
/// Check constant like in KeyCondition
|
||||||
if (const_value.getType() == Field::Types::UInt64
|
if (const_value.getType() == Field::Types::UInt64
|
||||||
|| const_value.getType() == Field::Types::Int64
|
|| const_value.getType() == Field::Types::Int64
|
||||||
|| const_value.getType() == Field::Types::Float64)
|
|| const_value.getType() == Field::Types::Float64)
|
||||||
{
|
{
|
||||||
/// Zero in all types is represented in memory the same way as in UInt64.
|
/// Zero in all types is represented in memory the same way as in UInt64.
|
||||||
out.function = const_value.get<UInt64>()
|
out.function = const_value.get<UInt64>()
|
||||||
@ -475,7 +538,6 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
|
MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
|
||||||
{
|
{
|
||||||
return std::make_shared<MergeTreeIndexGranuleFullText>(*this);
|
return std::make_shared<MergeTreeIndexGranuleFullText>(*this);
|
||||||
|
@ -78,10 +78,9 @@ private:
|
|||||||
/// Atoms of a Boolean expression.
|
/// Atoms of a Boolean expression.
|
||||||
FUNCTION_EQUALS,
|
FUNCTION_EQUALS,
|
||||||
FUNCTION_NOT_EQUALS,
|
FUNCTION_NOT_EQUALS,
|
||||||
FUNCTION_LIKE,
|
|
||||||
FUNCTION_NOT_LIKE,
|
|
||||||
FUNCTION_IN,
|
FUNCTION_IN,
|
||||||
FUNCTION_NOT_IN,
|
FUNCTION_NOT_IN,
|
||||||
|
FUNCTION_MULTI_SEARCH,
|
||||||
FUNCTION_UNKNOWN, /// Can take any value.
|
FUNCTION_UNKNOWN, /// Can take any value.
|
||||||
/// Operators of the logical expression.
|
/// Operators of the logical expression.
|
||||||
FUNCTION_NOT,
|
FUNCTION_NOT,
|
||||||
@ -93,15 +92,20 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
RPNElement(
|
RPNElement(
|
||||||
Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<BloomFilter> && const_bloom_filter_ = nullptr)
|
Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<BloomFilter> && const_bloom_filter_ = nullptr)
|
||||||
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
||||||
|
|
||||||
Function function = FUNCTION_UNKNOWN;
|
Function function = FUNCTION_UNKNOWN;
|
||||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE.
|
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH
|
||||||
size_t key_column;
|
size_t key_column;
|
||||||
|
|
||||||
|
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
|
||||||
std::unique_ptr<BloomFilter> bloom_filter;
|
std::unique_ptr<BloomFilter> bloom_filter;
|
||||||
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
|
||||||
|
/// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH
|
||||||
std::vector<std::vector<BloomFilter>> set_bloom_filters;
|
std::vector<std::vector<BloomFilter>> set_bloom_filters;
|
||||||
|
|
||||||
|
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
||||||
std::vector<size_t> set_key_position;
|
std::vector<size_t> set_key_position;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -411,7 +411,10 @@ static bool checkAtomName(const String & name)
|
|||||||
"greaterOrEquals",
|
"greaterOrEquals",
|
||||||
"in",
|
"in",
|
||||||
"notIn",
|
"notIn",
|
||||||
"like"
|
"like",
|
||||||
|
"startsWith",
|
||||||
|
"endsWith",
|
||||||
|
"multiSearchAny"
|
||||||
};
|
};
|
||||||
return atoms.find(name) != atoms.end();
|
return atoms.find(name) != atoms.end();
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,53 @@
|
|||||||
|
9 abra
|
||||||
|
14 abracadabra
|
||||||
|
"rows_read": 6,
|
||||||
|
8 computer science
|
||||||
|
"rows_read": 2,
|
||||||
|
9 abra
|
||||||
|
10 cadabra
|
||||||
|
11 crabacadabra
|
||||||
|
14 abracadabra
|
||||||
|
15 cadabraabra
|
||||||
|
"rows_read": 6,
|
||||||
|
6 some string
|
||||||
|
7 another string
|
||||||
|
"rows_read": 2,
|
||||||
|
9 abra
|
||||||
|
14 abracadabra
|
||||||
|
"rows_read": 6,
|
||||||
|
8 computer science
|
||||||
|
"rows_read": 2,
|
||||||
|
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||||
|
2 column-oriented database management system
|
||||||
|
13 basement
|
||||||
|
"rows_read": 6,
|
||||||
|
6 some string
|
||||||
|
7 another string
|
||||||
|
"rows_read": 2,
|
||||||
|
6 some string
|
||||||
|
7 another string
|
||||||
|
8 computer science
|
||||||
|
"rows_read": 4,
|
||||||
|
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||||
|
2 column-oriented database management system
|
||||||
|
13 basement
|
||||||
|
"rows_read": 6,
|
||||||
|
9 abra
|
||||||
|
10 cadabra
|
||||||
|
11 crabacadabra
|
||||||
|
14 abracadabra
|
||||||
|
15 cadabraabra
|
||||||
|
"rows_read": 6,
|
||||||
|
4 какая-то строка
|
||||||
|
5 еще строка
|
||||||
|
6 some string
|
||||||
|
7 another string
|
||||||
|
"rows_read": 4,
|
||||||
|
14 abracadabra
|
||||||
|
"rows_read": 4,
|
||||||
|
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||||
|
2 column-oriented database management system
|
||||||
|
10 cadabra
|
||||||
|
11 crabacadabra
|
||||||
|
15 cadabraabra
|
||||||
|
"rows_read": 8,
|
86
dbms/tests/queries/0_stateless/00964_bloom_index_string_functions.sh
Executable file
86
dbms/tests/queries/0_stateless/00964_bloom_index_string_functions.sh
Executable file
@ -0,0 +1,86 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
. $CURDIR/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx;"
|
||||||
|
|
||||||
|
# NGRAM BF
|
||||||
|
$CLICKHOUSE_CLIENT -n --query="
|
||||||
|
SET allow_experimental_data_skipping_indices = 1;
|
||||||
|
CREATE TABLE bloom_filter_idx
|
||||||
|
(
|
||||||
|
k UInt64,
|
||||||
|
s String,
|
||||||
|
INDEX bf (s, lower(s)) TYPE ngrambf_v1(3, 512, 2, 0) GRANULARITY 1
|
||||||
|
) ENGINE = MergeTree()
|
||||||
|
ORDER BY k
|
||||||
|
SETTINGS index_granularity = 2;"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="INSERT INTO bloom_filter_idx VALUES
|
||||||
|
(0, 'ClickHouse - столбцовая система управления базами данных (СУБД)'),
|
||||||
|
(1, 'ClickHouse is a column-oriented database management system (DBMS)'),
|
||||||
|
(2, 'column-oriented database management system'),
|
||||||
|
(3, 'columns'),
|
||||||
|
(4, 'какая-то строка'),
|
||||||
|
(5, 'еще строка'),
|
||||||
|
(6, 'some string'),
|
||||||
|
(7, 'another string'),
|
||||||
|
(8, 'computer science'),
|
||||||
|
(9, 'abra'),
|
||||||
|
(10, 'cadabra'),
|
||||||
|
(11, 'crabacadabra'),
|
||||||
|
(12, 'crab'),
|
||||||
|
(13, 'basement'),
|
||||||
|
(14, 'abracadabra'),
|
||||||
|
(15, 'cadabraabra')"
|
||||||
|
|
||||||
|
# STARTS_WITH
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') ORDER BY k"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'computer') ORDER BY k"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'computer') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# ENDS_WITH
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'abra') ORDER BY k"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'abra') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'ring') ORDER BY k"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'ring') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# COMBINED
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'c') AND endsWith(s, 'science')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'c') AND endsWith(s, 'science') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# MULTY_SEARCH_ANY
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['data', 'base'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['data', 'base']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string', 'computer'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string', 'computer']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['base', 'seme', 'gement'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['base', 'seme', 'gement']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['abra', 'cadabra', 'cab', 'extra'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['abra', 'cadabra', 'cab', 'extra']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['строка', 'string'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['строка', 'string']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# MULTY_SEARCH_ANY + OTHER
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND startsWith(s, 'abra')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND startsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND (startsWith(s, 'c') OR startsWith(s, 'C'))"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND (startsWith(s, 'c') OR startsWith(s, 'C')) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx;"
|
@ -0,0 +1,16 @@
|
|||||||
|
9 abra
|
||||||
|
14 abracadabra
|
||||||
|
"rows_read": 4,
|
||||||
|
9 abra
|
||||||
|
10 cadabra
|
||||||
|
11 crabacadabra
|
||||||
|
14 abracadabra
|
||||||
|
15 cadabraabra
|
||||||
|
"rows_read": 6,
|
||||||
|
9 abra
|
||||||
|
14 abracadabra
|
||||||
|
"rows_read": 4,
|
||||||
|
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||||
|
2 column-oriented database management system
|
||||||
|
13 basement
|
||||||
|
"rows_read": 6,
|
53
dbms/tests/queries/0_stateless/00965_set_index_string_functions.sh
Executable file
53
dbms/tests/queries/0_stateless/00965_set_index_string_functions.sh
Executable file
@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
. $CURDIR/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT -n --query="
|
||||||
|
SET allow_experimental_data_skipping_indices = 1;
|
||||||
|
CREATE TABLE set_idx
|
||||||
|
(
|
||||||
|
k UInt64,
|
||||||
|
s String,
|
||||||
|
INDEX idx (s) TYPE set(2) GRANULARITY 1
|
||||||
|
) ENGINE = MergeTree()
|
||||||
|
ORDER BY k
|
||||||
|
SETTINGS index_granularity = 2;"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="INSERT INTO set_idx VALUES
|
||||||
|
(0, 'ClickHouse - столбцовая система управления базами данных (СУБД)'),
|
||||||
|
(1, 'ClickHouse is a column-oriented database management system (DBMS)'),
|
||||||
|
(2, 'column-oriented database management system'),
|
||||||
|
(3, 'columns'),
|
||||||
|
(4, 'какая-то строка'),
|
||||||
|
(5, 'еще строка'),
|
||||||
|
(6, 'some string'),
|
||||||
|
(7, 'another string'),
|
||||||
|
(8, 'computer science'),
|
||||||
|
(9, 'abra'),
|
||||||
|
(10, 'cadabra'),
|
||||||
|
(11, 'crabacadabra'),
|
||||||
|
(12, 'crab'),
|
||||||
|
(13, 'basement'),
|
||||||
|
(14, 'abracadabra'),
|
||||||
|
(15, 'cadabraabra')"
|
||||||
|
|
||||||
|
# STARTS_WITH
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# ENDS_WITH
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE endsWith(s, 'abra')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# COMBINED
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra')"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# MULTY_SEARCH_ANY
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE multiSearchAny(s, ['data', 'base'])"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE multiSearchAny(s, ['data', 'base']) FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE set_idx;"
|
Loading…
Reference in New Issue
Block a user