mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
primary key and MergeTreeIndexFullText support for string functions
This commit is contained in:
parent
0b756c48a8
commit
c9a5b3c9ab
@ -178,6 +178,24 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"empty",
|
||||
[] (RPNElement & out, const Field &)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_IN_RANGE;
|
||||
out.range = Range("");
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notEmpty",
|
||||
[] (RPNElement & out, const Field &)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||
out.range = Range("");
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"like",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
@ -199,6 +217,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notLike",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
{
|
||||
if (value.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
|
||||
if (prefix.empty())
|
||||
return false;
|
||||
|
||||
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
|
||||
|
||||
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||
out.range = !right_bound.empty()
|
||||
? Range(prefix, true, right_bound, false)
|
||||
: Range::createLeftBounded(prefix, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"startsWith",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
@ -645,92 +684,102 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
||||
{
|
||||
const ASTs & args = func->arguments->children;
|
||||
|
||||
if (args.size() != 2)
|
||||
return false;
|
||||
|
||||
DataTypePtr key_expr_type; /// Type of expression containing key column
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||
MonotonicFunctionsChain chain;
|
||||
bool is_set_const = false;
|
||||
bool is_constant_transformed = false;
|
||||
std::string func_name = func->name;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func->name)
|
||||
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
||||
if (args.size() == 1)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_set_const = true;
|
||||
if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)))
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
||||
else if (args.size() == 2)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
is_constant_transformed = true;
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
bool is_set_const = false;
|
||||
bool is_constant_transformed = false;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func_name)
|
||||
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_set_const = true;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
||||
if (is_constant_transformed)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "greater")
|
||||
func_name = "greaterOrEquals";
|
||||
}
|
||||
|
||||
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
||||
if (key_arg_pos == 1)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "greater";
|
||||
else if (func_name == "greater")
|
||||
func_name = "less";
|
||||
else if (func_name == "greaterOrEquals")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "lessOrEquals")
|
||||
func_name = "greaterOrEquals";
|
||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
||||
{
|
||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool cast_not_needed =
|
||||
is_set_const /// Set args are already casted inside Set::createFromAST
|
||||
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
||||
|
||||
if (!cast_not_needed)
|
||||
castValueToType(key_expr_type, const_value, const_type, node);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
std::string func_name = func->name;
|
||||
|
||||
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
||||
if (is_constant_transformed)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "greater")
|
||||
func_name = "greaterOrEquals";
|
||||
}
|
||||
|
||||
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
||||
if (key_arg_pos == 1)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "greater";
|
||||
else if (func_name == "greater")
|
||||
func_name = "less";
|
||||
else if (func_name == "greaterOrEquals")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "lessOrEquals")
|
||||
func_name = "greaterOrEquals";
|
||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
||||
{
|
||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
out.key_column = key_column_num;
|
||||
out.monotonic_functions_chain = std::move(chain);
|
||||
|
||||
const auto atom_it = atom_map.find(func_name);
|
||||
if (atom_it == std::end(atom_map))
|
||||
return false;
|
||||
|
||||
bool cast_not_needed =
|
||||
is_set_const /// Set args are already casted inside Set::createFromAST
|
||||
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
||||
|
||||
if (!cast_not_needed)
|
||||
castValueToType(key_expr_type, const_value, const_type, node);
|
||||
out.key_column = key_column_num;
|
||||
out.monotonic_functions_chain = std::move(chain);
|
||||
|
||||
return atom_it->second(out, const_value);
|
||||
}
|
||||
@ -748,7 +797,6 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -142,7 +142,8 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
||||
"like",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_LIKE;
|
||||
std::cerr << "FULLTEXT INDEX IS USED FOR LIKE FUNCTION" << '\n';
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
@ -151,6 +152,92 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notLike",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
std::cerr << "FULLTEXT INDEX IS USED FOR NOT_LIKE FUNCTION" << '\n';
|
||||
out.function = RPNElement::FUNCTION_NOT_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & str = value.get<String>();
|
||||
likeStringToBloomFilter(str, idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"startsWith",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & prefix = value.get<String>();
|
||||
stringToBloomFilter(prefix.c_str(), prefix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"endsWith",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & suffix = value.get<String>();
|
||||
stringToBloomFilter(suffix.c_str(), suffix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"multiSearchAny",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
for (const auto & element : value.get<Array>())
|
||||
{
|
||||
if (element.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
const auto & str = element.get<String>();
|
||||
stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"empty",
|
||||
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
std::string empty_str;
|
||||
stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"notEmpty",
|
||||
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_NOT_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
std::string empty_str;
|
||||
stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
|
||||
},
|
||||
{
|
||||
"notIn",
|
||||
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
|
||||
@ -197,8 +284,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_EQUALS
|
||||
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|
||||
|| element.function == RPNElement::FUNCTION_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_IN
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN
|
||||
|| element.function == RPNElement::ALWAYS_FALSE)
|
||||
@ -255,15 +340,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (element.function == RPNElement::FUNCTION_NOT_EQUALS)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE)
|
||||
{
|
||||
rpn_stack.emplace_back(
|
||||
granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
|
||||
|
||||
if (element.function == RPNElement::FUNCTION_NOT_LIKE)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_IN
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN)
|
||||
{
|
||||
@ -338,42 +414,50 @@ bool MergeTreeConditionFullText::atomFromAST(
|
||||
{
|
||||
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
|
||||
|
||||
if (args.size() != 2)
|
||||
return false;
|
||||
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||
std::string func_name = func->name;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out))
|
||||
if (args.size() == 1)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
if (!getKey(args[0], key_column_num))
|
||||
return false;
|
||||
}
|
||||
else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0], key_column_num))
|
||||
else if (args.size() == 2)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1], key_column_num))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out)) {
|
||||
key_arg_pos = 0;
|
||||
} else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) &&
|
||||
getKey(args[0], key_column_num)) {
|
||||
key_arg_pos = 0;
|
||||
} else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) &&
|
||||
getKey(args[1], key_column_num)) {
|
||||
key_arg_pos = 1;
|
||||
} else
|
||||
return false;
|
||||
|
||||
if (const_type && const_type->getTypeId() != TypeIndex::String &&
|
||||
const_type->getTypeId() != TypeIndex::FixedString &&
|
||||
const_type->getTypeId() != TypeIndex::Array)
|
||||
return false;
|
||||
|
||||
if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
|
||||
return false;
|
||||
|
||||
else if (!index.token_extractor_func->supportLike() && (func_name == "like" || func_name == "notLike"))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (const_type && const_type->getTypeId() != TypeIndex::String && const_type->getTypeId() != TypeIndex::FixedString)
|
||||
return false;
|
||||
|
||||
if (key_arg_pos == 1 && (func->name != "equals" || func->name != "notEquals"))
|
||||
return false;
|
||||
else if (!index.token_extractor_func->supportLike() && (func->name == "like" || func->name == "notLike"))
|
||||
return false;
|
||||
else
|
||||
key_arg_pos = 0;
|
||||
|
||||
const auto atom_it = atom_map.find(func->name);
|
||||
const auto atom_it = atom_map.find(func_name);
|
||||
if (atom_it == std::end(atom_map))
|
||||
return false;
|
||||
|
||||
out.key_column = key_column_num;
|
||||
|
||||
return atom_it->second(out, const_value, index);
|
||||
}
|
||||
else if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))
|
||||
|
@ -78,8 +78,6 @@ private:
|
||||
/// Atoms of a Boolean expression.
|
||||
FUNCTION_EQUALS,
|
||||
FUNCTION_NOT_EQUALS,
|
||||
FUNCTION_LIKE,
|
||||
FUNCTION_NOT_LIKE,
|
||||
FUNCTION_IN,
|
||||
FUNCTION_NOT_IN,
|
||||
FUNCTION_UNKNOWN, /// Can take any value.
|
||||
@ -97,7 +95,7 @@ private:
|
||||
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
||||
|
||||
Function function = FUNCTION_UNKNOWN;
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE.
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
|
||||
size_t key_column;
|
||||
std::unique_ptr<BloomFilter> bloom_filter;
|
||||
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
||||
|
Loading…
Reference in New Issue
Block a user