primary key and MergeTreeIndexFullText support for string functions

This commit is contained in:
dimarub2000 2019-07-10 17:53:57 +03:00
parent 0b756c48a8
commit c9a5b3c9ab
3 changed files with 240 additions and 110 deletions

View File

@ -178,6 +178,24 @@ const KeyCondition::AtomMap KeyCondition::atom_map
return true;
}
},
{
"empty",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_IN_RANGE;
out.range = Range("");
return true;
}
},
{
"notEmpty",
[] (RPNElement & out, const Field &)
{
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
out.range = Range("");
return true;
}
},
{
"like",
[] (RPNElement & out, const Field & value)
@ -199,6 +217,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map
return true;
}
},
{
"notLike",
[] (RPNElement & out, const Field & value)
{
if (value.getType() != Field::Types::String)
return false;
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
if (prefix.empty())
return false;
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
out.range = !right_bound.empty()
? Range(prefix, true, right_bound, false)
: Range::createLeftBounded(prefix, true);
return true;
}
},
{
"startsWith",
[] (RPNElement & out, const Field & value)
@ -645,92 +684,102 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
{
const ASTs & args = func->arguments->children;
if (args.size() != 2)
return false;
DataTypePtr key_expr_type; /// Type of expression containing key column
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
MonotonicFunctionsChain chain;
bool is_set_const = false;
bool is_constant_transformed = false;
std::string func_name = func->name;
if (functionIsInOrGlobalInOperator(func->name)
&& tryPrepareSetIndex(args, context, out, key_column_num))
if (args.size() == 1)
{
key_arg_pos = 0;
is_set_const = true;
if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)))
return false;
if (key_column_num == static_cast<size_t>(-1))
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
else if (args.size() == 2)
{
key_arg_pos = 0;
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
{
key_arg_pos = 0;
is_constant_transformed = true;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 1;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
{
key_arg_pos = 1;
is_constant_transformed = true;
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
bool is_set_const = false;
bool is_constant_transformed = false;
if (functionIsInOrGlobalInOperator(func_name)
&& tryPrepareSetIndex(args, context, out, key_column_num))
{
key_arg_pos = 0;
is_set_const = true;
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 0;
}
else if (getConstant(args[1], block_with_constants, const_value, const_type)
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
{
key_arg_pos = 0;
is_constant_transformed = true;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
{
key_arg_pos = 1;
}
else if (getConstant(args[0], block_with_constants, const_value, const_type)
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
{
key_arg_pos = 1;
is_constant_transformed = true;
}
else
return false;
if (key_column_num == static_cast<size_t>(-1))
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
if (is_constant_transformed)
{
if (func_name == "less")
func_name = "lessOrEquals";
else if (func_name == "greater")
func_name = "greaterOrEquals";
}
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
if (key_arg_pos == 1)
{
if (func_name == "less")
func_name = "greater";
else if (func_name == "greater")
func_name = "less";
else if (func_name == "greaterOrEquals")
func_name = "lessOrEquals";
else if (func_name == "lessOrEquals")
func_name = "greaterOrEquals";
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
{
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
return false;
}
}
bool cast_not_needed =
is_set_const /// Set args are already casted inside Set::createFromAST
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
if (!cast_not_needed)
castValueToType(key_expr_type, const_value, const_type, node);
}
else
return false;
if (key_column_num == static_cast<size_t>(-1))
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
std::string func_name = func->name;
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
if (is_constant_transformed)
{
if (func_name == "less")
func_name = "lessOrEquals";
else if (func_name == "greater")
func_name = "greaterOrEquals";
}
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
if (key_arg_pos == 1)
{
if (func_name == "less")
func_name = "greater";
else if (func_name == "greater")
func_name = "less";
else if (func_name == "greaterOrEquals")
func_name = "lessOrEquals";
else if (func_name == "lessOrEquals")
func_name = "greaterOrEquals";
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
{
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
return false;
}
}
out.key_column = key_column_num;
out.monotonic_functions_chain = std::move(chain);
const auto atom_it = atom_map.find(func_name);
if (atom_it == std::end(atom_map))
return false;
bool cast_not_needed =
is_set_const /// Set args are already casted inside Set::createFromAST
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
if (!cast_not_needed)
castValueToType(key_expr_type, const_value, const_type, node);
out.key_column = key_column_num;
out.monotonic_functions_chain = std::move(chain);
return atom_it->second(out, const_value);
}
@ -748,7 +797,6 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
return true;
}
}
return false;
}

View File

@ -142,7 +142,8 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
"like",
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_LIKE;
std::cerr << "FULLTEXT INDEX IS USED FOR LIKE FUNCTION" << '\n';
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
@ -151,6 +152,92 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
return true;
}
},
{
"notLike",
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
{
std::cerr << "FULLTEXT INDEX IS USED FOR NOT_LIKE FUNCTION" << '\n';
out.function = RPNElement::FUNCTION_NOT_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
const auto & str = value.get<String>();
likeStringToBloomFilter(str, idx.token_extractor_func, *out.bloom_filter);
return true;
}
},
{
"startsWith",
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
const auto & prefix = value.get<String>();
stringToBloomFilter(prefix.c_str(), prefix.size(), idx.token_extractor_func, *out.bloom_filter);
return true;
}
},
{
"endsWith",
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
const auto & suffix = value.get<String>();
stringToBloomFilter(suffix.c_str(), suffix.size(), idx.token_extractor_func, *out.bloom_filter);
return true;
}
},
{
"multiSearchAny",
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
for (const auto & element : value.get<Array>())
{
if (element.getType() != Field::Types::String)
return false;
const auto & str = element.get<String>();
stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, *out.bloom_filter);
}
return true;
}
},
{
"empty",
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
std::string empty_str;
stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter);
return true;
}
},
{
"notEmpty",
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx)
{
out.function = RPNElement::FUNCTION_NOT_EQUALS;
out.bloom_filter = std::make_unique<BloomFilter>(
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
std::string empty_str;
stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter);
return true;
}
},
{
"notIn",
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
@ -197,8 +284,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
}
else if (element.function == RPNElement::FUNCTION_EQUALS
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|| element.function == RPNElement::FUNCTION_LIKE
|| element.function == RPNElement::FUNCTION_NOT_LIKE
|| element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::ALWAYS_FALSE)
@ -255,15 +340,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
if (element.function == RPNElement::FUNCTION_NOT_EQUALS)
rpn_stack.back() = !rpn_stack.back();
}
else if (element.function == RPNElement::FUNCTION_LIKE
|| element.function == RPNElement::FUNCTION_NOT_LIKE)
{
rpn_stack.emplace_back(
granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
if (element.function == RPNElement::FUNCTION_NOT_LIKE)
rpn_stack.back() = !rpn_stack.back();
}
else if (element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN)
{
@ -338,42 +414,50 @@ bool MergeTreeConditionFullText::atomFromAST(
{
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
if (args.size() != 2)
return false;
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
std::string func_name = func->name;
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out))
if (args.size() == 1)
{
key_arg_pos = 0;
if (!getKey(args[0], key_column_num))
return false;
}
else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0], key_column_num))
else if (args.size() == 2)
{
key_arg_pos = 0;
}
else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1], key_column_num))
{
key_arg_pos = 1;
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out)) {
key_arg_pos = 0;
} else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) &&
getKey(args[0], key_column_num)) {
key_arg_pos = 0;
} else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) &&
getKey(args[1], key_column_num)) {
key_arg_pos = 1;
} else
return false;
if (const_type && const_type->getTypeId() != TypeIndex::String &&
const_type->getTypeId() != TypeIndex::FixedString &&
const_type->getTypeId() != TypeIndex::Array)
return false;
if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
return false;
else if (!index.token_extractor_func->supportLike() && (func_name == "like" || func_name == "notLike"))
return false;
}
else
return false;
if (const_type && const_type->getTypeId() != TypeIndex::String && const_type->getTypeId() != TypeIndex::FixedString)
return false;
if (key_arg_pos == 1 && (func->name != "equals" || func->name != "notEquals"))
return false;
else if (!index.token_extractor_func->supportLike() && (func->name == "like" || func->name == "notLike"))
return false;
else
key_arg_pos = 0;
const auto atom_it = atom_map.find(func->name);
const auto atom_it = atom_map.find(func_name);
if (atom_it == std::end(atom_map))
return false;
out.key_column = key_column_num;
return atom_it->second(out, const_value, index);
}
else if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type))

View File

@ -78,8 +78,6 @@ private:
/// Atoms of a Boolean expression.
FUNCTION_EQUALS,
FUNCTION_NOT_EQUALS,
FUNCTION_LIKE,
FUNCTION_NOT_LIKE,
FUNCTION_IN,
FUNCTION_NOT_IN,
FUNCTION_UNKNOWN, /// Can take any value.
@ -97,7 +95,7 @@ private:
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
Function function = FUNCTION_UNKNOWN;
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE.
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
size_t key_column;
std::unique_ptr<BloomFilter> bloom_filter;
/// For FUNCTION_IN and FUNCTION_NOT_IN