diff --git a/dbms/src/Storages/MergeTree/KeyCondition.cpp b/dbms/src/Storages/MergeTree/KeyCondition.cpp index 22cf7cbadad..7993ac4555a 100644 --- a/dbms/src/Storages/MergeTree/KeyCondition.cpp +++ b/dbms/src/Storages/MergeTree/KeyCondition.cpp @@ -178,6 +178,24 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, + { + "empty", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_IN_RANGE; + out.range = Range(""); + return true; + } + }, + { + "notEmpty", + [] (RPNElement & out, const Field &) + { + out.function = RPNElement::FUNCTION_NOT_IN_RANGE; + out.range = Range(""); + return true; + } + }, { "like", [] (RPNElement & out, const Field & value) @@ -199,6 +217,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map return true; } }, + { + "notLike", + [] (RPNElement & out, const Field & value) + { + if (value.getType() != Field::Types::String) + return false; + + String prefix = extractFixedPrefixFromLikePattern(value.get()); + if (prefix.empty()) + return false; + + String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix); + + out.function = RPNElement::FUNCTION_NOT_IN_RANGE; + out.range = !right_bound.empty() + ? Range(prefix, true, right_bound, false) + : Range::createLeftBounded(prefix, true); + + return true; + } + }, { "startsWith", [] (RPNElement & out, const Field & value) @@ -645,92 +684,102 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo { const ASTs & args = func->arguments->children; - if (args.size() != 2) - return false; - DataTypePtr key_expr_type; /// Type of expression containing key column - size_t key_arg_pos; /// Position of argument with key column (non-const argument) size_t key_column_num = -1; /// Number of a key column (inside key_column_names array) MonotonicFunctionsChain chain; - bool is_set_const = false; - bool is_constant_transformed = false; + std::string func_name = func->name; - if (functionIsInOrGlobalInOperator(func->name) - && tryPrepareSetIndex(args, context, out, key_column_num)) + if (args.size() == 1) { - key_arg_pos = 0; - is_set_const = true; + if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))) + return false; + + if (key_column_num == static_cast(-1)) + throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + else if (args.size() == 2) { - key_arg_pos = 0; - } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 0; - is_constant_transformed = true; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) - { - key_arg_pos = 1; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 1; - is_constant_transformed = true; + size_t key_arg_pos; /// Position of argument with key column (non-const argument) + bool is_set_const = false; + bool is_constant_transformed = false; + + if (functionIsInOrGlobalInOperator(func_name) + && tryPrepareSetIndex(args, context, out, key_column_num)) + { + key_arg_pos = 0; + is_set_const = true; + } + else if (getConstant(args[1], block_with_constants, const_value, const_type) + && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 0; + } + else if (getConstant(args[1], block_with_constants, const_value, const_type) + && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else if (getConstant(args[0], block_with_constants, const_value, const_type) + && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 1; + } + else if (getConstant(args[0], block_with_constants, const_value, const_type) + && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 1; + is_constant_transformed = true; + } + else + return false; + + if (key_column_num == static_cast(-1)) + throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); + + /// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5" + if (is_constant_transformed) + { + if (func_name == "less") + func_name = "lessOrEquals"; + else if (func_name == "greater") + func_name = "greaterOrEquals"; + } + + /// Replace on to <-sign> + if (key_arg_pos == 1) + { + if (func_name == "less") + func_name = "greater"; + else if (func_name == "greater") + func_name = "less"; + else if (func_name == "greaterOrEquals") + func_name = "lessOrEquals"; + else if (func_name == "lessOrEquals") + func_name = "greaterOrEquals"; + else if (func_name == "in" || func_name == "notIn" || func_name == "like") + { + /// "const IN data_column" doesn't make sense (unlike "data_column IN const") + return false; + } + } + + bool cast_not_needed = + is_set_const /// Set args are already casted inside Set::createFromAST + || (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast. + + if (!cast_not_needed) + castValueToType(key_expr_type, const_value, const_type, node); } else return false; - if (key_column_num == static_cast(-1)) - throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); - - std::string func_name = func->name; - - /// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5" - if (is_constant_transformed) - { - if (func_name == "less") - func_name = "lessOrEquals"; - else if (func_name == "greater") - func_name = "greaterOrEquals"; - } - - /// Replace on to <-sign> - if (key_arg_pos == 1) - { - if (func_name == "less") - func_name = "greater"; - else if (func_name == "greater") - func_name = "less"; - else if (func_name == "greaterOrEquals") - func_name = "lessOrEquals"; - else if (func_name == "lessOrEquals") - func_name = "greaterOrEquals"; - else if (func_name == "in" || func_name == "notIn" || func_name == "like") - { - /// "const IN data_column" doesn't make sense (unlike "data_column IN const") - return false; - } - } - - out.key_column = key_column_num; - out.monotonic_functions_chain = std::move(chain); - const auto atom_it = atom_map.find(func_name); if (atom_it == std::end(atom_map)) return false; - bool cast_not_needed = - is_set_const /// Set args are already casted inside Set::createFromAST - || (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast. - - if (!cast_not_needed) - castValueToType(key_expr_type, const_value, const_type, node); + out.key_column = key_column_num; + out.monotonic_functions_chain = std::move(chain); return atom_it->second(out, const_value); } @@ -748,7 +797,6 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo return true; } } - return false; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 895764339e5..8a5b942effd 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -142,7 +142,8 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map "like", [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) { - out.function = RPNElement::FUNCTION_LIKE; + std::cerr << "FULLTEXT INDEX IS USED FOR LIKE FUNCTION" << '\n'; + out.function = RPNElement::FUNCTION_EQUALS; out.bloom_filter = std::make_unique( idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); @@ -151,6 +152,92 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map return true; } }, + { + "notLike", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + std::cerr << "FULLTEXT INDEX IS USED FOR NOT_LIKE FUNCTION" << '\n'; + out.function = RPNElement::FUNCTION_NOT_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + const auto & str = value.get(); + likeStringToBloomFilter(str, idx.token_extractor_func, *out.bloom_filter); + return true; + } + }, + { + "startsWith", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + const auto & prefix = value.get(); + stringToBloomFilter(prefix.c_str(), prefix.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + }, + { + "endsWith", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + const auto & suffix = value.get(); + stringToBloomFilter(suffix.c_str(), suffix.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + }, + { + "multiSearchAny", + [] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + for (const auto & element : value.get()) + { + if (element.getType() != Field::Types::String) + return false; + + const auto & str = element.get(); + stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, *out.bloom_filter); + } + return true; + } + }, + { + "empty", + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + std::string empty_str; + stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + + }, + { + "notEmpty", + [] (RPNElement & out, const Field &, const MergeTreeIndexFullText & idx) + { + out.function = RPNElement::FUNCTION_NOT_EQUALS; + out.bloom_filter = std::make_unique( + idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed); + + std::string empty_str; + stringToBloomFilter(empty_str.c_str(), empty_str.size(), idx.token_extractor_func, *out.bloom_filter); + return true; + } + + }, { "notIn", [] (RPNElement & out, const Field &, const MergeTreeIndexFullText &) @@ -197,8 +284,6 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const } else if (element.function == RPNElement::FUNCTION_EQUALS || element.function == RPNElement::FUNCTION_NOT_EQUALS - || element.function == RPNElement::FUNCTION_LIKE - || element.function == RPNElement::FUNCTION_NOT_LIKE || element.function == RPNElement::FUNCTION_IN || element.function == RPNElement::FUNCTION_NOT_IN || element.function == RPNElement::ALWAYS_FALSE) @@ -255,15 +340,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (element.function == RPNElement::FUNCTION_NOT_EQUALS) rpn_stack.back() = !rpn_stack.back(); } - else if (element.function == RPNElement::FUNCTION_LIKE - || element.function == RPNElement::FUNCTION_NOT_LIKE) - { - rpn_stack.emplace_back( - granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true); - - if (element.function == RPNElement::FUNCTION_NOT_LIKE) - rpn_stack.back() = !rpn_stack.back(); - } else if (element.function == RPNElement::FUNCTION_IN || element.function == RPNElement::FUNCTION_NOT_IN) { @@ -338,42 +414,50 @@ bool MergeTreeConditionFullText::atomFromAST( { const ASTs & args = typeid_cast(*func->arguments).children; - if (args.size() != 2) - return false; - - size_t key_arg_pos; /// Position of argument with key column (non-const argument) size_t key_column_num = -1; /// Number of a key column (inside key_column_names array) + std::string func_name = func->name; - if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out)) + if (args.size() == 1) { - key_arg_pos = 0; + if (!getKey(args[0], key_column_num)) + return false; } - else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && getKey(args[0], key_column_num)) + else if (args.size() == 2) { - key_arg_pos = 0; - } - else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && getKey(args[1], key_column_num)) - { - key_arg_pos = 1; + + size_t key_arg_pos; /// Position of argument with key column (non-const argument) + + if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out)) { + key_arg_pos = 0; + } else if (KeyCondition::getConstant(args[1], block_with_constants, const_value, const_type) && + getKey(args[0], key_column_num)) { + key_arg_pos = 0; + } else if (KeyCondition::getConstant(args[0], block_with_constants, const_value, const_type) && + getKey(args[1], key_column_num)) { + key_arg_pos = 1; + } else + return false; + + if (const_type && const_type->getTypeId() != TypeIndex::String && + const_type->getTypeId() != TypeIndex::FixedString && + const_type->getTypeId() != TypeIndex::Array) + return false; + + if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals")) + return false; + + else if (!index.token_extractor_func->supportLike() && (func_name == "like" || func_name == "notLike")) + return false; } else return false; - if (const_type && const_type->getTypeId() != TypeIndex::String && const_type->getTypeId() != TypeIndex::FixedString) - return false; - - if (key_arg_pos == 1 && (func->name != "equals" || func->name != "notEquals")) - return false; - else if (!index.token_extractor_func->supportLike() && (func->name == "like" || func->name == "notLike")) - return false; - else - key_arg_pos = 0; - - const auto atom_it = atom_map.find(func->name); + const auto atom_it = atom_map.find(func_name); if (atom_it == std::end(atom_map)) return false; out.key_column = key_column_num; + return atom_it->second(out, const_value, index); } else if (KeyCondition::getConstant(node, block_with_constants, const_value, const_type)) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h index cd8ac534e64..7cb5bf9e71b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -78,8 +78,6 @@ private: /// Atoms of a Boolean expression. FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, - FUNCTION_LIKE, - FUNCTION_NOT_LIKE, FUNCTION_IN, FUNCTION_NOT_IN, FUNCTION_UNKNOWN, /// Can take any value. @@ -97,7 +95,7 @@ private: : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {} Function function = FUNCTION_UNKNOWN; - /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE. + /// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS size_t key_column; std::unique_ptr bloom_filter; /// For FUNCTION_IN and FUNCTION_NOT_IN