Merge pull request #42209 from canhld94/ch_canh_fix_prefix_not_like

Add `notLike` to key condition atom map
This commit is contained in:
Robert Schulze 2022-11-08 10:07:21 +01:00 committed by GitHub
commit bb507356ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 54 additions and 9 deletions

View File

@ -29,6 +29,7 @@
#include <Storages/KeyDescription.h>
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
#include <algorithm>
#include <cassert>
#include <stack>
#include <limits>
@ -55,10 +56,15 @@ String Range::toString() const
}
/// Example: for `Hello\_World% ...` string it returns `Hello_World`, and for `%test%` returns an empty string.
String extractFixedPrefixFromLikePattern(const String & like_pattern)
/// Returns the prefix of like_pattern before the first wildcard, e.g. 'Hello\_World% ...' --> 'Hello\_World'
/// We call a pattern "perfect prefix" if:
/// - (1) the pattern has a wildcard
/// - (2) the first wildcard is '%' and is only followed by nothing or other '%'
/// e.g. 'test%' or 'test%% has perfect prefix 'test', 'test%x', 'test%_' or 'test_' has no perfect prefix.
String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix)
{
String fixed_prefix;
fixed_prefix.reserve(like_pattern.size());
const char * pos = like_pattern.data();
const char * end = pos + like_pattern.size();
@ -67,10 +73,13 @@ String extractFixedPrefixFromLikePattern(const String & like_pattern)
switch (*pos)
{
case '%':
[[fallthrough]];
case '_':
if (requires_perfect_prefix)
{
bool is_prefect_prefix = std::all_of(pos, end, [](auto c) { return c == '%'; });
return is_prefect_prefix ? fixed_prefix : "";
}
return fixed_prefix;
case '\\':
++pos;
if (pos == end)
@ -78,12 +87,13 @@ String extractFixedPrefixFromLikePattern(const String & like_pattern)
[[fallthrough]];
default:
fixed_prefix += *pos;
break;
}
++pos;
}
/// If we can reach this code, it means there was no wildcard found in the pattern, so it is not a perfect prefix
if (requires_perfect_prefix)
return "";
return fixed_prefix;
}
@ -346,7 +356,7 @@ const KeyCondition::AtomMap KeyCondition::atom_map
if (value.getType() != Field::Types::String)
return false;
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>(), /*requires_perfect_prefix*/ false);
if (prefix.empty())
return false;
@ -360,6 +370,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map
return true;
}
},
{
"notLike",
[] (RPNElement & out, const Field & value)
{
if (value.getType() != Field::Types::String)
return false;
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>(), /*requires_perfect_prefix*/ true);
if (prefix.empty())
return false;
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
out.range = !right_bound.empty()
? Range(prefix, true, right_bound, false)
: Range::createLeftBounded(prefix, true);
return true;
}
},
{
"startsWith",
[] (RPNElement & out, const Field & value)

View File

@ -485,6 +485,6 @@ private:
bool strict;
};
String extractFixedPrefixFromLikePattern(const String & like_pattern);
String extractFixedPrefixFromLikePattern(std::string_view like_pattern, bool requires_perfect_prefix);
}

View File

@ -116,7 +116,7 @@ void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns,
}
else
{
String target = extractFixedPrefixFromLikePattern(key);
String target = extractFixedPrefixFromLikePattern(key, /*requires_perfect_prefix*/ false);
if (target.empty())
throw Exception(
"SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);

View File

@ -0,0 +1,2 @@
200000
200000

View File

@ -0,0 +1,12 @@
CREATE TABLE data (str String) ENGINE=MergeTree ORDER BY str;
INSERT INTO data (str) SELECT 'aa' FROM numbers(100000);
INSERT INTO data (str) SELECT 'ba' FROM numbers(100000);
INSERT INTO data (str) SELECT 'ca' FROM numbers(100000);
SELECT count() FROM data WHERE str NOT LIKE 'a%' SETTINGS force_primary_key=1;
SELECT count() FROM data WHERE str NOT LIKE 'a%%' SETTINGS force_primary_key=1;
SELECT count() FROM data WHERE str NOT LIKE 'a' SETTINGS force_primary_key=1; -- { serverError 277 }
SELECT count() FROM data WHERE str NOT LIKE '%a' SETTINGS force_primary_key=1; -- { serverError 277 }
SELECT count() FROM data WHERE str NOT LIKE 'a_' SETTINGS force_primary_key=1; -- { serverError 277 }
SELECT count() FROM data WHERE str NOT LIKE 'a%_' SETTINGS force_primary_key=1; -- { serverError 277 }
SELECT count() FROM data WHERE str NOT LIKE '_a' SETTINGS force_primary_key=1; -- { serverError 277 }
SELECT count() FROM data WHERE str NOT LIKE 'a%\_' SETTINGS force_primary_key=1; -- { serverError 277 }