Merge pull request #18803 from azat/simhash-sanity

Add sanity checks for Sim/Min hash arguments
This commit is contained in:
alexey-milovidov 2021-01-07 05:57:17 +03:00 committed by GitHub
commit 17108a71cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 3 deletions

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ARGUMENT_OUT_OF_BOUND;
}
// FunctionStringHash
@ -30,6 +31,8 @@ public:
static constexpr auto name = Name::name;
static constexpr size_t default_shingle_size = 3;
static constexpr size_t default_num_hashes = 6;
static constexpr size_t max_shingle_size = 25;
static constexpr size_t max_num_hashes = 25;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringHash>(); }
@ -100,10 +103,14 @@ public:
}
if (shingle_size == 0)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument (shingle size) of function {} cannot be zero", getName());
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be zero", getName());
if (num_hashes == 0)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument (num hashes) of function {} cannot be zero", getName());
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be zero", getName());
if (shingle_size > max_shingle_size)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be greater then {}", getName(), max_shingle_size);
if (num_hashes > max_num_hashes)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be greater then {}", getName(), max_num_hashes);
auto type = std::make_shared<DataTypeUInt64>();
if constexpr (is_simhash)

View File

@ -108,4 +108,8 @@ SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinH
SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h;
SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
DROP TABLE defaults;