Add sanity checks for Sim/Min hash arguments

Fixes: #18799
Fixes: #18524
This commit is contained in:
Azat Khuzhin 2021-01-06 22:23:49 +03:00
parent 13e4579052
commit 6261d4135c
2 changed files with 14 additions and 3 deletions

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
extern const int ARGUMENT_OUT_OF_BOUND;
} }
// FunctionStringHash // FunctionStringHash
@ -30,6 +31,8 @@ public:
static constexpr auto name = Name::name; static constexpr auto name = Name::name;
static constexpr size_t default_shingle_size = 3; static constexpr size_t default_shingle_size = 3;
static constexpr size_t default_num_hashes = 6; static constexpr size_t default_num_hashes = 6;
static constexpr size_t max_shingle_size = 1000;
static constexpr size_t max_num_hashes = 1000;
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringHash>(); } static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringHash>(); }
@ -100,10 +103,14 @@ public:
} }
if (shingle_size == 0) if (shingle_size == 0)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument (shingle size) of function {} cannot be zero", getName()); throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be zero", getName());
if (num_hashes == 0) if (num_hashes == 0)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument (num hashes) of function {} cannot be zero", getName()); throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be zero", getName());
if (shingle_size > max_shingle_size)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument (shingle size) of function {} cannot be greater then {}", getName(), max_shingle_size);
if (num_hashes > max_num_hashes)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Third argument (num hashes) of function {} cannot be greater then {}", getName(), max_num_hashes);
auto type = std::make_shared<DataTypeUInt64>(); auto type = std::make_shared<DataTypeUInt64>();
if constexpr (is_simhash) if constexpr (is_simhash)

View File

@ -108,4 +108,8 @@ SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinH
SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h; SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h;
SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
DROP TABLE defaults; DROP TABLE defaults;