From 28685a80f54d0bbed61eca0e5fceb845eaac8fbf Mon Sep 17 00:00:00 2001 From: Andrei Nekrashevich Date: Thu, 7 May 2020 17:54:33 +0300 Subject: [PATCH 1/6] ClickHouse: functions for fuzzing --- src/Functions/randomString.cpp | 64 +++++++++++++++++++++++ src/Functions/registerFunctionsRandom.cpp | 3 +- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 src/Functions/randomString.cpp diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp new file mode 100644 index 00000000000..024174344f9 --- /dev/null +++ b/src/Functions/randomString.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_LARGE_STRING_SIZE; +} + + +/* Generate random string of specified length with fully random bytes(including zero). */ +class FunctionRandomPrintableASCII : public IFunction +{ +public: + static constexpr auto name = "randomString"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception( + "Function " + getName() + " requires at least one argument: the size of resulting string", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments.size() > 2) + throw Exception( + "Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const IDataType & length_type = *arguments[0]; + if (!isNumber(length_type)) + throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + // TODO + } +}; + +void registerFunctionRandomString(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/registerFunctionsRandom.cpp b/src/Functions/registerFunctionsRandom.cpp index 7b72c1cf305..5826fe78419 100644 --- a/src/Functions/registerFunctionsRandom.cpp +++ b/src/Functions/registerFunctionsRandom.cpp @@ -1,6 +1,5 @@ namespace DB { - class FunctionFactory; void registerFunctionRand(FunctionFactory & factory); @@ -8,6 +7,7 @@ void registerFunctionRand64(FunctionFactory & factory); void registerFunctionRandConstant(FunctionFactory & factory); void registerFunctionGenerateUUIDv4(FunctionFactory & factory); void registerFunctionRandomPrintableASCII(FunctionFactory & factory); +void registerFunctionRandomString(FunctionFactory & factory); void registerFunctionsRandom(FunctionFactory & factory) { @@ -16,6 +16,7 @@ void registerFunctionsRandom(FunctionFactory & factory) registerFunctionRandConstant(factory); registerFunctionGenerateUUIDv4(factory); registerFunctionRandomPrintableASCII(factory); + registerFunctionRandomString(factory); } } From 43bc55ab5fcca781625717fdd4ba0685e708da3b Mon Sep 17 00:00:00 2001 From: Andrei Nekrashevich Date: Thu, 7 May 2020 18:36:11 +0300 Subject: [PATCH 2/6] randomString function --- src/Functions/randomString.cpp | 39 ++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 024174344f9..42675d234c8 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -17,15 +17,17 @@ namespace ErrorCodes /* Generate random string of specified length with fully random bytes(including zero). */ -class FunctionRandomPrintableASCII : public IFunction +class FunctionRandomString : public IFunction { public: static constexpr auto name = "randomString"; + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override @@ -52,7 +54,40 @@ public: void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { - // TODO + auto col_to = ColumnString::create(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + offsets_to.resize(input_rows_count); + + const IColumn & length_column = *block.getByPosition(arguments[0]).column; + + IColumn::Offset offset = 0; + + for (size_t row_num = 0; row_num < input_rows_count; ++row_num) + { + size_t length = length_column.getUInt(row_num); + if (length > (1 << 30)) + throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); + + + IColumn::Offset next_offset = offset + length + 1; + data_to.resize(next_offset); + offsets_to[row_num] = next_offset; + + auto * data_to_ptr = data_to.data(); // avoid assert on array indexing after end + for (size_t pos = offset, end = offset + length; pos < end; + pos += 8) // We have padding in column buffers that we can overwrite. + { + UInt64 rand = thread_local_rng(); + data_to_ptr[pos] = rand; + } + + data_to[offset + length] = 0; + + offset = next_offset; + } + + block.getByPosition(result).column = std::move(col_to); } }; From 4a66b10669bbc4bc4091576ac0799d5f57aa2441 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 9 May 2020 21:52:27 +0300 Subject: [PATCH 3/6] Update randomString.cpp --- src/Functions/randomString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 42675d234c8..8a49b895781 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes } -/* Generate random string of specified length with fully random bytes(including zero). */ +/* Generate random string of specified length with fully random bytes (including zero). */ class FunctionRandomString : public IFunction { public: From d459f07eee78876e903abe4a9879dc4bcd5ab107 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 9 May 2020 21:54:23 +0300 Subject: [PATCH 4/6] Update randomString.cpp --- src/Functions/randomString.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 8a49b895781..2a56213a99a 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -69,7 +69,6 @@ public: if (length > (1 << 30)) throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); - IColumn::Offset next_offset = offset + length + 1; data_to.resize(next_offset); offsets_to[row_num] = next_offset; From c3873495c9324966d4f780d665961d7cfef12b8c Mon Sep 17 00:00:00 2001 From: Andrei Nekrashevich Date: Sun, 10 May 2020 22:07:02 +0300 Subject: [PATCH 5/6] fix and performance test template --- src/Functions/randomString.cpp | 4 ++-- tests/performance/random_string.xml | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/performance/random_string.xml diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 2a56213a99a..79e6dd25c81 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -73,9 +73,9 @@ public: data_to.resize(next_offset); offsets_to[row_num] = next_offset; - auto * data_to_ptr = data_to.data(); // avoid assert on array indexing after end + auto data_to_ptr = reinterpret_cast(data_to.data()); // avoid assert on array indexing after end for (size_t pos = offset, end = offset + length; pos < end; - pos += 8) // We have padding in column buffers that we can overwrite. + pos += sizeof(UInt64)) // We have padding in column buffers that we can overwrite. { UInt64 rand = thread_local_rng(); data_to_ptr[pos] = rand; diff --git a/tests/performance/random_string.xml b/tests/performance/random_string.xml new file mode 100644 index 00000000000..5894d4c469a --- /dev/null +++ b/tests/performance/random_string.xml @@ -0,0 +1,12 @@ + + + + + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(10)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(100)) + SELECT count() FROM zeros(100000) WHERE NOT ignore(randomString(1000)) + SELECT count() FROM zeros(10000) WHERE NOT ignore(randomString(10000)) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 10)) + SELECT count() FROM zeros(10000000) WHERE NOT ignore(randomString(rand() % 100)) + SELECT count() FROM zeros(1000000) WHERE NOT ignore(randomString(rand() % 1000)) + From 84ed5b118caa3904d7940fa73c70d51c33d42bee Mon Sep 17 00:00:00 2001 From: Andrei Nekrashevich Date: Sun, 10 May 2020 22:31:47 +0300 Subject: [PATCH 6/6] fix --- src/Functions/randomString.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 79e6dd25c81..eb0dcf6b21a 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -73,12 +73,12 @@ public: data_to.resize(next_offset); offsets_to[row_num] = next_offset; - auto data_to_ptr = reinterpret_cast(data_to.data()); // avoid assert on array indexing after end + auto * data_to_ptr = data_to.data(); // avoid assert on array indexing after end for (size_t pos = offset, end = offset + length; pos < end; pos += sizeof(UInt64)) // We have padding in column buffers that we can overwrite. { UInt64 rand = thread_local_rng(); - data_to_ptr[pos] = rand; + *reinterpret_cast(data_to_ptr + pos) = rand; } data_to[offset + length] = 0;