ClickHouse/src/Functions/randomString.cpp

132 lines
4.3 KiB
C++
Raw Normal View History

2020-05-07 14:54:33 +00:00
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/FunctionsRandom.h>
#include <Functions/PerformanceAdaptors.h>
2020-05-11 00:23:47 +00:00
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
2020-05-17 04:43:53 +00:00
#include <common/unaligned.h>
2020-05-07 14:54:33 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_STRING_SIZE;
}
2020-05-09 18:52:27 +00:00
/* Generate random string of specified length with fully random bytes (including zero). */
template <typename RandImpl>
class FunctionRandomStringImpl : public IFunction
2020-05-07 14:54:33 +00:00
{
public:
static constexpr auto name = "randomString";
2020-05-07 15:36:11 +00:00
2020-05-07 14:54:33 +00:00
String getName() const override { return name; }
bool isVariadic() const override { return true; }
2020-05-07 15:36:11 +00:00
2020-05-07 14:54:33 +00:00
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.empty())
throw Exception(
"Function " + getName() + " requires at least one argument: the size of resulting string",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (arguments.size() > 2)
throw Exception(
"Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
2020-05-17 13:07:37 +00:00
const IDataType & length_type = *arguments[0];
if (!isNumber(length_type))
throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
2020-05-07 14:54:33 +00:00
return std::make_shared<DataTypeString>();
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
2020-05-07 15:36:11 +00:00
auto col_to = ColumnString::create();
ColumnString::Chars & data_to = col_to->getChars();
ColumnString::Offsets & offsets_to = col_to->getOffsets();
2020-05-11 00:36:14 +00:00
if (input_rows_count == 0)
{
block.getByPosition(result).column = std::move(col_to);
return;
}
/// Fill offsets.
offsets_to.resize(input_rows_count);
2020-05-07 15:36:11 +00:00
const IColumn & length_column = *block.getByPosition(arguments[0]).column;
IColumn::Offset offset = 0;
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
{
size_t length = length_column.getUInt(row_num);
if (length > (1 << 30))
throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE);
2020-05-11 00:36:14 +00:00
offset += length + 1;
offsets_to[row_num] = offset;
}
2020-05-07 15:36:11 +00:00
2020-05-11 00:36:14 +00:00
/// Fill random bytes.
data_to.resize(offsets_to.back());
RandImpl::execute(reinterpret_cast<char *>(data_to.data()), data_to.size());
2020-05-07 15:36:11 +00:00
2020-05-11 00:36:14 +00:00
/// Put zero bytes in between.
auto * pos = data_to.data();
2020-05-11 00:36:14 +00:00
for (size_t row_num = 0; row_num < input_rows_count; ++row_num)
pos[offsets_to[row_num] - 1] = 0;
2020-05-07 15:36:11 +00:00
block.getByPosition(result).column = std::move(col_to);
2020-05-07 14:54:33 +00:00
}
};
class FunctionRandomString : public FunctionRandomStringImpl<TargetSpecific::Default::RandImpl>
{
public:
explicit FunctionRandomString(const Context & context) : selector(context)
{
selector.registerImplementation<TargetArch::Default,
FunctionRandomStringImpl<TargetSpecific::Default::RandImpl>>();
#if USE_MULTITARGET_CODE
selector.registerImplementation<TargetArch::AVX2,
FunctionRandomStringImpl<TargetSpecific::AVX2::RandImpl>>();
#endif
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
selector.selectAndExecute(block, arguments, result, input_rows_count);
}
static FunctionPtr create(const Context & context)
{
return std::make_shared<FunctionRandomString>(context);
}
private:
ImplementationSelector<IFunction> selector;
};
2020-05-07 14:54:33 +00:00
void registerFunctionRandomString(FunctionFactory & factory)
{
factory.registerFunction<FunctionRandomString>();
}
}