2023-05-22 06:59:08 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <Functions/IFunction.h>
|
|
|
|
#include <cstring>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
2023-05-23 12:16:49 +00:00
|
|
|
extern const int TOO_LARGE_STRING_SIZE;
|
2023-05-22 06:59:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
/// Prints whitespace n-times. Actually, space() could also be pushed down to repeat(). Chose a standalone-implementation because
|
|
|
|
/// we can do memset() whereas repeat() does memcpy().
|
|
|
|
class FunctionSpace : public IFunction
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
static constexpr auto space = ' ';
|
|
|
|
|
2023-05-23 12:16:49 +00:00
|
|
|
/// Safety threshold against DoS.
|
2023-05-25 11:55:23 +00:00
|
|
|
static inline void checkRepeatTime(size_t repeat_time)
|
2023-05-23 12:16:49 +00:00
|
|
|
{
|
2023-05-25 11:55:23 +00:00
|
|
|
static constexpr auto max_repeat_times = 1'000'000uz;
|
2023-05-23 12:16:49 +00:00
|
|
|
if (repeat_time > max_repeat_times)
|
|
|
|
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", repeat_time, max_repeat_times);
|
|
|
|
}
|
|
|
|
|
2023-05-22 06:59:08 +00:00
|
|
|
public:
|
|
|
|
static constexpr auto name = "space";
|
|
|
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSpace>(); }
|
|
|
|
|
|
|
|
String getName() const override { return name; }
|
|
|
|
size_t getNumberOfArguments() const override { return 1; }
|
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
|
|
|
FunctionArgumentDescriptors args{
|
2024-03-07 16:16:13 +00:00
|
|
|
{"n", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isInteger), nullptr, "Integer"}
|
2023-05-22 06:59:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
validateFunctionArgumentTypes(*this, arguments, args);
|
|
|
|
|
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename DataType>
|
|
|
|
bool executeConstant(ColumnPtr col_times, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const
|
|
|
|
{
|
2024-03-29 08:59:07 +00:00
|
|
|
const ColumnConst & col_times_const = checkAndGetColumn<ColumnConst>(*col_times);
|
2023-05-22 06:59:08 +00:00
|
|
|
|
2024-03-29 08:59:07 +00:00
|
|
|
const ColumnPtr & col_times_const_internal = col_times_const.getDataColumnPtr();
|
2023-05-22 06:59:08 +00:00
|
|
|
if (!checkAndGetColumn<typename DataType::ColumnType>(col_times_const_internal.get()))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
using T = typename DataType::FieldType;
|
2024-03-29 08:59:07 +00:00
|
|
|
T times = col_times_const.getValue<T>();
|
2023-05-22 06:59:08 +00:00
|
|
|
|
|
|
|
if (times < 1)
|
|
|
|
times = 0;
|
|
|
|
|
2023-05-23 12:16:49 +00:00
|
|
|
checkRepeatTime(times);
|
|
|
|
|
2023-05-22 06:59:08 +00:00
|
|
|
res_offsets.resize(col_times->size());
|
|
|
|
res_chars.resize(col_times->size() * (times + 1));
|
|
|
|
|
|
|
|
size_t pos = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < col_times->size(); ++i)
|
|
|
|
{
|
|
|
|
memset(res_chars.begin() + pos, space, times);
|
|
|
|
pos += times;
|
|
|
|
|
2023-05-22 19:09:11 +00:00
|
|
|
*(res_chars.begin() + pos) = '\0';
|
2023-05-22 06:59:08 +00:00
|
|
|
pos += 1;
|
|
|
|
|
|
|
|
res_offsets[i] = pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <typename DataType>
|
|
|
|
bool executeVector(ColumnPtr col_times_, ColumnString::Offsets & res_offsets, ColumnString::Chars & res_chars) const
|
|
|
|
{
|
|
|
|
auto * col_times = checkAndGetColumn<typename DataType::ColumnType>(col_times_.get());
|
|
|
|
if (!col_times)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
res_offsets.resize(col_times->size());
|
2023-05-22 19:19:08 +00:00
|
|
|
res_chars.resize(col_times->size() * 10); /// heuristic
|
2023-05-22 06:59:08 +00:00
|
|
|
|
|
|
|
const PaddedPODArray<typename DataType::FieldType> & times_data = col_times->getData();
|
|
|
|
|
|
|
|
size_t pos = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < col_times->size(); ++i)
|
|
|
|
{
|
|
|
|
typename DataType::FieldType times = times_data[i];
|
|
|
|
|
|
|
|
if (times < 1)
|
|
|
|
times = 0;
|
|
|
|
|
2023-05-23 12:16:49 +00:00
|
|
|
checkRepeatTime(times);
|
|
|
|
|
2023-05-25 11:55:23 +00:00
|
|
|
if (pos + times + 1 > res_chars.size())
|
2023-05-25 17:05:18 +00:00
|
|
|
res_chars.resize(std::max(2 * res_chars.size(), static_cast<size_t>(pos + times + 1)));
|
2023-05-22 06:59:08 +00:00
|
|
|
|
|
|
|
memset(res_chars.begin() + pos, space, times);
|
|
|
|
pos += times;
|
|
|
|
|
2023-05-22 19:09:11 +00:00
|
|
|
*(res_chars.begin() + pos) = '\0';
|
2023-05-22 06:59:08 +00:00
|
|
|
pos += 1;
|
|
|
|
|
|
|
|
res_offsets[i] = pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
res_chars.resize(pos);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
|
|
|
{
|
|
|
|
const auto & col_num = arguments[0].column;
|
|
|
|
|
|
|
|
auto col_res = ColumnString::create();
|
|
|
|
|
|
|
|
ColumnString::Offsets & res_offsets = col_res->getOffsets();
|
|
|
|
ColumnString::Chars & res_chars = col_res->getChars();
|
|
|
|
|
|
|
|
if (const ColumnConst * col_num_const = checkAndGetColumn<ColumnConst>(col_num.get()))
|
|
|
|
{
|
|
|
|
if ((executeConstant<DataTypeUInt8>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeUInt16>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeUInt32>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeUInt64>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeInt8>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeInt16>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeInt32>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeConstant<DataTypeInt64>(col_num, res_offsets, res_chars)))
|
|
|
|
return col_res;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if ((executeVector<DataTypeUInt8>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeUInt16>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeUInt32>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeUInt64>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeInt8>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeInt16>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeInt32>(col_num, res_offsets, res_chars))
|
|
|
|
|| (executeVector<DataTypeInt64>(col_num, res_offsets, res_chars)))
|
|
|
|
return col_res;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
REGISTER_FUNCTION(Space)
|
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionSpace>({}, FunctionFactory::CaseInsensitive);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|