2018-12-17 20:07:31 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionStringToString.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/find_symbols.h>
|
2018-12-17 20:07:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
}
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2018-12-18 13:19:23 +00:00
|
|
|
struct TrimModeLeft
|
|
|
|
{
|
2018-12-17 20:07:31 +00:00
|
|
|
static constexpr auto name = "trimLeft";
|
|
|
|
static constexpr bool trim_left = true;
|
|
|
|
static constexpr bool trim_right = false;
|
|
|
|
};
|
|
|
|
|
2018-12-18 13:19:23 +00:00
|
|
|
struct TrimModeRight
|
|
|
|
{
|
2018-12-17 20:07:31 +00:00
|
|
|
static constexpr auto name = "trimRight";
|
|
|
|
static constexpr bool trim_left = false;
|
|
|
|
static constexpr bool trim_right = true;
|
|
|
|
};
|
|
|
|
|
2018-12-18 13:19:23 +00:00
|
|
|
struct TrimModeBoth
|
|
|
|
{
|
2018-12-17 20:07:31 +00:00
|
|
|
static constexpr auto name = "trimBoth";
|
|
|
|
static constexpr bool trim_left = true;
|
|
|
|
static constexpr bool trim_right = true;
|
|
|
|
};
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
template <typename Mode>
|
2018-12-17 20:07:31 +00:00
|
|
|
class FunctionTrimImpl
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static void vector(
|
|
|
|
const ColumnString::Chars & data,
|
|
|
|
const ColumnString::Offsets & offsets,
|
|
|
|
ColumnString::Chars & res_data,
|
|
|
|
ColumnString::Offsets & res_offsets)
|
|
|
|
{
|
|
|
|
size_t size = offsets.size();
|
|
|
|
res_offsets.resize(size);
|
|
|
|
res_data.reserve(data.size());
|
|
|
|
|
|
|
|
size_t prev_offset = 0;
|
|
|
|
size_t res_offset = 0;
|
|
|
|
|
|
|
|
const UInt8 * start;
|
|
|
|
size_t length;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
|
|
|
execute(reinterpret_cast<const UInt8 *>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
|
|
|
|
|
|
|
|
res_data.resize(res_data.size() + length + 1);
|
2019-08-23 20:32:31 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], start, length);
|
2018-12-17 20:07:31 +00:00
|
|
|
res_offset += length + 1;
|
2018-12-19 09:02:59 +00:00
|
|
|
res_data[res_offset - 1] = '\0';
|
2018-12-17 20:07:31 +00:00
|
|
|
|
|
|
|
res_offsets[i] = res_offset;
|
|
|
|
prev_offset = offsets[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
2018-12-17 20:07:31 +00:00
|
|
|
{
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Functions trimLeft, trimRight and trimBoth cannot work with FixedString argument");
|
2018-12-17 20:07:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
static void execute(const UInt8 * data, size_t size, const UInt8 *& res_data, size_t & res_size)
|
|
|
|
{
|
2019-08-23 20:32:31 +00:00
|
|
|
const char * char_data = reinterpret_cast<const char *>(data);
|
|
|
|
const char * char_end = char_data + size;
|
2018-12-17 20:07:31 +00:00
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
if constexpr (Mode::trim_left)
|
2020-03-18 02:02:24 +00:00
|
|
|
{ // NOLINT
|
2019-08-23 20:32:31 +00:00
|
|
|
const char * found = find_first_not_symbols<' '>(char_data, char_end);
|
|
|
|
size_t num_chars = found - char_data;
|
|
|
|
char_data += num_chars;
|
2018-12-17 20:07:31 +00:00
|
|
|
}
|
|
|
|
|
2020-03-23 02:12:31 +00:00
|
|
|
if constexpr (Mode::trim_right)
|
2020-03-18 02:02:24 +00:00
|
|
|
{ // NOLINT
|
2019-08-23 20:32:31 +00:00
|
|
|
const char * found = find_last_not_symbols_or_null<' '>(char_data, char_end);
|
|
|
|
if (found)
|
|
|
|
char_end = found + 1;
|
|
|
|
else
|
|
|
|
char_end = char_data;
|
2018-12-17 20:07:31 +00:00
|
|
|
}
|
|
|
|
|
2019-08-23 20:32:31 +00:00
|
|
|
res_data = reinterpret_cast<const UInt8 *>(char_data);
|
|
|
|
res_size = char_end - char_data;
|
2018-12-17 20:07:31 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using FunctionTrimLeft = FunctionStringToString<FunctionTrimImpl<TrimModeLeft>, TrimModeLeft>;
|
|
|
|
using FunctionTrimRight = FunctionStringToString<FunctionTrimImpl<TrimModeRight>, TrimModeRight>;
|
|
|
|
using FunctionTrimBoth = FunctionStringToString<FunctionTrimImpl<TrimModeBoth>, TrimModeBoth>;
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
}
|
|
|
|
|
2022-07-04 07:01:39 +00:00
|
|
|
REGISTER_FUNCTION(Trim)
|
2018-12-17 20:07:31 +00:00
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionTrimLeft>();
|
|
|
|
factory.registerFunction<FunctionTrimRight>();
|
|
|
|
factory.registerFunction<FunctionTrimBoth>();
|
2023-04-06 14:25:27 +00:00
|
|
|
factory.registerAlias("ltrim", FunctionTrimLeft::name);
|
|
|
|
factory.registerAlias("rtrim", FunctionTrimRight::name);
|
|
|
|
factory.registerAlias("trim", FunctionTrimBoth::name);
|
2018-12-17 20:07:31 +00:00
|
|
|
}
|
|
|
|
}
|