mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
dbms: implement appendTrailingCharIfAbsent, refactor concat. [#METR-13772]
This commit is contained in:
parent
813742e5d7
commit
811565b5b1
@ -413,221 +413,6 @@ struct ReverseUTF8Impl
|
||||
};
|
||||
|
||||
|
||||
/** Склеивает две строки.
|
||||
*/
|
||||
struct ConcatImpl
|
||||
{
|
||||
static void vector_vector(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size() - size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
b_offset = b_offsets[i];
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed_vector(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size());
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
offset += 1;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
b_offset += b_n;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const std::string & b,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b.size() * size);
|
||||
c_offsets.assign(a_offsets);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c_offsets[i] += b.size() * (i + 1);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], b.data(), b.size() + 1);
|
||||
offset += b.size() + 1;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_vector(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size());
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
a_offset = a_n;
|
||||
b_offset = b_offsets[i];
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_fixed_vector(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
c_data.resize(a_data.size() + b_data.size() + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_constant(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const std::string & b,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
ColumnString::Offset_t b_n = b.size();
|
||||
c_data.resize(a_data.size() + size * b_n + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], b.data(), b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_vector(
|
||||
const std::string & a,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_offsets.size();
|
||||
c_data.resize(b_data.size() + a.size() * size);
|
||||
c_offsets.assign(b_offsets);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c_offsets[i] += a.size() * (i + 1);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], a.data(), a.size());
|
||||
offset += a.size();
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
b_offset = b_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_fixed_vector(
|
||||
const std::string & a,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_data.size() / b_n;
|
||||
ColumnString::Offset_t a_n = a.size();
|
||||
c_data.resize(size * a_n + b_data.size() + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], a.data(), a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_constant(
|
||||
const std::string & a,
|
||||
const std::string & b,
|
||||
std::string & c)
|
||||
{
|
||||
c = a + b;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** Выделяет подстроку в строке, как последовательности байт.
|
||||
*/
|
||||
struct SubstringImpl
|
||||
@ -962,12 +747,11 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionStringStringToString : public IFunction
|
||||
class FunctionConcat : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static IFunction * create(const Context & context) { return new FunctionStringStringToString; }
|
||||
static constexpr auto name = "concat";
|
||||
static IFunction * create(const Context & context) { return new FunctionConcat; }
|
||||
|
||||
/// Получить имя функции.
|
||||
String getName() const
|
||||
@ -997,6 +781,15 @@ public:
|
||||
return new DataTypeString;
|
||||
}
|
||||
|
||||
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||
{
|
||||
if (arguments.size() == 2)
|
||||
executeBinary(block, arguments, result);
|
||||
else
|
||||
executeNAry(block, arguments, result);
|
||||
}
|
||||
|
||||
private:
|
||||
enum class instr_type : uint8_t
|
||||
{
|
||||
copy_string,
|
||||
@ -1056,15 +849,6 @@ public:
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Выполнить функцию над блоком.
|
||||
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||
{
|
||||
if (arguments.size() == 2)
|
||||
executeBinary(block, arguments, result);
|
||||
else
|
||||
executeNAry(block, arguments, result);
|
||||
}
|
||||
|
||||
void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||
{
|
||||
const IColumn * c0 = &*block.getByPosition(arguments[0]).column;
|
||||
@ -1082,7 +866,7 @@ public:
|
||||
{
|
||||
ColumnConstString * c_res = new ColumnConstString(c0_const->size(), "");
|
||||
block.getByPosition(result).column = c_res;
|
||||
Impl::constant_constant(c0_const->getData(), c1_const->getData(), c_res->getData());
|
||||
constant_constant(c0_const->getData(), c1_const->getData(), c_res->getData());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1092,42 +876,42 @@ public:
|
||||
ColumnString::Offsets_t & offsets_res = c_res->getOffsets();
|
||||
|
||||
if (c0_string && c1_string)
|
||||
Impl::vector_vector(
|
||||
vector_vector(
|
||||
c0_string->getChars(), c0_string->getOffsets(),
|
||||
c1_string->getChars(), c1_string->getOffsets(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_string && c1_fixed_string)
|
||||
Impl::vector_fixed_vector(
|
||||
vector_fixed_vector(
|
||||
c0_string->getChars(), c0_string->getOffsets(),
|
||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_string && c1_const)
|
||||
Impl::vector_constant(
|
||||
vector_constant(
|
||||
c0_string->getChars(), c0_string->getOffsets(),
|
||||
c1_const->getData(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_fixed_string && c1_string)
|
||||
Impl::fixed_vector_vector(
|
||||
fixed_vector_vector(
|
||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||
c1_string->getChars(), c1_string->getOffsets(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_const && c1_string)
|
||||
Impl::constant_vector(
|
||||
constant_vector(
|
||||
c0_const->getData(),
|
||||
c1_string->getChars(), c1_string->getOffsets(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_fixed_string && c1_fixed_string)
|
||||
Impl::fixed_vector_fixed_vector(
|
||||
fixed_vector_fixed_vector(
|
||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_fixed_string && c1_const)
|
||||
Impl::fixed_vector_constant(
|
||||
fixed_vector_constant(
|
||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||
c1_const->getData(),
|
||||
vec_res, offsets_res);
|
||||
else if (c0_const && c1_fixed_string)
|
||||
Impl::constant_fixed_vector(
|
||||
constant_fixed_vector(
|
||||
c0_const->getData(),
|
||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||
vec_res, offsets_res);
|
||||
@ -1214,6 +998,215 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_vector(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size() - size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
b_offset = b_offsets[i];
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed_vector(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size());
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
offset += 1;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
b_offset += b_n;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||
const std::string & b,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_offsets.size();
|
||||
c_data.resize(a_data.size() + b.size() * size);
|
||||
c_offsets.assign(a_offsets);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c_offsets[i] += b.size() * (i + 1);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||
offset += a_offsets[i] - a_offset - 1;
|
||||
memcpy(&c_data[offset], b.data(), b.size() + 1);
|
||||
offset += b.size() + 1;
|
||||
|
||||
a_offset = a_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_vector(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_offsets.size();
|
||||
c_data.resize(a_data.size() + b_data.size());
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t a_offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[a_offset], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
a_offset = a_n;
|
||||
b_offset = b_offsets[i];
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_fixed_vector(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
c_data.resize(a_data.size() + b_data.size() + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void fixed_vector_constant(
|
||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||
const std::string & b,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = a_data.size() / a_n;
|
||||
ColumnString::Offset_t b_n = b.size();
|
||||
c_data.resize(a_data.size() + size * b_n + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], b.data(), b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_vector(
|
||||
const std::string & a,
|
||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_offsets.size();
|
||||
c_data.resize(b_data.size() + a.size() * size);
|
||||
c_offsets.assign(b_offsets);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
c_offsets[i] += a.size() * (i + 1);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
ColumnString::Offset_t b_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], a.data(), a.size());
|
||||
offset += a.size();
|
||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||
offset += b_offsets[i] - b_offset;
|
||||
|
||||
b_offset = b_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_fixed_vector(
|
||||
const std::string & a,
|
||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||
{
|
||||
size_t size = b_data.size() / b_n;
|
||||
ColumnString::Offset_t a_n = a.size();
|
||||
c_data.resize(size * a_n + b_data.size() + size);
|
||||
c_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
memcpy(&c_data[offset], a.data(), a_n);
|
||||
offset += a_n;
|
||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||
offset += b_n;
|
||||
c_data[offset] = 0;
|
||||
++offset;
|
||||
|
||||
c_offsets[i] = offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void constant_constant(
|
||||
const std::string & a,
|
||||
const std::string & b,
|
||||
std::string & c)
|
||||
{
|
||||
c = a + b;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -1299,6 +1292,134 @@ public:
|
||||
};
|
||||
|
||||
|
||||
class FunctionAppendTrailingCharIfAbsent : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "appendTrailingCharIfAbsent";
|
||||
static IFunction * create(const Context & context) { return new FunctionAppendTrailingCharIfAbsent; }
|
||||
|
||||
String getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
private:
|
||||
DataTypePtr getReturnType(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() != 2)
|
||||
throw Exception{
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed "
|
||||
+ toString(arguments.size()) + ", should be 2.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||
};
|
||||
|
||||
if (!typeid_cast<const DataTypeString *>(arguments[0].get()))
|
||||
throw Exception{
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||
};
|
||||
|
||||
if (!typeid_cast<const DataTypeString *>(arguments[1].get()))
|
||||
throw Exception{
|
||||
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||
};
|
||||
|
||||
return new DataTypeString;
|
||||
}
|
||||
|
||||
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||
{
|
||||
const auto & column = block.getByPosition(arguments[0]).column;
|
||||
const auto & column_char = block.getByPosition(arguments[1]).column;
|
||||
|
||||
if (!typeid_cast<const ColumnConstString *>(column_char.get()))
|
||||
throw Exception{
|
||||
"Second argument of function " + getName() + " must be a constant string",
|
||||
ErrorCodes::ILLEGAL_COLUMN
|
||||
};
|
||||
|
||||
const auto & trailing_char_str = static_cast<const ColumnConstString &>(*column_char).getData();
|
||||
|
||||
if (trailing_char_str.size() != 1)
|
||||
throw Exception{
|
||||
"Second argument of function " + getName() + " must be a one-character string",
|
||||
ErrorCodes::BAD_ARGUMENTS
|
||||
};
|
||||
|
||||
if (const auto col = typeid_cast<const ColumnString *>(&*column))
|
||||
{
|
||||
auto col_res = new ColumnString;
|
||||
block.getByPosition(result).column = col_res;
|
||||
|
||||
const auto & src_data = col->getChars();
|
||||
const auto & src_offsets = col->getOffsets();
|
||||
|
||||
auto & dst_data = col_res->getChars();
|
||||
auto & dst_offsets = col_res->getOffsets();
|
||||
|
||||
const auto size = src_offsets.size();
|
||||
dst_data.resize(src_data.size() + size);
|
||||
dst_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset_t src_offset{};
|
||||
ColumnString::Offset_t dst_offset{};
|
||||
|
||||
for (const auto i : ext::range(0, size))
|
||||
{
|
||||
const auto src_length = src_offsets[i] - src_offset;
|
||||
memcpy(&dst_data[dst_offset], &src_data[src_offset], src_length);
|
||||
src_offset = src_offsets[i];
|
||||
dst_offset += src_length;
|
||||
|
||||
if (dst_data[dst_offset - 2] != trailing_char_str.front())
|
||||
{
|
||||
dst_data[dst_offset - 1] = trailing_char_str.front();
|
||||
dst_data[dst_offset] = 0;
|
||||
++dst_offset;
|
||||
}
|
||||
|
||||
dst_offsets[i] = dst_offset;
|
||||
}
|
||||
|
||||
dst_data.resize_assume_reserved(dst_offset);
|
||||
}
|
||||
else if (const auto col = typeid_cast<const ColumnConstString *>(&*column))
|
||||
{
|
||||
const auto & in_data = col->getData();
|
||||
|
||||
block.getByPosition(result).column = new ColumnConstString{
|
||||
col->size(),
|
||||
in_data.back() == trailing_char_str.front() ? in_data : in_data + trailing_char_str
|
||||
};
|
||||
}
|
||||
else
|
||||
throw Exception{
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN
|
||||
};
|
||||
}
|
||||
|
||||
static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
|
||||
ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
|
||||
{
|
||||
res_data.resize(data.size());
|
||||
res_offsets.assign(offsets);
|
||||
size_t size = offsets.size();
|
||||
|
||||
ColumnString::Offset_t prev_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
for (size_t j = prev_offset; j < offsets[i] - 1; ++j)
|
||||
res_data[j] = data[offsets[i] + prev_offset - 2 - j];
|
||||
res_data[offsets[i] - 1] = 0;
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct NameEmpty { static constexpr auto name = "empty"; };
|
||||
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
|
||||
struct NameLength { static constexpr auto name = "length"; };
|
||||
@ -1309,7 +1430,6 @@ struct NameLowerUTF8 { static constexpr auto name = "lowerUTF8"; };
|
||||
struct NameUpperUTF8 { static constexpr auto name = "upperUTF8"; };
|
||||
struct NameReverse { static constexpr auto name = "reverse"; };
|
||||
struct NameReverseUTF8 { static constexpr auto name = "reverseUTF8"; };
|
||||
struct NameConcat { static constexpr auto name = "concat"; };
|
||||
struct NameSubstring { static constexpr auto name = "substring"; };
|
||||
struct NameSubstringUTF8 { static constexpr auto name = "substringUTF8"; };
|
||||
|
||||
@ -1323,7 +1443,6 @@ typedef FunctionStringToString<LowerUpperUTF8Impl<Poco::Unicode::toLower>, NameL
|
||||
typedef FunctionStringToString<LowerUpperUTF8Impl<Poco::Unicode::toUpper>, NameUpperUTF8> FunctionUpperUTF8;
|
||||
typedef FunctionStringToString<ReverseImpl, NameReverse> FunctionReverse;
|
||||
typedef FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8> FunctionReverseUTF8;
|
||||
typedef FunctionStringStringToString<ConcatImpl, NameConcat> FunctionConcat;
|
||||
typedef FunctionStringNumNumToString<SubstringImpl, NameSubstring> FunctionSubstring;
|
||||
typedef FunctionStringNumNumToString<SubstringUTF8Impl, NameSubstringUTF8> FunctionSubstringUTF8;
|
||||
|
||||
|
@ -19,6 +19,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionConcat>();
|
||||
factory.registerFunction<FunctionSubstring>();
|
||||
factory.registerFunction<FunctionSubstringUTF8>();
|
||||
factory.registerFunction<FunctionAppendTrailingCharIfAbsent>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,6 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
@ -0,0 +1,6 @@
|
||||
select appendTrailingCharIfAbsent('', 'a') = 'a';
|
||||
select appendTrailingCharIfAbsent('a', 'a') = 'a';
|
||||
select appendTrailingCharIfAbsent('a', 'b') = 'ab';
|
||||
select appendTrailingCharIfAbsent(materialize(''), 'a') = materialize('a');
|
||||
select appendTrailingCharIfAbsent(materialize('a'), 'a') = materialize('a');
|
||||
select appendTrailingCharIfAbsent(materialize('a'), 'b') = materialize('ab');
|
Loading…
Reference in New Issue
Block a user