mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
dbms: implement appendTrailingCharIfAbsent, refactor concat. [#METR-13772]
This commit is contained in:
parent
813742e5d7
commit
811565b5b1
@ -413,221 +413,6 @@ struct ReverseUTF8Impl
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Склеивает две строки.
|
|
||||||
*/
|
|
||||||
struct ConcatImpl
|
|
||||||
{
|
|
||||||
static void vector_vector(
|
|
||||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
|
||||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = a_offsets.size();
|
|
||||||
c_data.resize(a_data.size() + b_data.size() - size);
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
ColumnString::Offset_t a_offset = 0;
|
|
||||||
ColumnString::Offset_t b_offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
|
||||||
offset += a_offsets[i] - a_offset - 1;
|
|
||||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
|
||||||
offset += b_offsets[i] - b_offset;
|
|
||||||
|
|
||||||
a_offset = a_offsets[i];
|
|
||||||
b_offset = b_offsets[i];
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vector_fixed_vector(
|
|
||||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
|
||||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = a_offsets.size();
|
|
||||||
c_data.resize(a_data.size() + b_data.size());
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
ColumnString::Offset_t a_offset = 0;
|
|
||||||
ColumnString::Offset_t b_offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
|
||||||
offset += a_offsets[i] - a_offset - 1;
|
|
||||||
memcpy(&c_data[offset], &b_data[b_offset], b_n);
|
|
||||||
offset += b_n;
|
|
||||||
c_data[offset] = 0;
|
|
||||||
offset += 1;
|
|
||||||
|
|
||||||
a_offset = a_offsets[i];
|
|
||||||
b_offset += b_n;
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void vector_constant(
|
|
||||||
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
|
||||||
const std::string & b,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = a_offsets.size();
|
|
||||||
c_data.resize(a_data.size() + b.size() * size);
|
|
||||||
c_offsets.assign(a_offsets);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
c_offsets[i] += b.size() * (i + 1);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
ColumnString::Offset_t a_offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
|
||||||
offset += a_offsets[i] - a_offset - 1;
|
|
||||||
memcpy(&c_data[offset], b.data(), b.size() + 1);
|
|
||||||
offset += b.size() + 1;
|
|
||||||
|
|
||||||
a_offset = a_offsets[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fixed_vector_vector(
|
|
||||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
|
||||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = b_offsets.size();
|
|
||||||
c_data.resize(a_data.size() + b_data.size());
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
ColumnString::Offset_t a_offset = 0;
|
|
||||||
ColumnString::Offset_t b_offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[a_offset], a_n);
|
|
||||||
offset += a_n;
|
|
||||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
|
||||||
offset += b_offsets[i] - b_offset;
|
|
||||||
|
|
||||||
a_offset = a_n;
|
|
||||||
b_offset = b_offsets[i];
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fixed_vector_fixed_vector(
|
|
||||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
|
||||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = a_data.size() / a_n;
|
|
||||||
c_data.resize(a_data.size() + b_data.size() + size);
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
|
||||||
offset += a_n;
|
|
||||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
|
||||||
offset += b_n;
|
|
||||||
c_data[offset] = 0;
|
|
||||||
++offset;
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void fixed_vector_constant(
|
|
||||||
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
|
||||||
const std::string & b,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = a_data.size() / a_n;
|
|
||||||
ColumnString::Offset_t b_n = b.size();
|
|
||||||
c_data.resize(a_data.size() + size * b_n + size);
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
|
||||||
offset += a_n;
|
|
||||||
memcpy(&c_data[offset], b.data(), b_n);
|
|
||||||
offset += b_n;
|
|
||||||
c_data[offset] = 0;
|
|
||||||
++offset;
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void constant_vector(
|
|
||||||
const std::string & a,
|
|
||||||
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = b_offsets.size();
|
|
||||||
c_data.resize(b_data.size() + a.size() * size);
|
|
||||||
c_offsets.assign(b_offsets);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
c_offsets[i] += a.size() * (i + 1);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
ColumnString::Offset_t b_offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], a.data(), a.size());
|
|
||||||
offset += a.size();
|
|
||||||
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
|
||||||
offset += b_offsets[i] - b_offset;
|
|
||||||
|
|
||||||
b_offset = b_offsets[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void constant_fixed_vector(
|
|
||||||
const std::string & a,
|
|
||||||
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
|
||||||
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
|
||||||
{
|
|
||||||
size_t size = b_data.size() / b_n;
|
|
||||||
ColumnString::Offset_t a_n = a.size();
|
|
||||||
c_data.resize(size * a_n + b_data.size() + size);
|
|
||||||
c_offsets.resize(size);
|
|
||||||
|
|
||||||
ColumnString::Offset_t offset = 0;
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
|
||||||
{
|
|
||||||
memcpy(&c_data[offset], a.data(), a_n);
|
|
||||||
offset += a_n;
|
|
||||||
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
|
||||||
offset += b_n;
|
|
||||||
c_data[offset] = 0;
|
|
||||||
++offset;
|
|
||||||
|
|
||||||
c_offsets[i] = offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void constant_constant(
|
|
||||||
const std::string & a,
|
|
||||||
const std::string & b,
|
|
||||||
std::string & c)
|
|
||||||
{
|
|
||||||
c = a + b;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
/** Выделяет подстроку в строке, как последовательности байт.
|
/** Выделяет подстроку в строке, как последовательности байт.
|
||||||
*/
|
*/
|
||||||
struct SubstringImpl
|
struct SubstringImpl
|
||||||
@ -962,12 +747,11 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename Impl, typename Name>
|
class FunctionConcat : public IFunction
|
||||||
class FunctionStringStringToString : public IFunction
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
static constexpr auto name = Name::name;
|
static constexpr auto name = "concat";
|
||||||
static IFunction * create(const Context & context) { return new FunctionStringStringToString; }
|
static IFunction * create(const Context & context) { return new FunctionConcat; }
|
||||||
|
|
||||||
/// Получить имя функции.
|
/// Получить имя функции.
|
||||||
String getName() const
|
String getName() const
|
||||||
@ -997,6 +781,15 @@ public:
|
|||||||
return new DataTypeString;
|
return new DataTypeString;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||||
|
{
|
||||||
|
if (arguments.size() == 2)
|
||||||
|
executeBinary(block, arguments, result);
|
||||||
|
else
|
||||||
|
executeNAry(block, arguments, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
enum class instr_type : uint8_t
|
enum class instr_type : uint8_t
|
||||||
{
|
{
|
||||||
copy_string,
|
copy_string,
|
||||||
@ -1056,15 +849,6 @@ public:
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Выполнить функцию над блоком.
|
|
||||||
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
|
||||||
{
|
|
||||||
if (arguments.size() == 2)
|
|
||||||
executeBinary(block, arguments, result);
|
|
||||||
else
|
|
||||||
executeNAry(block, arguments, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result)
|
void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||||
{
|
{
|
||||||
const IColumn * c0 = &*block.getByPosition(arguments[0]).column;
|
const IColumn * c0 = &*block.getByPosition(arguments[0]).column;
|
||||||
@ -1082,7 +866,7 @@ public:
|
|||||||
{
|
{
|
||||||
ColumnConstString * c_res = new ColumnConstString(c0_const->size(), "");
|
ColumnConstString * c_res = new ColumnConstString(c0_const->size(), "");
|
||||||
block.getByPosition(result).column = c_res;
|
block.getByPosition(result).column = c_res;
|
||||||
Impl::constant_constant(c0_const->getData(), c1_const->getData(), c_res->getData());
|
constant_constant(c0_const->getData(), c1_const->getData(), c_res->getData());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1092,42 +876,42 @@ public:
|
|||||||
ColumnString::Offsets_t & offsets_res = c_res->getOffsets();
|
ColumnString::Offsets_t & offsets_res = c_res->getOffsets();
|
||||||
|
|
||||||
if (c0_string && c1_string)
|
if (c0_string && c1_string)
|
||||||
Impl::vector_vector(
|
vector_vector(
|
||||||
c0_string->getChars(), c0_string->getOffsets(),
|
c0_string->getChars(), c0_string->getOffsets(),
|
||||||
c1_string->getChars(), c1_string->getOffsets(),
|
c1_string->getChars(), c1_string->getOffsets(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_string && c1_fixed_string)
|
else if (c0_string && c1_fixed_string)
|
||||||
Impl::vector_fixed_vector(
|
vector_fixed_vector(
|
||||||
c0_string->getChars(), c0_string->getOffsets(),
|
c0_string->getChars(), c0_string->getOffsets(),
|
||||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_string && c1_const)
|
else if (c0_string && c1_const)
|
||||||
Impl::vector_constant(
|
vector_constant(
|
||||||
c0_string->getChars(), c0_string->getOffsets(),
|
c0_string->getChars(), c0_string->getOffsets(),
|
||||||
c1_const->getData(),
|
c1_const->getData(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_fixed_string && c1_string)
|
else if (c0_fixed_string && c1_string)
|
||||||
Impl::fixed_vector_vector(
|
fixed_vector_vector(
|
||||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||||
c1_string->getChars(), c1_string->getOffsets(),
|
c1_string->getChars(), c1_string->getOffsets(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_const && c1_string)
|
else if (c0_const && c1_string)
|
||||||
Impl::constant_vector(
|
constant_vector(
|
||||||
c0_const->getData(),
|
c0_const->getData(),
|
||||||
c1_string->getChars(), c1_string->getOffsets(),
|
c1_string->getChars(), c1_string->getOffsets(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_fixed_string && c1_fixed_string)
|
else if (c0_fixed_string && c1_fixed_string)
|
||||||
Impl::fixed_vector_fixed_vector(
|
fixed_vector_fixed_vector(
|
||||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_fixed_string && c1_const)
|
else if (c0_fixed_string && c1_const)
|
||||||
Impl::fixed_vector_constant(
|
fixed_vector_constant(
|
||||||
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
c0_fixed_string->getChars(), c0_fixed_string->getN(),
|
||||||
c1_const->getData(),
|
c1_const->getData(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
else if (c0_const && c1_fixed_string)
|
else if (c0_const && c1_fixed_string)
|
||||||
Impl::constant_fixed_vector(
|
constant_fixed_vector(
|
||||||
c0_const->getData(),
|
c0_const->getData(),
|
||||||
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
c1_fixed_string->getChars(), c1_fixed_string->getN(),
|
||||||
vec_res, offsets_res);
|
vec_res, offsets_res);
|
||||||
@ -1214,6 +998,215 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void vector_vector(
|
||||||
|
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||||
|
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = a_offsets.size();
|
||||||
|
c_data.resize(a_data.size() + b_data.size() - size);
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
ColumnString::Offset_t a_offset = 0;
|
||||||
|
ColumnString::Offset_t b_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||||
|
offset += a_offsets[i] - a_offset - 1;
|
||||||
|
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||||
|
offset += b_offsets[i] - b_offset;
|
||||||
|
|
||||||
|
a_offset = a_offsets[i];
|
||||||
|
b_offset = b_offsets[i];
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vector_fixed_vector(
|
||||||
|
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||||
|
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = a_offsets.size();
|
||||||
|
c_data.resize(a_data.size() + b_data.size());
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
ColumnString::Offset_t a_offset = 0;
|
||||||
|
ColumnString::Offset_t b_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||||
|
offset += a_offsets[i] - a_offset - 1;
|
||||||
|
memcpy(&c_data[offset], &b_data[b_offset], b_n);
|
||||||
|
offset += b_n;
|
||||||
|
c_data[offset] = 0;
|
||||||
|
offset += 1;
|
||||||
|
|
||||||
|
a_offset = a_offsets[i];
|
||||||
|
b_offset += b_n;
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vector_constant(
|
||||||
|
const ColumnString::Chars_t & a_data, const ColumnString::Offsets_t & a_offsets,
|
||||||
|
const std::string & b,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = a_offsets.size();
|
||||||
|
c_data.resize(a_data.size() + b.size() * size);
|
||||||
|
c_offsets.assign(a_offsets);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
c_offsets[i] += b.size() * (i + 1);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
ColumnString::Offset_t a_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[a_offset], a_offsets[i] - a_offset - 1);
|
||||||
|
offset += a_offsets[i] - a_offset - 1;
|
||||||
|
memcpy(&c_data[offset], b.data(), b.size() + 1);
|
||||||
|
offset += b.size() + 1;
|
||||||
|
|
||||||
|
a_offset = a_offsets[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fixed_vector_vector(
|
||||||
|
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||||
|
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = b_offsets.size();
|
||||||
|
c_data.resize(a_data.size() + b_data.size());
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
ColumnString::Offset_t a_offset = 0;
|
||||||
|
ColumnString::Offset_t b_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[a_offset], a_n);
|
||||||
|
offset += a_n;
|
||||||
|
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||||
|
offset += b_offsets[i] - b_offset;
|
||||||
|
|
||||||
|
a_offset = a_n;
|
||||||
|
b_offset = b_offsets[i];
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fixed_vector_fixed_vector(
|
||||||
|
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||||
|
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = a_data.size() / a_n;
|
||||||
|
c_data.resize(a_data.size() + b_data.size() + size);
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||||
|
offset += a_n;
|
||||||
|
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||||
|
offset += b_n;
|
||||||
|
c_data[offset] = 0;
|
||||||
|
++offset;
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void fixed_vector_constant(
|
||||||
|
const ColumnString::Chars_t & a_data, ColumnString::Offset_t a_n,
|
||||||
|
const std::string & b,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = a_data.size() / a_n;
|
||||||
|
ColumnString::Offset_t b_n = b.size();
|
||||||
|
c_data.resize(a_data.size() + size * b_n + size);
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], &a_data[i * a_n], a_n);
|
||||||
|
offset += a_n;
|
||||||
|
memcpy(&c_data[offset], b.data(), b_n);
|
||||||
|
offset += b_n;
|
||||||
|
c_data[offset] = 0;
|
||||||
|
++offset;
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void constant_vector(
|
||||||
|
const std::string & a,
|
||||||
|
const ColumnString::Chars_t & b_data, const ColumnString::Offsets_t & b_offsets,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = b_offsets.size();
|
||||||
|
c_data.resize(b_data.size() + a.size() * size);
|
||||||
|
c_offsets.assign(b_offsets);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
c_offsets[i] += a.size() * (i + 1);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
ColumnString::Offset_t b_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], a.data(), a.size());
|
||||||
|
offset += a.size();
|
||||||
|
memcpy(&c_data[offset], &b_data[b_offset], b_offsets[i] - b_offset);
|
||||||
|
offset += b_offsets[i] - b_offset;
|
||||||
|
|
||||||
|
b_offset = b_offsets[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void constant_fixed_vector(
|
||||||
|
const std::string & a,
|
||||||
|
const ColumnString::Chars_t & b_data, ColumnString::Offset_t b_n,
|
||||||
|
ColumnString::Chars_t & c_data, ColumnString::Offsets_t & c_offsets)
|
||||||
|
{
|
||||||
|
size_t size = b_data.size() / b_n;
|
||||||
|
ColumnString::Offset_t a_n = a.size();
|
||||||
|
c_data.resize(size * a_n + b_data.size() + size);
|
||||||
|
c_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
memcpy(&c_data[offset], a.data(), a_n);
|
||||||
|
offset += a_n;
|
||||||
|
memcpy(&c_data[offset], &b_data[i * b_n], b_n);
|
||||||
|
offset += b_n;
|
||||||
|
c_data[offset] = 0;
|
||||||
|
++offset;
|
||||||
|
|
||||||
|
c_offsets[i] = offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void constant_constant(
|
||||||
|
const std::string & a,
|
||||||
|
const std::string & b,
|
||||||
|
std::string & c)
|
||||||
|
{
|
||||||
|
c = a + b;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -1299,6 +1292,134 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class FunctionAppendTrailingCharIfAbsent : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "appendTrailingCharIfAbsent";
|
||||||
|
static IFunction * create(const Context & context) { return new FunctionAppendTrailingCharIfAbsent; }
|
||||||
|
|
||||||
|
String getName() const
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
DataTypePtr getReturnType(const DataTypes & arguments) const
|
||||||
|
{
|
||||||
|
if (arguments.size() != 2)
|
||||||
|
throw Exception{
|
||||||
|
"Number of arguments for function " + getName() + " doesn't match: passed "
|
||||||
|
+ toString(arguments.size()) + ", should be 2.",
|
||||||
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!typeid_cast<const DataTypeString *>(arguments[0].get()))
|
||||||
|
throw Exception{
|
||||||
|
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||||
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!typeid_cast<const DataTypeString *>(arguments[1].get()))
|
||||||
|
throw Exception{
|
||||||
|
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(),
|
||||||
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||||
|
};
|
||||||
|
|
||||||
|
return new DataTypeString;
|
||||||
|
}
|
||||||
|
|
||||||
|
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||||
|
{
|
||||||
|
const auto & column = block.getByPosition(arguments[0]).column;
|
||||||
|
const auto & column_char = block.getByPosition(arguments[1]).column;
|
||||||
|
|
||||||
|
if (!typeid_cast<const ColumnConstString *>(column_char.get()))
|
||||||
|
throw Exception{
|
||||||
|
"Second argument of function " + getName() + " must be a constant string",
|
||||||
|
ErrorCodes::ILLEGAL_COLUMN
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto & trailing_char_str = static_cast<const ColumnConstString &>(*column_char).getData();
|
||||||
|
|
||||||
|
if (trailing_char_str.size() != 1)
|
||||||
|
throw Exception{
|
||||||
|
"Second argument of function " + getName() + " must be a one-character string",
|
||||||
|
ErrorCodes::BAD_ARGUMENTS
|
||||||
|
};
|
||||||
|
|
||||||
|
if (const auto col = typeid_cast<const ColumnString *>(&*column))
|
||||||
|
{
|
||||||
|
auto col_res = new ColumnString;
|
||||||
|
block.getByPosition(result).column = col_res;
|
||||||
|
|
||||||
|
const auto & src_data = col->getChars();
|
||||||
|
const auto & src_offsets = col->getOffsets();
|
||||||
|
|
||||||
|
auto & dst_data = col_res->getChars();
|
||||||
|
auto & dst_offsets = col_res->getOffsets();
|
||||||
|
|
||||||
|
const auto size = src_offsets.size();
|
||||||
|
dst_data.resize(src_data.size() + size);
|
||||||
|
dst_offsets.resize(size);
|
||||||
|
|
||||||
|
ColumnString::Offset_t src_offset{};
|
||||||
|
ColumnString::Offset_t dst_offset{};
|
||||||
|
|
||||||
|
for (const auto i : ext::range(0, size))
|
||||||
|
{
|
||||||
|
const auto src_length = src_offsets[i] - src_offset;
|
||||||
|
memcpy(&dst_data[dst_offset], &src_data[src_offset], src_length);
|
||||||
|
src_offset = src_offsets[i];
|
||||||
|
dst_offset += src_length;
|
||||||
|
|
||||||
|
if (dst_data[dst_offset - 2] != trailing_char_str.front())
|
||||||
|
{
|
||||||
|
dst_data[dst_offset - 1] = trailing_char_str.front();
|
||||||
|
dst_data[dst_offset] = 0;
|
||||||
|
++dst_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst_offsets[i] = dst_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst_data.resize_assume_reserved(dst_offset);
|
||||||
|
}
|
||||||
|
else if (const auto col = typeid_cast<const ColumnConstString *>(&*column))
|
||||||
|
{
|
||||||
|
const auto & in_data = col->getData();
|
||||||
|
|
||||||
|
block.getByPosition(result).column = new ColumnConstString{
|
||||||
|
col->size(),
|
||||||
|
in_data.back() == trailing_char_str.front() ? in_data : in_data + trailing_char_str
|
||||||
|
};
|
||||||
|
}
|
||||||
|
else
|
||||||
|
throw Exception{
|
||||||
|
"Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
||||||
|
+ " of argument of function " + getName(),
|
||||||
|
ErrorCodes::ILLEGAL_COLUMN
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vector(const ColumnString::Chars_t & data, const ColumnString::Offsets_t & offsets,
|
||||||
|
ColumnString::Chars_t & res_data, ColumnString::Offsets_t & res_offsets)
|
||||||
|
{
|
||||||
|
res_data.resize(data.size());
|
||||||
|
res_offsets.assign(offsets);
|
||||||
|
size_t size = offsets.size();
|
||||||
|
|
||||||
|
ColumnString::Offset_t prev_offset = 0;
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
for (size_t j = prev_offset; j < offsets[i] - 1; ++j)
|
||||||
|
res_data[j] = data[offsets[i] + prev_offset - 2 - j];
|
||||||
|
res_data[offsets[i] - 1] = 0;
|
||||||
|
prev_offset = offsets[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct NameEmpty { static constexpr auto name = "empty"; };
|
struct NameEmpty { static constexpr auto name = "empty"; };
|
||||||
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
|
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
|
||||||
struct NameLength { static constexpr auto name = "length"; };
|
struct NameLength { static constexpr auto name = "length"; };
|
||||||
@ -1309,7 +1430,6 @@ struct NameLowerUTF8 { static constexpr auto name = "lowerUTF8"; };
|
|||||||
struct NameUpperUTF8 { static constexpr auto name = "upperUTF8"; };
|
struct NameUpperUTF8 { static constexpr auto name = "upperUTF8"; };
|
||||||
struct NameReverse { static constexpr auto name = "reverse"; };
|
struct NameReverse { static constexpr auto name = "reverse"; };
|
||||||
struct NameReverseUTF8 { static constexpr auto name = "reverseUTF8"; };
|
struct NameReverseUTF8 { static constexpr auto name = "reverseUTF8"; };
|
||||||
struct NameConcat { static constexpr auto name = "concat"; };
|
|
||||||
struct NameSubstring { static constexpr auto name = "substring"; };
|
struct NameSubstring { static constexpr auto name = "substring"; };
|
||||||
struct NameSubstringUTF8 { static constexpr auto name = "substringUTF8"; };
|
struct NameSubstringUTF8 { static constexpr auto name = "substringUTF8"; };
|
||||||
|
|
||||||
@ -1323,7 +1443,6 @@ typedef FunctionStringToString<LowerUpperUTF8Impl<Poco::Unicode::toLower>, NameL
|
|||||||
typedef FunctionStringToString<LowerUpperUTF8Impl<Poco::Unicode::toUpper>, NameUpperUTF8> FunctionUpperUTF8;
|
typedef FunctionStringToString<LowerUpperUTF8Impl<Poco::Unicode::toUpper>, NameUpperUTF8> FunctionUpperUTF8;
|
||||||
typedef FunctionStringToString<ReverseImpl, NameReverse> FunctionReverse;
|
typedef FunctionStringToString<ReverseImpl, NameReverse> FunctionReverse;
|
||||||
typedef FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8> FunctionReverseUTF8;
|
typedef FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8> FunctionReverseUTF8;
|
||||||
typedef FunctionStringStringToString<ConcatImpl, NameConcat> FunctionConcat;
|
|
||||||
typedef FunctionStringNumNumToString<SubstringImpl, NameSubstring> FunctionSubstring;
|
typedef FunctionStringNumNumToString<SubstringImpl, NameSubstring> FunctionSubstring;
|
||||||
typedef FunctionStringNumNumToString<SubstringUTF8Impl, NameSubstringUTF8> FunctionSubstringUTF8;
|
typedef FunctionStringNumNumToString<SubstringUTF8Impl, NameSubstringUTF8> FunctionSubstringUTF8;
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
|||||||
factory.registerFunction<FunctionConcat>();
|
factory.registerFunction<FunctionConcat>();
|
||||||
factory.registerFunction<FunctionSubstring>();
|
factory.registerFunction<FunctionSubstring>();
|
||||||
factory.registerFunction<FunctionSubstringUTF8>();
|
factory.registerFunction<FunctionSubstringUTF8>();
|
||||||
|
factory.registerFunction<FunctionAppendTrailingCharIfAbsent>();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
||||||
|
1
|
@ -0,0 +1,6 @@
|
|||||||
|
select appendTrailingCharIfAbsent('', 'a') = 'a';
|
||||||
|
select appendTrailingCharIfAbsent('a', 'a') = 'a';
|
||||||
|
select appendTrailingCharIfAbsent('a', 'b') = 'ab';
|
||||||
|
select appendTrailingCharIfAbsent(materialize(''), 'a') = materialize('a');
|
||||||
|
select appendTrailingCharIfAbsent(materialize('a'), 'a') = materialize('a');
|
||||||
|
select appendTrailingCharIfAbsent(materialize('a'), 'b') = materialize('ab');
|
Loading…
Reference in New Issue
Block a user