This commit is contained in:
Evgeniy Gatov 2015-10-20 17:42:41 +03:00
commit 6db0651b6c
9 changed files with 557 additions and 8 deletions

View File

@ -2101,6 +2101,225 @@ private:
};
class FunctionArrayReverse : public IFunction
{
public:
static constexpr auto name = "reverse";
static IFunction * create(const Context & context) { return new FunctionArrayReverse; }
/// Получить имя функции.
String getName() const override
{
return name;
}
/// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
if (!array_type)
throw Exception("Argument for function " + getName() + " must be array.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0]->clone();
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
{
if (executeConst(block, arguments, result))
return;
const ColumnArray * array = typeid_cast<const ColumnArray *>(block.getByPosition(arguments[0]).column.get());
if (!array)
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
ColumnPtr res_ptr = array->cloneEmpty();
block.getByPosition(result).column = res_ptr;
ColumnArray & res = static_cast<ColumnArray &>(*res_ptr);
const IColumn & src_data = array->getData();
const ColumnArray::Offsets_t & offsets = array->getOffsets();
IColumn & res_data = res.getData();
res.getOffsetsColumn() = array->getOffsetsColumn();
if (!( executeNumber<UInt8> (src_data, offsets, res_data)
|| executeNumber<UInt16> (src_data, offsets, res_data)
|| executeNumber<UInt32> (src_data, offsets, res_data)
|| executeNumber<UInt64> (src_data, offsets, res_data)
|| executeNumber<Int8> (src_data, offsets, res_data)
|| executeNumber<Int16> (src_data, offsets, res_data)
|| executeNumber<Int32> (src_data, offsets, res_data)
|| executeNumber<Int64> (src_data, offsets, res_data)
|| executeNumber<Float32> (src_data, offsets, res_data)
|| executeNumber<Float64> (src_data, offsets, res_data)
|| executeString (src_data, offsets, res_data)
|| executeFixedString (src_data, offsets, res_data)))
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
private:
bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result)
{
if (const ColumnConstArray * const_array = typeid_cast<const ColumnConstArray *>(block.getByPosition(arguments[0]).column.get()))
{
const Array & arr = const_array->getData();
size_t size = arr.size();
Array res(size);
for (size_t i = 0; i < size; ++i)
res[i] = arr[size - i - 1];
block.getByPosition(result).column = new ColumnConstArray(
block.rowsInFirstColumn(),
res,
block.getByPosition(arguments[0]).type->clone());
return true;
}
else
return false;
}
template <typename T>
bool executeNumber(
const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets,
IColumn & res_data_col)
{
if (const ColumnVector<T> * src_data_concrete = typeid_cast<const ColumnVector<T> *>(&src_data))
{
const PODArray<T> & src_data = src_data_concrete->getData();
PODArray<T> & res_data = typeid_cast<ColumnVector<T> &>(res_data_col).getData();
size_t size = src_offsets.size();
res_data.resize(src_data.size());
ColumnArray::Offset_t src_prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const T * src = &src_data[src_prev_offset];
const T * src_end = &src_data[src_offsets[i]];
if (src == src_end)
continue;
T * dst = &res_data[src_offsets[i] - 1];
while (src < src_end)
{
*dst = *src;
++src;
--dst;
}
src_prev_offset = src_offsets[i];
}
return true;
}
else
return false;
}
bool executeFixedString(
const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets,
IColumn & res_data_col)
{
if (const ColumnFixedString * src_data_concrete = typeid_cast<const ColumnFixedString *>(&src_data))
{
const size_t n = src_data_concrete->getN();
const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars();
ColumnFixedString::Chars_t & res_data = typeid_cast<ColumnFixedString &>(res_data_col).getChars();
size_t size = src_offsets.size();
res_data.resize(src_data.size());
ColumnArray::Offset_t src_prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
const UInt8 * src = &src_data[src_prev_offset * n];
const UInt8 * src_end = &src_data[src_offsets[i] * n];
if (src == src_end)
continue;
UInt8 * dst = &res_data[src_offsets[i] * n - n];
while (src < src_end)
{
memcpy(dst, src, n);
src += n;
dst -= n;
}
src_prev_offset = src_offsets[i];
}
return true;
}
else
return false;
}
bool executeString(
const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets,
IColumn & res_data_col)
{
if (const ColumnString * src_data_concrete = typeid_cast<const ColumnString *>(&src_data))
{
const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets();
ColumnString::Offsets_t & res_string_offsets = typeid_cast<ColumnString &>(res_data_col).getOffsets();
const ColumnString::Chars_t & src_data = src_data_concrete->getChars();
ColumnString::Chars_t & res_data = typeid_cast<ColumnString &>(res_data_col).getChars();
size_t size = src_array_offsets.size();
res_string_offsets.resize(src_string_offsets.size());
res_data.resize(src_data.size());
ColumnArray::Offset_t src_array_prev_offset = 0;
ColumnString::Offset_t res_string_prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
if (src_array_offsets[i] != src_array_prev_offset)
{
size_t array_size = src_array_offsets[i] - src_array_prev_offset;
for (size_t j = 0; j < array_size; ++j)
{
size_t j_reversed = array_size - j - 1;
auto src_pos = src_array_prev_offset + j_reversed == 0 ? 0 : src_string_offsets[src_array_prev_offset + j_reversed - 1];
size_t string_size = src_string_offsets[src_array_prev_offset + j_reversed] - src_pos;
memcpy(&res_data[res_string_prev_offset], &src_data[src_pos], string_size);
res_string_prev_offset += string_size;
res_string_offsets[src_array_prev_offset + j] = res_string_prev_offset;
}
}
src_array_prev_offset = src_array_offsets[i];
}
return true;
}
else
return false;
}
};
struct NameHas { static constexpr auto name = "has"; };
struct NameIndexOf { static constexpr auto name = "indexOf"; };
struct NameCountEqual { static constexpr auto name = "countEqual"; };

View File

@ -13,6 +13,7 @@
#include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Functions/FunctionsArray.h>
#include <DB/Functions/IFunction.h>
#include <ext/range.hpp>
@ -44,7 +45,7 @@ namespace DB
* Функции работы с URL расположены отдельно.
* Функции кодирования строк, конвертации в другие типы расположены отдельно.
*
* Функции length, empty, notEmpty также работают с массивами.
* Функции length, empty, notEmpty, reverse также работают с массивами.
*/
@ -921,6 +922,72 @@ public:
};
/// Также работает над массивами.
class FunctionReverse : public IFunction
{
public:
static constexpr auto name = "reverse";
static IFunction * create(const Context & context) { return new FunctionReverse; }
/// Получить имя функции.
String getName() const override
{
return name;
}
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!typeid_cast<const DataTypeString *>(&*arguments[0]) && !typeid_cast<const DataTypeFixedString *>(&*arguments[0])
&& !typeid_cast<const DataTypeArray *>(&*arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0]->clone();
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
{
const ColumnPtr column = block.getByPosition(arguments[0]).column;
if (const ColumnString * col = typeid_cast<const ColumnString *>(column.get()))
{
ColumnString * col_res = new ColumnString;
block.getByPosition(result).column = col_res;
ReverseImpl::vector(col->getChars(), col->getOffsets(),
col_res->getChars(), col_res->getOffsets());
}
else if (const ColumnFixedString * col = typeid_cast<const ColumnFixedString *>(column.get()))
{
ColumnFixedString * col_res = new ColumnFixedString(col->getN());
block.getByPosition(result).column = col_res;
ReverseImpl::vector_fixed(col->getChars(), col->getN(),
col_res->getChars());
}
else if (const ColumnConstString * col = typeid_cast<const ColumnConstString *>(column.get()))
{
String res;
ReverseImpl::constant(col->getData(), res);
ColumnConstString * col_res = new ColumnConstString(col->size(), res);
block.getByPosition(result).column = col_res;
}
else if (typeid_cast<const ColumnArray *>(column.get()) || typeid_cast<const ColumnConstArray *>(column.get()))
{
FunctionArrayReverse().execute(block, arguments, result);
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
template <typename Name>
class ConcatImpl : public IFunction
{
@ -1594,7 +1661,6 @@ struct NameLower { static constexpr auto name = "lower"; };
struct NameUpper { static constexpr auto name = "upper"; };
struct NameLowerUTF8 { static constexpr auto name = "lowerUTF8"; };
struct NameUpperUTF8 { static constexpr auto name = "upperUTF8"; };
struct NameReverse { static constexpr auto name = "reverse"; };
struct NameReverseUTF8 { static constexpr auto name = "reverseUTF8"; };
struct NameSubstring { static constexpr auto name = "substring"; };
struct NameSubstringUTF8 { static constexpr auto name = "substringUTF8"; };
@ -1613,7 +1679,6 @@ typedef FunctionStringToString<
typedef FunctionStringToString<
LowerUpperUTF8Impl<'a', 'z', Poco::Unicode::toUpper, UTF8CyrillicToCase<false>>,
NameUpperUTF8> FunctionUpperUTF8;
typedef FunctionStringToString<ReverseImpl, NameReverse> FunctionReverse;
typedef FunctionStringToString<ReverseUTF8Impl, NameReverseUTF8> FunctionReverseUTF8;
typedef FunctionStringNumNumToString<SubstringImpl, NameSubstring> FunctionSubstring;
typedef FunctionStringNumNumToString<SubstringUTF8Impl, NameSubstringUTF8> FunctionSubstringUTF8;

View File

@ -23,8 +23,9 @@ namespace DB
* - нулевой subpattern (сматчившуюся часть, иначе);
* - инача, пустой массив
*
* join(sep, arr)
* join(arr)
* arrayStringConcat(arr)
* arrayStringConcat(arr, delimiter)
* - склеить массив строк в одну строку через разделитель.
*
* alphaTokens(s) - выделить из строки подпоследовательности [a-zA-Z]+.
*
@ -414,8 +415,142 @@ public:
};
typedef FunctionTokens<AlphaTokensImpl> FunctionAlphaTokens;
typedef FunctionTokens<SplitByCharImpl> FunctionSplitByChar;
/// Склеивает массив строк в одну строку через разделитель.
class FunctionArrayStringConcat : public IFunction
{
private:
void executeImpl(
const ColumnString::Chars_t & src_chars,
const ColumnString::Offsets_t & src_string_offsets,
const ColumnArray::Offsets_t & src_array_offsets,
const char * delimiter, const size_t delimiter_size,
ColumnString::Chars_t & dst_chars,
ColumnString::Offsets_t & dst_string_offsets)
{
size_t size = src_array_offsets.size();
if (!size)
return;
/// С небольшим запасом - как будто разделитель идёт и после последней строки массива.
dst_chars.resize(
src_chars.size()
+ delimiter_size * src_string_offsets.size() /// Разделители после каждой строки...
+ src_array_offsets.size() /// Нулевой байт после каждой склеенной строки
- src_string_offsets.size()); /// Бывший нулевой байт после каждой строки массива
/// Будет столько строк, сколько было массивов.
dst_string_offsets.resize(src_array_offsets.size());
ColumnArray::Offset_t current_src_array_offset = 0;
ColumnString::Offset_t current_src_string_offset = 0;
ColumnString::Offset_t current_dst_string_offset = 0;
/// Цикл по массивам строк.
for (size_t i = 0; i < size; ++i)
{
/// Цикл по строкам внутри массива. /// NOTE Можно всё сделать за одно копирование, если разделитель имеет размер 1.
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
{
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
memcpy(&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
current_src_string_offset = src_string_offsets[current_src_array_offset];
current_dst_string_offset += bytes_to_copy;
if (current_src_array_offset + 1 != next_src_array_offset)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
}
dst_chars[current_dst_string_offset] = 0;
++current_dst_string_offset;
dst_string_offsets[i] = current_dst_string_offset;
}
dst_chars.resize(dst_string_offsets.back());
}
public:
static constexpr auto name = "arrayStringConcat";
static IFunction * create(const Context & context) { return new FunctionArrayStringConcat; }
/// Получить имя функции.
String getName() const override
{
return name;
}
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const override
{
if (arguments.size() != 1 && arguments.size() != 2)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1 or 2.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
if (!array_type || !typeid_cast<const DataTypeString *>(array_type->getNestedType().get()))
throw Exception("First argument for function " + getName() + " must be array of strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (arguments.size() == 2
&& !typeid_cast<const DataTypeString *>(arguments[1].get()))
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeString;
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
{
String delimiter;
if (arguments.size() == 2)
{
const ColumnConstString * col_delim = typeid_cast<const ColumnConstString *>(block.getByPosition(arguments[1]).column.get());
if (!col_delim)
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_COLUMN);
delimiter = col_delim->getData();
}
if (const ColumnConstArray * col_const_arr = typeid_cast<const ColumnConstArray *>(block.getByPosition(arguments[0]).column.get()))
{
ColumnConstString * col_res = new ColumnConstString(col_const_arr->size(), "");
block.getByPosition(result).column = col_res;
const Array & src_arr = col_const_arr->getData();
String & dst_str = col_res->getData();
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
{
if (i != 0)
dst_str += delimiter;
dst_str += src_arr[i].get<const String &>();
}
}
else
{
const ColumnArray & col_arr = static_cast<const ColumnArray &>(*block.getByPosition(arguments[0]).column);
const ColumnString & col_string = static_cast<const ColumnString &>(col_arr.getData());
ColumnString * col_res = new ColumnString;
block.getByPosition(result).column = col_res;
executeImpl(
col_string.getChars(), col_string.getOffsets(), col_arr.getOffsets(),
delimiter.data(), delimiter.size(),
col_res->getChars(), col_res->getOffsets());
}
}
};
typedef FunctionTokens<AlphaTokensImpl> FunctionAlphaTokens;
typedef FunctionTokens<SplitByCharImpl> FunctionSplitByChar;
typedef FunctionTokens<SplitByStringImpl> FunctionSplitByString;
typedef FunctionTokens<ExtractAllImpl> FunctionExtractAll;

View File

@ -10,6 +10,7 @@ void registerFunctionsStringArray(FunctionFactory & factory)
factory.registerFunction<FunctionAlphaTokens>();
factory.registerFunction<FunctionSplitByChar>();
factory.registerFunction<FunctionSplitByString>();
factory.registerFunction<FunctionArrayStringConcat>();
}
}

View File

@ -25,7 +25,8 @@ cat "$CONFIG" | sed -r \
s/<http_port>[0-9]+/<http_port>8124/;
s/<tcp_port>[0-9]+/<tcp_port>9001/;
s/<interserver_http_port>[0-9]+/<interserver_http_port>9010/;
s/<use_olap_http_server>true/<use_olap_http_server>false/;
s/<olap_compatibility>/<!-- <olap_compatibility>/;
s/<\/olap_compatibility>/<\/olap_compatibility> -->/;
s/users\.xml/users-preprocessed.xml/' > $CONFIG2
cp ${CONFIG/config/users} .

View File

@ -0,0 +1,65 @@
HelloWorld
HelloWorld
Hello, World
Hello, World
0
01
012
0123
01234
012345
0123456
01234567
012345678
0
01
012
0123
01234
012345
0123456
01234567
012345678
0
0,1
0,1,2
0,1,2,3
0,1,2,3,4
0,1,2,3,4,5
0,1,2,3,4,5,6
0,1,2,3,4,5,6,7
0,1,2,3,4,5,6,7,8
yandex
yandex google
yandex google test
yandex google test 123
yandex google test 123
yandex google test 123 hello
yandex google test 123 hello world
yandex google test 123 hello world goodbye
yandex google test 123 hello world goodbye xyz
yandex google test 123 hello world goodbye xyz yandex
yandex google test 123 hello world goodbye xyz yandex google
yandex google test 123 hello world goodbye xyz yandex google test
yandex google test 123 hello world goodbye xyz yandex google test 123
yandex google test 123 hello world goodbye xyz yandex google test 123
yandex google test 123 hello world goodbye xyz yandex google test 123 hello
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz
yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz yandex
0
01
012
0
01
012
0

View File

@ -0,0 +1,10 @@
SELECT arrayStringConcat(['Hello', 'World']);
SELECT arrayStringConcat(materialize(['Hello', 'World']));
SELECT arrayStringConcat(['Hello', 'World'], ', ');
SELECT arrayStringConcat(materialize(['Hello', 'World']), ', ');
SELECT arrayStringConcat(emptyArrayString());
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), '') FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM system.numbers LIMIT 10;
SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20;
SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10;

View File

@ -0,0 +1,44 @@
olleH
olleH
1
01
001
0001
00001
000001
0000001
00000001
000000001
0000000001
['World','Hello']
['World','Hello']
[]
[0]
[1,0]
[2,1,0]
[3,2,1,0]
[4,3,2,1,0]
[5,4,3,2,1,0]
[6,5,4,3,2,1,0]
[7,6,5,4,3,2,1,0]
[8,7,6,5,4,3,2,1,0]
[]
['1']
['10','1']
['100','10','1']
['1000','100','10','1']
['10000','1000','100','10','1']
['100000','10000','1000','100','10','1']
['1000000','100000','10000','1000','100','10','1']
['10000000','1000000','100000','10000','1000','100','10','1']
['100000000','10000000','1000000','100000','10000','1000','100','10','1']
\0\0\0\0\0\0\0\0\01
\0\0\0\0\0\0\0\001
\0\0\0\0\0\0\0001
\0\0\0\0\0\00001
\0\0\0\0\000001
\0\0\0\0000001
\0\0\00000001
\0\000000001
\0000000001
0000000001

View File

@ -0,0 +1,9 @@
SELECT reverse('Hello');
SELECT reverse(materialize('Hello'));
SELECT reverse(toString(exp10(number))) FROM system.numbers LIMIT 10;
SELECT reverse(['Hello', 'World']);
SELECT reverse(materialize(['Hello', 'World']));
SELECT reverse(range(number)) FROM system.numbers LIMIT 10;
SELECT reverse(arrayMap(x -> toString(exp10(x)), range(number))) FROM system.numbers LIMIT 10;
SELECT reverse(toFixedString(toString(exp10(number)), 10)) FROM system.numbers LIMIT 10;