2012-09-17 04:34:19 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <DB/DataTypes/DataTypeArray.h>
|
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
2014-08-22 00:57:20 +00:00
|
|
|
|
#include <DB/Columns/ColumnFixedString.h>
|
2012-09-17 04:34:19 +00:00
|
|
|
|
#include <DB/Columns/ColumnConst.h>
|
|
|
|
|
#include <DB/Columns/ColumnArray.h>
|
|
|
|
|
#include <DB/Functions/IFunction.h>
|
2013-08-02 13:09:42 +00:00
|
|
|
|
#include <DB/Functions/FunctionsStringSearch.h>
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
/** Функции, разделяющие строки на массив строк или наоборот.
|
|
|
|
|
*
|
2012-09-23 05:28:59 +00:00
|
|
|
|
* splitByChar(sep, s)
|
|
|
|
|
* splitByString(sep, s)
|
|
|
|
|
* splitByRegexp(regexp, s)
|
|
|
|
|
*
|
2013-08-07 11:25:02 +00:00
|
|
|
|
* extractAll(s, regexp) - выделить из строки подпоследовательности, соответствующие регекспу.
|
2013-08-02 16:31:23 +00:00
|
|
|
|
* - первый subpattern, если в regexp-е есть subpattern;
|
|
|
|
|
* - нулевой subpattern (сматчившуюся часть, иначе);
|
|
|
|
|
* - инача, пустой массив
|
|
|
|
|
*
|
2015-10-19 21:07:24 +00:00
|
|
|
|
* arrayStringConcat(arr)
|
|
|
|
|
* arrayStringConcat(arr, delimiter)
|
|
|
|
|
* - склеить массив строк в одну строку через разделитель.
|
2014-06-26 00:58:14 +00:00
|
|
|
|
*
|
2012-09-22 07:30:40 +00:00
|
|
|
|
* alphaTokens(s) - выделить из строки подпоследовательности [a-zA-Z]+.
|
2014-06-26 00:58:14 +00:00
|
|
|
|
*
|
2012-09-17 04:34:19 +00:00
|
|
|
|
* Функции работы с URL расположены отдельно.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef const char * Pos;
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
|
|
|
|
/// Генераторы подстрок. Все они обладают общим интерфейсом.
|
|
|
|
|
|
|
|
|
|
class AlphaTokensImpl
|
2012-09-17 04:34:19 +00:00
|
|
|
|
{
|
2012-09-23 05:28:59 +00:00
|
|
|
|
private:
|
|
|
|
|
Pos pos;
|
|
|
|
|
Pos end;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
/// Получить имя фукнции.
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = "alphaTokens";
|
|
|
|
|
static String getName() { return name; }
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Проверить типы агрументов функции.
|
|
|
|
|
static void checkArguments(const DataTypes & arguments)
|
|
|
|
|
{
|
|
|
|
|
if (arguments.size() != 1)
|
|
|
|
|
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
|
2013-06-21 20:34:19 +00:00
|
|
|
|
+ toString(arguments.size()) + ", should be 1.",
|
2012-09-23 05:28:59 +00:00
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
|
2012-09-23 05:28:59 +00:00
|
|
|
|
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Инициализировать по аргументам функции.
|
|
|
|
|
void init(Block & block, const ColumnNumbers & arguments) {}
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Вызывается для каждой следующей строки.
|
|
|
|
|
void set(Pos pos_, Pos end_)
|
|
|
|
|
{
|
|
|
|
|
pos = pos_;
|
|
|
|
|
end = end_;
|
|
|
|
|
}
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2013-08-07 11:25:02 +00:00
|
|
|
|
/// Возвращает позицию аргумента, являющегося столбцом строк
|
|
|
|
|
size_t getStringsArgumentPosition()
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Получить следующий токен, если есть, или вернуть false.
|
|
|
|
|
bool get(Pos & token_begin, Pos & token_end)
|
|
|
|
|
{
|
|
|
|
|
/// Пропускаем мусор
|
|
|
|
|
while (pos < end && !((*pos >= 'a' && *pos <= 'z') || (*pos >= 'A' && *pos <= 'Z')))
|
|
|
|
|
++pos;
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
if (pos == end)
|
|
|
|
|
return false;
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
token_begin = pos;
|
|
|
|
|
|
|
|
|
|
while (pos < end && ((*pos >= 'a' && *pos <= 'z') || (*pos >= 'A' && *pos <= 'Z')))
|
|
|
|
|
++pos;
|
|
|
|
|
|
|
|
|
|
token_end = pos;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
class SplitByCharImpl
|
2012-09-17 04:34:19 +00:00
|
|
|
|
{
|
2012-09-23 05:28:59 +00:00
|
|
|
|
private:
|
|
|
|
|
Pos pos;
|
|
|
|
|
Pos end;
|
|
|
|
|
|
|
|
|
|
char sep;
|
|
|
|
|
|
2012-09-17 04:34:19 +00:00
|
|
|
|
public:
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = "splitByChar";
|
|
|
|
|
static String getName() { return name; }
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
static void checkArguments(const DataTypes & arguments)
|
2012-09-17 04:34:19 +00:00
|
|
|
|
{
|
2012-09-23 05:28:59 +00:00
|
|
|
|
if (arguments.size() != 2)
|
2012-09-17 04:34:19 +00:00
|
|
|
|
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
|
2013-06-21 20:34:19 +00:00
|
|
|
|
+ toString(arguments.size()) + ", should be 2.",
|
2012-09-17 04:34:19 +00:00
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
|
2012-09-17 04:34:19 +00:00
|
|
|
|
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[1]))
|
2012-09-23 05:28:59 +00:00
|
|
|
|
throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.",
|
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void init(Block & block, const ColumnNumbers & arguments)
|
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnConstString * col = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[0]).column);
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!col)
|
2015-07-20 20:05:34 +00:00
|
|
|
|
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
|
|
|
|
+ " of first argument of function " + getName() + ". Must be constant string.",
|
2012-09-23 05:28:59 +00:00
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
|
|
const String & sep_str = col->getData();
|
|
|
|
|
|
|
|
|
|
if (sep_str.size() != 1)
|
|
|
|
|
throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.");
|
|
|
|
|
|
|
|
|
|
sep = sep_str[0];
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-07 11:25:02 +00:00
|
|
|
|
/// Возвращает позицию аргумента, являющегося столбцом строк
|
|
|
|
|
size_t getStringsArgumentPosition()
|
|
|
|
|
{
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
void set(Pos pos_, Pos end_)
|
|
|
|
|
{
|
|
|
|
|
pos = pos_;
|
|
|
|
|
end = end_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool get(Pos & token_begin, Pos & token_end)
|
|
|
|
|
{
|
|
|
|
|
if (!pos)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
token_begin = pos;
|
|
|
|
|
pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
|
|
|
|
|
|
|
|
|
|
if (pos)
|
|
|
|
|
{
|
|
|
|
|
token_end = pos;
|
|
|
|
|
++pos;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
token_end = end;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SplitByStringImpl
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
Pos pos;
|
|
|
|
|
Pos end;
|
|
|
|
|
|
|
|
|
|
String sep;
|
|
|
|
|
|
|
|
|
|
public:
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = "splitByString";
|
|
|
|
|
static String getName() { return name; }
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
|
|
|
|
static void checkArguments(const DataTypes & arguments)
|
|
|
|
|
{
|
|
|
|
|
SplitByCharImpl::checkArguments(arguments);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void init(Block & block, const ColumnNumbers & arguments)
|
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnConstString * col = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[0]).column);
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!col)
|
2015-07-20 20:05:34 +00:00
|
|
|
|
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
|
|
|
|
+ " of first argument of function " + getName() + ". Must be constant string.",
|
2012-09-23 05:28:59 +00:00
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
|
|
sep = col->getData();
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-07 11:25:02 +00:00
|
|
|
|
/// Возвращает позицию аргумента, являющегося столбцом строк
|
|
|
|
|
size_t getStringsArgumentPosition()
|
|
|
|
|
{
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Вызывается для каждой следующей строки.
|
|
|
|
|
void set(Pos pos_, Pos end_)
|
|
|
|
|
{
|
|
|
|
|
pos = pos_;
|
|
|
|
|
end = end_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить следующий токен, если есть, или вернуть false.
|
|
|
|
|
bool get(Pos & token_begin, Pos & token_end)
|
|
|
|
|
{
|
|
|
|
|
if (!pos)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
token_begin = pos;
|
|
|
|
|
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, sep.data(), sep.size()));
|
|
|
|
|
|
|
|
|
|
if (pos)
|
|
|
|
|
{
|
|
|
|
|
token_end = pos;
|
|
|
|
|
pos += sep.size();
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
token_end = end;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2013-08-05 08:40:56 +00:00
|
|
|
|
class ExtractAllImpl
|
2013-08-02 13:09:42 +00:00
|
|
|
|
{
|
|
|
|
|
private:
|
2014-07-23 15:25:48 +00:00
|
|
|
|
Regexps::Pointer re;
|
2013-08-02 13:09:42 +00:00
|
|
|
|
OptimizedRegularExpression::MatchVec matches;
|
2013-08-02 16:31:23 +00:00
|
|
|
|
size_t capture;
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
|
|
|
|
Pos pos;
|
|
|
|
|
Pos end;
|
|
|
|
|
public:
|
|
|
|
|
/// Получить имя функции.
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = "extractAll";
|
|
|
|
|
static String getName() { return name; }
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
|
|
|
|
/// Проверить типы агрументов функции.
|
|
|
|
|
static void checkArguments( const DataTypes & arguments )
|
|
|
|
|
{
|
|
|
|
|
SplitByStringImpl::checkArguments(arguments);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Инициализировать по аргументам функции.
|
|
|
|
|
void init(Block & block, const ColumnNumbers & arguments)
|
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnConstString * col = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[1]).column);
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
|
|
|
|
if (!col)
|
2015-07-20 20:05:34 +00:00
|
|
|
|
throw Exception("Illegal column " + block.getByPosition(arguments[1]).column->getName()
|
|
|
|
|
+ " of first argument of function " + getName() + ". Must be constant string.",
|
2013-08-02 13:09:42 +00:00
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
2015-02-11 01:24:51 +00:00
|
|
|
|
re = Regexps::get<false, false>(col->getData());
|
2013-08-02 16:31:23 +00:00
|
|
|
|
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
|
|
|
|
|
2013-08-05 08:40:56 +00:00
|
|
|
|
matches.resize(capture + 1);
|
2013-08-02 13:09:42 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-08-07 11:25:02 +00:00
|
|
|
|
/// Возвращает позицию аргумента, являющегося столбцом строк
|
|
|
|
|
size_t getStringsArgumentPosition()
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-08-02 13:09:42 +00:00
|
|
|
|
/// Вызывается для каждой следующей строки.
|
|
|
|
|
void set(Pos pos_, Pos end_)
|
|
|
|
|
{
|
|
|
|
|
pos = pos_;
|
|
|
|
|
end = end_;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить следующий токен, если есть, или вернуть false.
|
|
|
|
|
bool get(Pos & token_begin, Pos & token_end)
|
|
|
|
|
{
|
2013-08-05 08:40:56 +00:00
|
|
|
|
if (!pos || pos > end)
|
2013-08-02 13:09:42 +00:00
|
|
|
|
return false;
|
|
|
|
|
|
2013-09-10 16:52:40 +00:00
|
|
|
|
if (!re->match(pos, end - pos, matches) || !matches[capture].length)
|
2013-08-02 16:31:23 +00:00
|
|
|
|
return false;
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
2013-08-05 08:40:56 +00:00
|
|
|
|
token_begin = pos + matches[capture].offset;
|
|
|
|
|
token_end = token_begin + matches[capture].length;
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
2013-08-07 11:25:02 +00:00
|
|
|
|
pos += matches[capture].offset + matches[capture].length;
|
2013-08-02 13:09:42 +00:00
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
};
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
|
|
|
|
/// Функция, принимающая строку, и возвращающая массив подстрок, создаваемый некоторым генератором.
|
|
|
|
|
template <typename Generator>
|
|
|
|
|
class FunctionTokens : public IFunction
|
|
|
|
|
{
|
|
|
|
|
public:
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = Generator::name;
|
|
|
|
|
static IFunction * create(const Context & context) { return new FunctionTokens; }
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
/// Получить имя функции.
|
2015-10-11 23:36:45 +00:00
|
|
|
|
String getName() const override
|
2012-09-23 05:28:59 +00:00
|
|
|
|
{
|
2014-11-12 17:23:26 +00:00
|
|
|
|
return name;
|
2012-09-23 05:28:59 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
|
2015-10-11 23:36:45 +00:00
|
|
|
|
DataTypePtr getReturnType(const DataTypes & arguments) const override
|
2012-09-23 05:28:59 +00:00
|
|
|
|
{
|
2013-03-26 19:30:23 +00:00
|
|
|
|
Generator::checkArguments(arguments);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2012-09-17 04:34:19 +00:00
|
|
|
|
return new DataTypeArray(new DataTypeString);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Выполнить функцию над блоком.
|
2015-10-11 23:36:45 +00:00
|
|
|
|
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
|
2012-09-17 04:34:19 +00:00
|
|
|
|
{
|
2013-03-26 19:30:23 +00:00
|
|
|
|
Generator generator;
|
2012-09-23 05:28:59 +00:00
|
|
|
|
generator.init(block, arguments);
|
2013-08-07 11:25:02 +00:00
|
|
|
|
size_t arrayArgumentPosition = arguments[generator.getStringsArgumentPosition()];
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
const ColumnString * col_str = typeid_cast<const ColumnString *>(&*block.getByPosition(arrayArgumentPosition).column);
|
2013-08-07 11:25:02 +00:00
|
|
|
|
const ColumnConstString * col_const_str =
|
2014-06-26 00:58:14 +00:00
|
|
|
|
typeid_cast<const ColumnConstString *>(&*block.getByPosition(arrayArgumentPosition).column);
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
|
|
|
|
ColumnArray * col_res = new ColumnArray(new ColumnString);
|
2015-03-05 05:42:42 +00:00
|
|
|
|
ColumnPtr col_res_holder = col_res;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
2012-09-17 04:34:19 +00:00
|
|
|
|
ColumnArray::Offsets_t & res_offsets = col_res->getOffsets();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
ColumnString::Chars_t & res_strings_chars = res_strings.getChars();
|
2012-09-17 04:34:19 +00:00
|
|
|
|
ColumnString::Offsets_t & res_strings_offsets = res_strings.getOffsets();
|
|
|
|
|
|
|
|
|
|
if (col_str)
|
|
|
|
|
{
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnString::Chars_t & src_chars = col_str->getChars();
|
2012-09-17 04:34:19 +00:00
|
|
|
|
const ColumnString::Offsets_t & src_offsets = col_str->getOffsets();
|
|
|
|
|
|
|
|
|
|
res_offsets.reserve(src_offsets.size());
|
|
|
|
|
res_strings_offsets.reserve(src_offsets.size() * 5); /// Константа 5 - наугад.
|
|
|
|
|
res_strings_chars.reserve(src_chars.size());
|
|
|
|
|
|
2014-04-08 07:31:51 +00:00
|
|
|
|
Pos token_begin = nullptr;
|
|
|
|
|
Pos token_end = nullptr;
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
|
|
|
|
size_t size = src_offsets.size();
|
|
|
|
|
ColumnString::Offset_t current_src_offset = 0;
|
|
|
|
|
ColumnArray::Offset_t current_dst_offset = 0;
|
|
|
|
|
ColumnString::Offset_t current_dst_strings_offset = 0;
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
|
|
|
|
|
current_src_offset = src_offsets[i];
|
2012-09-23 05:28:59 +00:00
|
|
|
|
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
|
|
|
|
|
|
|
|
|
|
generator.set(pos, end);
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
|
|
|
|
size_t j = 0;
|
2012-09-23 05:28:59 +00:00
|
|
|
|
while (generator.get(token_begin, token_end))
|
2012-09-17 04:34:19 +00:00
|
|
|
|
{
|
|
|
|
|
size_t token_size = token_end - token_begin;
|
|
|
|
|
|
|
|
|
|
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
|
2016-04-15 00:33:21 +00:00
|
|
|
|
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
|
2013-10-31 19:24:22 +00:00
|
|
|
|
res_strings_chars[current_dst_strings_offset + token_size] = 0;
|
|
|
|
|
|
2012-09-17 04:34:19 +00:00
|
|
|
|
current_dst_strings_offset += token_size + 1;
|
|
|
|
|
res_strings_offsets.push_back(current_dst_strings_offset);
|
|
|
|
|
++j;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
current_dst_offset += j;
|
|
|
|
|
res_offsets.push_back(current_dst_offset);
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-05 05:42:42 +00:00
|
|
|
|
block.getByPosition(result).column = col_res_holder;
|
2012-09-17 04:34:19 +00:00
|
|
|
|
}
|
|
|
|
|
else if (col_const_str)
|
|
|
|
|
{
|
|
|
|
|
String src = col_const_str->getData();
|
|
|
|
|
Array dst;
|
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
generator.set(src.data(), src.data() + src.size());
|
2014-04-08 07:31:51 +00:00
|
|
|
|
Pos token_begin = nullptr;
|
|
|
|
|
Pos token_end = nullptr;
|
2012-09-17 04:34:19 +00:00
|
|
|
|
|
2012-09-23 05:28:59 +00:00
|
|
|
|
while (generator.get(token_begin, token_end))
|
2012-09-17 04:34:19 +00:00
|
|
|
|
dst.push_back(String(token_begin, token_end - token_begin));
|
|
|
|
|
|
2013-03-27 10:07:23 +00:00
|
|
|
|
block.getByPosition(result).column = new ColumnConstArray(col_const_str->size(), dst, new DataTypeArray(new DataTypeString));
|
2012-09-17 04:34:19 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2013-08-07 11:25:02 +00:00
|
|
|
|
throw Exception("Illegal columns " + block.getByPosition(arrayArgumentPosition).column->getName()
|
|
|
|
|
+ ", " + block.getByPosition(arrayArgumentPosition).column->getName()
|
2012-09-17 04:34:19 +00:00
|
|
|
|
+ " of arguments of function " + getName(),
|
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2015-10-19 18:49:52 +00:00
|
|
|
|
/// Склеивает массив строк в одну строку через разделитель.
|
|
|
|
|
class FunctionArrayStringConcat : public IFunction
|
|
|
|
|
{
|
|
|
|
|
private:
|
|
|
|
|
void executeImpl(
|
|
|
|
|
const ColumnString::Chars_t & src_chars,
|
|
|
|
|
const ColumnString::Offsets_t & src_string_offsets,
|
|
|
|
|
const ColumnArray::Offsets_t & src_array_offsets,
|
|
|
|
|
const char * delimiter, const size_t delimiter_size,
|
|
|
|
|
ColumnString::Chars_t & dst_chars,
|
|
|
|
|
ColumnString::Offsets_t & dst_string_offsets)
|
|
|
|
|
{
|
|
|
|
|
size_t size = src_array_offsets.size();
|
|
|
|
|
|
|
|
|
|
if (!size)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/// С небольшим запасом - как будто разделитель идёт и после последней строки массива.
|
|
|
|
|
dst_chars.resize(
|
|
|
|
|
src_chars.size()
|
|
|
|
|
+ delimiter_size * src_string_offsets.size() /// Разделители после каждой строки...
|
|
|
|
|
+ src_array_offsets.size() /// Нулевой байт после каждой склеенной строки
|
|
|
|
|
- src_string_offsets.size()); /// Бывший нулевой байт после каждой строки массива
|
|
|
|
|
|
|
|
|
|
/// Будет столько строк, сколько было массивов.
|
|
|
|
|
dst_string_offsets.resize(src_array_offsets.size());
|
|
|
|
|
|
|
|
|
|
ColumnArray::Offset_t current_src_array_offset = 0;
|
|
|
|
|
ColumnString::Offset_t current_src_string_offset = 0;
|
|
|
|
|
|
|
|
|
|
ColumnString::Offset_t current_dst_string_offset = 0;
|
|
|
|
|
|
|
|
|
|
/// Цикл по массивам строк.
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
/// Цикл по строкам внутри массива. /// NOTE Можно всё сделать за одно копирование, если разделитель имеет размер 1.
|
|
|
|
|
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
|
|
|
|
|
{
|
|
|
|
|
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
|
|
|
|
|
|
2016-04-15 00:33:21 +00:00
|
|
|
|
memcpySmallAllowReadWriteOverflow15(
|
|
|
|
|
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
|
2015-10-19 18:49:52 +00:00
|
|
|
|
|
|
|
|
|
current_src_string_offset = src_string_offsets[current_src_array_offset];
|
|
|
|
|
current_dst_string_offset += bytes_to_copy;
|
|
|
|
|
|
|
|
|
|
if (current_src_array_offset + 1 != next_src_array_offset)
|
|
|
|
|
{
|
|
|
|
|
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
|
|
|
|
|
current_dst_string_offset += delimiter_size;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst_chars[current_dst_string_offset] = 0;
|
|
|
|
|
++current_dst_string_offset;
|
|
|
|
|
|
|
|
|
|
dst_string_offsets[i] = current_dst_string_offset;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst_chars.resize(dst_string_offsets.back());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
static constexpr auto name = "arrayStringConcat";
|
|
|
|
|
static IFunction * create(const Context & context) { return new FunctionArrayStringConcat; }
|
|
|
|
|
|
|
|
|
|
/// Получить имя функции.
|
|
|
|
|
String getName() const override
|
|
|
|
|
{
|
|
|
|
|
return name;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
|
|
|
|
|
DataTypePtr getReturnType(const DataTypes & arguments) const override
|
|
|
|
|
{
|
|
|
|
|
if (arguments.size() != 1 && arguments.size() != 2)
|
|
|
|
|
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
|
|
|
|
|
+ toString(arguments.size()) + ", should be 1 or 2.",
|
|
|
|
|
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
|
|
|
|
|
|
|
|
const DataTypeArray * array_type = typeid_cast<const DataTypeArray *>(arguments[0].get());
|
|
|
|
|
if (!array_type || !typeid_cast<const DataTypeString *>(array_type->getNestedType().get()))
|
|
|
|
|
throw Exception("First argument for function " + getName() + " must be array of strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
|
|
if (arguments.size() == 2
|
|
|
|
|
&& !typeid_cast<const DataTypeString *>(arguments[1].get()))
|
|
|
|
|
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
|
|
return new DataTypeString;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Выполнить функцию над блоком.
|
|
|
|
|
void execute(Block & block, const ColumnNumbers & arguments, size_t result) override
|
|
|
|
|
{
|
|
|
|
|
String delimiter;
|
|
|
|
|
if (arguments.size() == 2)
|
|
|
|
|
{
|
|
|
|
|
const ColumnConstString * col_delim = typeid_cast<const ColumnConstString *>(block.getByPosition(arguments[1]).column.get());
|
|
|
|
|
if (!col_delim)
|
|
|
|
|
throw Exception("Second argument for function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
|
|
delimiter = col_delim->getData();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (const ColumnConstArray * col_const_arr = typeid_cast<const ColumnConstArray *>(block.getByPosition(arguments[0]).column.get()))
|
|
|
|
|
{
|
|
|
|
|
ColumnConstString * col_res = new ColumnConstString(col_const_arr->size(), "");
|
|
|
|
|
block.getByPosition(result).column = col_res;
|
|
|
|
|
|
|
|
|
|
const Array & src_arr = col_const_arr->getData();
|
|
|
|
|
String & dst_str = col_res->getData();
|
|
|
|
|
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i != 0)
|
|
|
|
|
dst_str += delimiter;
|
|
|
|
|
dst_str += src_arr[i].get<const String &>();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
const ColumnArray & col_arr = static_cast<const ColumnArray &>(*block.getByPosition(arguments[0]).column);
|
|
|
|
|
const ColumnString & col_string = static_cast<const ColumnString &>(col_arr.getData());
|
|
|
|
|
|
|
|
|
|
ColumnString * col_res = new ColumnString;
|
|
|
|
|
block.getByPosition(result).column = col_res;
|
|
|
|
|
|
|
|
|
|
executeImpl(
|
|
|
|
|
col_string.getChars(), col_string.getOffsets(), col_arr.getOffsets(),
|
|
|
|
|
delimiter.data(), delimiter.size(),
|
|
|
|
|
col_res->getChars(), col_res->getOffsets());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef FunctionTokens<AlphaTokensImpl> FunctionAlphaTokens;
|
|
|
|
|
typedef FunctionTokens<SplitByCharImpl> FunctionSplitByChar;
|
2012-09-23 05:28:59 +00:00
|
|
|
|
typedef FunctionTokens<SplitByStringImpl> FunctionSplitByString;
|
2013-08-05 08:40:56 +00:00
|
|
|
|
typedef FunctionTokens<ExtractAllImpl> FunctionExtractAll;
|
2012-09-23 05:28:59 +00:00
|
|
|
|
|
2012-09-17 04:34:19 +00:00
|
|
|
|
}
|