2017-03-10 17:52:36 +00:00
|
|
|
|
#pragma once
|
2011-10-16 07:11:36 +00:00
|
|
|
|
|
|
|
|
|
#include <DB/Columns/ColumnConst.h>
|
2017-03-10 17:52:36 +00:00
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/DataTypes/DataTypeString.h>
|
|
|
|
|
#include <DB/Functions/FunctionsArithmetic.h>
|
2011-10-16 07:11:36 +00:00
|
|
|
|
#include <DB/Functions/IFunction.h>
|
2016-10-24 13:47:15 +00:00
|
|
|
|
|
|
|
|
|
|
2011-10-16 07:11:36 +00:00
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
/** Функции поиска и замены в строках:
|
|
|
|
|
*
|
|
|
|
|
* position(haystack, needle) - обычный поиск подстроки в строке, возвращает позицию (в байтах) найденной подстроки, начиная с 1, или 0, если подстрока не найдена.
|
|
|
|
|
* positionUTF8(haystack, needle) - то же самое, но позиция вычисляется в кодовых точках, при условии, что строка в кодировке UTF-8.
|
2016-01-27 03:11:28 +00:00
|
|
|
|
* positionCaseInsensitive(haystack, needle)
|
|
|
|
|
* positionCaseInsensitiveUTF8(haystack, needle)
|
2014-06-26 00:58:14 +00:00
|
|
|
|
*
|
2011-10-17 08:28:39 +00:00
|
|
|
|
* like(haystack, pattern) - поиск по регулярному выражению LIKE; возвращает 0 или 1. Регистронезависимое, но только для латиницы.
|
|
|
|
|
* notLike(haystack, pattern)
|
|
|
|
|
*
|
2011-10-16 07:11:36 +00:00
|
|
|
|
* match(haystack, pattern) - поиск по регулярному выражению re2; возвращает 0 или 1.
|
2012-07-21 03:45:48 +00:00
|
|
|
|
*
|
2013-03-18 10:49:31 +00:00
|
|
|
|
* Применяет регексп re2 и достаёт:
|
|
|
|
|
* - первый subpattern, если в regexp-е есть subpattern;
|
|
|
|
|
* - нулевой subpattern (сматчившуюся часть, иначе);
|
|
|
|
|
* - если не сматчилось - пустую строку.
|
|
|
|
|
* extract(haystack, pattern)
|
2011-10-17 08:28:39 +00:00
|
|
|
|
*
|
2011-10-16 07:11:36 +00:00
|
|
|
|
* replaceOne(haystack, pattern, replacement) - замена шаблона по заданным правилам, только первое вхождение.
|
|
|
|
|
* replaceAll(haystack, pattern, replacement) - замена шаблона по заданным правилам, все вхождения.
|
|
|
|
|
*
|
2014-01-27 16:01:53 +00:00
|
|
|
|
* replaceRegexpOne(haystack, pattern, replacement) - замена шаблона по заданному регекспу, только первое вхождение.
|
|
|
|
|
* replaceRegexpAll(haystack, pattern, replacement) - замена шаблона по заданному регекспу, все вхождения.
|
|
|
|
|
*
|
2012-12-26 14:51:41 +00:00
|
|
|
|
* Внимание! На данный момент, аргументы needle, pattern, n, replacement обязаны быть константами.
|
2011-10-16 07:11:36 +00:00
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
2014-01-27 13:49:06 +00:00
|
|
|
|
template <typename Impl, typename Name>
|
2017-03-10 17:52:36 +00:00
|
|
|
|
class FunctionsStringSearch : public IFunction
|
2014-01-27 13:49:06 +00:00
|
|
|
|
{
|
|
|
|
|
public:
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = Name::name;
|
2017-03-10 17:52:36 +00:00
|
|
|
|
static FunctionPtr create(const Context & context)
|
2014-01-27 13:49:06 +00:00
|
|
|
|
{
|
2017-03-10 17:52:36 +00:00
|
|
|
|
return std::make_shared<FunctionsStringSearch>();
|
2014-01-27 13:49:06 +00:00
|
|
|
|
}
|
2014-11-12 17:23:26 +00:00
|
|
|
|
|
2015-10-11 23:36:45 +00:00
|
|
|
|
String getName() const override
|
2011-10-16 07:11:36 +00:00
|
|
|
|
{
|
2014-11-12 17:23:26 +00:00
|
|
|
|
return name;
|
2011-10-16 07:11:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2017-03-10 17:52:36 +00:00
|
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
|
{
|
|
|
|
|
return 2;
|
|
|
|
|
}
|
2016-12-29 19:38:10 +00:00
|
|
|
|
|
2016-07-06 09:47:55 +00:00
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
2011-10-16 07:11:36 +00:00
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception(
|
|
|
|
|
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2011-10-16 07:11:36 +00:00
|
|
|
|
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[1]))
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception(
|
|
|
|
|
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2011-10-16 07:11:36 +00:00
|
|
|
|
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<typename DataTypeFromFieldType<typename Impl::ResultType>::Type>();
|
2011-10-16 07:11:36 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-07-06 09:47:55 +00:00
|
|
|
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
|
2011-10-16 07:11:36 +00:00
|
|
|
|
{
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using ResultType = typename Impl::ResultType;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2017-01-02 20:12:12 +00:00
|
|
|
|
const ColumnPtr & column_haystack = block.safeGetByPosition(arguments[0]).column;
|
|
|
|
|
const ColumnPtr & column_needle = block.safeGetByPosition(arguments[1]).column;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-01-27 03:11:28 +00:00
|
|
|
|
const ColumnConstString * col_haystack_const = typeid_cast<const ColumnConstString *>(&*column_haystack);
|
|
|
|
|
const ColumnConstString * col_needle_const = typeid_cast<const ColumnConstString *>(&*column_needle);
|
2011-10-16 07:11:36 +00:00
|
|
|
|
|
2016-01-27 03:11:28 +00:00
|
|
|
|
if (col_haystack_const && col_needle_const)
|
2011-10-16 07:11:36 +00:00
|
|
|
|
{
|
2015-09-22 13:43:32 +00:00
|
|
|
|
ResultType res{};
|
2016-01-27 03:11:28 +00:00
|
|
|
|
Impl::constant_constant(col_haystack_const->getData(), col_needle_const->getData(), res);
|
2017-01-02 20:12:12 +00:00
|
|
|
|
block.safeGetByPosition(result).column = std::make_shared<ColumnConst<ResultType>>(col_haystack_const->size(), res);
|
2016-01-27 03:11:28 +00:00
|
|
|
|
return;
|
2011-10-16 07:11:36 +00:00
|
|
|
|
}
|
2016-01-27 03:11:28 +00:00
|
|
|
|
|
2016-05-28 05:31:36 +00:00
|
|
|
|
auto col_res = std::make_shared<ColumnVector<ResultType>>();
|
2017-01-02 20:12:12 +00:00
|
|
|
|
block.safeGetByPosition(result).column = col_res;
|
2016-01-27 03:11:28 +00:00
|
|
|
|
|
|
|
|
|
typename ColumnVector<ResultType>::Container_t & vec_res = col_res->getData();
|
|
|
|
|
vec_res.resize(column_haystack->size());
|
|
|
|
|
|
|
|
|
|
const ColumnString * col_haystack_vector = typeid_cast<const ColumnString *>(&*column_haystack);
|
|
|
|
|
const ColumnString * col_needle_vector = typeid_cast<const ColumnString *>(&*column_needle);
|
|
|
|
|
|
|
|
|
|
if (col_haystack_vector && col_needle_vector)
|
2017-03-10 17:52:36 +00:00
|
|
|
|
Impl::vector_vector(col_haystack_vector->getChars(),
|
|
|
|
|
col_haystack_vector->getOffsets(),
|
|
|
|
|
col_needle_vector->getChars(),
|
|
|
|
|
col_needle_vector->getOffsets(),
|
2016-01-27 03:11:28 +00:00
|
|
|
|
vec_res);
|
|
|
|
|
else if (col_haystack_vector && col_needle_const)
|
2017-03-10 17:52:36 +00:00
|
|
|
|
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), col_needle_const->getData(), vec_res);
|
2016-01-27 03:11:28 +00:00
|
|
|
|
else if (col_haystack_const && col_needle_vector)
|
2017-03-10 17:52:36 +00:00
|
|
|
|
Impl::constant_vector(col_haystack_const->getData(), col_needle_vector->getChars(), col_needle_vector->getOffsets(), vec_res);
|
2011-10-16 07:11:36 +00:00
|
|
|
|
else
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception("Illegal columns " + block.safeGetByPosition(arguments[0]).column->getName() + " and "
|
|
|
|
|
+ block.safeGetByPosition(arguments[1]).column->getName()
|
|
|
|
|
+ " of arguments of function "
|
|
|
|
|
+ getName(),
|
2011-10-16 07:11:36 +00:00
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2011-10-17 08:28:39 +00:00
|
|
|
|
|
2013-02-28 13:01:07 +00:00
|
|
|
|
template <typename Impl, typename Name>
|
|
|
|
|
class FunctionsStringSearchToString : public IFunction
|
|
|
|
|
{
|
|
|
|
|
public:
|
2014-11-12 17:23:26 +00:00
|
|
|
|
static constexpr auto name = Name::name;
|
2017-03-10 17:52:36 +00:00
|
|
|
|
static FunctionPtr create(const Context & context)
|
|
|
|
|
{
|
|
|
|
|
return std::make_shared<FunctionsStringSearchToString>();
|
|
|
|
|
}
|
2014-11-12 17:23:26 +00:00
|
|
|
|
|
2015-10-11 23:36:45 +00:00
|
|
|
|
String getName() const override
|
2013-02-28 13:01:07 +00:00
|
|
|
|
{
|
2014-11-12 17:23:26 +00:00
|
|
|
|
return name;
|
2013-02-28 13:01:07 +00:00
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2017-03-10 17:52:36 +00:00
|
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
|
{
|
|
|
|
|
return 2;
|
|
|
|
|
}
|
2016-12-29 19:38:10 +00:00
|
|
|
|
|
2016-07-06 09:47:55 +00:00
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
2013-02-28 13:01:07 +00:00
|
|
|
|
{
|
2014-06-26 00:58:14 +00:00
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception(
|
|
|
|
|
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
|
|
|
|
if (!typeid_cast<const DataTypeString *>(&*arguments[1]))
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception(
|
|
|
|
|
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-05-28 07:48:40 +00:00
|
|
|
|
return std::make_shared<DataTypeString>();
|
2013-02-28 13:01:07 +00:00
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2016-07-06 09:47:55 +00:00
|
|
|
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) override
|
2013-02-28 13:01:07 +00:00
|
|
|
|
{
|
2017-01-02 20:12:12 +00:00
|
|
|
|
const ColumnPtr column = block.safeGetByPosition(arguments[0]).column;
|
|
|
|
|
const ColumnPtr column_needle = block.safeGetByPosition(arguments[1]).column;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
|
|
|
|
const ColumnConstString * col_needle = typeid_cast<const ColumnConstString *>(&*column_needle);
|
2013-02-28 13:01:07 +00:00
|
|
|
|
if (!col_needle)
|
|
|
|
|
throw Exception("Second argument of function " + getName() + " must be constant string.", ErrorCodes::ILLEGAL_COLUMN);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
|
|
|
|
if (const ColumnString * col = typeid_cast<const ColumnString *>(&*column))
|
2013-02-28 13:01:07 +00:00
|
|
|
|
{
|
2016-05-28 05:31:36 +00:00
|
|
|
|
std::shared_ptr<ColumnString> col_res = std::make_shared<ColumnString>();
|
2017-01-02 20:12:12 +00:00
|
|
|
|
block.safeGetByPosition(result).column = col_res;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-05-05 15:25:25 +00:00
|
|
|
|
ColumnString::Chars_t & vec_res = col_res->getChars();
|
2013-02-28 13:01:07 +00:00
|
|
|
|
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
Impl::vector(col->getChars(), col->getOffsets(), col_needle->getData(), vec_res, offsets_res);
|
2013-02-28 13:01:07 +00:00
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
else if (const ColumnConstString * col = typeid_cast<const ColumnConstString *>(&*column))
|
2013-02-28 13:01:07 +00:00
|
|
|
|
{
|
2013-03-05 12:12:47 +00:00
|
|
|
|
const std::string & data = col->getData();
|
2017-03-10 17:52:36 +00:00
|
|
|
|
ColumnString::Chars_t vdata(reinterpret_cast<const ColumnString::Chars_t::value_type *>(data.c_str()),
|
2013-09-15 05:51:43 +00:00
|
|
|
|
reinterpret_cast<const ColumnString::Chars_t::value_type *>(data.c_str() + data.size() + 1));
|
2013-05-05 15:25:25 +00:00
|
|
|
|
ColumnString::Offsets_t offsets(1, vdata.size());
|
2013-09-15 05:51:43 +00:00
|
|
|
|
ColumnString::Chars_t res_vdata;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
ColumnString::Offsets_t res_offsets;
|
2013-03-05 12:12:47 +00:00
|
|
|
|
Impl::vector(vdata, offsets, col_needle->getData(), res_vdata, res_offsets);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-10-03 19:23:08 +00:00
|
|
|
|
std::string res;
|
|
|
|
|
|
|
|
|
|
if (!res_offsets.empty())
|
|
|
|
|
res.assign(&res_vdata[0], &res_vdata[res_vdata.size() - 1]);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2017-01-02 20:12:12 +00:00
|
|
|
|
block.safeGetByPosition(result).column = std::make_shared<ColumnConstString>(col->size(), res);
|
2013-02-28 13:01:07 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2017-03-10 17:52:36 +00:00
|
|
|
|
throw Exception(
|
|
|
|
|
"Illegal column " + block.safeGetByPosition(arguments[0]).column->getName() + " of argument of function " + getName(),
|
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
2013-02-28 13:01:07 +00:00
|
|
|
|
}
|
|
|
|
|
};
|
2011-10-16 07:11:36 +00:00
|
|
|
|
}
|