2018-12-14 08:43:20 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
2018-12-13 12:58:00 +00:00
|
|
|
#include <Functions/FunctionFactory.h>
|
2018-12-14 08:43:20 +00:00
|
|
|
#include <Functions/FunctionHelpers.h>
|
2018-12-20 19:06:46 +00:00
|
|
|
#include <common/find_symbols.h>
|
2018-12-14 08:43:20 +00:00
|
|
|
|
2018-12-13 12:58:00 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2018-12-14 08:43:20 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
}
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2018-12-14 08:43:20 +00:00
|
|
|
class FunctionRegexpQuoteMeta : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = "regexpQuoteMeta";
|
|
|
|
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr)
|
2018-12-14 08:43:20 +00:00
|
|
|
{
|
|
|
|
return std::make_shared<FunctionRegexpQuoteMeta>();
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool useDefaultImplementationForConstants() const override
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-05-17 13:06:11 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(ColumnsWithTypeAndName & /*arguments*/) const override { return true; }
|
2021-04-29 14:48:26 +00:00
|
|
|
|
2018-12-14 08:43:20 +00:00
|
|
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
|
|
|
{
|
|
|
|
if (!WhichDataType(arguments[0].type).isString())
|
|
|
|
throw Exception(
|
|
|
|
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.",
|
|
|
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
|
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
2018-12-14 08:43:20 +00:00
|
|
|
{
|
2020-10-19 15:27:41 +00:00
|
|
|
const ColumnPtr & column_string = arguments[0].column;
|
2018-12-14 08:43:20 +00:00
|
|
|
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
|
|
|
|
|
|
|
|
if (!input)
|
|
|
|
throw Exception(
|
2020-10-19 15:27:41 +00:00
|
|
|
"Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(),
|
2018-12-14 08:43:20 +00:00
|
|
|
ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
auto dst_column = ColumnString::create();
|
|
|
|
auto & dst_data = dst_column->getChars();
|
|
|
|
auto & dst_offsets = dst_column->getOffsets();
|
|
|
|
|
|
|
|
dst_offsets.resize(input_rows_count);
|
|
|
|
|
|
|
|
const ColumnString::Offsets & src_offsets = input->getOffsets();
|
|
|
|
|
2020-04-22 08:45:14 +00:00
|
|
|
const auto * src_begin = reinterpret_cast<const char *>(input->getChars().data());
|
|
|
|
const auto * src_pos = src_begin;
|
2018-12-14 08:43:20 +00:00
|
|
|
|
2018-12-20 19:06:46 +00:00
|
|
|
for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx)
|
2018-12-14 08:43:20 +00:00
|
|
|
{
|
2018-12-20 19:06:46 +00:00
|
|
|
/// NOTE This implementation slightly differs from re2::RE2::QuoteMeta.
|
|
|
|
/// It escapes zero byte as \0 instead of \x00
|
|
|
|
/// and it escapes only required characters.
|
|
|
|
/// This is Ok. Look at comments in re2.cc
|
|
|
|
|
|
|
|
const char * src_end = src_begin + src_offsets[row_idx] - 1;
|
|
|
|
|
|
|
|
while (true)
|
|
|
|
{
|
2018-12-23 02:11:56 +00:00
|
|
|
const char * next_src_pos = find_first_symbols<'\0', '\\', '|', '(', ')', '^', '$', '.', '[', ']', '?', '*', '+', '{', ':', '-'>(src_pos, src_end);
|
2018-12-20 19:06:46 +00:00
|
|
|
|
|
|
|
size_t bytes_to_copy = next_src_pos - src_pos;
|
|
|
|
size_t old_dst_size = dst_data.size();
|
|
|
|
dst_data.resize(old_dst_size + bytes_to_copy);
|
|
|
|
memcpySmallAllowReadWriteOverflow15(dst_data.data() + old_dst_size, src_pos, bytes_to_copy);
|
|
|
|
src_pos = next_src_pos + 1;
|
|
|
|
|
|
|
|
if (next_src_pos == src_end)
|
|
|
|
{
|
|
|
|
dst_data.emplace_back('\0');
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_data.emplace_back('\\');
|
|
|
|
dst_data.emplace_back(*next_src_pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
dst_offsets[row_idx] = dst_data.size();
|
2018-12-14 08:43:20 +00:00
|
|
|
}
|
|
|
|
|
2020-10-19 15:27:41 +00:00
|
|
|
return dst_column;
|
2018-12-14 08:43:20 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
}
|
|
|
|
|
2018-12-13 12:58:00 +00:00
|
|
|
void registerFunctionRegexpQuoteMeta(FunctionFactory & factory)
|
|
|
|
{
|
|
|
|
factory.registerFunction<FunctionRegexpQuoteMeta>();
|
|
|
|
}
|
2020-09-07 18:00:37 +00:00
|
|
|
|
2018-12-13 12:58:00 +00:00
|
|
|
}
|