mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Introduce regexpQuoteMeta function to properly handle regexp special chars in TRIM #3714
This commit is contained in:
parent
034265f044
commit
180311b42b
112
dbms/src/Functions/FunctionRegexpQuoteMeta.h
Normal file
112
dbms/src/Functions/FunctionRegexpQuoteMeta.h
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#include <Common/config.h>
|
||||||
|
#include <Columns/ColumnConst.h>
|
||||||
|
#include <Columns/ColumnString.h>
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <Functions/FunctionHelpers.h>
|
||||||
|
#include <Functions/GatherUtils/Algorithms.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
#include <re2/re2.h>
|
||||||
|
#include <re2/stringpiece.h>
|
||||||
|
|
||||||
|
#if USE_RE2_ST
|
||||||
|
#include <re2_st/re2.h> // Y_IGNORE
|
||||||
|
#else
|
||||||
|
#define re2_st re2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
using namespace GatherUtils;
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int ILLEGAL_COLUMN;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
}
|
||||||
|
|
||||||
|
class FunctionRegexpQuoteMeta : public IFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
static constexpr auto name = "regexpQuoteMeta";
|
||||||
|
|
||||||
|
static FunctionPtr create(const Context &)
|
||||||
|
{
|
||||||
|
return std::make_shared<FunctionRegexpQuoteMeta>();
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() const override
|
||||||
|
{
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override
|
||||||
|
{
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool useDefaultImplementationForConstants() const override
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||||
|
{
|
||||||
|
if (!WhichDataType(arguments[0].type).isString())
|
||||||
|
throw Exception(
|
||||||
|
"Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.",
|
||||||
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
|
||||||
|
return std::make_shared<DataTypeString>();
|
||||||
|
}
|
||||||
|
|
||||||
|
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||||
|
{
|
||||||
|
const ColumnPtr column_string = block.getByPosition(arguments[0]).column;
|
||||||
|
const ColumnString * input = checkAndGetColumn<ColumnString>(column_string.get());
|
||||||
|
|
||||||
|
if (!input)
|
||||||
|
throw Exception(
|
||||||
|
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||||
|
ErrorCodes::ILLEGAL_COLUMN);
|
||||||
|
|
||||||
|
auto dst_column = ColumnString::create();
|
||||||
|
auto & dst_data = dst_column->getChars();
|
||||||
|
auto & dst_offsets = dst_column->getOffsets();
|
||||||
|
|
||||||
|
dst_data.resize(input->getChars().size() * input->size());
|
||||||
|
dst_offsets.resize(input_rows_count);
|
||||||
|
|
||||||
|
const ColumnString::Offsets & src_offsets = input->getOffsets();
|
||||||
|
|
||||||
|
auto source = reinterpret_cast<const char *>(input->getChars().data());
|
||||||
|
auto dst = reinterpret_cast<char *>(dst_data.data());
|
||||||
|
auto dst_pos = dst;
|
||||||
|
|
||||||
|
size_t src_offset_prev = 0;
|
||||||
|
|
||||||
|
for (size_t row = 0; row < input_rows_count; ++row)
|
||||||
|
{
|
||||||
|
size_t srclen = src_offsets[row] - src_offset_prev - 1;
|
||||||
|
|
||||||
|
re2_st::StringPiece unquoted(source, srclen);
|
||||||
|
const auto & quoted = re2_st::RE2::QuoteMeta(unquoted);
|
||||||
|
std::memcpy(dst_pos, quoted.data(), quoted.size());
|
||||||
|
|
||||||
|
source += srclen + 1;
|
||||||
|
dst_pos += quoted.size() + 1;
|
||||||
|
|
||||||
|
dst_offsets[row] = dst_pos - dst;
|
||||||
|
src_offset_prev = src_offsets[row];
|
||||||
|
}
|
||||||
|
|
||||||
|
dst_data.resize(dst_pos - dst);
|
||||||
|
|
||||||
|
block.getByPosition(result).column = std::move(dst_column);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
10
dbms/src/Functions/regexpQuoteMeta.cpp
Normal file
10
dbms/src/Functions/regexpQuoteMeta.cpp
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
#include <Functions/FunctionRegexpQuoteMeta.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
void registerFunctionRegexpQuoteMeta(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionRegexpQuoteMeta>();
|
||||||
|
}
|
||||||
|
}
|
@ -21,6 +21,7 @@ void registerFunctionSubstringUTF8(FunctionFactory &);
|
|||||||
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
||||||
void registerFunctionStartsWith(FunctionFactory &);
|
void registerFunctionStartsWith(FunctionFactory &);
|
||||||
void registerFunctionEndsWith(FunctionFactory &);
|
void registerFunctionEndsWith(FunctionFactory &);
|
||||||
|
void registerFunctionRegexpQuoteMeta(FunctionFactory &);
|
||||||
|
|
||||||
#if USE_BASE64
|
#if USE_BASE64
|
||||||
void registerFunctionBase64Encode(FunctionFactory &);
|
void registerFunctionBase64Encode(FunctionFactory &);
|
||||||
@ -46,6 +47,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
|||||||
registerFunctionAppendTrailingCharIfAbsent(factory);
|
registerFunctionAppendTrailingCharIfAbsent(factory);
|
||||||
registerFunctionStartsWith(factory);
|
registerFunctionStartsWith(factory);
|
||||||
registerFunctionEndsWith(factory);
|
registerFunctionEndsWith(factory);
|
||||||
|
registerFunctionRegexpQuoteMeta(factory);
|
||||||
#if USE_BASE64
|
#if USE_BASE64
|
||||||
registerFunctionBase64Encode(factory);
|
registerFunctionBase64Encode(factory);
|
||||||
registerFunctionBase64Decode(factory);
|
registerFunctionBase64Decode(factory);
|
||||||
|
@ -444,10 +444,20 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
|
|||||||
|
|
||||||
if (char_override)
|
if (char_override)
|
||||||
{
|
{
|
||||||
if (!ParserExpression().parse(pos, to_remove, expected)) /// TODO: wrap in RE2::QuoteMeta call
|
if (!ParserExpression().parse(pos, to_remove, expected))
|
||||||
return false;
|
return false;
|
||||||
if (!ParserKeyword("FROM").ignore(pos, expected))
|
if (!ParserKeyword("FROM").ignore(pos, expected))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
auto quote_meta_func_node = std::make_shared<ASTFunction>();
|
||||||
|
auto quote_meta_list_args = std::make_shared<ASTExpressionList>();
|
||||||
|
quote_meta_list_args->children = {to_remove};
|
||||||
|
|
||||||
|
quote_meta_func_node->name = "regexpQuoteMeta";
|
||||||
|
quote_meta_func_node->arguments = std::move(quote_meta_list_args);
|
||||||
|
quote_meta_func_node->children.push_back(quote_meta_func_node->arguments);
|
||||||
|
|
||||||
|
to_remove = std::move(quote_meta_func_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -463,9 +473,9 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
|
|||||||
|
|
||||||
/// Convert to regexp replace function call
|
/// Convert to regexp replace function call
|
||||||
|
|
||||||
auto pattern_func_node = std::make_shared<ASTFunction>();
|
|
||||||
if (char_override)
|
if (char_override)
|
||||||
{
|
{
|
||||||
|
auto pattern_func_node = std::make_shared<ASTFunction>();
|
||||||
auto pattern_list_args = std::make_shared<ASTExpressionList>();
|
auto pattern_list_args = std::make_shared<ASTExpressionList>();
|
||||||
if (trim_left && trim_right)
|
if (trim_left && trim_right)
|
||||||
{
|
{
|
||||||
|
@ -17,3 +17,4 @@ foo
|
|||||||
xxfoo
|
xxfoo
|
||||||
fooabba
|
fooabba
|
||||||
fooabbafoo
|
fooabbafoo
|
||||||
|
foo*
|
||||||
|
@ -19,3 +19,4 @@ select RTRIM(' foo ');
|
|||||||
select trim(TRAILING 'x' FROM 'xxfooxx');
|
select trim(TRAILING 'x' FROM 'xxfooxx');
|
||||||
select Trim(LEADING 'ab' FROM 'abbafooabba');
|
select Trim(LEADING 'ab' FROM 'abbafooabba');
|
||||||
select TRIM(both 'ab' FROM 'abbafooabbafooabba');
|
select TRIM(both 'ab' FROM 'abbafooabbafooabba');
|
||||||
|
select trim(LEADING '*[]{}|\\' FROM '\\|[[[}}}*foo*');
|
||||||
|
Loading…
Reference in New Issue
Block a user