diff --git a/dbms/src/Functions/FunctionRegexpQuoteMeta.h b/dbms/src/Functions/FunctionRegexpQuoteMeta.h new file mode 100644 index 00000000000..a4e2c0a1771 --- /dev/null +++ b/dbms/src/Functions/FunctionRegexpQuoteMeta.h @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#if USE_RE2_ST + #include // Y_IGNORE +#else + #define re2_st re2 +#endif + + +namespace DB +{ +using namespace GatherUtils; + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +class FunctionRegexpQuoteMeta : public IFunction +{ +public: + static constexpr auto name = "regexpQuoteMeta"; + + static FunctionPtr create(const Context &) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (!WhichDataType(arguments[0].type).isString()) + throw Exception( + "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const ColumnPtr column_string = block.getByPosition(arguments[0]).column; + const ColumnString * input = checkAndGetColumn(column_string.get()); + + if (!input) + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto dst_column = ColumnString::create(); + auto & dst_data = dst_column->getChars(); + auto & dst_offsets = dst_column->getOffsets(); + + dst_data.resize(input->getChars().size() * input->size()); + dst_offsets.resize(input_rows_count); + + const ColumnString::Offsets & src_offsets = input->getOffsets(); + + auto source = reinterpret_cast(input->getChars().data()); + auto dst = reinterpret_cast(dst_data.data()); + auto dst_pos = dst; + + size_t src_offset_prev = 0; + + for (size_t row = 0; row < input_rows_count; ++row) + { + size_t srclen = src_offsets[row] - src_offset_prev - 1; + + re2_st::StringPiece unquoted(source, srclen); + const auto & quoted = re2_st::RE2::QuoteMeta(unquoted); + std::memcpy(dst_pos, quoted.data(), quoted.size()); + + source += srclen + 1; + dst_pos += quoted.size() + 1; + + dst_offsets[row] = dst_pos - dst; + src_offset_prev = src_offsets[row]; + } + + dst_data.resize(dst_pos - dst); + + block.getByPosition(result).column = std::move(dst_column); + } + +}; +} diff --git a/dbms/src/Functions/regexpQuoteMeta.cpp b/dbms/src/Functions/regexpQuoteMeta.cpp new file mode 100644 index 00000000000..a0d99fdefc6 --- /dev/null +++ b/dbms/src/Functions/regexpQuoteMeta.cpp @@ -0,0 +1,10 @@ +#include +#include + +namespace DB +{ +void registerFunctionRegexpQuoteMeta(FunctionFactory & factory) +{ + factory.registerFunction(); +} +} diff --git a/dbms/src/Functions/registerFunctionsString.cpp b/dbms/src/Functions/registerFunctionsString.cpp index 3a07d8bbd65..d838ac9ff31 100644 --- a/dbms/src/Functions/registerFunctionsString.cpp +++ b/dbms/src/Functions/registerFunctionsString.cpp @@ -21,6 +21,7 @@ void registerFunctionSubstringUTF8(FunctionFactory &); void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &); void registerFunctionEndsWith(FunctionFactory &); +void registerFunctionRegexpQuoteMeta(FunctionFactory &); #if USE_BASE64 void registerFunctionBase64Encode(FunctionFactory &); @@ -46,6 +47,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionAppendTrailingCharIfAbsent(factory); registerFunctionStartsWith(factory); registerFunctionEndsWith(factory); + registerFunctionRegexpQuoteMeta(factory); #if USE_BASE64 registerFunctionBase64Encode(factory); registerFunctionBase64Decode(factory); diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index babea27b3a3..43f39cd0079 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -444,10 +444,20 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect if (char_override) { - if (!ParserExpression().parse(pos, to_remove, expected)) /// TODO: wrap in RE2::QuoteMeta call + if (!ParserExpression().parse(pos, to_remove, expected)) return false; if (!ParserKeyword("FROM").ignore(pos, expected)) return false; + + auto quote_meta_func_node = std::make_shared(); + auto quote_meta_list_args = std::make_shared(); + quote_meta_list_args->children = {to_remove}; + + quote_meta_func_node->name = "regexpQuoteMeta"; + quote_meta_func_node->arguments = std::move(quote_meta_list_args); + quote_meta_func_node->children.push_back(quote_meta_func_node->arguments); + + to_remove = std::move(quote_meta_func_node); } } @@ -463,9 +473,9 @@ bool ParserTrimExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expect /// Convert to regexp replace function call - auto pattern_func_node = std::make_shared(); if (char_override) { + auto pattern_func_node = std::make_shared(); auto pattern_list_args = std::make_shared(); if (trim_left && trim_right) { diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference index 1f1736ff552..79c3925bde9 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference @@ -17,3 +17,4 @@ foo xxfoo fooabba fooabbafoo +foo* diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql index e83f9f48de7..6cc77010fea 100644 --- a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql @@ -19,3 +19,4 @@ select RTRIM(' foo '); select trim(TRAILING 'x' FROM 'xxfooxx'); select Trim(LEADING 'ab' FROM 'abbafooabba'); select TRIM(both 'ab' FROM 'abbafooabbafooabba'); +select trim(LEADING '*[]{}|\\' FROM '\\|[[[}}}*foo*');