From ae7a586aea59deb84a7355021b06eb3b35d876f7 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 28 Jun 2023 10:45:52 +0800 Subject: [PATCH] fix bugs and add uts --- src/Functions/substringIndex.cpp | 497 +++++++++--------- .../02798_substring_index.reference | 155 ++++++ .../0_stateless/02798_substring_index.sql | 93 ++++ 3 files changed, 496 insertions(+), 249 deletions(-) create mode 100644 tests/queries/0_stateless/02798_substring_index.reference create mode 100644 tests/queries/0_stateless/02798_substring_index.sql diff --git a/src/Functions/substringIndex.cpp b/src/Functions/substringIndex.cpp index 0a5dfd00656..1fca3bbed14 100644 --- a/src/Functions/substringIndex.cpp +++ b/src/Functions/substringIndex.cpp @@ -25,287 +25,287 @@ namespace ErrorCodes namespace { -template -class FunctionSubstringIndex : public IFunction -{ -public: - static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; - - - static FunctionPtr create(ContextPtr) + template + class FunctionSubstringIndex : public IFunction { - return std::make_shared(); - } + public: + static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex"; - String getName() const override - { - return name; - } - size_t getNumberOfArguments() const override { return 3; } + static FunctionPtr create(ContextPtr) { return std::make_shared(); } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + String getName() const override { return name; } - bool useDefaultImplementationForConstants() const override { return true; } + size_t getNumberOfArguments() const override { return 3; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {}", - arguments[0]->getName(), - getName()); + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - if (!isString(arguments[1])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}", - arguments[1]->getName(), - getName()); + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - if (!isNativeNumber(arguments[2])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", - arguments[2]->getName(), getName()); - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - ColumnPtr column_string = arguments[0].column; - ColumnPtr column_delim = arguments[1].column; - ColumnPtr column_index = arguments[2].column; - - const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); - if (!column_delim_const) - throw Exception(ErrorCodes::ILLEGAL_COLUMN , "Second argument to {} must be a constant String", getName()); - - String delim = column_delim_const->getValue(); - if constexpr (!is_utf8) + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (delim.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); - } - else - { - if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}", + arguments[0]->getName(), + getName()); + + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}", + arguments[1]->getName(), + getName()); + + if (!isNativeNumber(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}", + arguments[2]->getName(), + getName()); + + return std::make_shared(); } - auto column_res = ColumnString::create(); - ColumnString::Chars & vec_res = column_res->getChars(); - ColumnString::Offsets & offsets_res = column_res->getOffsets(); - - const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); - if (column_string_const) + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - String str = column_string_const->getValue(); - constantVector(str, delim, column_index.get(), vec_res, offsets_res); - } - else - { - const auto * col_str = checkAndGetColumn(column_string.get()); - if (!col_str) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_delim = arguments[1].column; + ColumnPtr column_index = arguments[2].column; - bool is_index_const = isColumnConst(*column_index); - if (is_index_const) + const ColumnConst * column_delim_const = checkAndGetColumnConst(column_delim.get()); + if (!column_delim_const) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName()); + + String delim = column_delim_const->getValue(); + if constexpr (!is_utf8) { - Int64 index = column_index->getInt(0); - vectorConstant(col_str->getChars(), col_str->getOffsets(), delim, index, vec_res, offsets_res); + if (delim.size() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName()); } else - vectorVector(col_str->getChars(), col_str->getOffsets(), delim, column_index.get(), vec_res, offsets_res); - } - } - -protected: - static void vectorVector( - const ColumnString::Chars & str_data, - const ColumnString::Offsets & str_offsets, - const String & delim, - const IColumn * index_column, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = str_offsets.size(); - res_data.reserve(str_data.size() / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - for (size_t i = 0; i < rows; ++i) - { - StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1}; - Int64 index = index_column->getInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void vectorConstant( - const ColumnString::Chars & str_data, - const ColumnString::Offsets & str_offsets, - const String & delim, - Int64 index, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = str_offsets.size(); - res_data.reserve(str_data.size() / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - for (size_t i = 0; i(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void constantVector( - const String & str, - const String & delim, - const IColumn * index_column, - ColumnString::Chars & res_data, - ColumnString::Offsets & res_offsets) - { - size_t rows = index_column->size(); - res_data.reserve(str.size() * rows / 2); - res_offsets.reserve(rows); - - std::unique_ptr searcher - = !is_utf8 ? nullptr : std::make_unique(delim); - - StringRef str_ref{str.data(), str.size()}; - for (size_t i=0; igetInt(i); - StringRef res_ref - = !is_utf8 ? substringIndex(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); - appendToResultColumn(res_ref, res_data, res_offsets); - } - } - - static void appendToResultColumn( - const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) - { - size_t res_offset = res_data.size(); - res_data.resize(res_offset + res_ref.size + 1); - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); - res_offset += res_ref.size; - res_data[res_offset] = 0; - ++res_offset; - - res_offsets.emplace_back(res_offset); - } - - static StringRef substringIndexUTF8( - const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) - { - if (index == 0) - return {str_ref.data, 0}; - - const auto * begin = reinterpret_cast(str_ref.data); - const auto * end = reinterpret_cast(str_ref.data + str_ref.size); - const auto * pos = begin; - if (index > 0) - { - Int64 i = 0; - while (i < index) { - pos = searcher->search(pos, end - pos); + if (UTF8::countCodePoints(reinterpret_cast(delim.data()), delim.size()) != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName()); + } - if (pos != end) + auto column_res = ColumnString::create(); + ColumnString::Chars & vec_res = column_res->getChars(); + ColumnString::Offsets & offsets_res = column_res->getOffsets(); + + const ColumnConst * column_string_const = checkAndGetColumnConst(column_string.get()); + if (column_string_const) + { + String str = column_string_const->getValue(); + constantVector(str, delim, column_index.get(), vec_res, offsets_res); + } + else + { + const auto * col_str = checkAndGetColumn(column_string.get()); + if (!col_str) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName()); + + bool is_index_const = isColumnConst(*column_index); + if (is_index_const) + { + Int64 index = column_index->getInt(0); + vectorConstant(col_str, delim, index, vec_res, offsets_res); + } + else + vectorVector(col_str, delim, column_index.get(), vec_res, offsets_res); + } + return column_res; + } + + protected: + static void vectorVector( + const ColumnString * str_column, + const String & delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + Int64 index = index_column->getInt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void vectorConstant( + const ColumnString * str_column, + const String & delim, + Int64 index, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = str_column->size(); + res_data.reserve(str_column->getChars().size() / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + for (size_t i = 0; i < rows; ++i) + { + StringRef str_ref = str_column->getDataAt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + std::cout << "result:" << res_ref.toString() << std::endl; + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void constantVector( + const String & str, + const String & delim, + const IColumn * index_column, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + size_t rows = index_column->size(); + res_data.reserve(str.size() * rows / 2); + res_offsets.reserve(rows); + + std::unique_ptr searcher + = !is_utf8 ? nullptr : std::make_unique(delim.data(), delim.size()); + + StringRef str_ref{str.data(), str.size()}; + for (size_t i = 0; i < rows; ++i) + { + Int64 index = index_column->getInt(i); + StringRef res_ref + = !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index); + appendToResultColumn(res_ref, res_data, res_offsets); + } + } + + static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) + { + size_t res_offset = res_data.size(); + res_data.resize(res_offset + res_ref.size + 1); + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size); + res_offset += res_ref.size; + res_data[res_offset] = 0; + ++res_offset; + + res_offsets.emplace_back(res_offset); + } + + static StringRef substringIndexUTF8( + const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index) + { + std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; + + if (index == 0) + return {str_ref.data, 0}; + + const auto * begin = reinterpret_cast(str_ref.data); + const auto * end = reinterpret_cast(str_ref.data + str_ref.size); + const auto * pos = begin; + if (index > 0) + { + Int64 i = 0; + while (i < index) + { + pos = searcher->search(pos, end - pos); + + if (pos != end) + { + pos += delim.size(); + ++i; + } + else + return str_ref; + } + return {begin, static_cast(pos - begin - delim.size())}; + } + else + { + Int64 total = 0; + while (pos < end && end != (pos = searcher->search(pos, end - pos))) + { + pos += delim.size(); + ++total; + } + + if (total + index < 0) + return str_ref; + + Int64 index_from_left = total + 1 + index; + std::cout << "total:" << total << ", index_from_left" << index_from_left << std::endl; + pos = begin; + Int64 i = 0; + while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos))) { pos += delim.size(); ++i; + std::cout << "pos offset:" << pos - begin << ", total size:" << end - begin << std::endl; } - else - return str_ref; + std::cout << "pos offset:" << pos - begin << ", size:" << end - pos << std::endl; + StringRef res = {pos, static_cast(end - pos)}; + std::cout << "result:" << res.toString() << std::endl; + return res; } - return {begin, static_cast(pos - begin - delim.size())}; } - else + + static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index) { - Int64 total = 0; - while (pos < end && end != (pos = searcher->search(pos, end - pos))) + std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl; + + if (index == 0) + return {str_ref.data, 0}; + + if (index > 0) { - pos += delim.size(); - ++total; - } - - if (total + index < 0) - return str_ref; - - Int64 index_from_left = total + 1 + index; - pos = begin; - Int64 i = 0; - while (pos < end && end != (pos = searcher->search(pos, end - pos)) && i < index_from_left) - { - pos += delim.size(); - ++i; - } - return {pos, static_cast(end - pos)}; - } - } - - template - static StringRef substringIndex( - const StringRef & str_ref, - Int64 index) - { - if (index == 0) - return {str_ref.data, 0}; - - if (index > 0) - { - const auto * end = str_ref.data + str_ref.size; - const auto * pos = str_ref.data; - Int64 i = 0; - while (i < index) - { - pos = find_first_symbols(pos, end); - - if (pos != end) + const auto * end = str_ref.data + str_ref.size; + const auto * pos = str_ref.data; + Int64 i = 0; + while (i < index) { - ++pos; - ++i; + pos = std::find(pos, end, delim); + if (pos != end) + { + ++pos; + ++i; + } + else + return str_ref; } - else - return str_ref; + return {str_ref.data, static_cast(pos - str_ref.data - 1)}; } - return {str_ref.data, static_cast(pos - str_ref.data - 1)}; - } - else - { - const auto * begin = str_ref.data; - const auto * pos = str_ref.data + str_ref.size; - Int64 i = 0; - while (i < index) + else { - const auto * next_pos = ::detail::find_last_symbols_sse2(begin, pos); - - if (next_pos != pos) + const auto * begin = str_ref.data; + const auto * pos = str_ref.data + str_ref.size; + Int64 i = 0; + while (i + index < 0) { - pos = next_pos; - ++i; - } - else - return str_ref; - } + --pos; + while (pos >= begin && *pos != delim) + --pos; - return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; + if (pos >= begin) + ++i; + else + return str_ref; + } + return {pos + 1, static_cast(str_ref.data + str_ref.size - pos - 1)}; + } } - } -}; + }; } @@ -319,4 +319,3 @@ REGISTER_FUNCTION(SubstringIndex) } - diff --git a/tests/queries/0_stateless/02798_substring_index.reference b/tests/queries/0_stateless/02798_substring_index.reference new file mode 100644 index 00000000000..a3084509c12 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.reference @@ -0,0 +1,155 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', -2); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', -1); +com +select substringIndex('www.clickhouse.com', '.', 0); + +select substringIndex('www.clickhouse.com', '.', 1); +www +select substringIndex('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', 3); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +com +select substringIndex(materialize('www.clickhouse.com'), '.', 0); + +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +www +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', 4); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +www +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +www.clickhouse +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +www.clickhouse.com +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +com +select substringIndex('www.clickhouse.com', '.', materialize(0)); + +select substringIndex('www.clickhouse.com', '.', materialize(1)); +www +select substringIndex('www.clickhouse.com', '.', materialize(2)); +www.clickhouse +select substringIndex('www.clickhouse.com', '.', materialize(3)); +www.clickhouse.com +select substringIndex('www.clickhouse.com', '.', materialize(4)); +www.clickhouse.com +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); +www.clickhouse +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +select substringIndexUTF8('富强,民主,文明', ',', -4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -2); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', -1); +文明 +select substringIndexUTF8('富强,民主,文明', ',', 0); + +select substringIndexUTF8('富强,民主,文明', ',', 1); +富强 +select substringIndexUTF8('富强,民主,文明', ',', 2); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', 3); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', 4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +富强 +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +富强,民主 +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +富强 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +富强,民主 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +富强,民主,文明 +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); +富强,民主,文明 +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02798_substring_index.sql b/tests/queries/0_stateless/02798_substring_index.sql new file mode 100644 index 00000000000..520775e8970 --- /dev/null +++ b/tests/queries/0_stateless/02798_substring_index.sql @@ -0,0 +1,93 @@ +-- { echoOn } +select substringIndex('www.clickhouse.com', '.', -4); +select substringIndex('www.clickhouse.com', '.', -3); +select substringIndex('www.clickhouse.com', '.', -2); +select substringIndex('www.clickhouse.com', '.', -1); +select substringIndex('www.clickhouse.com', '.', 0); +select substringIndex('www.clickhouse.com', '.', 1); +select substringIndex('www.clickhouse.com', '.', 2); +select substringIndex('www.clickhouse.com', '.', 3); +select substringIndex('www.clickhouse.com', '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', -4); +select substringIndex(materialize('www.clickhouse.com'), '.', -3); +select substringIndex(materialize('www.clickhouse.com'), '.', -2); +select substringIndex(materialize('www.clickhouse.com'), '.', -1); +select substringIndex(materialize('www.clickhouse.com'), '.', 0); +select substringIndex(materialize('www.clickhouse.com'), '.', 1); +select substringIndex(materialize('www.clickhouse.com'), '.', 2); +select substringIndex(materialize('www.clickhouse.com'), '.', 3); +select substringIndex(materialize('www.clickhouse.com'), '.', 4); + +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3)); +select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4)); + +select substringIndex('www.clickhouse.com', '.', materialize(-4)); +select substringIndex('www.clickhouse.com', '.', materialize(-3)); +select substringIndex('www.clickhouse.com', '.', materialize(-2)); +select substringIndex('www.clickhouse.com', '.', materialize(-1)); +select substringIndex('www.clickhouse.com', '.', materialize(0)); +select substringIndex('www.clickhouse.com', '.', materialize(1)); +select substringIndex('www.clickhouse.com', '.', materialize(2)); +select substringIndex('www.clickhouse.com', '.', materialize(3)); +select substringIndex('www.clickhouse.com', '.', materialize(4)); + +select SUBSTRING_INDEX('www.clickhouse.com', '.', 2); + +select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +select substringIndexUTF8('富强,民主,文明', ',', -4); +select substringIndexUTF8('富强,民主,文明', ',', -3); +select substringIndexUTF8('富强,民主,文明', ',', -2); +select substringIndexUTF8('富强,民主,文明', ',', -1); +select substringIndexUTF8('富强,民主,文明', ',', 0); +select substringIndexUTF8('富强,民主,文明', ',', 1); +select substringIndexUTF8('富强,民主,文明', ',', 2); +select substringIndexUTF8('富强,民主,文明', ',', 3); +select substringIndexUTF8('富强,民主,文明', ',', 4); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4); + +select substringIndexUTF8('富强,民主,文明', ',', materialize(-4)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(-1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(0)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(1)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(2)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(3)); +select substringIndexUTF8('富强,民主,文明', ',', materialize(4)); + +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3)); +select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4)); + +select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS } +select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN } +select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +-- { echoOff }