fix bugs and add uts

This commit is contained in:
taiyang-li 2023-06-28 10:45:52 +08:00
parent 0de5fcfbee
commit ae7a586aea
3 changed files with 496 additions and 249 deletions

View File

@ -25,287 +25,287 @@ namespace ErrorCodes
namespace
{
template <bool is_utf8>
class FunctionSubstringIndex : public IFunction
{
public:
static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
static FunctionPtr create(ContextPtr)
template <bool is_utf8>
class FunctionSubstringIndex : public IFunction
{
return std::make_shared<FunctionSubstringIndex>();
}
public:
static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override { return 3; }
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSubstringIndex>(); }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
String getName() const override { return name; }
bool useDefaultImplementationForConstants() const override { return true; }
size_t getNumberOfArguments() const override { return 3; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}",
arguments[0]->getName(),
getName());
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
if (!isString(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of function {}",
arguments[1]->getName(),
getName());
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
if (!isNativeNumber(arguments[2]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}",
arguments[2]->getName(), getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
ColumnPtr column_string = arguments[0].column;
ColumnPtr column_delim = arguments[1].column;
ColumnPtr column_index = arguments[2].column;
const ColumnConst * column_delim_const = checkAndGetColumnConst<ColumnString>(column_delim.get());
if (!column_delim_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN , "Second argument to {} must be a constant String", getName());
String delim = column_delim_const->getValue<String>();
if constexpr (!is_utf8)
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (delim.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
}
else
{
if (UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(delim.data()), delim.size()) != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
if (!isString(arguments[0]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}",
arguments[0]->getName(),
getName());
if (!isString(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of function {}",
arguments[1]->getName(),
getName());
if (!isNativeNumber(arguments[2]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of third argument of function {}",
arguments[2]->getName(),
getName());
return std::make_shared<DataTypeString>();
}
auto column_res = ColumnString::create();
ColumnString::Chars & vec_res = column_res->getChars();
ColumnString::Offsets & offsets_res = column_res->getOffsets();
const ColumnConst * column_string_const = checkAndGetColumnConst<ColumnString>(column_string.get());
if (column_string_const)
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
String str = column_string_const->getValue<String>();
constantVector(str, delim, column_index.get(), vec_res, offsets_res);
}
else
{
const auto * col_str = checkAndGetColumn<ColumnString>(column_string.get());
if (!col_str)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
ColumnPtr column_string = arguments[0].column;
ColumnPtr column_delim = arguments[1].column;
ColumnPtr column_index = arguments[2].column;
bool is_index_const = isColumnConst(*column_index);
if (is_index_const)
const ColumnConst * column_delim_const = checkAndGetColumnConst<ColumnString>(column_delim.get());
if (!column_delim_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName());
String delim = column_delim_const->getValue<String>();
if constexpr (!is_utf8)
{
Int64 index = column_index->getInt(0);
vectorConstant(col_str->getChars(), col_str->getOffsets(), delim, index, vec_res, offsets_res);
if (delim.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
}
else
vectorVector(col_str->getChars(), col_str->getOffsets(), delim, column_index.get(), vec_res, offsets_res);
}
}
protected:
static void vectorVector(
const ColumnString::Chars & str_data,
const ColumnString::Offsets & str_offsets,
const String & delim,
const IColumn * index_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_offsets.size();
res_data.reserve(str_data.size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
for (size_t i = 0; i < rows; ++i)
{
StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1};
Int64 index = index_column->getInt(i);
StringRef res_ref
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void vectorConstant(
const ColumnString::Chars & str_data,
const ColumnString::Offsets & str_offsets,
const String & delim,
Int64 index,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_offsets.size();
res_data.reserve(str_data.size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
for (size_t i = 0; i<rows; ++i)
{
StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1};
StringRef res_ref
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void constantVector(
const String & str,
const String & delim,
const IColumn * index_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = index_column->size();
res_data.reserve(str.size() * rows / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
StringRef str_ref{str.data(), str.size()};
for (size_t i=0; i<rows; ++i)
{
Int64 index = index_column->getInt(i);
StringRef res_ref
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void appendToResultColumn(
const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
{
size_t res_offset = res_data.size();
res_data.resize(res_offset + res_ref.size + 1);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size);
res_offset += res_ref.size;
res_data[res_offset] = 0;
++res_offset;
res_offsets.emplace_back(res_offset);
}
static StringRef substringIndexUTF8(
const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index)
{
if (index == 0)
return {str_ref.data, 0};
const auto * begin = reinterpret_cast<const UInt8 *>(str_ref.data);
const auto * end = reinterpret_cast<const UInt8 *>(str_ref.data + str_ref.size);
const auto * pos = begin;
if (index > 0)
{
Int64 i = 0;
while (i < index)
{
pos = searcher->search(pos, end - pos);
if (UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(delim.data()), delim.size()) != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
}
if (pos != end)
auto column_res = ColumnString::create();
ColumnString::Chars & vec_res = column_res->getChars();
ColumnString::Offsets & offsets_res = column_res->getOffsets();
const ColumnConst * column_string_const = checkAndGetColumnConst<ColumnString>(column_string.get());
if (column_string_const)
{
String str = column_string_const->getValue<String>();
constantVector(str, delim, column_index.get(), vec_res, offsets_res);
}
else
{
const auto * col_str = checkAndGetColumn<ColumnString>(column_string.get());
if (!col_str)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
bool is_index_const = isColumnConst(*column_index);
if (is_index_const)
{
Int64 index = column_index->getInt(0);
vectorConstant(col_str, delim, index, vec_res, offsets_res);
}
else
vectorVector(col_str, delim, column_index.get(), vec_res, offsets_res);
}
return column_res;
}
protected:
static void vectorVector(
const ColumnString * str_column,
const String & delim,
const IColumn * index_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_column->size();
res_data.reserve(str_column->getChars().size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
for (size_t i = 0; i < rows; ++i)
{
StringRef str_ref = str_column->getDataAt(i);
Int64 index = index_column->getInt(i);
StringRef res_ref
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void vectorConstant(
const ColumnString * str_column,
const String & delim,
Int64 index,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_column->size();
res_data.reserve(str_column->getChars().size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
for (size_t i = 0; i < rows; ++i)
{
StringRef str_ref = str_column->getDataAt(i);
StringRef res_ref
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
std::cout << "result:" << res_ref.toString() << std::endl;
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void constantVector(
const String & str,
const String & delim,
const IColumn * index_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = index_column->size();
res_data.reserve(str.size() * rows / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
StringRef str_ref{str.data(), str.size()};
for (size_t i = 0; i < rows; ++i)
{
Int64 index = index_column->getInt(i);
StringRef res_ref
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
{
size_t res_offset = res_data.size();
res_data.resize(res_offset + res_ref.size + 1);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size);
res_offset += res_ref.size;
res_data[res_offset] = 0;
++res_offset;
res_offsets.emplace_back(res_offset);
}
static StringRef substringIndexUTF8(
const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index)
{
std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl;
if (index == 0)
return {str_ref.data, 0};
const auto * begin = reinterpret_cast<const UInt8 *>(str_ref.data);
const auto * end = reinterpret_cast<const UInt8 *>(str_ref.data + str_ref.size);
const auto * pos = begin;
if (index > 0)
{
Int64 i = 0;
while (i < index)
{
pos = searcher->search(pos, end - pos);
if (pos != end)
{
pos += delim.size();
++i;
}
else
return str_ref;
}
return {begin, static_cast<size_t>(pos - begin - delim.size())};
}
else
{
Int64 total = 0;
while (pos < end && end != (pos = searcher->search(pos, end - pos)))
{
pos += delim.size();
++total;
}
if (total + index < 0)
return str_ref;
Int64 index_from_left = total + 1 + index;
std::cout << "total:" << total << ", index_from_left" << index_from_left << std::endl;
pos = begin;
Int64 i = 0;
while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos)))
{
pos += delim.size();
++i;
std::cout << "pos offset:" << pos - begin << ", total size:" << end - begin << std::endl;
}
else
return str_ref;
std::cout << "pos offset:" << pos - begin << ", size:" << end - pos << std::endl;
StringRef res = {pos, static_cast<size_t>(end - pos)};
std::cout << "result:" << res.toString() << std::endl;
return res;
}
return {begin, static_cast<size_t>(pos - begin - delim.size())};
}
else
static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index)
{
Int64 total = 0;
while (pos < end && end != (pos = searcher->search(pos, end - pos)))
std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl;
if (index == 0)
return {str_ref.data, 0};
if (index > 0)
{
pos += delim.size();
++total;
}
if (total + index < 0)
return str_ref;
Int64 index_from_left = total + 1 + index;
pos = begin;
Int64 i = 0;
while (pos < end && end != (pos = searcher->search(pos, end - pos)) && i < index_from_left)
{
pos += delim.size();
++i;
}
return {pos, static_cast<size_t>(end - pos)};
}
}
template <char delim>
static StringRef substringIndex(
const StringRef & str_ref,
Int64 index)
{
if (index == 0)
return {str_ref.data, 0};
if (index > 0)
{
const auto * end = str_ref.data + str_ref.size;
const auto * pos = str_ref.data;
Int64 i = 0;
while (i < index)
{
pos = find_first_symbols<delim>(pos, end);
if (pos != end)
const auto * end = str_ref.data + str_ref.size;
const auto * pos = str_ref.data;
Int64 i = 0;
while (i < index)
{
++pos;
++i;
pos = std::find(pos, end, delim);
if (pos != end)
{
++pos;
++i;
}
else
return str_ref;
}
else
return str_ref;
return {str_ref.data, static_cast<size_t>(pos - str_ref.data - 1)};
}
return {str_ref.data, static_cast<size_t>(pos - str_ref.data - 1)};
}
else
{
const auto * begin = str_ref.data;
const auto * pos = str_ref.data + str_ref.size;
Int64 i = 0;
while (i < index)
else
{
const auto * next_pos = ::detail::find_last_symbols_sse2<true, ::detail::ReturnMode::End, delim>(begin, pos);
if (next_pos != pos)
const auto * begin = str_ref.data;
const auto * pos = str_ref.data + str_ref.size;
Int64 i = 0;
while (i + index < 0)
{
pos = next_pos;
++i;
}
else
return str_ref;
}
--pos;
while (pos >= begin && *pos != delim)
--pos;
return {pos + 1, static_cast<size_t>(str_ref.data + str_ref.size - pos - 1)};
if (pos >= begin)
++i;
else
return str_ref;
}
return {pos + 1, static_cast<size_t>(str_ref.data + str_ref.size - pos - 1)};
}
}
}
};
};
}
@ -319,4 +319,3 @@ REGISTER_FUNCTION(SubstringIndex)
}

View File

@ -0,0 +1,155 @@
-- { echoOn }
select substringIndex('www.clickhouse.com', '.', -4);
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', -3);
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', -2);
clickhouse.com
select substringIndex('www.clickhouse.com', '.', -1);
com
select substringIndex('www.clickhouse.com', '.', 0);
select substringIndex('www.clickhouse.com', '.', 1);
www
select substringIndex('www.clickhouse.com', '.', 2);
www.clickhouse
select substringIndex('www.clickhouse.com', '.', 3);
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', 4);
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', -4);
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', -3);
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', -2);
clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', -1);
com
select substringIndex(materialize('www.clickhouse.com'), '.', 0);
select substringIndex(materialize('www.clickhouse.com'), '.', 1);
www
select substringIndex(materialize('www.clickhouse.com'), '.', 2);
www.clickhouse
select substringIndex(materialize('www.clickhouse.com'), '.', 3);
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', 4);
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4));
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3));
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2));
clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1));
com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1));
www
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2));
www.clickhouse
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3));
www.clickhouse.com
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4));
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', materialize(-4));
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', materialize(-3));
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', materialize(-2));
clickhouse.com
select substringIndex('www.clickhouse.com', '.', materialize(-1));
com
select substringIndex('www.clickhouse.com', '.', materialize(0));
select substringIndex('www.clickhouse.com', '.', materialize(1));
www
select substringIndex('www.clickhouse.com', '.', materialize(2));
www.clickhouse
select substringIndex('www.clickhouse.com', '.', materialize(3));
www.clickhouse.com
select substringIndex('www.clickhouse.com', '.', materialize(4));
www.clickhouse.com
select SUBSTRING_INDEX('www.clickhouse.com', '.', 2);
www.clickhouse
select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS }
select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN }
select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select substringIndexUTF8('富强,民主,文明', '', -4);
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', -3);
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', -2);
民主,文明
select substringIndexUTF8('富强,民主,文明', '', -1);
文明
select substringIndexUTF8('富强,民主,文明', '', 0);
select substringIndexUTF8('富强,民主,文明', '', 1);
富强
select substringIndexUTF8('富强,民主,文明', '', 2);
富强,民主
select substringIndexUTF8('富强,民主,文明', '', 3);
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', 4);
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', -4);
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', -3);
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', -2);
民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', -1);
文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', 0);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 1);
富强
select substringIndexUTF8(materialize('富强,民主,文明'), '', 2);
富强,民主
select substringIndexUTF8(materialize('富强,民主,文明'), '', 3);
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', 4);
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', materialize(-4));
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', materialize(-3));
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', materialize(-2));
民主,文明
select substringIndexUTF8('富强,民主,文明', '', materialize(-1));
文明
select substringIndexUTF8('富强,民主,文明', '', materialize(0));
select substringIndexUTF8('富强,民主,文明', '', materialize(1));
富强
select substringIndexUTF8('富强,民主,文明', '', materialize(2));
富强,民主
select substringIndexUTF8('富强,民主,文明', '', materialize(3));
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', materialize(4));
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-4));
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-3));
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-2));
民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-1));
文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(0));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(1));
富强
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(2));
富强,民主
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(3));
富强,民主,文明
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(4));
富强,民主,文明
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndexUTF8('富强,民主,文明', materialize(''), 2); -- { serverError ILLEGAL_COLUMN }
select substringIndexUTF8('富强,民主,文明', '', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }

View File

@ -0,0 +1,93 @@
-- { echoOn }
select substringIndex('www.clickhouse.com', '.', -4);
select substringIndex('www.clickhouse.com', '.', -3);
select substringIndex('www.clickhouse.com', '.', -2);
select substringIndex('www.clickhouse.com', '.', -1);
select substringIndex('www.clickhouse.com', '.', 0);
select substringIndex('www.clickhouse.com', '.', 1);
select substringIndex('www.clickhouse.com', '.', 2);
select substringIndex('www.clickhouse.com', '.', 3);
select substringIndex('www.clickhouse.com', '.', 4);
select substringIndex(materialize('www.clickhouse.com'), '.', -4);
select substringIndex(materialize('www.clickhouse.com'), '.', -3);
select substringIndex(materialize('www.clickhouse.com'), '.', -2);
select substringIndex(materialize('www.clickhouse.com'), '.', -1);
select substringIndex(materialize('www.clickhouse.com'), '.', 0);
select substringIndex(materialize('www.clickhouse.com'), '.', 1);
select substringIndex(materialize('www.clickhouse.com'), '.', 2);
select substringIndex(materialize('www.clickhouse.com'), '.', 3);
select substringIndex(materialize('www.clickhouse.com'), '.', 4);
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3));
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4));
select substringIndex('www.clickhouse.com', '.', materialize(-4));
select substringIndex('www.clickhouse.com', '.', materialize(-3));
select substringIndex('www.clickhouse.com', '.', materialize(-2));
select substringIndex('www.clickhouse.com', '.', materialize(-1));
select substringIndex('www.clickhouse.com', '.', materialize(0));
select substringIndex('www.clickhouse.com', '.', materialize(1));
select substringIndex('www.clickhouse.com', '.', materialize(2));
select substringIndex('www.clickhouse.com', '.', materialize(3));
select substringIndex('www.clickhouse.com', '.', materialize(4));
select SUBSTRING_INDEX('www.clickhouse.com', '.', 2);
select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS }
select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN }
select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
select substringIndexUTF8('富强,民主,文明', '', -4);
select substringIndexUTF8('富强,民主,文明', '', -3);
select substringIndexUTF8('富强,民主,文明', '', -2);
select substringIndexUTF8('富强,民主,文明', '', -1);
select substringIndexUTF8('富强,民主,文明', '', 0);
select substringIndexUTF8('富强,民主,文明', '', 1);
select substringIndexUTF8('富强,民主,文明', '', 2);
select substringIndexUTF8('富强,民主,文明', '', 3);
select substringIndexUTF8('富强,民主,文明', '', 4);
select substringIndexUTF8(materialize('富强,民主,文明'), '', -4);
select substringIndexUTF8(materialize('富强,民主,文明'), '', -3);
select substringIndexUTF8(materialize('富强,民主,文明'), '', -2);
select substringIndexUTF8(materialize('富强,民主,文明'), '', -1);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 0);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 1);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 2);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 3);
select substringIndexUTF8(materialize('富强,民主,文明'), '', 4);
select substringIndexUTF8('富强,民主,文明', '', materialize(-4));
select substringIndexUTF8('富强,民主,文明', '', materialize(-3));
select substringIndexUTF8('富强,民主,文明', '', materialize(-2));
select substringIndexUTF8('富强,民主,文明', '', materialize(-1));
select substringIndexUTF8('富强,民主,文明', '', materialize(0));
select substringIndexUTF8('富强,民主,文明', '', materialize(1));
select substringIndexUTF8('富强,民主,文明', '', materialize(2));
select substringIndexUTF8('富强,民主,文明', '', materialize(3));
select substringIndexUTF8('富强,民主,文明', '', materialize(4));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-4));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-3));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-2));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(-1));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(0));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(1));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(2));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(3));
select substringIndexUTF8(materialize('富强,民主,文明'), '', materialize(4));
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
select substringIndexUTF8('富强,民主,文明', materialize(''), 2); -- { serverError ILLEGAL_COLUMN }
select substringIndexUTF8('富强,民主,文明', '', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- { echoOff }