mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-27 10:02:01 +00:00
fix bugs and add uts
This commit is contained in:
parent
0de5fcfbee
commit
ae7a586aea
@ -25,287 +25,287 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
template <bool is_utf8>
|
||||
class FunctionSubstringIndex : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
|
||||
|
||||
|
||||
static FunctionPtr create(ContextPtr)
|
||||
template <bool is_utf8>
|
||||
class FunctionSubstringIndex : public IFunction
|
||||
{
|
||||
return std::make_shared<FunctionSubstringIndex>();
|
||||
}
|
||||
public:
|
||||
static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSubstringIndex>(); }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 3; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of function {}",
|
||||
arguments[0]->getName(),
|
||||
getName());
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of second argument of function {}",
|
||||
arguments[1]->getName(),
|
||||
getName());
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
|
||||
|
||||
if (!isNativeNumber(arguments[2]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}",
|
||||
arguments[2]->getName(), getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
ColumnPtr column_string = arguments[0].column;
|
||||
ColumnPtr column_delim = arguments[1].column;
|
||||
ColumnPtr column_index = arguments[2].column;
|
||||
|
||||
const ColumnConst * column_delim_const = checkAndGetColumnConst<ColumnString>(column_delim.get());
|
||||
if (!column_delim_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN , "Second argument to {} must be a constant String", getName());
|
||||
|
||||
String delim = column_delim_const->getValue<String>();
|
||||
if constexpr (!is_utf8)
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (delim.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(delim.data()), delim.size()) != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of function {}",
|
||||
arguments[0]->getName(),
|
||||
getName());
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of second argument of function {}",
|
||||
arguments[1]->getName(),
|
||||
getName());
|
||||
|
||||
if (!isNativeNumber(arguments[2]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of third argument of function {}",
|
||||
arguments[2]->getName(),
|
||||
getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
auto column_res = ColumnString::create();
|
||||
ColumnString::Chars & vec_res = column_res->getChars();
|
||||
ColumnString::Offsets & offsets_res = column_res->getOffsets();
|
||||
|
||||
const ColumnConst * column_string_const = checkAndGetColumnConst<ColumnString>(column_string.get());
|
||||
if (column_string_const)
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
String str = column_string_const->getValue<String>();
|
||||
constantVector(str, delim, column_index.get(), vec_res, offsets_res);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * col_str = checkAndGetColumn<ColumnString>(column_string.get());
|
||||
if (!col_str)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
|
||||
ColumnPtr column_string = arguments[0].column;
|
||||
ColumnPtr column_delim = arguments[1].column;
|
||||
ColumnPtr column_index = arguments[2].column;
|
||||
|
||||
bool is_index_const = isColumnConst(*column_index);
|
||||
if (is_index_const)
|
||||
const ColumnConst * column_delim_const = checkAndGetColumnConst<ColumnString>(column_delim.get());
|
||||
if (!column_delim_const)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName());
|
||||
|
||||
String delim = column_delim_const->getValue<String>();
|
||||
if constexpr (!is_utf8)
|
||||
{
|
||||
Int64 index = column_index->getInt(0);
|
||||
vectorConstant(col_str->getChars(), col_str->getOffsets(), delim, index, vec_res, offsets_res);
|
||||
if (delim.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
|
||||
}
|
||||
else
|
||||
vectorVector(col_str->getChars(), col_str->getOffsets(), delim, column_index.get(), vec_res, offsets_res);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
static void vectorVector(
|
||||
const ColumnString::Chars & str_data,
|
||||
const ColumnString::Offsets & str_offsets,
|
||||
const String & delim,
|
||||
const IColumn * index_column,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = str_offsets.size();
|
||||
res_data.reserve(str_data.size() / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1};
|
||||
Int64 index = index_column->getInt(i);
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorConstant(
|
||||
const ColumnString::Chars & str_data,
|
||||
const ColumnString::Offsets & str_offsets,
|
||||
const String & delim,
|
||||
Int64 index,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = str_offsets.size();
|
||||
res_data.reserve(str_data.size() / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
|
||||
|
||||
for (size_t i = 0; i<rows; ++i)
|
||||
{
|
||||
StringRef str_ref{&str_data[str_offsets[i]], str_offsets[i] - str_offsets[i - 1] - 1};
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void constantVector(
|
||||
const String & str,
|
||||
const String & delim,
|
||||
const IColumn * index_column,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = index_column->size();
|
||||
res_data.reserve(str.size() * rows / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim);
|
||||
|
||||
StringRef str_ref{str.data(), str.size()};
|
||||
for (size_t i=0; i<rows; ++i)
|
||||
{
|
||||
Int64 index = index_column->getInt(i);
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex<delim[0]>(str_ref, index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void appendToResultColumn(
|
||||
const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t res_offset = res_data.size();
|
||||
res_data.resize(res_offset + res_ref.size + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size);
|
||||
res_offset += res_ref.size;
|
||||
res_data[res_offset] = 0;
|
||||
++res_offset;
|
||||
|
||||
res_offsets.emplace_back(res_offset);
|
||||
}
|
||||
|
||||
static StringRef substringIndexUTF8(
|
||||
const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index)
|
||||
{
|
||||
if (index == 0)
|
||||
return {str_ref.data, 0};
|
||||
|
||||
const auto * begin = reinterpret_cast<const UInt8 *>(str_ref.data);
|
||||
const auto * end = reinterpret_cast<const UInt8 *>(str_ref.data + str_ref.size);
|
||||
const auto * pos = begin;
|
||||
if (index > 0)
|
||||
{
|
||||
Int64 i = 0;
|
||||
while (i < index)
|
||||
{
|
||||
pos = searcher->search(pos, end - pos);
|
||||
if (UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(delim.data()), delim.size()) != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
|
||||
}
|
||||
|
||||
if (pos != end)
|
||||
auto column_res = ColumnString::create();
|
||||
ColumnString::Chars & vec_res = column_res->getChars();
|
||||
ColumnString::Offsets & offsets_res = column_res->getOffsets();
|
||||
|
||||
const ColumnConst * column_string_const = checkAndGetColumnConst<ColumnString>(column_string.get());
|
||||
if (column_string_const)
|
||||
{
|
||||
String str = column_string_const->getValue<String>();
|
||||
constantVector(str, delim, column_index.get(), vec_res, offsets_res);
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * col_str = checkAndGetColumn<ColumnString>(column_string.get());
|
||||
if (!col_str)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
|
||||
|
||||
bool is_index_const = isColumnConst(*column_index);
|
||||
if (is_index_const)
|
||||
{
|
||||
Int64 index = column_index->getInt(0);
|
||||
vectorConstant(col_str, delim, index, vec_res, offsets_res);
|
||||
}
|
||||
else
|
||||
vectorVector(col_str, delim, column_index.get(), vec_res, offsets_res);
|
||||
}
|
||||
return column_res;
|
||||
}
|
||||
|
||||
protected:
|
||||
static void vectorVector(
|
||||
const ColumnString * str_column,
|
||||
const String & delim,
|
||||
const IColumn * index_column,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = str_column->size();
|
||||
res_data.reserve(str_column->getChars().size() / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
StringRef str_ref = str_column->getDataAt(i);
|
||||
Int64 index = index_column->getInt(i);
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorConstant(
|
||||
const ColumnString * str_column,
|
||||
const String & delim,
|
||||
Int64 index,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = str_column->size();
|
||||
res_data.reserve(str_column->getChars().size() / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
|
||||
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
StringRef str_ref = str_column->getDataAt(i);
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
std::cout << "result:" << res_ref.toString() << std::endl;
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void constantVector(
|
||||
const String & str,
|
||||
const String & delim,
|
||||
const IColumn * index_column,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t rows = index_column->size();
|
||||
res_data.reserve(str.size() * rows / 2);
|
||||
res_offsets.reserve(rows);
|
||||
|
||||
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
|
||||
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
|
||||
|
||||
StringRef str_ref{str.data(), str.size()};
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
{
|
||||
Int64 index = index_column->getInt(i);
|
||||
StringRef res_ref
|
||||
= !is_utf8 ? substringIndex(str_ref, delim[0], index) : substringIndexUTF8(searcher.get(), str_ref, delim, index);
|
||||
appendToResultColumn(res_ref, res_data, res_offsets);
|
||||
}
|
||||
}
|
||||
|
||||
static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
size_t res_offset = res_data.size();
|
||||
res_data.resize(res_offset + res_ref.size + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], res_ref.data, res_ref.size);
|
||||
res_offset += res_ref.size;
|
||||
res_data[res_offset] = 0;
|
||||
++res_offset;
|
||||
|
||||
res_offsets.emplace_back(res_offset);
|
||||
}
|
||||
|
||||
static StringRef substringIndexUTF8(
|
||||
const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 index)
|
||||
{
|
||||
std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl;
|
||||
|
||||
if (index == 0)
|
||||
return {str_ref.data, 0};
|
||||
|
||||
const auto * begin = reinterpret_cast<const UInt8 *>(str_ref.data);
|
||||
const auto * end = reinterpret_cast<const UInt8 *>(str_ref.data + str_ref.size);
|
||||
const auto * pos = begin;
|
||||
if (index > 0)
|
||||
{
|
||||
Int64 i = 0;
|
||||
while (i < index)
|
||||
{
|
||||
pos = searcher->search(pos, end - pos);
|
||||
|
||||
if (pos != end)
|
||||
{
|
||||
pos += delim.size();
|
||||
++i;
|
||||
}
|
||||
else
|
||||
return str_ref;
|
||||
}
|
||||
return {begin, static_cast<size_t>(pos - begin - delim.size())};
|
||||
}
|
||||
else
|
||||
{
|
||||
Int64 total = 0;
|
||||
while (pos < end && end != (pos = searcher->search(pos, end - pos)))
|
||||
{
|
||||
pos += delim.size();
|
||||
++total;
|
||||
}
|
||||
|
||||
if (total + index < 0)
|
||||
return str_ref;
|
||||
|
||||
Int64 index_from_left = total + 1 + index;
|
||||
std::cout << "total:" << total << ", index_from_left" << index_from_left << std::endl;
|
||||
pos = begin;
|
||||
Int64 i = 0;
|
||||
while (i < index_from_left && pos < end && end != (pos = searcher->search(pos, end - pos)))
|
||||
{
|
||||
pos += delim.size();
|
||||
++i;
|
||||
std::cout << "pos offset:" << pos - begin << ", total size:" << end - begin << std::endl;
|
||||
}
|
||||
else
|
||||
return str_ref;
|
||||
std::cout << "pos offset:" << pos - begin << ", size:" << end - pos << std::endl;
|
||||
StringRef res = {pos, static_cast<size_t>(end - pos)};
|
||||
std::cout << "result:" << res.toString() << std::endl;
|
||||
return res;
|
||||
}
|
||||
return {begin, static_cast<size_t>(pos - begin - delim.size())};
|
||||
}
|
||||
else
|
||||
|
||||
static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 index)
|
||||
{
|
||||
Int64 total = 0;
|
||||
while (pos < end && end != (pos = searcher->search(pos, end - pos)))
|
||||
std::cout << "str:" << str_ref.toString() << ", delim" << delim << ",index:" << index << std::endl;
|
||||
|
||||
if (index == 0)
|
||||
return {str_ref.data, 0};
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
pos += delim.size();
|
||||
++total;
|
||||
}
|
||||
|
||||
if (total + index < 0)
|
||||
return str_ref;
|
||||
|
||||
Int64 index_from_left = total + 1 + index;
|
||||
pos = begin;
|
||||
Int64 i = 0;
|
||||
while (pos < end && end != (pos = searcher->search(pos, end - pos)) && i < index_from_left)
|
||||
{
|
||||
pos += delim.size();
|
||||
++i;
|
||||
}
|
||||
return {pos, static_cast<size_t>(end - pos)};
|
||||
}
|
||||
}
|
||||
|
||||
template <char delim>
|
||||
static StringRef substringIndex(
|
||||
const StringRef & str_ref,
|
||||
Int64 index)
|
||||
{
|
||||
if (index == 0)
|
||||
return {str_ref.data, 0};
|
||||
|
||||
if (index > 0)
|
||||
{
|
||||
const auto * end = str_ref.data + str_ref.size;
|
||||
const auto * pos = str_ref.data;
|
||||
Int64 i = 0;
|
||||
while (i < index)
|
||||
{
|
||||
pos = find_first_symbols<delim>(pos, end);
|
||||
|
||||
if (pos != end)
|
||||
const auto * end = str_ref.data + str_ref.size;
|
||||
const auto * pos = str_ref.data;
|
||||
Int64 i = 0;
|
||||
while (i < index)
|
||||
{
|
||||
++pos;
|
||||
++i;
|
||||
pos = std::find(pos, end, delim);
|
||||
if (pos != end)
|
||||
{
|
||||
++pos;
|
||||
++i;
|
||||
}
|
||||
else
|
||||
return str_ref;
|
||||
}
|
||||
else
|
||||
return str_ref;
|
||||
return {str_ref.data, static_cast<size_t>(pos - str_ref.data - 1)};
|
||||
}
|
||||
return {str_ref.data, static_cast<size_t>(pos - str_ref.data - 1)};
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * begin = str_ref.data;
|
||||
const auto * pos = str_ref.data + str_ref.size;
|
||||
Int64 i = 0;
|
||||
while (i < index)
|
||||
else
|
||||
{
|
||||
const auto * next_pos = ::detail::find_last_symbols_sse2<true, ::detail::ReturnMode::End, delim>(begin, pos);
|
||||
|
||||
if (next_pos != pos)
|
||||
const auto * begin = str_ref.data;
|
||||
const auto * pos = str_ref.data + str_ref.size;
|
||||
Int64 i = 0;
|
||||
while (i + index < 0)
|
||||
{
|
||||
pos = next_pos;
|
||||
++i;
|
||||
}
|
||||
else
|
||||
return str_ref;
|
||||
}
|
||||
--pos;
|
||||
while (pos >= begin && *pos != delim)
|
||||
--pos;
|
||||
|
||||
return {pos + 1, static_cast<size_t>(str_ref.data + str_ref.size - pos - 1)};
|
||||
if (pos >= begin)
|
||||
++i;
|
||||
else
|
||||
return str_ref;
|
||||
}
|
||||
return {pos + 1, static_cast<size_t>(str_ref.data + str_ref.size - pos - 1)};
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -319,4 +319,3 @@ REGISTER_FUNCTION(SubstringIndex)
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
155
tests/queries/0_stateless/02798_substring_index.reference
Normal file
155
tests/queries/0_stateless/02798_substring_index.reference
Normal file
@ -0,0 +1,155 @@
|
||||
-- { echoOn }
|
||||
select substringIndex('www.clickhouse.com', '.', -4);
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', -3);
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', -2);
|
||||
clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', -1);
|
||||
com
|
||||
select substringIndex('www.clickhouse.com', '.', 0);
|
||||
|
||||
select substringIndex('www.clickhouse.com', '.', 1);
|
||||
www
|
||||
select substringIndex('www.clickhouse.com', '.', 2);
|
||||
www.clickhouse
|
||||
select substringIndex('www.clickhouse.com', '.', 3);
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', 4);
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -4);
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -3);
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -2);
|
||||
clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -1);
|
||||
com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 0);
|
||||
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 1);
|
||||
www
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 2);
|
||||
www.clickhouse
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 3);
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 4);
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4));
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3));
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2));
|
||||
clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1));
|
||||
com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0));
|
||||
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1));
|
||||
www
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2));
|
||||
www.clickhouse
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3));
|
||||
www.clickhouse.com
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4));
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-4));
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-3));
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-2));
|
||||
clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-1));
|
||||
com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(0));
|
||||
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(1));
|
||||
www
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(2));
|
||||
www.clickhouse
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(3));
|
||||
www.clickhouse.com
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(4));
|
||||
www.clickhouse.com
|
||||
select SUBSTRING_INDEX('www.clickhouse.com', '.', 2);
|
||||
www.clickhouse
|
||||
select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN }
|
||||
select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -4);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -3);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -2);
|
||||
民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -1);
|
||||
文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 0);
|
||||
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 1);
|
||||
富强
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 2);
|
||||
富强,民主
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 3);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 4);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2);
|
||||
民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1);
|
||||
文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0);
|
||||
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1);
|
||||
富强
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2);
|
||||
富强,民主
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4);
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-4));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-3));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-2));
|
||||
民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-1));
|
||||
文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(0));
|
||||
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(1));
|
||||
富强
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(2));
|
||||
富强,民主
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(3));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(4));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2));
|
||||
民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1));
|
||||
文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0));
|
||||
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1));
|
||||
富强
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2));
|
||||
富强,民主
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4));
|
||||
富强,民主,文明
|
||||
select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN }
|
||||
select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
93
tests/queries/0_stateless/02798_substring_index.sql
Normal file
93
tests/queries/0_stateless/02798_substring_index.sql
Normal file
@ -0,0 +1,93 @@
|
||||
-- { echoOn }
|
||||
select substringIndex('www.clickhouse.com', '.', -4);
|
||||
select substringIndex('www.clickhouse.com', '.', -3);
|
||||
select substringIndex('www.clickhouse.com', '.', -2);
|
||||
select substringIndex('www.clickhouse.com', '.', -1);
|
||||
select substringIndex('www.clickhouse.com', '.', 0);
|
||||
select substringIndex('www.clickhouse.com', '.', 1);
|
||||
select substringIndex('www.clickhouse.com', '.', 2);
|
||||
select substringIndex('www.clickhouse.com', '.', 3);
|
||||
select substringIndex('www.clickhouse.com', '.', 4);
|
||||
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -4);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -3);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -2);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', -1);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 0);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 1);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 2);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 3);
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', 4);
|
||||
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-4));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-3));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-2));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(-1));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(0));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(1));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(2));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(3));
|
||||
select substringIndex(materialize('www.clickhouse.com'), '.', materialize(4));
|
||||
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-4));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-3));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-2));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(-1));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(0));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(1));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(2));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(3));
|
||||
select substringIndex('www.clickhouse.com', '.', materialize(4));
|
||||
|
||||
select SUBSTRING_INDEX('www.clickhouse.com', '.', 2);
|
||||
|
||||
select substringIndex('www.clickhouse.com', '..', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndex('www.clickhouse.com', '', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndex('www.clickhouse.com', materialize('.'), 2); -- { serverError ILLEGAL_COLUMN }
|
||||
select substringIndex('www.clickhouse.com', '.', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -4);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -3);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -2);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', -1);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 0);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 1);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 2);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 3);
|
||||
select substringIndexUTF8('富强,民主,文明', ',', 4);
|
||||
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -4);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -3);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -2);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', -1);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 0);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 1);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 2);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 3);
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', 4);
|
||||
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-4));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-3));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-2));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(-1));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(0));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(1));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(2));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(3));
|
||||
select substringIndexUTF8('富强,民主,文明', ',', materialize(4));
|
||||
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-4));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-3));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-2));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(-1));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(0));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(1));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(2));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(3));
|
||||
select substringIndexUTF8(materialize('富强,民主,文明'), ',', materialize(4));
|
||||
|
||||
select substringIndexUTF8('富强,民主,文明', ',,', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndexUTF8('富强,民主,文明', '', 2); -- { serverError BAD_ARGUMENTS }
|
||||
select substringIndexUTF8('富强,民主,文明', materialize(','), 2); -- { serverError ILLEGAL_COLUMN }
|
||||
select substringIndexUTF8('富强,民主,文明', ',', cast(2 as Int128)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
|
||||
-- { echoOff }
|
Loading…
Reference in New Issue
Block a user