Fix incorrect character counting in PositionImpl::vectorVector

This commit is contained in:
heymind 2024-10-24 14:09:56 +08:00
parent 4c0ce6ad10
commit f158c0346c

View File

@ -370,10 +370,14 @@ struct PositionImpl
{ {
size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1; size_t haystack_size = haystack_offsets[i] - prev_haystack_offset - 1;
size_t haystack_chars_size = Impl::countChars(
reinterpret_cast<const char *>(&haystack_data[prev_haystack_offset]),
reinterpret_cast<const char *>(&haystack_data[haystack_offsets[i] - 1]));
auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1); auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1);
if (start > haystack_size + 1) if (start > haystack_chars_size + 1)
{ {
res[i] = 0; res[i] = 0;
} }
@ -429,13 +433,15 @@ struct PositionImpl
/// NOTE You could use haystack indexing. But this is a rare case. /// NOTE You could use haystack indexing. But this is a rare case.
ColumnString::Offset prev_needle_offset = 0; ColumnString::Offset prev_needle_offset = 0;
size_t haystack_size = Impl::countChars(haystack.data(), haystack.data() + haystack.size());
for (size_t i = 0; i < input_rows_count; ++i) for (size_t i = 0; i < input_rows_count; ++i)
{ {
size_t needle_size = needle_offsets[i] - prev_needle_offset - 1; size_t needle_size = needle_offsets[i] - prev_needle_offset - 1;
auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1); auto start = start_pos != nullptr ? std::max(start_pos->getUInt(i), UInt64(1)) : UInt64(1);
if (start > haystack.size() + 1) if (start > haystack_size + 1)
{ {
res[i] = 0; res[i] = 0;
} }