2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Defines.h>
|
2018-09-02 05:23:20 +00:00
|
|
|
#include <Common/Arena.h>
|
2019-03-03 20:08:39 +00:00
|
|
|
#include <Common/memcmpSmall.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2020-03-13 17:31:50 +00:00
|
|
|
#include <Common/WeakHash.h>
|
|
|
|
#include <Common/HashTable/Hash.h>
|
2017-11-20 06:01:05 +00:00
|
|
|
#include <Columns/Collator.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Columns/ColumnsCommon.h>
|
2017-07-06 13:54:55 +00:00
|
|
|
#include <DataStreams/ColumnGathererStream.h>
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2018-12-14 17:50:10 +00:00
|
|
|
#include <common/unaligned.h>
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int PARAMETER_OUT_OF_BOUND;
|
|
|
|
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
2020-03-19 17:35:08 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumnPtr ColumnString::cloneResized(size_t to_size) const
|
2017-03-09 03:34:09 +00:00
|
|
|
{
|
2017-12-14 01:43:19 +00:00
|
|
|
auto res = ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (to_size == 0)
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t from_size = size();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (to_size <= from_size)
|
|
|
|
{
|
|
|
|
/// Just cut column.
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
res->offsets.assign(offsets.begin(), offsets.begin() + to_size);
|
|
|
|
res->chars.assign(chars.begin(), chars.begin() + offsets[to_size - 1]);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Copy column and append empty strings for extra elements.
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offset offset = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
if (from_size > 0)
|
|
|
|
{
|
|
|
|
res->offsets.assign(offsets.begin(), offsets.end());
|
|
|
|
res->chars.assign(chars.begin(), chars.end());
|
|
|
|
offset = offsets.back();
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Empty strings are just zero terminating bytes.
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
res->chars.resize_fill(res->chars.size() + to_size - from_size);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
res->offsets.resize(to_size);
|
|
|
|
for (size_t i = from_size; i < to_size; ++i)
|
|
|
|
{
|
|
|
|
++offset;
|
|
|
|
res->offsets[i] = offset;
|
|
|
|
}
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
void ColumnString::updateWeakHash32(WeakHash32 & hash) const
|
|
|
|
{
|
|
|
|
auto s = offsets.size();
|
|
|
|
|
|
|
|
if (hash.getData().size() != s)
|
|
|
|
throw Exception("Size of WeakHash32 does not match size of column: column size is " + std::to_string(s) +
|
|
|
|
", hash size is " + std::to_string(hash.getData().size()), ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-03-25 11:14:11 +00:00
|
|
|
const UInt8 * pos = chars.data();
|
|
|
|
UInt32 * hash_data = hash.getData().data();
|
2020-03-18 13:44:46 +00:00
|
|
|
Offset prev_offset = 0;
|
2020-03-13 17:31:50 +00:00
|
|
|
|
2020-03-18 13:44:46 +00:00
|
|
|
for (auto & offset : offsets)
|
2020-03-13 17:31:50 +00:00
|
|
|
{
|
2020-03-18 13:44:46 +00:00
|
|
|
auto str_size = offset - prev_offset;
|
2020-03-25 11:14:11 +00:00
|
|
|
/// Skip last zero byte.
|
|
|
|
*hash_data = ::updateWeakHash32(pos, str_size - 1, *hash_data);
|
2020-03-13 17:31:50 +00:00
|
|
|
|
2020-03-18 13:44:46 +00:00
|
|
|
pos += str_size;
|
|
|
|
prev_offset = offset;
|
2020-03-13 18:34:59 +00:00
|
|
|
++hash_data;
|
2020-03-13 17:31:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
|
|
|
|
void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (length == 0)
|
|
|
|
return;
|
|
|
|
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & src_concrete = assert_cast<const ColumnString &>(src);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (start + length > src_concrete.offsets.size())
|
|
|
|
throw Exception("Parameter out of bound in IColumnString::insertRangeFrom method.",
|
|
|
|
ErrorCodes::PARAMETER_OUT_OF_BOUND);
|
|
|
|
|
|
|
|
size_t nested_offset = src_concrete.offsetAt(start);
|
|
|
|
size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;
|
|
|
|
|
|
|
|
size_t old_chars_size = chars.size();
|
|
|
|
chars.resize(old_chars_size + nested_length);
|
|
|
|
memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);
|
|
|
|
|
|
|
|
if (start == 0 && offsets.empty())
|
|
|
|
{
|
|
|
|
offsets.assign(src_concrete.offsets.begin(), src_concrete.offsets.begin() + length);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t old_size = offsets.size();
|
2019-03-30 10:46:17 +00:00
|
|
|
size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.resize(old_size + length);
|
|
|
|
|
|
|
|
for (size_t i = 0; i < length; ++i)
|
|
|
|
offsets[old_size + i] = src_concrete.offsets[start + i] - nested_offset + prev_max_offset;
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) const
|
2017-03-09 03:34:09 +00:00
|
|
|
{
|
2020-03-08 21:40:00 +00:00
|
|
|
if (offsets.empty())
|
2017-12-14 01:43:19 +00:00
|
|
|
return ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-14 01:43:19 +00:00
|
|
|
auto res = ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & res_chars = res->chars;
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & res_offsets = res->offsets;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
filterArraysImpl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
|
2017-03-09 03:34:09 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t size = offsets.size();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit == 0)
|
|
|
|
limit = size;
|
|
|
|
else
|
|
|
|
limit = std::min(size, limit);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (perm.size() < limit)
|
|
|
|
throw Exception("Size of permutation is less than required.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit == 0)
|
2017-12-14 01:43:19 +00:00
|
|
|
return ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
auto res = ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & res_chars = res->chars;
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & res_offsets = res->offsets;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (limit == size)
|
|
|
|
res_chars.resize(chars.size());
|
|
|
|
else
|
|
|
|
{
|
|
|
|
size_t new_chars_size = 0;
|
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
new_chars_size += sizeAt(perm[i]);
|
|
|
|
res_chars.resize(new_chars_size);
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
res_offsets.resize(limit);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offset current_new_offset = 0;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
{
|
|
|
|
size_t j = perm[i];
|
2018-12-24 14:26:38 +00:00
|
|
|
size_t string_offset = offsets[j - 1];
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t string_size = offsets[j] - string_offset;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
current_new_offset += string_size;
|
|
|
|
res_offsets[i] = current_new_offset;
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
|
|
|
|
{
|
|
|
|
size_t string_size = sizeAt(n);
|
|
|
|
size_t offset = offsetAt(n);
|
|
|
|
|
|
|
|
StringRef res;
|
|
|
|
res.size = sizeof(string_size) + string_size;
|
|
|
|
char * pos = arena.allocContinue(res.size, begin);
|
|
|
|
memcpy(pos, &string_size, sizeof(string_size));
|
|
|
|
memcpy(pos + sizeof(string_size), &chars[offset], string_size);
|
|
|
|
res.data = pos;
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
|
|
|
|
{
|
2018-12-14 17:50:10 +00:00
|
|
|
const size_t string_size = unalignedLoad<size_t>(pos);
|
2018-09-02 05:23:20 +00:00
|
|
|
pos += sizeof(string_size);
|
|
|
|
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + string_size;
|
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, string_size);
|
2018-09-02 05:23:20 +00:00
|
|
|
|
|
|
|
offsets.push_back(new_size);
|
|
|
|
return pos + string_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr ColumnString::index(const IColumn & indexes, size_t limit) const
|
2018-04-23 16:40:25 +00:00
|
|
|
{
|
|
|
|
return selectIndexImpl(*this, indexes, limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Type>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
|
2018-04-23 16:40:25 +00:00
|
|
|
{
|
|
|
|
if (limit == 0)
|
|
|
|
return ColumnString::create();
|
|
|
|
|
|
|
|
auto res = ColumnString::create();
|
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & res_chars = res->chars;
|
2018-04-23 16:40:25 +00:00
|
|
|
Offsets & res_offsets = res->offsets;
|
|
|
|
|
|
|
|
size_t new_chars_size = 0;
|
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
new_chars_size += sizeAt(indexes[i]);
|
|
|
|
res_chars.resize(new_chars_size);
|
|
|
|
|
|
|
|
res_offsets.resize(limit);
|
|
|
|
|
|
|
|
Offset current_new_offset = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < limit; ++i)
|
|
|
|
{
|
|
|
|
size_t j = indexes[i];
|
2018-12-24 14:26:38 +00:00
|
|
|
size_t string_offset = offsets[j - 1];
|
2018-04-23 16:40:25 +00:00
|
|
|
size_t string_size = offsets[j] - string_offset;
|
|
|
|
|
|
|
|
memcpySmallAllowReadWriteOverflow15(&res_chars[current_new_offset], &chars[string_offset], string_size);
|
|
|
|
|
|
|
|
current_new_offset += string_size;
|
|
|
|
res_offsets[i] = current_new_offset;
|
|
|
|
}
|
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2018-04-23 16:40:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
template <bool positive>
|
|
|
|
struct ColumnString::less
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const ColumnString & parent;
|
2017-09-07 21:04:48 +00:00
|
|
|
explicit less(const ColumnString & parent_) : parent(parent_) {}
|
2017-04-01 07:20:54 +00:00
|
|
|
bool operator()(size_t lhs, size_t rhs) const
|
|
|
|
{
|
2019-03-03 20:08:39 +00:00
|
|
|
int res = memcmpSmallAllowOverflow15(
|
2019-03-04 00:03:20 +00:00
|
|
|
parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1,
|
|
|
|
parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-03 20:08:39 +00:00
|
|
|
return positive ? (res < 0) : (res > 0);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
};
|
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const
|
2017-03-09 03:34:09 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t s = offsets.size();
|
|
|
|
res.resize(s);
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
|
|
|
if (limit >= s)
|
|
|
|
limit = 0;
|
|
|
|
|
|
|
|
if (limit)
|
|
|
|
{
|
|
|
|
if (reverse)
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
|
|
|
|
else
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (reverse)
|
|
|
|
std::sort(res.begin(), res.end(), less<false>(*this));
|
|
|
|
else
|
|
|
|
std::sort(res.begin(), res.end(), less<true>(*this));
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
|
2017-03-09 03:34:09 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t col_size = size();
|
|
|
|
if (col_size != replicate_offsets.size())
|
|
|
|
throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
auto res = ColumnString::create();
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (0 == col_size)
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & res_chars = res->chars;
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & res_offsets = res->offsets;
|
2017-04-01 07:20:54 +00:00
|
|
|
res_chars.reserve(chars.size() / col_size * replicate_offsets.back());
|
|
|
|
res_offsets.reserve(replicate_offsets.back());
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offset prev_replicate_offset = 0;
|
|
|
|
Offset prev_string_offset = 0;
|
|
|
|
Offset current_new_offset = 0;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < col_size; ++i)
|
|
|
|
{
|
|
|
|
size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
|
|
|
|
size_t string_size = offsets[i] - prev_string_offset;
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t j = 0; j < size_to_replicate; ++j)
|
|
|
|
{
|
|
|
|
current_new_offset += string_size;
|
|
|
|
res_offsets.push_back(current_new_offset);
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
res_chars.resize(res_chars.size() + string_size);
|
|
|
|
memcpySmallAllowReadWriteOverflow15(
|
|
|
|
&res_chars[res_chars.size() - string_size], &chars[prev_string_offset], string_size);
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
prev_replicate_offset = replicate_offsets[i];
|
|
|
|
prev_string_offset = offsets[i];
|
|
|
|
}
|
2017-03-09 03:34:09 +00:00
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
return res;
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-07-06 13:54:55 +00:00
|
|
|
void ColumnString::gather(ColumnGathererStream & gatherer)
|
|
|
|
{
|
|
|
|
gatherer.gather(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
void ColumnString::reserve(size_t n)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.reserve(n);
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ColumnString::getExtremes(Field & min, Field & max) const
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
min = String();
|
|
|
|
max = String();
|
2017-09-07 13:22:25 +00:00
|
|
|
|
|
|
|
size_t col_size = size();
|
|
|
|
|
|
|
|
if (col_size == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
size_t min_idx = 0;
|
|
|
|
size_t max_idx = 0;
|
|
|
|
|
|
|
|
less<true> less_op(*this);
|
|
|
|
|
|
|
|
for (size_t i = 1; i < col_size; ++i)
|
|
|
|
{
|
|
|
|
if (less_op(i, min_idx))
|
|
|
|
min_idx = i;
|
2017-09-14 11:52:22 +00:00
|
|
|
else if (less_op(max_idx, i))
|
2017-09-07 13:22:25 +00:00
|
|
|
max_idx = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
get(min_idx, min);
|
|
|
|
get(max_idx, max);
|
2017-03-09 03:34:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const
|
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return collator.compare(
|
|
|
|
reinterpret_cast<const char *>(&chars[offsetAt(n)]), sizeAt(n),
|
|
|
|
reinterpret_cast<const char *>(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m));
|
2016-11-20 12:43:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-11-20 12:46:32 +00:00
|
|
|
template <bool positive>
|
2017-03-09 03:34:09 +00:00
|
|
|
struct ColumnString::lessWithCollation
|
2016-11-20 12:43:20 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
const ColumnString & parent;
|
|
|
|
const Collator & collator;
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {}
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
bool operator()(size_t lhs, size_t rhs) const
|
|
|
|
{
|
|
|
|
int res = collator.compare(
|
|
|
|
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs),
|
|
|
|
reinterpret_cast<const char *>(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs));
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return positive ? (res < 0) : (res > 0);
|
|
|
|
}
|
2016-11-20 12:46:32 +00:00
|
|
|
};
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const
|
2016-11-20 12:43:20 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t s = offsets.size();
|
|
|
|
res.resize(s);
|
|
|
|
for (size_t i = 0; i < s; ++i)
|
|
|
|
res[i] = i;
|
|
|
|
|
|
|
|
if (limit >= s)
|
|
|
|
limit = 0;
|
|
|
|
|
|
|
|
if (limit)
|
|
|
|
{
|
|
|
|
if (reverse)
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<false>(*this, collator));
|
|
|
|
else
|
|
|
|
std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation<true>(*this, collator));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (reverse)
|
|
|
|
std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
|
|
|
|
else
|
|
|
|
std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
|
|
|
|
}
|
2016-11-20 12:43:20 +00:00
|
|
|
}
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
|
|
|
|
void ColumnString::protect()
|
|
|
|
{
|
|
|
|
getChars().protect();
|
|
|
|
getOffsets().protect();
|
|
|
|
}
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
}
|