2011-09-04 00:22:19 +00:00
|
|
|
#pragma once
|
2010-03-12 18:25:35 +00:00
|
|
|
|
2019-03-08 12:59:06 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cassert>
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-10-07 18:56:03 +00:00
|
|
|
#include <Columns/IColumnImpl.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/PODArray.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/memcpySmall.h>
|
2019-03-03 20:08:39 +00:00
|
|
|
#include <Common/memcmpSmall.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2019-09-27 13:44:33 +00:00
|
|
|
#include <Core/Field.h>
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
class Collator;
|
|
|
|
|
|
|
|
|
2010-03-12 18:25:35 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/** Column for String values.
|
2010-05-13 16:13:38 +00:00
|
|
|
*/
|
2019-04-19 20:21:17 +00:00
|
|
|
class ColumnString final : public COWHelper<IColumn, ColumnString>
|
2010-05-13 16:13:38 +00:00
|
|
|
{
|
2013-03-05 10:29:26 +00:00
|
|
|
public:
|
2019-04-12 00:34:11 +00:00
|
|
|
using Char = UInt8;
|
2018-11-25 00:08:50 +00:00
|
|
|
using Chars = PaddedPODArray<UInt8>;
|
2013-03-05 10:29:26 +00:00
|
|
|
|
2010-05-20 19:29:04 +00:00
|
|
|
private:
|
2019-04-19 20:21:17 +00:00
|
|
|
friend class COWHelper<IColumn, ColumnString>;
|
2017-12-14 04:25:22 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets offsets;
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Bytes of strings, placed contiguously.
|
|
|
|
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars chars;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return offsets[i - 1]; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Size of i-th element, including terminating zero.
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
|
2014-08-20 04:57:03 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
template <bool positive>
|
|
|
|
struct less;
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
template <bool positive>
|
|
|
|
struct lessWithCollation;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-15 02:36:40 +00:00
|
|
|
ColumnString() = default;
|
|
|
|
|
2017-12-14 04:25:22 +00:00
|
|
|
ColumnString(const ColumnString & src)
|
|
|
|
: offsets(src.offsets.begin(), src.offsets.end()),
|
2018-06-03 17:43:56 +00:00
|
|
|
chars(src.chars.begin(), src.chars.end()) {}
|
2017-12-14 03:56:56 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
public:
|
2017-12-07 22:11:51 +00:00
|
|
|
const char * getFamilyName() const override { return "String"; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
size_t size() const override
|
|
|
|
{
|
|
|
|
return offsets.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t byteSize() const override
|
|
|
|
{
|
|
|
|
return chars.size() + offsets.size() * sizeof(offsets[0]);
|
|
|
|
}
|
|
|
|
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t allocatedBytes() const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-13 16:49:09 +00:00
|
|
|
return chars.allocated_bytes() + offsets.allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
void protect() override;
|
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumnPtr cloneResized(size_t to_size) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
Field operator[](size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void get(size_t n, Field & res) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
res.assignString(&chars[offsetAt(n)], sizeAt(n) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getDataAt(size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return StringRef(&chars[offsetAt(n)], sizeAt(n));
|
|
|
|
}
|
|
|
|
|
2018-03-29 21:15:14 +00:00
|
|
|
/// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
|
2018-03-29 17:01:47 +00:00
|
|
|
#if !__clang__
|
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
|
|
|
#endif
|
|
|
|
|
2018-03-29 20:21:40 +00:00
|
|
|
void insert(const Field & x) override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
const String & s = DB::get<const String &>(x);
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t size_to_append = s.size() + 1;
|
|
|
|
const size_t new_size = old_size + size_to_append;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, s.c_str(), size_to_append);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
2018-03-29 17:01:47 +00:00
|
|
|
#if !__clang__
|
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
#endif
|
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
void insertFrom(const IColumn & src_, size_t n) override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & src = assert_cast<const ColumnString &>(src_);
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-10 22:01:12 +00:00
|
|
|
if (size_to_append == 1)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-03-10 22:01:12 +00:00
|
|
|
/// shortcut for empty string
|
|
|
|
chars.push_back(0);
|
|
|
|
offsets.push_back(chars.size());
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t offset = src.offsets[n - 1];
|
2017-04-01 07:20:54 +00:00
|
|
|
const size_t new_size = old_size + size_to_append;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-10 22:01:12 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[offset], size_to_append);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void insertData(const char * pos, size_t length) override
|
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + length + 1;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2018-12-27 20:48:21 +00:00
|
|
|
if (length)
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, length);
|
2017-04-01 07:20:54 +00:00
|
|
|
chars[old_size + length] = 0;
|
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
2018-12-12 19:21:48 +00:00
|
|
|
/// Like getData, but inserting data should be zero-ending (i.e. length is 1 byte greater than real string size).
|
|
|
|
void insertDataWithTerminatingZero(const char * pos, size_t length)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + length;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, length);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void popBack(size_t n) override
|
|
|
|
{
|
|
|
|
size_t nested_n = offsets.back() - offsetAt(offsets.size() - n);
|
|
|
|
chars.resize(chars.size() - nested_n);
|
|
|
|
offsets.resize_assume_reserved(offsets.size() - n);
|
|
|
|
}
|
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
void updateHashWithValue(size_t n, SipHash & hash) const override
|
|
|
|
{
|
|
|
|
size_t string_size = sizeAt(n);
|
|
|
|
size_t offset = offsetAt(n);
|
|
|
|
|
|
|
|
hash.update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
|
|
|
|
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
|
|
|
}
|
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
void updateWeakHash32(WeakHash32 & hash) const override;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
|
|
|
template <typename Type>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertDefault() override
|
|
|
|
{
|
|
|
|
chars.push_back(0);
|
2018-12-24 14:26:38 +00:00
|
|
|
offsets.push_back(offsets.back() + 1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-11-08 11:15:20 +00:00
|
|
|
virtual void insertManyDefaults(size_t length) override
|
|
|
|
{
|
|
|
|
chars.resize_fill(chars.size() + length);
|
|
|
|
for (size_t i = 0; i < length; ++i)
|
|
|
|
offsets.push_back(offsets.back() + 1);
|
|
|
|
}
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
2019-03-04 00:03:20 +00:00
|
|
|
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 12:10:32 +00:00
|
|
|
std::vector<UInt8> compareAt(const IColumn & rhs, size_t rhs_row_num, const std::vector<UInt8> & mask, int nan_direction_hint) const override
|
|
|
|
{
|
|
|
|
return compareImpl<ColumnString>(assert_cast<const ColumnString &>(rhs), rhs_row_num, mask, nan_direction_hint);
|
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Variant of compareAt for string comparison with respect of collation.
|
|
|
|
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const;
|
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-05-25 14:33:31 +00:00
|
|
|
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override;
|
2020-05-12 00:58:58 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Sorting with respect of collation.
|
2019-02-18 19:44:26 +00:00
|
|
|
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-05-12 00:58:58 +00:00
|
|
|
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const;
|
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
return scatterImpl<ColumnString>(num_columns, selector);
|
|
|
|
}
|
|
|
|
|
2017-07-06 13:54:55 +00:00
|
|
|
void gather(ColumnGathererStream & gatherer_stream) override;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void reserve(size_t n) override;
|
|
|
|
|
|
|
|
void getExtremes(Field & min, Field & max) const override;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-09 10:14:45 +00:00
|
|
|
|
|
|
|
bool canBeInsideNullable() const override { return true; }
|
|
|
|
|
2019-03-14 23:10:51 +00:00
|
|
|
bool structureEquals(const IColumn & rhs) const override
|
|
|
|
{
|
|
|
|
return typeid(rhs) == typeid(ColumnString);
|
|
|
|
}
|
|
|
|
|
2017-12-09 10:14:45 +00:00
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & getChars() { return chars; }
|
|
|
|
const Chars & getChars() const { return chars; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & getOffsets() { return offsets; }
|
|
|
|
const Offsets & getOffsets() const { return offsets; }
|
2010-05-13 16:13:38 +00:00
|
|
|
};
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|