2011-09-04 00:22:19 +00:00
|
|
|
#pragma once
|
2010-03-12 18:25:35 +00:00
|
|
|
|
2019-03-08 12:59:06 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cassert>
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-10-07 18:56:03 +00:00
|
|
|
#include <Columns/IColumnImpl.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/PODArray.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/memcpySmall.h>
|
2019-03-03 20:08:39 +00:00
|
|
|
#include <Common/memcmpSmall.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2019-09-27 13:44:33 +00:00
|
|
|
#include <Core/Field.h>
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
class Collator;
|
|
|
|
|
|
|
|
|
2010-03-12 18:25:35 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2023-11-21 22:32:47 +00:00
|
|
|
class Arena;
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/** Column for String values.
|
2010-05-13 16:13:38 +00:00
|
|
|
*/
|
2024-02-25 18:29:00 +00:00
|
|
|
class ColumnString final : public COWHelper<IColumnHelper<ColumnString>, ColumnString>
|
2010-05-13 16:13:38 +00:00
|
|
|
{
|
2013-03-05 10:29:26 +00:00
|
|
|
public:
|
2019-04-12 00:34:11 +00:00
|
|
|
using Char = UInt8;
|
2018-11-25 00:08:50 +00:00
|
|
|
using Chars = PaddedPODArray<UInt8>;
|
2013-03-05 10:29:26 +00:00
|
|
|
|
2010-05-20 19:29:04 +00:00
|
|
|
private:
|
2024-02-25 18:29:00 +00:00
|
|
|
friend class COWHelper<IColumnHelper<ColumnString>, ColumnString>;
|
2017-12-14 04:25:22 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets offsets;
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/// Bytes of strings, placed contiguously.
|
|
|
|
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars chars;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return offsets[i - 1]; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/// Size of i-th element, including terminating zero.
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
|
2014-08-20 04:57:03 +00:00
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
struct ComparatorBase;
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
using ComparatorAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorBase>;
|
|
|
|
using ComparatorAscendingStable = ComparatorAscendingStableImpl<ComparatorBase>;
|
|
|
|
using ComparatorDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorBase>;
|
|
|
|
using ComparatorDescendingStable = ComparatorDescendingStableImpl<ComparatorBase>;
|
|
|
|
using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2022-03-12 18:04:08 +00:00
|
|
|
struct ComparatorCollationBase;
|
|
|
|
|
|
|
|
using ComparatorCollationAscendingUnstable = ComparatorAscendingUnstableImpl<ComparatorCollationBase>;
|
|
|
|
using ComparatorCollationAscendingStable = ComparatorAscendingStableImpl<ComparatorCollationBase>;
|
|
|
|
using ComparatorCollationDescendingUnstable = ComparatorDescendingUnstableImpl<ComparatorCollationBase>;
|
|
|
|
using ComparatorCollationDescendingStable = ComparatorDescendingStableImpl<ComparatorCollationBase>;
|
|
|
|
using ComparatorCollationEqual = ComparatorEqualImpl<ComparatorCollationBase>;
|
|
|
|
|
2017-12-15 02:36:40 +00:00
|
|
|
ColumnString() = default;
|
2020-08-07 19:36:04 +00:00
|
|
|
ColumnString(const ColumnString & src);
|
2017-12-14 03:56:56 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
public:
|
2017-12-07 22:11:51 +00:00
|
|
|
const char * getFamilyName() const override { return "String"; }
|
2020-06-04 22:02:59 +00:00
|
|
|
TypeIndex getDataType() const override { return TypeIndex::String; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
size_t size() const override
|
2010-05-13 16:13:38 +00:00
|
|
|
{
|
2013-05-05 15:25:25 +00:00
|
|
|
return offsets.size();
|
2010-05-13 16:13:38 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
size_t byteSize() const override
|
2013-05-05 15:25:25 +00:00
|
|
|
{
|
|
|
|
return chars.size() + offsets.size() * sizeof(offsets[0]);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-01-02 22:58:10 +00:00
|
|
|
size_t byteSizeAt(size_t n) const override
|
|
|
|
{
|
|
|
|
assert(n < size());
|
|
|
|
return sizeAt(n) + sizeof(offsets[0]);
|
|
|
|
}
|
|
|
|
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t allocatedBytes() const override
|
2017-01-17 20:54:32 +00:00
|
|
|
{
|
2017-07-13 16:49:09 +00:00
|
|
|
return chars.allocated_bytes() + offsets.allocated_bytes();
|
2017-01-17 20:54:32 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
void protect() override;
|
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumnPtr cloneResized(size_t to_size) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
Field operator[](size_t n) const override
|
2010-05-13 16:13:38 +00:00
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2013-05-05 15:25:25 +00:00
|
|
|
return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
|
2010-05-13 16:13:38 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
void get(size_t n, Field & res) const override
|
2013-01-07 06:47:15 +00:00
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2020-07-26 21:41:27 +00:00
|
|
|
res = std::string_view{reinterpret_cast<const char *>(&chars[offsetAt(n)]), sizeAt(n) - 1};
|
2013-01-07 06:47:15 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
StringRef getDataAt(size_t n) const override
|
2013-04-13 00:56:07 +00:00
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2013-05-05 15:25:25 +00:00
|
|
|
return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
|
2013-04-13 00:56:07 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-15 02:16:09 +00:00
|
|
|
bool isDefaultAt(size_t n) const override
|
|
|
|
{
|
|
|
|
assert(n < size());
|
2021-06-07 13:50:27 +00:00
|
|
|
return sizeAt(n) == 1;
|
2021-04-15 02:16:09 +00:00
|
|
|
}
|
|
|
|
|
2018-03-29 20:21:40 +00:00
|
|
|
void insert(const Field & x) override
|
2010-05-20 19:29:04 +00:00
|
|
|
{
|
2022-09-10 03:04:40 +00:00
|
|
|
const String & s = x.get<const String &>();
|
2016-08-12 20:50:34 +00:00
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t size_to_append = s.size() + 1;
|
|
|
|
const size_t new_size = old_size + size_to_append;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-08-12 20:50:34 +00:00
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, s.c_str(), size_to_append);
|
2016-08-12 20:50:34 +00:00
|
|
|
offsets.push_back(new_size);
|
2010-05-20 19:29:04 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2024-02-20 18:58:06 +00:00
|
|
|
bool tryInsert(const Field & x) override
|
|
|
|
{
|
|
|
|
if (x.getType() != Field::Types::Which::String)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
insert(x);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
void insertFrom(const IColumn & src_, size_t n) override
|
2012-12-16 00:52:06 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & src = assert_cast<const ColumnString &>(src_);
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-10 22:01:12 +00:00
|
|
|
if (size_to_append == 1)
|
2016-08-12 20:50:34 +00:00
|
|
|
{
|
2019-03-10 22:01:12 +00:00
|
|
|
/// shortcut for empty string
|
|
|
|
chars.push_back(0);
|
|
|
|
offsets.push_back(chars.size());
|
2016-08-12 20:50:34 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t offset = src.offsets[n - 1];
|
2016-08-12 20:50:34 +00:00
|
|
|
const size_t new_size = old_size + size_to_append;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-08-12 20:50:34 +00:00
|
|
|
chars.resize(new_size);
|
2019-03-10 22:01:12 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[offset], size_to_append);
|
2016-08-12 20:50:34 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
2012-12-16 00:52:06 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
void insertData(const char * pos, size_t length) override
|
2013-02-16 20:15:45 +00:00
|
|
|
{
|
2016-08-12 20:50:34 +00:00
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + length + 1;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-08-12 20:50:34 +00:00
|
|
|
chars.resize(new_size);
|
2018-12-27 20:48:21 +00:00
|
|
|
if (length)
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, length);
|
2013-05-05 15:25:25 +00:00
|
|
|
chars[old_size + length] = 0;
|
2016-08-12 20:50:34 +00:00
|
|
|
offsets.push_back(new_size);
|
2013-02-16 20:15:45 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-02-16 16:39:39 +00:00
|
|
|
void popBack(size_t n) override
|
|
|
|
{
|
|
|
|
size_t nested_n = offsets.back() - offsetAt(offsets.size() - n);
|
|
|
|
chars.resize(chars.size() - nested_n);
|
|
|
|
offsets.resize_assume_reserved(offsets.size() - n);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2024-02-25 18:29:00 +00:00
|
|
|
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
|
|
|
|
|
|
|
|
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
|
|
|
void serializeValueIntoMemory(size_t n, char *& memory) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-02-16 21:26:06 +00:00
|
|
|
const char * skipSerializedInArena(const char * pos) const override;
|
|
|
|
|
2016-07-10 15:58:58 +00:00
|
|
|
void updateHashWithValue(size_t n, SipHash & hash) const override
|
|
|
|
{
|
|
|
|
size_t string_size = sizeAt(n);
|
|
|
|
size_t offset = offsetAt(n);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-07-10 15:58:58 +00:00
|
|
|
hash.update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
|
|
|
|
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
void updateWeakHash32(WeakHash32 & hash) const override;
|
|
|
|
|
2020-05-20 22:16:08 +00:00
|
|
|
void updateHashFast(SipHash & hash) const override
|
|
|
|
{
|
2023-03-02 09:07:10 +00:00
|
|
|
hash.update(reinterpret_cast<const char *>(offsets.data()), offsets.size() * sizeof(offsets[0]));
|
|
|
|
hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
|
2020-05-20 22:16:08 +00:00
|
|
|
}
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-06-07 10:55:55 +00:00
|
|
|
void expand(const Filter & mask, bool inverted) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
|
|
|
template <typename Type>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
2014-10-31 22:15:17 +00:00
|
|
|
void insertDefault() override
|
2010-05-20 19:29:04 +00:00
|
|
|
{
|
2013-05-05 15:25:25 +00:00
|
|
|
chars.push_back(0);
|
2018-12-24 14:26:38 +00:00
|
|
|
offsets.push_back(offsets.back() + 1);
|
2010-05-20 19:29:04 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-10-04 15:33:29 +00:00
|
|
|
void insertManyDefaults(size_t length) override
|
2019-11-08 11:15:20 +00:00
|
|
|
{
|
|
|
|
chars.resize_fill(chars.size() + length);
|
|
|
|
for (size_t i = 0; i < length; ++i)
|
|
|
|
offsets.push_back(offsets.back() + 1);
|
|
|
|
}
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
|
2011-09-04 00:22:19 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
2019-03-04 00:03:20 +00:00
|
|
|
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);
|
2011-09-26 11:05:38 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/// Variant of compareAt for string comparison with respect of collation.
|
2020-10-29 11:24:01 +00:00
|
|
|
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int nan_direction_hint, Permutation & res) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
2020-05-12 00:58:58 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/// Sorting with respect of collation.
|
2022-02-23 17:34:19 +00:00
|
|
|
void getPermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int, Permutation & res) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
|
|
|
|
size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
2020-05-12 00:58:58 +00:00
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-02-17 23:09:46 +00:00
|
|
|
ColumnPtr compress() const override;
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
void reserve(size_t n) override;
|
2024-01-27 16:40:11 +00:00
|
|
|
void shrinkToFit() override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
void getExtremes(Field & min, Field & max) const override;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-09 10:14:45 +00:00
|
|
|
bool canBeInsideNullable() const override { return true; }
|
|
|
|
|
2019-03-14 23:10:51 +00:00
|
|
|
bool structureEquals(const IColumn & rhs) const override
|
|
|
|
{
|
|
|
|
return typeid(rhs) == typeid(ColumnString);
|
|
|
|
}
|
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & getChars() { return chars; }
|
|
|
|
const Chars & getChars() const { return chars; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & getOffsets() { return offsets; }
|
|
|
|
const Offsets & getOffsets() const { return offsets; }
|
encrypt, aes_encrypt_mysql, decrypt, aes_decrypt_mysql functions
Functions to encrypt/decrypt any input data with OpenSSL's ciphers
with custom key, iv, and add (-gcm mode only).
_mysql versions are 100% compatitable with corresponding MySQL functions
Supported modes depend on OpenSSL version, but generally are:
aes-{128,192,56}-{ecb,cbc,cfb1,cfb8,cfb128,ofb,gcm}
Please note that in a -gcm mode a 16-byte tag is appended to the ciphertext
on encryption and is expected to be found at the end of ciphertext on decryption.
Added tests that verify compatibility with MySQL functions,
and test vectors for GCM mode from OpenSSL.
Added masking rules for aes_X funtions
Rules are installed by default to config.d/query_masking_rules.xml
2020-06-16 09:22:55 +00:00
|
|
|
|
|
|
|
// Throws an exception if offsets/chars are messed up
|
|
|
|
void validate() const;
|
2020-10-29 11:24:01 +00:00
|
|
|
|
|
|
|
bool isCollationSupported() const override { return true; }
|
2010-05-13 16:13:38 +00:00
|
|
|
};
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|