2011-09-04 00:22:19 +00:00
|
|
|
#pragma once
|
2010-03-12 18:25:35 +00:00
|
|
|
|
2019-03-08 12:59:06 +00:00
|
|
|
#include <cstring>
|
|
|
|
#include <cassert>
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2019-10-07 18:56:03 +00:00
|
|
|
#include <Columns/IColumnImpl.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/PODArray.h>
|
|
|
|
#include <Common/SipHash.h>
|
|
|
|
#include <Common/memcpySmall.h>
|
2019-03-03 20:08:39 +00:00
|
|
|
#include <Common/memcmpSmall.h>
|
2019-08-21 02:28:04 +00:00
|
|
|
#include <Common/assert_cast.h>
|
2019-09-27 13:44:33 +00:00
|
|
|
#include <Core/Field.h>
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
2016-11-20 12:43:20 +00:00
|
|
|
class Collator;
|
|
|
|
|
|
|
|
|
2010-03-12 18:25:35 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
/** Column for String values.
|
2010-05-13 16:13:38 +00:00
|
|
|
*/
|
2019-04-19 20:21:17 +00:00
|
|
|
class ColumnString final : public COWHelper<IColumn, ColumnString>
|
2010-05-13 16:13:38 +00:00
|
|
|
{
|
2013-03-05 10:29:26 +00:00
|
|
|
public:
|
2019-04-12 00:34:11 +00:00
|
|
|
using Char = UInt8;
|
2018-11-25 00:08:50 +00:00
|
|
|
using Chars = PaddedPODArray<UInt8>;
|
2013-03-05 10:29:26 +00:00
|
|
|
|
2010-05-20 19:29:04 +00:00
|
|
|
private:
|
2019-04-19 20:21:17 +00:00
|
|
|
friend class COWHelper<IColumn, ColumnString>;
|
2017-12-14 04:25:22 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets offsets;
|
2010-05-20 19:29:04 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Bytes of strings, placed contiguously.
|
|
|
|
/// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars chars;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return offsets[i - 1]; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Size of i-th element, including terminating zero.
|
2018-12-26 05:34:25 +00:00
|
|
|
size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
|
2014-08-20 04:57:03 +00:00
|
|
|
|
2020-11-03 14:25:52 +00:00
|
|
|
template <bool positive>
|
2020-10-29 11:24:01 +00:00
|
|
|
struct Cmp;
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2020-11-03 14:25:52 +00:00
|
|
|
template <bool positive>
|
2020-10-29 11:24:01 +00:00
|
|
|
struct CmpWithCollation;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-15 02:36:40 +00:00
|
|
|
ColumnString() = default;
|
2020-08-07 19:36:04 +00:00
|
|
|
ColumnString(const ColumnString & src);
|
2017-12-14 03:56:56 +00:00
|
|
|
|
2020-10-29 11:24:01 +00:00
|
|
|
template <typename Comparator>
|
|
|
|
void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const;
|
2020-10-22 20:23:44 +00:00
|
|
|
|
2020-10-29 11:24:01 +00:00
|
|
|
template <typename Comparator>
|
|
|
|
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const;
|
2020-10-22 20:23:44 +00:00
|
|
|
|
2017-03-09 03:34:09 +00:00
|
|
|
public:
|
2017-12-07 22:11:51 +00:00
|
|
|
const char * getFamilyName() const override { return "String"; }
|
2020-06-04 22:02:59 +00:00
|
|
|
TypeIndex getDataType() const override { return TypeIndex::String; }
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
size_t size() const override
|
|
|
|
{
|
|
|
|
return offsets.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t byteSize() const override
|
|
|
|
{
|
|
|
|
return chars.size() + offsets.size() * sizeof(offsets[0]);
|
|
|
|
}
|
|
|
|
|
2021-01-02 22:58:10 +00:00
|
|
|
size_t byteSizeAt(size_t n) const override
|
|
|
|
{
|
|
|
|
assert(n < size());
|
|
|
|
return sizeAt(n) + sizeof(offsets[0]);
|
|
|
|
}
|
|
|
|
|
2017-07-13 16:49:09 +00:00
|
|
|
size_t allocatedBytes() const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-07-13 16:49:09 +00:00
|
|
|
return chars.allocated_bytes() + offsets.allocated_bytes();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-03-10 03:16:51 +00:00
|
|
|
void protect() override;
|
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumnPtr cloneResized(size_t to_size) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
Field operator[](size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return Field(&chars[offsetAt(n)], sizeAt(n) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
void get(size_t n, Field & res) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2020-07-26 21:41:27 +00:00
|
|
|
res = std::string_view{reinterpret_cast<const char *>(&chars[offsetAt(n)]), sizeAt(n) - 1};
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getDataAt(size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return StringRef(&chars[offsetAt(n)], sizeAt(n) - 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
|
|
|
{
|
2019-03-07 20:04:59 +00:00
|
|
|
assert(n < size());
|
2017-04-01 07:20:54 +00:00
|
|
|
return StringRef(&chars[offsetAt(n)], sizeAt(n));
|
|
|
|
}
|
|
|
|
|
2018-03-29 21:15:14 +00:00
|
|
|
/// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
|
2018-03-29 17:01:47 +00:00
|
|
|
#if !__clang__
|
|
|
|
#pragma GCC diagnostic push
|
|
|
|
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
|
|
|
|
#endif
|
|
|
|
|
2018-03-29 20:21:40 +00:00
|
|
|
void insert(const Field & x) override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
const String & s = DB::get<const String &>(x);
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t size_to_append = s.size() + 1;
|
|
|
|
const size_t new_size = old_size + size_to_append;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, s.c_str(), size_to_append);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
2018-03-29 17:01:47 +00:00
|
|
|
#if !__clang__
|
|
|
|
#pragma GCC diagnostic pop
|
|
|
|
#endif
|
|
|
|
|
Get rid of useless std::move to get NRVO
http://eel.is/c++draft/class.copy.elision#:constructor,copy,elision
Some quote:
> Speaking of RVO, return std::move(w); prohibits it. It means "use move constructor or fail to compile", whereas return w; means "use RVO, and if you can't, use move constructor, and if you can't, use copy constructor, and if you can't, fail to compile."
There is one exception to this rule:
```cpp
Block FilterBlockInputStream::removeFilterIfNeed(Block && block)
{
if (block && remove_filter)
block.erase(static_cast<size_t>(filter_column));
return std::move(block);
}
```
because references are not eligible for NRVO, which is another rule "always move rvalue references and forward universal references" that takes precedence.
2018-08-27 14:04:22 +00:00
|
|
|
void insertFrom(const IColumn & src_, size_t n) override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & src = assert_cast<const ColumnString &>(src_);
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-10 22:01:12 +00:00
|
|
|
if (size_to_append == 1)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-03-10 22:01:12 +00:00
|
|
|
/// shortcut for empty string
|
|
|
|
chars.push_back(0);
|
|
|
|
offsets.push_back(chars.size());
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
2019-03-10 22:01:12 +00:00
|
|
|
const size_t offset = src.offsets[n - 1];
|
2017-04-01 07:20:54 +00:00
|
|
|
const size_t new_size = old_size + size_to_append;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-10 22:01:12 +00:00
|
|
|
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[offset], size_to_append);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void insertData(const char * pos, size_t length) override
|
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + length + 1;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2018-12-27 20:48:21 +00:00
|
|
|
if (length)
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, length);
|
2017-04-01 07:20:54 +00:00
|
|
|
chars[old_size + length] = 0;
|
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
2018-12-12 19:21:48 +00:00
|
|
|
/// Like getData, but inserting data should be zero-ending (i.e. length is 1 byte greater than real string size).
|
|
|
|
void insertDataWithTerminatingZero(const char * pos, size_t length)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
const size_t old_size = chars.size();
|
|
|
|
const size_t new_size = old_size + length;
|
|
|
|
|
|
|
|
chars.resize(new_size);
|
2019-03-07 20:04:59 +00:00
|
|
|
memcpy(chars.data() + old_size, pos, length);
|
2017-04-01 07:20:54 +00:00
|
|
|
offsets.push_back(new_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
void popBack(size_t n) override
|
|
|
|
{
|
|
|
|
size_t nested_n = offsets.back() - offsetAt(offsets.size() - n);
|
|
|
|
chars.resize(chars.size() - nested_n);
|
|
|
|
offsets.resize_assume_reserved(offsets.size() - n);
|
|
|
|
}
|
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-09-02 05:23:20 +00:00
|
|
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
void updateHashWithValue(size_t n, SipHash & hash) const override
|
|
|
|
{
|
|
|
|
size_t string_size = sizeAt(n);
|
|
|
|
size_t offset = offsetAt(n);
|
|
|
|
|
|
|
|
hash.update(reinterpret_cast<const char *>(&string_size), sizeof(string_size));
|
|
|
|
hash.update(reinterpret_cast<const char *>(&chars[offset]), string_size);
|
|
|
|
}
|
|
|
|
|
2020-03-13 17:31:50 +00:00
|
|
|
void updateWeakHash32(WeakHash32 & hash) const override;
|
|
|
|
|
2020-05-20 22:16:08 +00:00
|
|
|
void updateHashFast(SipHash & hash) const override
|
|
|
|
{
|
|
|
|
hash.update(reinterpret_cast<const char *>(offsets.data()), size() * sizeof(offsets[0]));
|
|
|
|
hash.update(reinterpret_cast<const char *>(chars.data()), size() * sizeof(chars[0]));
|
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
|
|
|
template <typename Type>
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
|
2018-04-23 16:40:25 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void insertDefault() override
|
|
|
|
{
|
|
|
|
chars.push_back(0);
|
2018-12-24 14:26:38 +00:00
|
|
|
offsets.push_back(offsets.back() + 1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2019-11-08 11:15:20 +00:00
|
|
|
virtual void insertManyDefaults(size_t length) override
|
|
|
|
{
|
|
|
|
chars.resize_fill(chars.size() + length);
|
|
|
|
for (size_t i = 0; i < length; ++i)
|
|
|
|
offsets.push_back(offsets.back() + 1);
|
|
|
|
}
|
|
|
|
|
2017-12-01 19:34:51 +00:00
|
|
|
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-08-21 02:28:04 +00:00
|
|
|
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
2019-03-04 00:03:20 +00:00
|
|
|
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
2020-06-02 00:23:41 +00:00
|
|
|
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
2020-06-17 11:43:55 +00:00
|
|
|
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
|
2020-06-03 22:50:58 +00:00
|
|
|
int direction, int nan_direction_hint) const override;
|
2020-06-01 12:10:32 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Variant of compareAt for string comparison with respect of collation.
|
2020-10-29 11:24:01 +00:00
|
|
|
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-10-22 20:23:44 +00:00
|
|
|
void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
2020-05-12 00:58:58 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// Sorting with respect of collation.
|
2020-10-29 11:24:01 +00:00
|
|
|
void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2020-10-29 11:24:01 +00:00
|
|
|
void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
|
2020-05-12 00:58:58 +00:00
|
|
|
|
2018-03-20 14:17:09 +00:00
|
|
|
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-12-14 03:56:56 +00:00
|
|
|
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
return scatterImpl<ColumnString>(num_columns, selector);
|
|
|
|
}
|
|
|
|
|
2017-07-06 13:54:55 +00:00
|
|
|
void gather(ColumnGathererStream & gatherer_stream) override;
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
void reserve(size_t n) override;
|
|
|
|
|
|
|
|
void getExtremes(Field & min, Field & max) const override;
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-09 10:14:45 +00:00
|
|
|
|
|
|
|
bool canBeInsideNullable() const override { return true; }
|
|
|
|
|
2019-03-14 23:10:51 +00:00
|
|
|
bool structureEquals(const IColumn & rhs) const override
|
|
|
|
{
|
|
|
|
return typeid(rhs) == typeid(ColumnString);
|
|
|
|
}
|
|
|
|
|
2017-12-09 10:14:45 +00:00
|
|
|
|
2018-11-25 00:08:50 +00:00
|
|
|
Chars & getChars() { return chars; }
|
|
|
|
const Chars & getChars() const { return chars; }
|
2013-05-05 15:25:25 +00:00
|
|
|
|
2017-12-15 21:32:25 +00:00
|
|
|
Offsets & getOffsets() { return offsets; }
|
|
|
|
const Offsets & getOffsets() const { return offsets; }
|
encrypt, aes_encrypt_mysql, decrypt, aes_decrypt_mysql functions
Functions to encrypt/decrypt any input data with OpenSSL's ciphers
with custom key, iv, and add (-gcm mode only).
_mysql versions are 100% compatitable with corresponding MySQL functions
Supported modes depend on OpenSSL version, but generally are:
aes-{128,192,56}-{ecb,cbc,cfb1,cfb8,cfb128,ofb,gcm}
Please note that in a -gcm mode a 16-byte tag is appended to the ciphertext
on encryption and is expected to be found at the end of ciphertext on decryption.
Added tests that verify compatibility with MySQL functions,
and test vectors for GCM mode from OpenSSL.
Added masking rules for aes_X funtions
Rules are installed by default to config.d/query_masking_rules.xml
2020-06-16 09:22:55 +00:00
|
|
|
|
|
|
|
// Throws an exception if offsets/chars are messed up
|
|
|
|
void validate() const;
|
2020-10-29 11:24:01 +00:00
|
|
|
|
|
|
|
bool isCollationSupported() const override { return true; }
|
2010-05-13 16:13:38 +00:00
|
|
|
};
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
}
|