2018-03-29 14:04:24 +00:00
|
|
|
#pragma once
|
2020-07-17 12:44:26 +00:00
|
|
|
#include <optional>
|
2018-03-14 10:47:13 +00:00
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
}
|
2018-03-14 10:47:13 +00:00
|
|
|
|
2020-07-16 15:08:51 +00:00
|
|
|
/// Sort of a dictionary
|
2018-03-19 11:45:17 +00:00
|
|
|
class IColumnUnique : public IColumn
|
2018-03-14 10:47:13 +00:00
|
|
|
{
|
|
|
|
public:
|
2018-06-07 18:14:37 +00:00
|
|
|
using ColumnUniquePtr = IColumn::template immutable_ptr<IColumnUnique>;
|
|
|
|
using MutableColumnUniquePtr = IColumn::template mutable_ptr<IColumnUnique>;
|
|
|
|
|
2018-03-14 10:47:13 +00:00
|
|
|
/// Column always contains Null if it's Nullable and empty string if it's String or Nullable(String).
|
|
|
|
/// So, size may be greater than the number of inserted unique values.
|
2018-07-09 18:19:03 +00:00
|
|
|
virtual const ColumnPtr & getNestedColumn() const = 0;
|
2018-06-26 17:07:08 +00:00
|
|
|
/// The same as getNestedColumn, but removes null map if nested column is nullable.
|
|
|
|
virtual const ColumnPtr & getNestedNotNullableColumn() const = 0;
|
2018-06-07 18:14:37 +00:00
|
|
|
|
2018-12-03 13:00:01 +00:00
|
|
|
virtual bool nestedColumnIsNullable() const = 0;
|
2021-04-17 19:03:32 +00:00
|
|
|
virtual void nestedToNullable() = 0;
|
2021-05-15 14:09:51 +00:00
|
|
|
virtual void nestedRemoveNullable() = 0;
|
2018-12-03 13:00:01 +00:00
|
|
|
|
2018-09-20 14:19:12 +00:00
|
|
|
/// Returns array with StringRefHash calculated for each row of getNestedNotNullableColumn() column.
|
|
|
|
/// Returns nullptr if nested column doesn't contain strings. Otherwise calculates hash (if it wasn't).
|
|
|
|
/// Uses thread-safe cache.
|
2018-08-23 13:22:03 +00:00
|
|
|
virtual const UInt64 * tryGetSavedHash() const = 0;
|
|
|
|
|
2018-11-12 08:40:23 +00:00
|
|
|
size_t size() const override { return getNestedNotNullableColumn()->size(); }
|
2018-03-14 10:47:13 +00:00
|
|
|
|
|
|
|
/// Appends new value at the end of column (column's size is increased by 1).
|
|
|
|
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
|
2018-03-19 11:45:17 +00:00
|
|
|
virtual size_t uniqueInsert(const Field & x) = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
virtual size_t uniqueInsertFrom(const IColumn & src, size_t n) = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
/// Appends range of elements from other column.
|
|
|
|
/// Could be used to concatenate columns.
|
2018-06-07 18:14:37 +00:00
|
|
|
virtual MutableColumnPtr uniqueInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
|
|
|
|
|
|
|
|
struct IndexesWithOverflow
|
|
|
|
{
|
|
|
|
MutableColumnPtr indexes;
|
|
|
|
MutableColumnPtr overflowed_keys;
|
|
|
|
};
|
|
|
|
/// Like uniqueInsertRangeFrom, but doesn't insert keys if inner dictionary has more than max_dictionary_size keys.
|
|
|
|
/// Keys that won't be inserted into dictionary will be into overflowed_keys, indexes will be calculated for
|
|
|
|
/// concatenation of nested column (which can be got from getNestedColumn() function) and overflowed_keys.
|
|
|
|
virtual IndexesWithOverflow uniqueInsertRangeWithOverflow(const IColumn & src, size_t start,
|
|
|
|
size_t length, size_t max_dictionary_size) = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
|
|
|
|
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
|
|
|
|
/// Is used to optimize some computations (in aggregation, for example).
|
|
|
|
/// Parameter length could be ignored if column values have fixed size.
|
2018-03-19 11:45:17 +00:00
|
|
|
virtual size_t uniqueInsertData(const char * pos, size_t length) = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
|
2018-09-19 11:59:40 +00:00
|
|
|
virtual size_t getDefaultValueIndex() const = 0; /// Nullable ? getNullValueIndex : getNestedTypeDefaultValueIndex
|
|
|
|
virtual size_t getNullValueIndex() const = 0; /// Throws if not nullable.
|
|
|
|
virtual size_t getNestedTypeDefaultValueIndex() const = 0; /// removeNullable()->getDefault() value index
|
2018-03-19 11:45:17 +00:00
|
|
|
virtual bool canContainNulls() const = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
virtual size_t uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos) = 0;
|
|
|
|
|
2022-09-12 14:29:23 +00:00
|
|
|
/// Returns dictionary hash which is SipHash is applied to each row of nested column.
|
2018-09-07 10:08:09 +00:00
|
|
|
virtual UInt128 getHash() const = 0;
|
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
const char * getFamilyName() const override { return "ColumnUnique"; }
|
2020-06-04 22:02:59 +00:00
|
|
|
TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void insert(const Field &) override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void insertRangeFrom(const IColumn &, size_t, size_t) override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method insertRangeFrom is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void insertData(const char *, size_t) override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method insertData is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
|
|
|
void insertDefault() override
|
|
|
|
{
|
|
|
|
throw Exception("Method insertDefault is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void popBack(size_t) override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method popBack is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void gather(ColumnGathererStream &) override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method gather is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2018-03-29 14:04:24 +00:00
|
|
|
|
|
|
|
const char * deserializeAndInsertFromArena(const char *) override
|
|
|
|
{
|
|
|
|
throw Exception("Method deserializeAndInsertFromArena is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2018-04-23 16:40:25 +00:00
|
|
|
|
2019-02-18 17:31:18 +00:00
|
|
|
ColumnPtr index(const IColumn &, size_t) const override
|
2018-04-23 16:40:25 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method index is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2018-06-06 11:20:27 +00:00
|
|
|
|
|
|
|
ColumnPtr cut(size_t, size_t) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method cut is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2021-08-10 11:31:15 +00:00
|
|
|
ColumnPtr filter(const IColumn::Filter &, ssize_t) const override
|
2018-06-06 11:20:27 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method filter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2021-04-27 12:49:58 +00:00
|
|
|
void expand(const IColumn::Filter &, bool) override
|
|
|
|
{
|
|
|
|
throw Exception("Method expand is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2019-02-18 17:31:18 +00:00
|
|
|
ColumnPtr permute(const IColumn::Permutation &, size_t) const override
|
2018-06-06 11:20:27 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method permute is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr replicate(const IColumn::Offsets &) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method replicate is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2022-02-23 17:34:19 +00:00
|
|
|
void getPermutation(IColumn::PermutationSortDirection, IColumn::PermutationSortStability,
|
|
|
|
size_t, int, IColumn::Permutation &) const override
|
2018-06-06 11:20:27 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method getPermutation is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2022-03-11 21:16:25 +00:00
|
|
|
void updatePermutation(PermutationSortDirection, PermutationSortStability,
|
|
|
|
size_t, int, Permutation &, EqualRanges &) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method getPermutation is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
|
|
|
|
2018-06-06 11:20:27 +00:00
|
|
|
std::vector<MutableColumnPtr> scatter(IColumn::ColumnIndex, const IColumn::Selector &) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method scatter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2020-03-18 16:28:50 +00:00
|
|
|
|
|
|
|
void updateWeakHash32(WeakHash32 &) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method updateWeakHash32 is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2020-06-02 07:53:55 +00:00
|
|
|
|
2020-05-20 22:16:08 +00:00
|
|
|
void updateHashFast(SipHash &) const override
|
|
|
|
{
|
|
|
|
throw Exception("Method updateHashFast is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2020-06-26 11:27:19 +00:00
|
|
|
|
2020-06-17 11:43:55 +00:00
|
|
|
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
|
2020-06-02 07:53:55 +00:00
|
|
|
{
|
|
|
|
throw Exception("Method compareColumn is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2021-02-26 04:50:04 +00:00
|
|
|
|
|
|
|
bool hasEqualValues() const override
|
|
|
|
{
|
|
|
|
throw Exception("Method hasEqualValues is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2022-09-12 14:29:23 +00:00
|
|
|
|
|
|
|
/** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
|
|
|
|
* index in the DB::ColumnUnique::reverse_index hashtable.
|
|
|
|
*
|
|
|
|
* The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
|
|
|
|
* - On the function call it's present. Therefore we obtain the index in O(1).
|
|
|
|
* - The reverse index is absent. We search for the index linearly.
|
|
|
|
*
|
|
|
|
* @see DB::ReverseIndex
|
|
|
|
* @see DB::ColumnUnique
|
|
|
|
*
|
|
|
|
* The most common example uses https://clickhouse.com/docs/en/sql-reference/data-types/lowcardinality/ columns.
|
|
|
|
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contiguous memory
|
|
|
|
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
|
|
|
|
* index in the dictionary, which can be used to operate with the indices column.
|
|
|
|
*/
|
|
|
|
virtual std::optional<UInt64> getOrFindValueIndex(StringRef value) const = 0;
|
2018-03-14 10:47:13 +00:00
|
|
|
};
|
|
|
|
|
2018-06-07 18:14:37 +00:00
|
|
|
using ColumnUniquePtr = IColumnUnique::ColumnUniquePtr;
|
|
|
|
using MutableColumnUniquePtr = IColumnUnique::MutableColumnUniquePtr;
|
|
|
|
|
2018-03-14 10:47:13 +00:00
|
|
|
}
|