2018-03-29 14:04:24 +00:00
|
|
|
#pragma once
|
2018-03-19 11:45:17 +00:00
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <Columns/IColumnUnique.h>
|
2018-04-17 17:47:27 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
#include <AggregateFunctions/AggregateFunctionCount.h>
|
|
|
|
#include "ColumnsNumber.h"
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
}
|
|
|
|
|
2018-09-27 15:55:22 +00:00
|
|
|
class ColumnLowCardinality final : public COWPtrHelper<IColumn, ColumnLowCardinality>
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
friend class COWPtrHelper<IColumn, ColumnLowCardinality>;
|
2018-03-29 14:04:24 +00:00
|
|
|
|
2018-09-27 15:55:22 +00:00
|
|
|
ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
|
|
|
|
ColumnLowCardinality(const ColumnLowCardinality & other) = default;
|
2018-03-29 14:04:24 +00:00
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
public:
|
2018-04-17 10:45:05 +00:00
|
|
|
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
|
|
|
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
|
|
|
*/
|
2018-09-27 15:55:22 +00:00
|
|
|
using Base = COWPtrHelper<IColumn, ColumnLowCardinality>;
|
2018-09-21 14:15:21 +00:00
|
|
|
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
|
2018-04-17 10:45:05 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
|
2018-04-17 10:45:05 +00:00
|
|
|
}
|
|
|
|
|
2018-09-21 14:15:21 +00:00
|
|
|
static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false)
|
|
|
|
{
|
|
|
|
return Base::create(std::move(column_unique), std::move(indexes), is_shared);
|
|
|
|
}
|
2018-04-17 10:45:05 +00:00
|
|
|
|
2018-09-27 15:55:22 +00:00
|
|
|
std::string getName() const override { return "ColumnLowCardinality"; }
|
|
|
|
const char * getFamilyName() const override { return "ColumnLowCardinality"; }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); }
|
2018-09-27 15:55:22 +00:00
|
|
|
ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); }
|
2018-06-06 13:43:16 +00:00
|
|
|
|
2018-06-28 18:12:29 +00:00
|
|
|
MutableColumnPtr cloneResized(size_t size) const override;
|
2018-07-09 18:19:03 +00:00
|
|
|
size_t size() const override { return getIndexes().size(); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
Field operator[](size_t n) const override { return getDictionary()[getIndexes().getUInt(n)]; }
|
|
|
|
void get(size_t n, Field & res) const override { getDictionary().get(getIndexes().getUInt(n), res); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
StringRef getDataAt(size_t n) const override { return getDictionary().getDataAt(getIndexes().getUInt(n)); }
|
2018-03-19 11:45:17 +00:00
|
|
|
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
|
|
|
{
|
2018-07-09 18:19:03 +00:00
|
|
|
return getDictionary().getDataAtWithTerminatingZero(getIndexes().getUInt(n));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
UInt64 get64(size_t n) const override { return getDictionary().get64(getIndexes().getUInt(n)); }
|
|
|
|
UInt64 getUInt(size_t n) const override { return getDictionary().getUInt(getIndexes().getUInt(n)); }
|
|
|
|
Int64 getInt(size_t n) const override { return getDictionary().getInt(getIndexes().getUInt(n)); }
|
|
|
|
bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr cut(size_t start, size_t length) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void insert(const Field & x) override;
|
|
|
|
void insertDefault() override;
|
2018-05-04 10:48:09 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void insertFrom(const IColumn & src, size_t n) override;
|
|
|
|
void insertFromFullColumn(const IColumn & src, size_t n);
|
2018-05-21 16:21:15 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
|
|
|
void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length);
|
|
|
|
void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions);
|
2018-05-04 10:48:09 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void insertData(const char * pos, size_t length) override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void popBack(size_t n) override { idx.popBack(n); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
const char * deserializeAndInsertFromArena(const char * pos) override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
void updateHashWithValue(size_t n, SipHash & hash) const override
|
|
|
|
{
|
2018-07-09 18:19:03 +00:00
|
|
|
return getDictionary().updateHashWithValue(getIndexes().getUInt(n), hash);
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2019-02-18 17:28:53 +00:00
|
|
|
ColumnPtr index(const IColumn & indexes_, size_t limit) const override
|
2018-04-23 16:40:25 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
|
2018-04-23 16:40:25 +00:00
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2019-02-18 19:44:26 +00:00
|
|
|
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr replicate(const Offsets & offsets) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-09-27 15:55:22 +00:00
|
|
|
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-08-24 00:07:25 +00:00
|
|
|
void gather(ColumnGathererStream & gatherer_stream) override;
|
|
|
|
|
|
|
|
void getExtremes(Field & min, Field & max) const override
|
|
|
|
{
|
2018-11-01 17:03:05 +00:00
|
|
|
return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize
|
2018-07-09 18:19:03 +00:00
|
|
|
}
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void reserve(size_t n) override { idx.reserve(n); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
|
|
|
|
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
void forEachSubcolumn(ColumnCallback callback) override
|
|
|
|
{
|
2018-07-09 18:19:03 +00:00
|
|
|
callback(idx.getPositionsPtr());
|
|
|
|
|
|
|
|
/// Column doesn't own dictionary if it's shared.
|
|
|
|
if (!dictionary.isShared())
|
|
|
|
callback(dictionary.getColumnUniquePtr());
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); }
|
2019-01-09 14:47:51 +00:00
|
|
|
bool isFixedAndContiguous() const override { return false; }
|
2018-07-09 18:19:03 +00:00
|
|
|
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
|
|
|
|
bool isNumeric() const override { return getDictionary().isNumeric(); }
|
2018-09-27 15:55:22 +00:00
|
|
|
bool lowCardinality() const override { return true; }
|
2018-07-09 18:19:03 +00:00
|
|
|
|
|
|
|
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
|
2018-09-07 10:08:09 +00:00
|
|
|
const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }
|
2018-07-09 18:19:03 +00:00
|
|
|
/// IColumnUnique & getUnique() { return static_cast<IColumnUnique &>(*column_unique->assumeMutable()); }
|
|
|
|
/// ColumnPtr getUniquePtr() const { return column_unique; }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
/// IColumn & getIndexes() { return idx.getPositions()->assumeMutableRef(); }
|
|
|
|
const IColumn & getIndexes() const { return *idx.getPositions(); }
|
|
|
|
const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); }
|
2018-08-23 13:22:03 +00:00
|
|
|
size_t getSizeOfIndexType() const { return idx.getSizeOfIndexType(); }
|
2018-03-29 14:04:24 +00:00
|
|
|
|
2018-09-14 13:02:03 +00:00
|
|
|
ALWAYS_INLINE size_t getIndexAt(size_t row) const
|
|
|
|
{
|
|
|
|
const IColumn * indexes = &getIndexes();
|
|
|
|
|
|
|
|
switch (idx.getSizeOfIndexType())
|
|
|
|
{
|
|
|
|
case sizeof(UInt8): return static_cast<const ColumnUInt8 *>(indexes)->getElement(row);
|
|
|
|
case sizeof(UInt16): return static_cast<const ColumnUInt16 *>(indexes)->getElement(row);
|
|
|
|
case sizeof(UInt32): return static_cast<const ColumnUInt32 *>(indexes)->getElement(row);
|
|
|
|
case sizeof(UInt64): return static_cast<const ColumnUInt64 *>(indexes)->getElement(row);
|
|
|
|
default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); }
|
2018-03-29 14:04:24 +00:00
|
|
|
|
2018-09-27 15:55:22 +00:00
|
|
|
/// Set shared ColumnUnique for empty low cardinality column.
|
2018-07-09 18:19:03 +00:00
|
|
|
void setSharedDictionary(const ColumnPtr & column_unique);
|
2018-08-24 15:45:17 +00:00
|
|
|
bool isSharedDictionary() const { return dictionary.isShared(); }
|
2018-07-09 18:19:03 +00:00
|
|
|
|
|
|
|
/// Create column new dictionary with only keys that are mentioned in index.
|
|
|
|
MutablePtr compact();
|
|
|
|
|
|
|
|
/// Cut + compact.
|
|
|
|
MutablePtr cutAndCompact(size_t start, size_t length) const;
|
2018-05-03 12:47:14 +00:00
|
|
|
|
2018-07-19 19:12:48 +00:00
|
|
|
struct DictionaryEncodedColumn
|
|
|
|
{
|
|
|
|
ColumnPtr dictionary;
|
|
|
|
ColumnPtr indexes;
|
|
|
|
};
|
|
|
|
|
2019-02-10 16:22:38 +00:00
|
|
|
DictionaryEncodedColumn getMinimalDictionaryEncodedColumn(UInt64 offset, UInt64 limit) const;
|
2018-04-17 17:47:27 +00:00
|
|
|
|
2018-08-13 16:23:40 +00:00
|
|
|
ColumnPtr countKeys() const;
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
class Index
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Index();
|
|
|
|
Index(const Index & other) = default;
|
|
|
|
explicit Index(MutableColumnPtr && positions);
|
|
|
|
explicit Index(ColumnPtr positions);
|
|
|
|
|
|
|
|
const ColumnPtr & getPositions() const { return positions; }
|
|
|
|
ColumnPtr & getPositionsPtr() { return positions; }
|
2018-08-23 13:22:03 +00:00
|
|
|
size_t getPositionAt(size_t row) const;
|
2018-07-09 18:19:03 +00:00
|
|
|
void insertPosition(UInt64 position);
|
2019-02-10 16:22:38 +00:00
|
|
|
void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit);
|
2018-07-09 18:19:03 +00:00
|
|
|
|
|
|
|
void popBack(size_t n) { positions->assumeMutableRef().popBack(n); }
|
|
|
|
void reserve(size_t n) { positions->assumeMutableRef().reserve(n); }
|
|
|
|
|
|
|
|
UInt64 getMaxPositionForCurrentType() const;
|
|
|
|
|
|
|
|
static size_t getSizeOfIndexType(const IColumn & column, size_t hint);
|
2018-08-23 13:22:03 +00:00
|
|
|
size_t getSizeOfIndexType() const { return size_of_type; }
|
2018-07-09 18:19:03 +00:00
|
|
|
|
|
|
|
void check(size_t max_dictionary_size);
|
|
|
|
void checkSizeOfType();
|
|
|
|
|
|
|
|
ColumnPtr detachPositions() { return std::move(positions); }
|
|
|
|
void attachPositions(ColumnPtr positions_);
|
|
|
|
|
2018-08-13 16:23:40 +00:00
|
|
|
void countKeys(ColumnUInt64::Container & counts) const;
|
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
private:
|
|
|
|
ColumnPtr positions;
|
|
|
|
size_t size_of_type = 0;
|
|
|
|
|
|
|
|
void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); }
|
|
|
|
void expandType();
|
|
|
|
|
|
|
|
template <typename IndexType>
|
|
|
|
typename ColumnVector<IndexType>::Container & getPositionsData();
|
|
|
|
|
2018-08-13 16:23:40 +00:00
|
|
|
template <typename IndexType>
|
2018-08-21 14:53:51 +00:00
|
|
|
const typename ColumnVector<IndexType>::Container & getPositionsData() const;
|
2018-08-13 16:23:40 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
template <typename IndexType>
|
|
|
|
void convertPositions();
|
|
|
|
|
|
|
|
template <typename Callback>
|
|
|
|
static void callForType(Callback && callback, size_t size_of_type);
|
|
|
|
};
|
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
private:
|
2018-07-09 18:19:03 +00:00
|
|
|
class Dictionary
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
Dictionary(const Dictionary & other) = default;
|
2018-09-21 14:15:21 +00:00
|
|
|
explicit Dictionary(MutableColumnPtr && column_unique, bool is_shared);
|
|
|
|
explicit Dictionary(ColumnPtr column_unique, bool is_shared);
|
2018-07-09 18:19:03 +00:00
|
|
|
|
|
|
|
const ColumnPtr & getColumnUniquePtr() const { return column_unique; }
|
|
|
|
ColumnPtr & getColumnUniquePtr() { return column_unique; }
|
|
|
|
|
|
|
|
const IColumnUnique & getColumnUnique() const { return static_cast<const IColumnUnique &>(*column_unique); }
|
|
|
|
IColumnUnique & getColumnUnique() { return static_cast<IColumnUnique &>(column_unique->assumeMutableRef()); }
|
|
|
|
|
|
|
|
/// Dictionary may be shared for several mutable columns.
|
|
|
|
/// Immutable columns may have the same column unique, which isn't necessarily shared dictionary.
|
|
|
|
void setShared(const ColumnPtr & dictionary);
|
|
|
|
bool isShared() const { return shared; }
|
|
|
|
|
|
|
|
/// Create new dictionary with only keys that are mentioned in positions.
|
|
|
|
void compact(ColumnPtr & positions);
|
|
|
|
|
|
|
|
private:
|
|
|
|
ColumnPtr column_unique;
|
|
|
|
bool shared = false;
|
2018-03-19 11:45:17 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void checkColumn(const IColumn & column);
|
|
|
|
};
|
2018-06-28 18:12:29 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
Dictionary dictionary;
|
|
|
|
Index idx;
|
2018-06-28 18:12:29 +00:00
|
|
|
|
2018-07-09 18:19:03 +00:00
|
|
|
void compactInplace();
|
|
|
|
void compactIfSharedDictionary();
|
2018-03-19 11:45:17 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|