2018-03-29 14:04:24 +00:00
|
|
|
#pragma once
|
2018-03-19 11:45:17 +00:00
|
|
|
#include <Columns/IColumn.h>
|
|
|
|
#include <Columns/IColumnUnique.h>
|
2018-04-17 17:47:27 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
|
|
|
#include <AggregateFunctions/AggregateFunctionCount.h>
|
|
|
|
#include "ColumnsNumber.h"
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
}
|
|
|
|
|
|
|
|
class ColumnWithDictionary final : public COWPtrHelper<IColumn, ColumnWithDictionary>
|
|
|
|
{
|
2018-03-29 14:04:24 +00:00
|
|
|
friend class COWPtrHelper<IColumn, ColumnWithDictionary>;
|
|
|
|
|
|
|
|
ColumnWithDictionary(MutableColumnPtr && column_unique, MutableColumnPtr && indexes);
|
2018-03-19 11:45:17 +00:00
|
|
|
ColumnWithDictionary(const ColumnWithDictionary & other);
|
2018-03-29 14:04:24 +00:00
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
public:
|
2018-04-17 10:45:05 +00:00
|
|
|
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
|
|
|
|
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
|
|
|
|
*/
|
|
|
|
using Base = COWPtrHelper<IColumn, ColumnWithDictionary>;
|
|
|
|
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_)
|
|
|
|
{
|
|
|
|
return ColumnWithDictionary::create(column_unique_->assumeMutable(), indexes_->assumeMutable());
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename ... Args, typename = typename std::enable_if<IsMutableColumns<Args ...>::value>::type>
|
|
|
|
static MutablePtr create(Args &&... args) { return Base::create(std::forward<Args>(args)...); }
|
|
|
|
|
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
std::string getName() const override { return "ColumnWithDictionary"; }
|
|
|
|
const char * getFamilyName() const override { return "ColumnWithDictionary"; }
|
|
|
|
|
2018-04-17 17:47:27 +00:00
|
|
|
ColumnPtr convertToFullColumn() const
|
|
|
|
{
|
2018-04-23 16:40:25 +00:00
|
|
|
return getUnique()->getNestedColumn()->index(indexes, 0);
|
2018-04-17 17:47:27 +00:00
|
|
|
}
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
MutableColumnPtr cloneResized(size_t size) const override
|
|
|
|
{
|
2018-03-29 14:04:24 +00:00
|
|
|
auto unique_ptr = column_unique;
|
2018-04-17 11:12:11 +00:00
|
|
|
return ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), indexes->cloneResized(size));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
size_t size() const override { return indexes->size(); }
|
|
|
|
|
|
|
|
|
|
|
|
Field operator[](size_t n) const override { return (*column_unique)[indexes->getUInt(n)]; }
|
|
|
|
void get(size_t n, Field & res) const override { column_unique->get(indexes->getUInt(n), res); }
|
|
|
|
|
|
|
|
StringRef getDataAt(size_t n) const override { return column_unique->getDataAt(indexes->getUInt(n)); }
|
|
|
|
|
|
|
|
StringRef getDataAtWithTerminatingZero(size_t n) const override
|
|
|
|
{
|
|
|
|
return column_unique->getDataAtWithTerminatingZero(indexes->getUInt(n));
|
|
|
|
}
|
|
|
|
|
|
|
|
UInt64 get64(size_t n) const override { return column_unique->get64(indexes->getUInt(n)); }
|
|
|
|
|
|
|
|
UInt64 getUInt(size_t n) const override { return column_unique->getUInt(indexes->getUInt(n)); }
|
|
|
|
Int64 getInt(size_t n) const override { return column_unique->getInt(indexes->getUInt(n)); }
|
|
|
|
bool isNullAt(size_t n) const override { return column_unique->isNullAt(indexes->getUInt(n)); }
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr cut(size_t start, size_t length) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-04-17 10:45:05 +00:00
|
|
|
return ColumnWithDictionary::create(column_unique, indexes->cut(start, length));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void insert(const Field & x) override { getIndexes()->insert(Field(UInt64(getUnique()->uniqueInsert(x)))); }
|
2018-05-04 10:48:09 +00:00
|
|
|
|
|
|
|
void insertFromFullColumn(const IColumn & src, size_t n)
|
|
|
|
{
|
|
|
|
getIndexes()->insert(getUnique()->uniqueInsertFrom(src, n));
|
|
|
|
}
|
|
|
|
void insertFrom(const IColumn & src, size_t n) override
|
|
|
|
{
|
|
|
|
if (!typeid_cast<const ColumnWithDictionary *>(&src))
|
|
|
|
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
auto & src_with_dict = static_cast<const ColumnWithDictionary &>(src);
|
|
|
|
size_t idx = src_with_dict.getIndexes()->getUInt(n);
|
|
|
|
insertFromFullColumn(*src_with_dict.getUnique()->getNestedColumn(), idx);
|
|
|
|
}
|
|
|
|
|
|
|
|
void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
|
|
|
auto inserted_indexes = getUnique()->uniqueInsertRangeFrom(src, start, length);
|
2018-03-29 14:04:24 +00:00
|
|
|
getIndexes()->insertRangeFrom(*inserted_indexes, 0, length);
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
2018-05-04 10:48:09 +00:00
|
|
|
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override
|
|
|
|
{
|
|
|
|
if (!typeid_cast<const ColumnWithDictionary *>(&src))
|
|
|
|
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
|
|
|
auto & src_with_dict = static_cast<const ColumnWithDictionary &>(src);
|
|
|
|
auto & src_nested = src_with_dict.getUnique()->getNestedColumn();
|
|
|
|
auto inserted_idx = getUnique()->uniqueInsertRangeFrom(*src_nested, 0, src_nested->size());
|
|
|
|
auto idx = inserted_idx->index(src_with_dict.getIndexes()->cut(start, length), 0);
|
|
|
|
getIndexes()->insertRangeFrom(*idx, 0, length);
|
|
|
|
}
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
void insertData(const char * pos, size_t length) override
|
|
|
|
{
|
2018-03-29 14:04:24 +00:00
|
|
|
getIndexes()->insert(Field(UInt64(getUnique()->uniqueInsertData(pos, length))));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertDataWithTerminatingZero(const char * pos, size_t length) override
|
|
|
|
{
|
2018-03-29 14:04:24 +00:00
|
|
|
getIndexes()->insert(Field(UInt64(getUnique()->uniqueInsertDataWithTerminatingZero(pos, length))));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void insertDefault() override
|
|
|
|
{
|
2018-03-29 14:04:24 +00:00
|
|
|
getIndexes()->insert(getUnique()->getDefaultValueIndex());
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void popBack(size_t n) override { getIndexes()->popBack(n); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override
|
|
|
|
{
|
|
|
|
return getUnique()->serializeValueIntoArena(indexes->getUInt(n), arena, begin);
|
|
|
|
}
|
|
|
|
|
|
|
|
const char * deserializeAndInsertFromArena(const char * pos) override
|
|
|
|
{
|
|
|
|
const char * new_pos;
|
2018-03-29 14:04:24 +00:00
|
|
|
getIndexes()->insert(getUnique()->uniqueDeserializeAndInsertFromArena(pos, new_pos));
|
2018-03-19 11:45:17 +00:00
|
|
|
return new_pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
void updateHashWithValue(size_t n, SipHash & hash) const override
|
|
|
|
{
|
|
|
|
return getUnique()->updateHashWithValue(indexes->getUInt(n), hash);
|
|
|
|
}
|
|
|
|
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-04-17 10:45:05 +00:00
|
|
|
return ColumnWithDictionary::create(column_unique, indexes->filter(filt, result_size_hint));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr permute(const Permutation & perm, size_t limit) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-04-17 10:45:05 +00:00
|
|
|
return ColumnWithDictionary::create(column_unique, indexes->permute(perm, limit));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
2018-04-23 16:40:25 +00:00
|
|
|
ColumnPtr index(const ColumnPtr & indexes_, size_t limit) const override
|
|
|
|
{
|
|
|
|
return ColumnWithDictionary::create(column_unique, indexes->index(indexes_, limit));
|
|
|
|
}
|
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override
|
|
|
|
{
|
|
|
|
const auto & column_with_dictionary = static_cast<const ColumnWithDictionary &>(rhs);
|
|
|
|
size_t n_index = indexes->getUInt(n);
|
|
|
|
size_t m_index = column_with_dictionary.indexes->getUInt(m);
|
|
|
|
return getUnique()->compareAt(n_index, m_index, *column_with_dictionary.column_unique, nan_direction_hint);
|
|
|
|
}
|
|
|
|
|
|
|
|
void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override
|
|
|
|
{
|
|
|
|
size_t unique_limit = std::min(limit, getUnique()->size());
|
|
|
|
Permutation unique_perm;
|
|
|
|
getUnique()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm);
|
|
|
|
|
|
|
|
/// TODO: optimize with sse.
|
|
|
|
|
|
|
|
/// Get indexes per row in column_unique.
|
|
|
|
std::vector<std::vector<size_t>> indexes_per_row(getUnique()->size());
|
|
|
|
size_t indexes_size = indexes->size();
|
|
|
|
for (size_t row = 0; row < indexes_size; ++row)
|
|
|
|
indexes_per_row[indexes->getUInt(row)].push_back(row);
|
|
|
|
|
|
|
|
/// Replicate permutation.
|
|
|
|
size_t perm_size = std::min(indexes_size, limit);
|
|
|
|
res.resize(perm_size);
|
|
|
|
size_t perm_index = 0;
|
|
|
|
for (size_t row = 0; row < indexes_size && perm_index < perm_size; ++row)
|
|
|
|
{
|
|
|
|
const auto & row_indexes = indexes_per_row[unique_perm[row]];
|
|
|
|
for (auto row_index : row_indexes)
|
|
|
|
{
|
|
|
|
res[perm_index] = row_index;
|
|
|
|
++perm_index;
|
|
|
|
|
|
|
|
if (perm_index == perm_size)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-17 10:45:05 +00:00
|
|
|
ColumnPtr replicate(const Offsets & offsets) const override
|
2018-03-19 11:45:17 +00:00
|
|
|
{
|
2018-04-17 10:45:05 +00:00
|
|
|
return ColumnWithDictionary::create(column_unique, indexes->replicate(offsets));
|
2018-03-19 11:45:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override
|
|
|
|
{
|
|
|
|
auto columns = indexes->scatter(num_columns, selector);
|
|
|
|
for (auto & column : columns)
|
2018-03-29 14:04:24 +00:00
|
|
|
{
|
|
|
|
auto unique_ptr = column_unique;
|
2018-04-17 10:45:05 +00:00
|
|
|
column = ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), std::move(column));
|
2018-03-29 14:04:24 +00:00
|
|
|
}
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
return columns;
|
|
|
|
}
|
|
|
|
|
|
|
|
void gather(ColumnGathererStream & gatherer_stream) override ;
|
|
|
|
void getExtremes(Field & min, Field & max) const override { return column_unique->getExtremes(min, max); }
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
void reserve(size_t n) override { getIndexes()->reserve(n); }
|
2018-03-19 11:45:17 +00:00
|
|
|
|
|
|
|
size_t byteSize() const override { return indexes->byteSize() + column_unique->byteSize(); }
|
|
|
|
size_t allocatedBytes() const override { return indexes->allocatedBytes() + column_unique->allocatedBytes(); }
|
|
|
|
|
|
|
|
void forEachSubcolumn(ColumnCallback callback) override
|
|
|
|
{
|
|
|
|
callback(column_unique);
|
|
|
|
callback(indexes);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool valuesHaveFixedSize() const override { return column_unique->valuesHaveFixedSize(); }
|
|
|
|
bool isFixedAndContiguous() const override { return column_unique->isFixedAndContiguous(); }
|
|
|
|
size_t sizeOfValueIfFixed() const override { return column_unique->sizeOfValueIfFixed(); }
|
|
|
|
bool isNumeric() const override { return column_unique->isNumeric(); }
|
|
|
|
|
2018-03-29 14:04:24 +00:00
|
|
|
IColumnUnique * getUnique() { return static_cast<IColumnUnique *>(column_unique->assumeMutable().get()); }
|
|
|
|
const IColumnUnique * getUnique() const { return static_cast<const IColumnUnique *>(column_unique->assumeMutable().get()); }
|
|
|
|
const ColumnPtr & getUniquePtr() const { return column_unique; }
|
|
|
|
|
|
|
|
IColumn * getIndexes() { return indexes->assumeMutable().get(); }
|
|
|
|
const IColumn * getIndexes() const { return indexes.get(); }
|
|
|
|
const ColumnPtr & getIndexesPtr() const { return indexes; }
|
|
|
|
|
2018-05-03 12:51:26 +00:00
|
|
|
void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); }
|
2018-05-03 12:47:14 +00:00
|
|
|
|
2018-04-17 17:47:27 +00:00
|
|
|
bool withDictionary() const override { return true; }
|
|
|
|
|
2018-03-19 11:45:17 +00:00
|
|
|
private:
|
|
|
|
ColumnPtr column_unique;
|
|
|
|
ColumnPtr indexes;
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|