mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-25 11:10:49 +00:00
initial more or less working version
This commit is contained in:
parent
9e776e6907
commit
bc8e8c25e9
@ -90,14 +90,13 @@ public:
|
||||
void protect() override { column_holder->protect(); }
|
||||
size_t allocatedBytes() const override
|
||||
{
|
||||
return column_holder->allocatedBytes()
|
||||
+ index.allocatedBytes()
|
||||
return column_holder->allocatedBytes() + reverse_index.allocatedBytes()
|
||||
+ (nested_null_mask ? nested_null_mask->allocatedBytes() : 0);
|
||||
}
|
||||
void forEachSubcolumn(IColumn::ColumnCallback callback) override
|
||||
{
|
||||
callback(column_holder);
|
||||
index.setColumn(getRawColumnPtr());
|
||||
reverse_index.setColumn(getRawColumnPtr());
|
||||
if (is_nullable)
|
||||
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
|
||||
}
|
||||
@ -109,16 +108,21 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
const UInt64 * tryGetSavedHash() const override { return index.tryGetSavedHash(); }
|
||||
const UInt64 * tryGetSavedHash() const override { return reverse_index.tryGetSavedHash(); }
|
||||
|
||||
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
|
||||
|
||||
private:
|
||||
inline UInt64 getIndexByValue(const StringRef& value) const override
|
||||
{
|
||||
return reverse_index.getInsertionPointConst(value);
|
||||
}
|
||||
|
||||
private:
|
||||
IColumn::WrappedPtr column_holder;
|
||||
bool is_nullable;
|
||||
size_t size_of_value_if_fixed = 0;
|
||||
ReverseIndex<UInt64, ColumnType> index;
|
||||
|
||||
ReverseIndex<UInt64, ColumnType> reverse_index;
|
||||
|
||||
/// For DataTypeNullable, stores null map.
|
||||
IColumn::WrappedPtr nested_null_mask;
|
||||
@ -170,20 +174,19 @@ ColumnUnique<ColumnType>::ColumnUnique(const ColumnUnique & other)
|
||||
: column_holder(other.column_holder)
|
||||
, is_nullable(other.is_nullable)
|
||||
, size_of_value_if_fixed(other.size_of_value_if_fixed)
|
||||
, index(numSpecialValues(is_nullable), 0)
|
||||
, reverse_index(numSpecialValues(is_nullable), 0)
|
||||
{
|
||||
index.setColumn(getRawColumnPtr());
|
||||
reverse_index.setColumn(getRawColumnPtr());
|
||||
createNullMask();
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
ColumnUnique<ColumnType>::ColumnUnique(const IDataType & type)
|
||||
: is_nullable(type.isNullable())
|
||||
, index(numSpecialValues(is_nullable), 0)
|
||||
: is_nullable(type.isNullable()), reverse_index(numSpecialValues(is_nullable), 0)
|
||||
{
|
||||
const auto & holder_type = is_nullable ? *static_cast<const DataTypeNullable &>(type).getNestedType() : type;
|
||||
column_holder = holder_type.createColumn()->cloneResized(numSpecialValues());
|
||||
index.setColumn(getRawColumnPtr());
|
||||
reverse_index.setColumn(getRawColumnPtr());
|
||||
createNullMask();
|
||||
|
||||
if (column_holder->valuesHaveFixedSize())
|
||||
@ -192,16 +195,14 @@ ColumnUnique<ColumnType>::ColumnUnique(const IDataType & type)
|
||||
|
||||
template <typename ColumnType>
|
||||
ColumnUnique<ColumnType>::ColumnUnique(MutableColumnPtr && holder, bool is_nullable_)
|
||||
: column_holder(std::move(holder))
|
||||
, is_nullable(is_nullable_)
|
||||
, index(numSpecialValues(is_nullable_), 0)
|
||||
: column_holder(std::move(holder)), is_nullable(is_nullable_), reverse_index(numSpecialValues(is_nullable_), 0)
|
||||
{
|
||||
if (column_holder->size() < numSpecialValues())
|
||||
throw Exception("Too small holder column for ColumnUnique.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
if (isColumnNullable(*column_holder))
|
||||
throw Exception("Holder column for ColumnUnique can't be nullable.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
index.setColumn(getRawColumnPtr());
|
||||
reverse_index.setColumn(getRawColumnPtr());
|
||||
createNullMask();
|
||||
|
||||
if (column_holder->valuesHaveFixedSize())
|
||||
@ -288,12 +289,10 @@ size_t ColumnUnique<ColumnType>::uniqueInsertFrom(const IColumn & src, size_t n)
|
||||
template <typename ColumnType>
|
||||
size_t ColumnUnique<ColumnType>::uniqueInsertData(const char * pos, size_t length)
|
||||
{
|
||||
auto column = getRawColumnPtr();
|
||||
if (auto index = getNestedTypeDefaultValueIndex(); getRawColumnPtr()->getDataAt(index) == StringRef(pos, length))
|
||||
return index;
|
||||
|
||||
if (column->getDataAt(getNestedTypeDefaultValueIndex()) == StringRef(pos, length))
|
||||
return getNestedTypeDefaultValueIndex();
|
||||
|
||||
auto insertion_point = index.insert(StringRef(pos, length));
|
||||
auto insertion_point = reverse_index.insert({pos, length});
|
||||
|
||||
updateNullMask();
|
||||
|
||||
@ -320,6 +319,7 @@ StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & ar
|
||||
return StringRef(nested_ref.data - s, nested_ref.size + s);
|
||||
}
|
||||
|
||||
|
||||
return column_holder->serializeValueIntoArena(n, arena, begin);
|
||||
}
|
||||
|
||||
@ -513,14 +513,14 @@ MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeImpl(
|
||||
|
||||
if (secondary_index && next_position >= max_dictionary_size)
|
||||
{
|
||||
auto insertion_point = index.getInsertionPoint(ref);
|
||||
if (insertion_point == index.lastInsertionPoint())
|
||||
auto insertion_point = reverse_index.getInsertionPoint(ref);
|
||||
if (insertion_point == reverse_index.lastInsertionPoint())
|
||||
res = insert_key(ref, *secondary_index);
|
||||
else
|
||||
positions[num_added_rows] = insertion_point;
|
||||
}
|
||||
else
|
||||
res = insert_key(ref, index);
|
||||
res = insert_key(ref, reverse_index);
|
||||
|
||||
if (res)
|
||||
return res;
|
||||
|
@ -9,6 +9,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/// Sort of a dictionary
|
||||
class IColumnUnique : public IColumn
|
||||
{
|
||||
public:
|
||||
@ -68,6 +69,20 @@ public:
|
||||
const char * getFamilyName() const override { return "ColumnUnique"; }
|
||||
TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
|
||||
|
||||
/**
|
||||
* Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
|
||||
* index in the DB::ColumnUnique::reverse_index hastable.
|
||||
*
|
||||
* @see DB::ReverseIndex
|
||||
* @see DB::ColumnUnique
|
||||
*
|
||||
* The most common example uses https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/ columns.
|
||||
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contigous memory
|
||||
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
|
||||
* index in the dictionary, which can be used to operate with the indices column.
|
||||
*/
|
||||
virtual inline UInt64 getIndexByValue(const StringRef& value) const = 0;
|
||||
|
||||
void insert(const Field &) override
|
||||
{
|
||||
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
@ -325,9 +325,16 @@ public:
|
||||
static constexpr bool use_saved_hash = !is_numeric_column;
|
||||
|
||||
UInt64 insert(const StringRef & data);
|
||||
|
||||
/// If index is not built, builds it.
|
||||
UInt64 getInsertionPoint(const StringRef & data);
|
||||
|
||||
/// If index is not found, throws a ErrorCodes::LOGICAL_ERROR
|
||||
UInt64 getInsertionPointConst(const StringRef & data) const;
|
||||
|
||||
UInt64 lastInsertionPoint() const { return size() + base_index; }
|
||||
|
||||
|
||||
ColumnType * getColumn() const { return column; }
|
||||
size_t size() const;
|
||||
|
||||
@ -513,4 +520,19 @@ UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef &
|
||||
return iterator == index->end() ? size() + base_index : iterator->getValue();
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPointConst(const StringRef & data) const
|
||||
{
|
||||
if (!index)
|
||||
throw Exception("No built index in ReverseIndex", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
using IteratorType = typename IndexMapType::iterator;
|
||||
IteratorType iterator;
|
||||
|
||||
auto hash = getHash(data);
|
||||
iterator = index->reverseIndexFind(data, hash);
|
||||
|
||||
return iterator == index->end() ? size() + base_index : iterator->getValue();
|
||||
}
|
||||
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user