initial more or less working version

This commit is contained in:
myrrc 2020-07-16 18:08:51 +03:00
parent 9e776e6907
commit bc8e8c25e9
4 changed files with 444 additions and 288 deletions

View File

@ -90,14 +90,13 @@ public:
void protect() override { column_holder->protect(); }
size_t allocatedBytes() const override
{
return column_holder->allocatedBytes()
+ index.allocatedBytes()
return column_holder->allocatedBytes() + reverse_index.allocatedBytes()
+ (nested_null_mask ? nested_null_mask->allocatedBytes() : 0);
}
void forEachSubcolumn(IColumn::ColumnCallback callback) override
{
callback(column_holder);
index.setColumn(getRawColumnPtr());
reverse_index.setColumn(getRawColumnPtr());
if (is_nullable)
nested_column_nullable = ColumnNullable::create(column_holder, nested_null_mask);
}
@ -109,16 +108,21 @@ public:
return false;
}
const UInt64 * tryGetSavedHash() const override { return index.tryGetSavedHash(); }
const UInt64 * tryGetSavedHash() const override { return reverse_index.tryGetSavedHash(); }
UInt128 getHash() const override { return hash.getHash(*getRawColumnPtr()); }
private:
inline UInt64 getIndexByValue(const StringRef& value) const override
{
return reverse_index.getInsertionPointConst(value);
}
private:
IColumn::WrappedPtr column_holder;
bool is_nullable;
size_t size_of_value_if_fixed = 0;
ReverseIndex<UInt64, ColumnType> index;
ReverseIndex<UInt64, ColumnType> reverse_index;
/// For DataTypeNullable, stores null map.
IColumn::WrappedPtr nested_null_mask;
@ -170,20 +174,19 @@ ColumnUnique<ColumnType>::ColumnUnique(const ColumnUnique & other)
: column_holder(other.column_holder)
, is_nullable(other.is_nullable)
, size_of_value_if_fixed(other.size_of_value_if_fixed)
, index(numSpecialValues(is_nullable), 0)
, reverse_index(numSpecialValues(is_nullable), 0)
{
index.setColumn(getRawColumnPtr());
reverse_index.setColumn(getRawColumnPtr());
createNullMask();
}
template <typename ColumnType>
ColumnUnique<ColumnType>::ColumnUnique(const IDataType & type)
: is_nullable(type.isNullable())
, index(numSpecialValues(is_nullable), 0)
: is_nullable(type.isNullable()), reverse_index(numSpecialValues(is_nullable), 0)
{
const auto & holder_type = is_nullable ? *static_cast<const DataTypeNullable &>(type).getNestedType() : type;
column_holder = holder_type.createColumn()->cloneResized(numSpecialValues());
index.setColumn(getRawColumnPtr());
reverse_index.setColumn(getRawColumnPtr());
createNullMask();
if (column_holder->valuesHaveFixedSize())
@ -192,16 +195,14 @@ ColumnUnique<ColumnType>::ColumnUnique(const IDataType & type)
template <typename ColumnType>
ColumnUnique<ColumnType>::ColumnUnique(MutableColumnPtr && holder, bool is_nullable_)
: column_holder(std::move(holder))
, is_nullable(is_nullable_)
, index(numSpecialValues(is_nullable_), 0)
: column_holder(std::move(holder)), is_nullable(is_nullable_), reverse_index(numSpecialValues(is_nullable_), 0)
{
if (column_holder->size() < numSpecialValues())
throw Exception("Too small holder column for ColumnUnique.", ErrorCodes::ILLEGAL_COLUMN);
if (isColumnNullable(*column_holder))
throw Exception("Holder column for ColumnUnique can't be nullable.", ErrorCodes::ILLEGAL_COLUMN);
index.setColumn(getRawColumnPtr());
reverse_index.setColumn(getRawColumnPtr());
createNullMask();
if (column_holder->valuesHaveFixedSize())
@ -288,12 +289,10 @@ size_t ColumnUnique<ColumnType>::uniqueInsertFrom(const IColumn & src, size_t n)
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueInsertData(const char * pos, size_t length)
{
auto column = getRawColumnPtr();
if (auto index = getNestedTypeDefaultValueIndex(); getRawColumnPtr()->getDataAt(index) == StringRef(pos, length))
return index;
if (column->getDataAt(getNestedTypeDefaultValueIndex()) == StringRef(pos, length))
return getNestedTypeDefaultValueIndex();
auto insertion_point = index.insert(StringRef(pos, length));
auto insertion_point = reverse_index.insert({pos, length});
updateNullMask();
@ -320,6 +319,7 @@ StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & ar
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
return column_holder->serializeValueIntoArena(n, arena, begin);
}
@ -513,14 +513,14 @@ MutableColumnPtr ColumnUnique<ColumnType>::uniqueInsertRangeImpl(
if (secondary_index && next_position >= max_dictionary_size)
{
auto insertion_point = index.getInsertionPoint(ref);
if (insertion_point == index.lastInsertionPoint())
auto insertion_point = reverse_index.getInsertionPoint(ref);
if (insertion_point == reverse_index.lastInsertionPoint())
res = insert_key(ref, *secondary_index);
else
positions[num_added_rows] = insertion_point;
}
else
res = insert_key(ref, index);
res = insert_key(ref, reverse_index);
if (res)
return res;

View File

@ -9,6 +9,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
/// Sort of a dictionary
class IColumnUnique : public IColumn
{
public:
@ -68,6 +69,20 @@ public:
const char * getFamilyName() const override { return "ColumnUnique"; }
TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); }
/**
* Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
* index in the DB::ColumnUnique::reverse_index hastable.
*
* @see DB::ReverseIndex
* @see DB::ColumnUnique
*
* The most common example uses https://clickhouse.tech/docs/en/sql-reference/data-types/lowcardinality/ columns.
* Consider data type @e LC(String). The inner type here is @e String which is more or less a contigous memory
* region, so it can be easily represented as a @e StringRef. So we pass that ref to this function and get its
* index in the dictionary, which can be used to operate with the indices column.
*/
virtual inline UInt64 getIndexByValue(const StringRef& value) const = 0;
void insert(const Field &) override
{
throw Exception("Method insert is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -325,9 +325,16 @@ public:
static constexpr bool use_saved_hash = !is_numeric_column;
UInt64 insert(const StringRef & data);
/// If index is not built, builds it.
UInt64 getInsertionPoint(const StringRef & data);
/// If index is not found, throws a ErrorCodes::LOGICAL_ERROR
UInt64 getInsertionPointConst(const StringRef & data) const;
UInt64 lastInsertionPoint() const { return size() + base_index; }
ColumnType * getColumn() const { return column; }
size_t size() const;
@ -513,4 +520,19 @@ UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef &
return iterator == index->end() ? size() + base_index : iterator->getValue();
}
template <typename IndexType, typename ColumnType>
UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPointConst(const StringRef & data) const
{
if (!index)
throw Exception("No built index in ReverseIndex", ErrorCodes::LOGICAL_ERROR);
using IteratorType = typename IndexMapType::iterator;
IteratorType iterator;
auto hash = getHash(data);
iterator = index->reverseIndexFind(data, hash);
return iterator == index->end() ? size() + base_index : iterator->getValue();
}
}

File diff suppressed because it is too large Load Diff