added ColumnUnique [#CLICKHOUSE-3621]

This commit is contained in:
Nikolai Kochetov 2018-03-14 13:47:13 +03:00
parent b7475b1d2c
commit b7ee1be6df
4 changed files with 116 additions and 0 deletions

View File

@ -0,0 +1,50 @@
#include <Columns/IColumnUnique.h>
#include <Common/HashTable/HashMap.h>
namespace DB
{
template <typename ColumnType, typename IndexType, bool is_nullable, bool may_has_empty_data>
class ColumnUnique : public IColumnUnique
{
public:
ColumnPtr getColumn() const overrdie;
size_t insert(const Field & x) overrdie;
ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
size_t insertData(const char * pos, size_t length) overrdie;
private:
struct StringRefWrapper
{
const ColumnType * column = nullptr;
size_t offset = 0;
size_t size = 0;
StringRefWrapper(const ColumnType * column, size_t row) : column(column)
{
auto ref = column->getDataAt(row);
offset = ref.data - column->getDataAt(0).data;
size = res.size;
}
operator StringRef() const { return StringRef(column->getDataAt(0).data + offset, size); }
bool operator== (const StringRefWrapper & other)
{
return (column == other.column && offset == other.offset && size == other.size)
|| StringRef(*this) == other;
}
};
using IndexType = HashMap<StringRefWrapper, IndexType, StringRefHash>;
MutableColumnPtr column;
/// Lazy initialized.
std::unique_ptr<IndexType> index;
};
}

View File

@ -0,0 +1,32 @@
#include <Columns/IColumn.h>
namespace DB
{
class IColumnUnique
{
public:
/// Column always contains Null if it's Nullable and empty string if it's String or Nullable(String).
/// So, size may be greater than the number of inserted unique values.
virtual ColumnPtr getColumn() const = 0;
virtual size_t size() const { return getColumn()->size(); }
/// Appends new value at the end of column (column's size is increased by 1).
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
virtual size_t insert(const Field & x) = 0;
/// Appends range of elements from other column.
/// Could be used to concatenate columns.
virtual ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
/// Is used to optimize some computations (in aggregation, for example).
/// Parameter length could be ignored if column values have fixed size.
virtual size_t insertData(const char * pos, size_t length) = 0;
virtual size_t getInsertionPoint(const char * pos, size_t length) const = 0;
virtual bool has(const char * pos, size_t length) const { return getInsertionPoint(pos, length) != size(); }
};
}

View File

@ -0,0 +1,34 @@
#include <Columns/IColumn.h>
#include <memory>
#include <Common/HashTable/HashMap.h>
namespace DB
{
template <typename IndexType>
class CountingRecursiveDictionary
{
public:
using DictionaryType = HashMap<StringRef, IndexType, StringRefHash>;
void insertData(const char * pos, size_t length) { column->insertData(pos, length); }
StringRef getDataAt(size_t n) const
{
if (n < prev_dictionary_size)
return prev_dictionary->getDataAt(n);
else
return column->getDataAt(n - prev_dictionary_size);
}
private:
ColumnPtr column;
DictionaryType dictionary;
std::shared_ptr<CountingRecursiveDictionary> prev_dictionary;
size_t prev_dictionary_size = 0;
};
}