From b7ee1be6df1db69501198740c8731b3370b72511 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 14 Mar 2018 13:47:13 +0300 Subject: [PATCH] added ColumnUnique [#CLICKHOUSE-3621] --- dbms/src/Columns/ColumnUnique.h | 50 +++++++++++++++++++ dbms/src/Columns/IColumnUnique.h | 32 ++++++++++++ dbms/src/DataTypes/DataTypeWithDictionary.cpp | 0 dbms/src/DataTypes/DataTypeWithDictionary.h | 34 +++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 dbms/src/Columns/ColumnUnique.h create mode 100644 dbms/src/Columns/IColumnUnique.h create mode 100644 dbms/src/DataTypes/DataTypeWithDictionary.cpp create mode 100644 dbms/src/DataTypes/DataTypeWithDictionary.h diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h new file mode 100644 index 00000000000..75c82e6ab06 --- /dev/null +++ b/dbms/src/Columns/ColumnUnique.h @@ -0,0 +1,50 @@ +#include +#include + +namespace DB +{ + +template +class ColumnUnique : public IColumnUnique +{ +public: + + ColumnPtr getColumn() const overrdie; + size_t insert(const Field & x) overrdie; + ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) override; + size_t insertData(const char * pos, size_t length) overrdie; + +private: + + struct StringRefWrapper + { + const ColumnType * column = nullptr; + size_t offset = 0; + size_t size = 0; + + StringRefWrapper(const ColumnType * column, size_t row) : column(column) + { + auto ref = column->getDataAt(row); + offset = ref.data - column->getDataAt(0).data; + size = res.size; + } + + operator StringRef() const { return StringRef(column->getDataAt(0).data + offset, size); } + + bool operator== (const StringRefWrapper & other) + { + return (column == other.column && offset == other.offset && size == other.size) + || StringRef(*this) == other; + } + }; + using IndexType = HashMap; + + + MutableColumnPtr column; + /// Lazy initialized. + std::unique_ptr index; + + +}; + +} diff --git a/dbms/src/Columns/IColumnUnique.h b/dbms/src/Columns/IColumnUnique.h new file mode 100644 index 00000000000..d4af8d0f6c4 --- /dev/null +++ b/dbms/src/Columns/IColumnUnique.h @@ -0,0 +1,32 @@ +#include + +namespace DB +{ + +class IColumnUnique +{ +public: + /// Column always contains Null if it's Nullable and empty string if it's String or Nullable(String). + /// So, size may be greater than the number of inserted unique values. + virtual ColumnPtr getColumn() const = 0; + virtual size_t size() const { return getColumn()->size(); } + + /// Appends new value at the end of column (column's size is increased by 1). + /// Is used to transform raw strings to Blocks (for example, inside input format parsers) + virtual size_t insert(const Field & x) = 0; + + /// Appends range of elements from other column. + /// Could be used to concatenate columns. + virtual ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0; + + /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented). + /// Is used to optimize some computations (in aggregation, for example). + /// Parameter length could be ignored if column values have fixed size. + virtual size_t insertData(const char * pos, size_t length) = 0; + + virtual size_t getInsertionPoint(const char * pos, size_t length) const = 0; + + virtual bool has(const char * pos, size_t length) const { return getInsertionPoint(pos, length) != size(); } +}; + +} diff --git a/dbms/src/DataTypes/DataTypeWithDictionary.cpp b/dbms/src/DataTypes/DataTypeWithDictionary.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/src/DataTypes/DataTypeWithDictionary.h b/dbms/src/DataTypes/DataTypeWithDictionary.h new file mode 100644 index 00000000000..631e3e42970 --- /dev/null +++ b/dbms/src/DataTypes/DataTypeWithDictionary.h @@ -0,0 +1,34 @@ +#include +#include +#include + +namespace DB +{ + + + +template +class CountingRecursiveDictionary +{ +public: + using DictionaryType = HashMap; + + void insertData(const char * pos, size_t length) { column->insertData(pos, length); } + + StringRef getDataAt(size_t n) const + { + if (n < prev_dictionary_size) + return prev_dictionary->getDataAt(n); + else + return column->getDataAt(n - prev_dictionary_size); + } + +private: + ColumnPtr column; + DictionaryType dictionary; + + std::shared_ptr prev_dictionary; + size_t prev_dictionary_size = 0; +}; + +}