mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
added ColumnUnique [#CLICKHOUSE-3621]
This commit is contained in:
parent
b7475b1d2c
commit
b7ee1be6df
50
dbms/src/Columns/ColumnUnique.h
Normal file
50
dbms/src/Columns/ColumnUnique.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#include <Columns/IColumnUnique.h>
|
||||||
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename ColumnType, typename IndexType, bool is_nullable, bool may_has_empty_data>
|
||||||
|
class ColumnUnique : public IColumnUnique
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
ColumnPtr getColumn() const overrdie;
|
||||||
|
size_t insert(const Field & x) overrdie;
|
||||||
|
ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
|
||||||
|
size_t insertData(const char * pos, size_t length) overrdie;
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
struct StringRefWrapper
|
||||||
|
{
|
||||||
|
const ColumnType * column = nullptr;
|
||||||
|
size_t offset = 0;
|
||||||
|
size_t size = 0;
|
||||||
|
|
||||||
|
StringRefWrapper(const ColumnType * column, size_t row) : column(column)
|
||||||
|
{
|
||||||
|
auto ref = column->getDataAt(row);
|
||||||
|
offset = ref.data - column->getDataAt(0).data;
|
||||||
|
size = res.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
operator StringRef() const { return StringRef(column->getDataAt(0).data + offset, size); }
|
||||||
|
|
||||||
|
bool operator== (const StringRefWrapper & other)
|
||||||
|
{
|
||||||
|
return (column == other.column && offset == other.offset && size == other.size)
|
||||||
|
|| StringRef(*this) == other;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
using IndexType = HashMap<StringRefWrapper, IndexType, StringRefHash>;
|
||||||
|
|
||||||
|
|
||||||
|
MutableColumnPtr column;
|
||||||
|
/// Lazy initialized.
|
||||||
|
std::unique_ptr<IndexType> index;
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
32
dbms/src/Columns/IColumnUnique.h
Normal file
32
dbms/src/Columns/IColumnUnique.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#include <Columns/IColumn.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class IColumnUnique
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// Column always contains Null if it's Nullable and empty string if it's String or Nullable(String).
|
||||||
|
/// So, size may be greater than the number of inserted unique values.
|
||||||
|
virtual ColumnPtr getColumn() const = 0;
|
||||||
|
virtual size_t size() const { return getColumn()->size(); }
|
||||||
|
|
||||||
|
/// Appends new value at the end of column (column's size is increased by 1).
|
||||||
|
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
|
||||||
|
virtual size_t insert(const Field & x) = 0;
|
||||||
|
|
||||||
|
/// Appends range of elements from other column.
|
||||||
|
/// Could be used to concatenate columns.
|
||||||
|
virtual ColumnPtr insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
|
||||||
|
|
||||||
|
/// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
|
||||||
|
/// Is used to optimize some computations (in aggregation, for example).
|
||||||
|
/// Parameter length could be ignored if column values have fixed size.
|
||||||
|
virtual size_t insertData(const char * pos, size_t length) = 0;
|
||||||
|
|
||||||
|
virtual size_t getInsertionPoint(const char * pos, size_t length) const = 0;
|
||||||
|
|
||||||
|
virtual bool has(const char * pos, size_t length) const { return getInsertionPoint(pos, length) != size(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
0
dbms/src/DataTypes/DataTypeWithDictionary.cpp
Normal file
0
dbms/src/DataTypes/DataTypeWithDictionary.cpp
Normal file
34
dbms/src/DataTypes/DataTypeWithDictionary.h
Normal file
34
dbms/src/DataTypes/DataTypeWithDictionary.h
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
#include <Columns/IColumn.h>
|
||||||
|
#include <memory>
|
||||||
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
template <typename IndexType>
|
||||||
|
class CountingRecursiveDictionary
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using DictionaryType = HashMap<StringRef, IndexType, StringRefHash>;
|
||||||
|
|
||||||
|
void insertData(const char * pos, size_t length) { column->insertData(pos, length); }
|
||||||
|
|
||||||
|
StringRef getDataAt(size_t n) const
|
||||||
|
{
|
||||||
|
if (n < prev_dictionary_size)
|
||||||
|
return prev_dictionary->getDataAt(n);
|
||||||
|
else
|
||||||
|
return column->getDataAt(n - prev_dictionary_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
ColumnPtr column;
|
||||||
|
DictionaryType dictionary;
|
||||||
|
|
||||||
|
std::shared_ptr<CountingRecursiveDictionary> prev_dictionary;
|
||||||
|
size_t prev_dictionary_size = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user