Merge branch 'master' into joins

This commit is contained in:
chertus 2018-09-28 20:09:05 +03:00
commit 69b9c25801
29 changed files with 383 additions and 371 deletions

View File

@ -13,7 +13,7 @@
#include <Common/typeid_cast.h>
#include <Poco/String.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
@ -42,14 +42,14 @@ void AggregateFunctionFactory::registerFunction(const String & name, Creator cre
ErrorCodes::LOGICAL_ERROR);
}
static DataTypes convertTypesWithDictionaryToNested(const DataTypes & types)
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
{
DataTypes res_types;
res_types.reserve(types.size());
for (const auto & type : types)
{
if (auto * type_with_dict = typeid_cast<const DataTypeWithDictionary *>(type.get()))
res_types.push_back(type_with_dict->getDictionaryType());
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
res_types.push_back(low_cardinality_type->getDictionaryType());
else
res_types.push_back(type);
}
@ -63,7 +63,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
const Array & parameters,
int recursion_level) const
{
auto type_without_dictionary = convertTypesWithDictionaryToNested(argument_types);
auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types);
/// If one of types is Nullable, we apply aggregate function combinator "Null".
@ -74,7 +74,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
if (!combinator)
throw Exception("Logical error: cannot find aggregate function combinator to apply a function to Nullable arguments.", ErrorCodes::LOGICAL_ERROR);
DataTypes nested_types = combinator->transformArguments(type_without_dictionary);
DataTypes nested_types = combinator->transformArguments(type_without_low_cardinality);
AggregateFunctionPtr nested_function;
@ -87,7 +87,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
return combinator->transformAggregateFunction(nested_function, argument_types, parameters);
}
auto res = getImpl(name, type_without_dictionary, parameters, recursion_level);
auto res = getImpl(name, type_without_low_cardinality, parameters, recursion_level);
if (!res)
throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR);
return res;

View File

@ -32,7 +32,7 @@ ColumnPtr ColumnConst::convertToFullColumn() const
ColumnPtr ColumnConst::removeLowCardinality() const
{
return ColumnConst::create(data->convertToFullColumnIfWithDictionary(), s);
return ColumnConst::create(data->convertToFullColumnIfLowCardinality(), s);
}
ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/) const

View File

@ -1,4 +1,4 @@
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnsNumber.h>
#include <DataStreams/ColumnGathererStream.h>
#include <DataTypes/NumberTraits.h>
@ -109,34 +109,34 @@ namespace
}
ColumnWithDictionary::ColumnWithDictionary(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared)
ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, MutableColumnPtr && indexes_, bool is_shared)
: dictionary(std::move(column_unique_), is_shared), idx(std::move(indexes_))
{
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insert(const Field & x)
void ColumnLowCardinality::insert(const Field & x)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertDefault()
void ColumnLowCardinality::insertDefault()
{
idx.insertPosition(getDictionary().getDefaultValueIndex());
}
void ColumnWithDictionary::insertFrom(const IColumn & src, size_t n)
void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
{
auto * src_with_dict = typeid_cast<const ColumnWithDictionary *>(&src);
auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
if (!src_with_dict)
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
if (!low_cardinality_src)
throw Exception("Expected ColumnLowCardinality, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
size_t position = src_with_dict->getIndexes().getUInt(n);
size_t position = low_cardinality_src->getIndexes().getUInt(n);
if (&src_with_dict->getDictionary() == &getDictionary())
if (&low_cardinality_src->getDictionary() == &getDictionary())
{
/// Dictionary is shared with src column. Insert only index.
idx.insertPosition(position);
@ -144,31 +144,31 @@ void ColumnWithDictionary::insertFrom(const IColumn & src, size_t n)
else
{
compactIfSharedDictionary();
const auto & nested = *src_with_dict->getDictionary().getNestedColumn();
const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
}
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertFromFullColumn(const IColumn & src, size_t n)
void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertRangeFrom(const IColumn & src, size_t start, size_t length)
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
auto * src_with_dict = typeid_cast<const ColumnWithDictionary *>(&src);
auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
if (!src_with_dict)
throw Exception("Expected ColumnWithDictionary, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
if (!low_cardinality_src)
throw Exception("Expected ColumnLowCardinality, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN);
if (&src_with_dict->getDictionary() == &getDictionary())
if (&low_cardinality_src->getDictionary() == &getDictionary())
{
/// Dictionary is shared with src column. Insert only indexes.
idx.insertPositionsRange(src_with_dict->getIndexes(), start, length);
idx.insertPositionsRange(low_cardinality_src->getIndexes(), start, length);
}
else
{
@ -176,10 +176,10 @@ void ColumnWithDictionary::insertRangeFrom(const IColumn & src, size_t start, si
/// TODO: Support native insertion from other unique column. It will help to avoid null map creation.
auto sub_idx = (*src_with_dict->getIndexes().cut(start, length)).mutate();
auto sub_idx = (*low_cardinality_src->getIndexes().cut(start, length)).mutate();
auto idx_map = mapUniqueIndex(*sub_idx);
auto src_nested = src_with_dict->getDictionary().getNestedColumn();
auto src_nested = low_cardinality_src->getDictionary().getNestedColumn();
auto used_keys = src_nested->index(*idx_map, 0);
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
@ -188,7 +188,7 @@ void ColumnWithDictionary::insertRangeFrom(const IColumn & src, size_t start, si
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
{
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
@ -196,7 +196,7 @@ void ColumnWithDictionary::insertRangeFromFullColumn(const IColumn & src, size_t
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions)
void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions)
{
Index(positions.getPtr()).check(keys.size());
compactIfSharedDictionary();
@ -205,26 +205,26 @@ void ColumnWithDictionary::insertRangeFromDictionaryEncodedColumn(const IColumn
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertData(const char * pos, size_t length)
void ColumnLowCardinality::insertData(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
idx.check(getDictionary().size());
}
void ColumnWithDictionary::insertDataWithTerminatingZero(const char * pos, size_t length)
void ColumnLowCardinality::insertDataWithTerminatingZero(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertDataWithTerminatingZero(pos, length));
idx.check(getDictionary().size());
}
StringRef ColumnWithDictionary::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
}
const char * ColumnWithDictionary::deserializeAndInsertFromArena(const char * pos)
const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * pos)
{
compactIfSharedDictionary();
@ -235,26 +235,26 @@ const char * ColumnWithDictionary::deserializeAndInsertFromArena(const char * po
return new_pos;
}
void ColumnWithDictionary::gather(ColumnGathererStream & gatherer)
void ColumnLowCardinality::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
MutableColumnPtr ColumnWithDictionary::cloneResized(size_t size) const
MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
{
auto unique_ptr = dictionary.getColumnUniquePtr();
return ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), getIndexes().cloneResized(size));
return ColumnLowCardinality::create((*std::move(unique_ptr)).mutate(), getIndexes().cloneResized(size));
}
int ColumnWithDictionary::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
const auto & column_with_dictionary = static_cast<const ColumnWithDictionary &>(rhs);
const auto & low_cardinality_column = static_cast<const ColumnLowCardinality &>(rhs);
size_t n_index = getIndexes().getUInt(n);
size_t m_index = column_with_dictionary.getIndexes().getUInt(m);
return getDictionary().compareAt(n_index, m_index, column_with_dictionary.getDictionary(), nan_direction_hint);
size_t m_index = low_cardinality_column.getIndexes().getUInt(m);
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
void ColumnWithDictionary::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const
{
if (limit == 0)
limit = size();
@ -289,65 +289,65 @@ void ColumnWithDictionary::getPermutation(bool reverse, size_t limit, int nan_di
}
}
std::vector<MutableColumnPtr> ColumnWithDictionary::scatter(ColumnIndex num_columns, const Selector & selector) const
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
{
auto columns = getIndexes().scatter(num_columns, selector);
for (auto & column : columns)
{
auto unique_ptr = dictionary.getColumnUniquePtr();
column = ColumnWithDictionary::create((*std::move(unique_ptr)).mutate(), std::move(column));
column = ColumnLowCardinality::create((*std::move(unique_ptr)).mutate(), std::move(column));
}
return columns;
}
void ColumnWithDictionary::setSharedDictionary(const ColumnPtr & column_unique)
void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
{
if (!empty())
throw Exception("Can't set ColumnUnique for ColumnWithDictionary because is't not empty.",
throw Exception("Can't set ColumnUnique for ColumnLowCardinality because is't not empty.",
ErrorCodes::LOGICAL_ERROR);
dictionary.setShared(column_unique);
}
ColumnWithDictionary::MutablePtr ColumnWithDictionary::compact()
ColumnLowCardinality::MutablePtr ColumnLowCardinality::compact()
{
auto positions = idx.getPositions();
/// Create column with new indexes and old dictionary.
auto column = ColumnWithDictionary::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate());
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), (*std::move(positions)).mutate());
/// Will create new dictionary.
column->compactInplace();
return column;
}
ColumnWithDictionary::MutablePtr ColumnWithDictionary::cutAndCompact(size_t start, size_t length) const
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
{
auto sub_positions = (*idx.getPositions()->cut(start, length)).mutate();
/// Create column with new indexes and old dictionary.
auto column = ColumnWithDictionary::create(getDictionary().assumeMutable(), std::move(sub_positions));
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
/// Will create new dictionary.
column->compactInplace();
return column;
}
void ColumnWithDictionary::compactInplace()
void ColumnLowCardinality::compactInplace()
{
auto positions = idx.detachPositions();
dictionary.compact(positions);
idx.attachPositions(std::move(positions));
}
void ColumnWithDictionary::compactIfSharedDictionary()
void ColumnLowCardinality::compactIfSharedDictionary()
{
if (dictionary.isShared())
compactInplace();
}
ColumnWithDictionary::DictionaryEncodedColumn
ColumnWithDictionary::getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const
ColumnLowCardinality::DictionaryEncodedColumn
ColumnLowCardinality::getMinimalDictionaryEncodedColumn(size_t offset, size_t limit) const
{
MutableColumnPtr sub_indexes = (*std::move(idx.getPositions()->cut(offset, limit))).mutate();
auto indexes_map = mapUniqueIndex(*sub_indexes);
@ -356,7 +356,7 @@ ColumnWithDictionary::getMinimalDictionaryEncodedColumn(size_t offset, size_t li
return {std::move(sub_keys), std::move(sub_indexes)};
}
ColumnPtr ColumnWithDictionary::countKeys() const
ColumnPtr ColumnLowCardinality::countKeys() const
{
const auto & nested_column = getDictionary().getNestedColumn();
size_t dict_size = nested_column->size();
@ -368,20 +368,20 @@ ColumnPtr ColumnWithDictionary::countKeys() const
ColumnWithDictionary::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {}
ColumnLowCardinality::Index::Index() : positions(ColumnUInt8::create()), size_of_type(sizeof(UInt8)) {}
ColumnWithDictionary::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions))
ColumnLowCardinality::Index::Index(MutableColumnPtr && positions) : positions(std::move(positions))
{
updateSizeOfType();
}
ColumnWithDictionary::Index::Index(ColumnPtr positions) : positions(std::move(positions))
ColumnLowCardinality::Index::Index(ColumnPtr positions) : positions(std::move(positions))
{
updateSizeOfType();
}
template <typename Callback>
void ColumnWithDictionary::Index::callForType(Callback && callback, size_t size_of_type)
void ColumnLowCardinality::Index::callForType(Callback && callback, size_t size_of_type)
{
switch (size_of_type)
{
@ -390,13 +390,13 @@ void ColumnWithDictionary::Index::callForType(Callback && callback, size_t size_
case sizeof(UInt32): { callback(UInt32()); break; }
case sizeof(UInt64): { callback(UInt64()); break; }
default: {
throw Exception("Unexpected size of index type for ColumnWithDictionary: " + toString(size_of_type),
throw Exception("Unexpected size of index type for ColumnLowCardinality: " + toString(size_of_type),
ErrorCodes::LOGICAL_ERROR);
}
}
}
size_t ColumnWithDictionary::Index::getSizeOfIndexType(const IColumn & column, size_t hint)
size_t ColumnLowCardinality::Index::getSizeOfIndexType(const IColumn & column, size_t hint)
{
auto checkFor = [&](auto type) { return typeid_cast<const ColumnVector<decltype(type)> *>(&column) != nullptr; };
auto tryGetSizeFor = [&](auto type) -> size_t { return checkFor(type) ? sizeof(decltype(type)) : 0; };
@ -419,22 +419,22 @@ size_t ColumnWithDictionary::Index::getSizeOfIndexType(const IColumn & column, s
if (auto size = tryGetSizeFor(UInt64()))
return size;
throw Exception("Unexpected indexes type for ColumnWithDictionary. Expected UInt, got " + column.getName(),
throw Exception("Unexpected indexes type for ColumnLowCardinality. Expected UInt, got " + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
void ColumnWithDictionary::Index::attachPositions(ColumnPtr positions_)
void ColumnLowCardinality::Index::attachPositions(ColumnPtr positions_)
{
positions = std::move(positions_);
updateSizeOfType();
}
template <typename IndexType>
typename ColumnVector<IndexType>::Container & ColumnWithDictionary::Index::getPositionsData()
typename ColumnVector<IndexType>::Container & ColumnLowCardinality::Index::getPositionsData()
{
auto * positions_ptr = typeid_cast<ColumnVector<IndexType> *>(positions->assumeMutable().get());
if (!positions_ptr)
throw Exception("Invalid indexes type for ColumnWithDictionary."
throw Exception("Invalid indexes type for ColumnLowCardinality."
" Expected UInt" + toString(8 * sizeof(IndexType)) + ", got " + positions->getName(),
ErrorCodes::LOGICAL_ERROR);
@ -442,11 +442,11 @@ typename ColumnVector<IndexType>::Container & ColumnWithDictionary::Index::getPo
}
template <typename IndexType>
const typename ColumnVector<IndexType>::Container & ColumnWithDictionary::Index::getPositionsData() const
const typename ColumnVector<IndexType>::Container & ColumnLowCardinality::Index::getPositionsData() const
{
const auto * positions_ptr = typeid_cast<const ColumnVector<IndexType> *>(positions.get());
if (!positions_ptr)
throw Exception("Invalid indexes type for ColumnWithDictionary."
throw Exception("Invalid indexes type for ColumnLowCardinality."
" Expected UInt" + toString(8 * sizeof(IndexType)) + ", got " + positions->getName(),
ErrorCodes::LOGICAL_ERROR);
@ -454,7 +454,7 @@ const typename ColumnVector<IndexType>::Container & ColumnWithDictionary::Index:
}
template <typename IndexType>
void ColumnWithDictionary::Index::convertPositions()
void ColumnLowCardinality::Index::convertPositions()
{
auto convert = [&](auto x)
{
@ -485,14 +485,14 @@ void ColumnWithDictionary::Index::convertPositions()
checkSizeOfType();
}
void ColumnWithDictionary::Index::expandType()
void ColumnLowCardinality::Index::expandType()
{
auto expand = [&](auto type)
{
using CurIndexType = decltype(type);
constexpr auto next_size = NumberTraits::nextSize(sizeof(CurIndexType));
if (next_size == sizeof(CurIndexType))
throw Exception("Can't expand indexes type for ColumnWithDictionary from type: "
throw Exception("Can't expand indexes type for ColumnLowCardinality from type: "
+ demangle(typeid(CurIndexType).name()), ErrorCodes::LOGICAL_ERROR);
using NewIndexType = typename NumberTraits::Construct<false, false, next_size>::Type;
@ -502,14 +502,14 @@ void ColumnWithDictionary::Index::expandType()
callForType(std::move(expand), size_of_type);
}
UInt64 ColumnWithDictionary::Index::getMaxPositionForCurrentType() const
UInt64 ColumnLowCardinality::Index::getMaxPositionForCurrentType() const
{
UInt64 value = 0;
callForType([&](auto type) { value = std::numeric_limits<decltype(type)>::max(); }, size_of_type);
return value;
}
size_t ColumnWithDictionary::Index::getPositionAt(size_t row) const
size_t ColumnLowCardinality::Index::getPositionAt(size_t row) const
{
size_t pos;
auto getPosition = [&](auto type)
@ -522,7 +522,7 @@ size_t ColumnWithDictionary::Index::getPositionAt(size_t row) const
return pos;
}
void ColumnWithDictionary::Index::insertPosition(UInt64 position)
void ColumnLowCardinality::Index::insertPosition(UInt64 position)
{
while (position > getMaxPositionForCurrentType())
expandType();
@ -531,7 +531,7 @@ void ColumnWithDictionary::Index::insertPosition(UInt64 position)
checkSizeOfType();
}
void ColumnWithDictionary::Index::insertPositionsRange(const IColumn & column, size_t offset, size_t limit)
void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, size_t offset, size_t limit)
{
auto insertForType = [&](auto type)
{
@ -571,13 +571,13 @@ void ColumnWithDictionary::Index::insertPositionsRange(const IColumn & column, s
!insertForType(UInt16()) &&
!insertForType(UInt32()) &&
!insertForType(UInt64()))
throw Exception("Invalid column for ColumnWithDictionary index. Expected UInt, got " + column.getName(),
throw Exception("Invalid column for ColumnLowCardinality index. Expected UInt, got " + column.getName(),
ErrorCodes::ILLEGAL_COLUMN);
checkSizeOfType();
}
void ColumnWithDictionary::Index::check(size_t /*max_dictionary_size*/)
void ColumnLowCardinality::Index::check(size_t /*max_dictionary_size*/)
{
/// TODO: remove
/*
@ -601,14 +601,14 @@ void ColumnWithDictionary::Index::check(size_t /*max_dictionary_size*/)
*/
}
void ColumnWithDictionary::Index::checkSizeOfType()
void ColumnLowCardinality::Index::checkSizeOfType()
{
if (size_of_type != getSizeOfIndexType(*positions, size_of_type))
throw Exception("Invalid size of type. Expected " + toString(8 * size_of_type) +
", but positions are " + positions->getName(), ErrorCodes::LOGICAL_ERROR);
}
void ColumnWithDictionary::Index::countKeys(ColumnUInt64::Container & counts) const
void ColumnLowCardinality::Index::countKeys(ColumnUInt64::Container & counts) const
{
auto counter = [&](auto x)
{
@ -621,25 +621,25 @@ void ColumnWithDictionary::Index::countKeys(ColumnUInt64::Container & counts) co
}
ColumnWithDictionary::Dictionary::Dictionary(MutableColumnPtr && column_unique_, bool is_shared)
ColumnLowCardinality::Dictionary::Dictionary(MutableColumnPtr && column_unique_, bool is_shared)
: column_unique(std::move(column_unique_)), shared(is_shared)
{
checkColumn(*column_unique);
}
ColumnWithDictionary::Dictionary::Dictionary(ColumnPtr column_unique_, bool is_shared)
ColumnLowCardinality::Dictionary::Dictionary(ColumnPtr column_unique_, bool is_shared)
: column_unique(std::move(column_unique_)), shared(is_shared)
{
checkColumn(*column_unique);
}
void ColumnWithDictionary::Dictionary::checkColumn(const IColumn & column)
void ColumnLowCardinality::Dictionary::checkColumn(const IColumn & column)
{
if (!dynamic_cast<const IColumnUnique *>(&column))
throw Exception("ColumnUnique expected as an argument of ColumnWithDictionary.", ErrorCodes::ILLEGAL_COLUMN);
throw Exception("ColumnUnique expected as an argument of ColumnLowCardinality.", ErrorCodes::ILLEGAL_COLUMN);
}
void ColumnWithDictionary::Dictionary::setShared(const ColumnPtr & dictionary)
void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & dictionary)
{
checkColumn(*dictionary);
@ -647,7 +647,7 @@ void ColumnWithDictionary::Dictionary::setShared(const ColumnPtr & dictionary)
shared = true;
}
void ColumnWithDictionary::Dictionary::compact(ColumnPtr & positions)
void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
{
auto new_column_unique = column_unique->cloneEmpty();

View File

@ -13,21 +13,21 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
}
class ColumnWithDictionary final : public COWPtrHelper<IColumn, ColumnWithDictionary>
class ColumnLowCardinality final : public COWPtrHelper<IColumn, ColumnLowCardinality>
{
friend class COWPtrHelper<IColumn, ColumnWithDictionary>;
friend class COWPtrHelper<IColumn, ColumnLowCardinality>;
ColumnWithDictionary(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
ColumnWithDictionary(const ColumnWithDictionary & other) = default;
ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
ColumnLowCardinality(const ColumnLowCardinality & other) = default;
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWPtrHelper<IColumn, ColumnWithDictionary>;
using Base = COWPtrHelper<IColumn, ColumnLowCardinality>;
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
{
return ColumnWithDictionary::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
}
static MutablePtr create(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false)
@ -35,11 +35,11 @@ public:
return Base::create(std::move(column_unique), std::move(indexes), is_shared);
}
std::string getName() const override { return "ColumnWithDictionary"; }
const char * getFamilyName() const override { return "ColumnWithDictionary"; }
std::string getName() const override { return "ColumnLowCardinality"; }
const char * getFamilyName() const override { return "ColumnLowCardinality"; }
ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); }
ColumnPtr convertToFullColumnIfWithDictionary() const override { return convertToFullColumn(); }
ColumnPtr convertToFullColumnIfLowCardinality() const override { return convertToFullColumn(); }
MutableColumnPtr cloneResized(size_t size) const override;
size_t size() const override { return getIndexes().size(); }
@ -59,7 +59,7 @@ public:
bool isNullAt(size_t n) const override { return getDictionary().isNullAt(getIndexes().getUInt(n)); }
ColumnPtr cut(size_t start, size_t length) const override
{
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().cut(start, length));
}
void insert(const Field & x) override;
@ -89,17 +89,17 @@ public:
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override
{
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override
{
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));
}
ColumnPtr index(const IColumn & indexes_, size_t limit) const override
{
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
}
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
@ -108,7 +108,7 @@ public:
ColumnPtr replicate(const Offsets & offsets) const override
{
return ColumnWithDictionary::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));
}
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
@ -138,7 +138,7 @@ public:
bool isFixedAndContiguous() const override { return getDictionary().isFixedAndContiguous(); }
size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); }
bool isNumeric() const override { return getDictionary().isNumeric(); }
bool withDictionary() const override { return true; }
bool lowCardinality() const override { return true; }
const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); }
const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); }
@ -166,7 +166,7 @@ public:
///void setIndexes(MutableColumnPtr && indexes_) { indexes = std::move(indexes_); }
/// Set shared ColumnUnique for empty column with dictionary.
/// Set shared ColumnUnique for empty low cardinality column.
void setSharedDictionary(const ColumnPtr & column_unique);
bool isSharedDictionary() const { return dictionary.isShared(); }

View File

@ -27,7 +27,7 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
if (column.isColumnConst())
{
const ColumnConst & column_const = static_cast<const ColumnConst &>(column);
ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfWithDictionary();
ColumnPtr column_nested = column_const.getDataColumnPtr()->convertToFullColumnIfLowCardinality();
if (!typeid_cast<const ColumnUInt8 *>(column_nested.get()))
{
@ -50,8 +50,8 @@ ConstantFilterDescription::ConstantFilterDescription(const IColumn & column)
FilterDescription::FilterDescription(const IColumn & column_)
{
if (column_.withDictionary())
data_holder = column_.convertToFullColumnIfWithDictionary();
if (column_.lowCardinality())
data_holder = column_.convertToFullColumnIfLowCardinality();
const auto & column = data_holder ? *data_holder : column_;

View File

@ -47,9 +47,9 @@ public:
*/
virtual Ptr convertToFullColumnIfConst() const { return {}; }
/// If column isn't ColumnWithDictionary, return itself.
/// If column is ColumnWithDictionary, transforms is to full column.
virtual Ptr convertToFullColumnIfWithDictionary() const { return getPtr(); }
/// If column isn't ColumnLowCardinality, return itself.
/// If column is ColumnLowCardinality, transforms is to full column.
virtual Ptr convertToFullColumnIfLowCardinality() const { return getPtr(); }
/// Creates empty column with the same type.
virtual MutablePtr cloneEmpty() const { return cloneResized(0); }
@ -333,7 +333,7 @@ public:
/// Can be inside ColumnNullable.
virtual bool canBeInsideNullable() const { return false; }
virtual bool withDictionary() const { return false; }
virtual bool lowCardinality() const { return false; }
virtual ~IColumn() {}

View File

@ -1,8 +1,8 @@
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
#include <Columns/ColumnWithDictionary.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnConst.h>
namespace DB
@ -13,15 +13,15 @@ namespace DB
* Unlike UnionBlockInputStream, it does this sequentially.
* Blocks of different sources are not interleaved with each other.
*/
class ConvertColumnWithDictionaryToFullBlockInputStream : public IProfilingBlockInputStream
class ConvertColumnLowCardinalityToFullBlockInputStream : public IProfilingBlockInputStream
{
public:
explicit ConvertColumnWithDictionaryToFullBlockInputStream(const BlockInputStreamPtr & input)
explicit ConvertColumnLowCardinalityToFullBlockInputStream(const BlockInputStreamPtr & input)
{
children.push_back(input);
}
String getName() const override { return "ConvertColumnWithDictionaryToFull"; }
String getName() const override { return "ConvertColumnLowCardinalityToFull"; }
Block getHeader() const override { return convert(children.at(0)->getHeader()); }
@ -36,9 +36,9 @@ private:
if (auto * column_const = typeid_cast<const ColumnConst *>(column.column.get()))
column.column = column_const->removeLowCardinality();
else
column.column = column.column->convertToFullColumnIfWithDictionary();
column.column = column.column->convertToFullColumnIfLowCardinality();
if (auto * low_cardinality_type = typeid_cast<const DataTypeWithDictionary *>(column.type.get()))
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(column.type.get()))
column.type = low_cardinality_type->getDictionaryType();
}

View File

@ -143,7 +143,7 @@ void registerDataTypeUUID(DataTypeFactory & factory);
void registerDataTypeAggregateFunction(DataTypeFactory & factory);
void registerDataTypeNested(DataTypeFactory & factory);
void registerDataTypeInterval(DataTypeFactory & factory);
void registerDataTypeWithDictionary(DataTypeFactory & factory);
void registerDataTypeLowCardinality(DataTypeFactory & factory);
DataTypeFactory::DataTypeFactory()
@ -163,7 +163,7 @@ DataTypeFactory::DataTypeFactory()
registerDataTypeAggregateFunction(*this);
registerDataTypeNested(*this);
registerDataTypeInterval(*this);
registerDataTypeWithDictionary(*this);
registerDataTypeLowCardinality(*this);
}
}

View File

@ -1,4 +1,4 @@
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnUnique.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsCommon.h>
@ -6,7 +6,7 @@
#include <Common/typeid_cast.h>
#include <Core/TypeListNumber.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
@ -24,18 +24,18 @@ namespace ErrorCodes
namespace
{
const ColumnWithDictionary & getColumnWithDictionary(const IColumn & column)
const ColumnLowCardinality & getColumnLowCardinality(const IColumn & column)
{
return typeid_cast<const ColumnWithDictionary &>(column);
return typeid_cast<const ColumnLowCardinality &>(column);
}
ColumnWithDictionary & getColumnWithDictionary(IColumn & column)
ColumnLowCardinality & getColumnLowCardinality(IColumn & column)
{
return typeid_cast<ColumnWithDictionary &>(column);
return typeid_cast<ColumnLowCardinality &>(column);
}
}
DataTypeWithDictionary::DataTypeWithDictionary(DataTypePtr dictionary_type_)
DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
: dictionary_type(std::move(dictionary_type_))
{
auto inner_type = dictionary_type;
@ -45,11 +45,11 @@ DataTypeWithDictionary::DataTypeWithDictionary(DataTypePtr dictionary_type_)
if (!isStringOrFixedString(inner_type)
&& !isDateOrDateTime(inner_type)
&& !isNumber(inner_type))
throw Exception("DataTypeWithDictionary is supported only for numbers, strings, Date or DateTime, but got "
throw Exception("DataTypeLowCardinality is supported only for numbers, strings, Date or DateTime, but got "
+ dictionary_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void DataTypeWithDictionary::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::DictionaryKeys);
dictionary_type->enumerateStreams(callback, path);
@ -74,7 +74,7 @@ struct KeysSerializationVersion
static void checkVersion(UInt64 version)
{
if (version != SharedDictionariesWithAdditionalKeys)
throw Exception("Invalid version for DataTypeWithDictionary key column.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Invalid version for DataTypeLowCardinality key column.", ErrorCodes::LOGICAL_ERROR);
}
KeysSerializationVersion(UInt64 version) : value(static_cast<Value>(version)) { checkVersion(version); }
@ -115,7 +115,7 @@ struct IndexesSerializationType
if (value <= TUInt64)
return;
throw Exception("Invalid type for DataTypeWithDictionary index column.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Invalid type for DataTypeLowCardinality index column.", ErrorCodes::LOGICAL_ERROR);
}
void serialize(WriteBuffer & buffer) const
@ -179,15 +179,15 @@ struct IndexesSerializationType
IndexesSerializationType() = default;
};
struct SerializeStateWithDictionary : public IDataType::SerializeBinaryBulkState
struct SerializeStateLowCardinality : public IDataType::SerializeBinaryBulkState
{
KeysSerializationVersion key_version;
MutableColumnUniquePtr shared_dictionary;
explicit SerializeStateWithDictionary(UInt64 key_version) : key_version(key_version) {}
explicit SerializeStateLowCardinality(UInt64 key_version) : key_version(key_version) {}
};
struct DeserializeStateWithDictionary : public IDataType::DeserializeBinaryBulkState
struct DeserializeStateLowCardinality : public IDataType::DeserializeBinaryBulkState
{
KeysSerializationVersion key_version;
ColumnUniquePtr global_dictionary;
@ -197,46 +197,46 @@ struct DeserializeStateWithDictionary : public IDataType::DeserializeBinaryBulkS
ColumnPtr null_map;
UInt64 num_pending_rows = 0;
explicit DeserializeStateWithDictionary(UInt64 key_version) : key_version(key_version) {}
explicit DeserializeStateLowCardinality(UInt64 key_version) : key_version(key_version) {}
};
static SerializeStateWithDictionary * checkAndGetWithDictionarySerializeState(
static SerializeStateLowCardinality * checkAndGetLowCardinalitySerializeState(
IDataType::SerializeBinaryBulkStatePtr & state)
{
if (!state)
throw Exception("Got empty state for DataTypeWithDictionary.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty state for DataTypeLowCardinality.", ErrorCodes::LOGICAL_ERROR);
auto * with_dictionary_state = typeid_cast<SerializeStateWithDictionary *>(state.get());
if (!with_dictionary_state)
auto * low_cardinality_state = typeid_cast<SerializeStateLowCardinality *>(state.get());
if (!low_cardinality_state)
{
auto & state_ref = *state;
throw Exception("Invalid SerializeBinaryBulkState for DataTypeWithDictionary. Expected: "
+ demangle(typeid(SerializeStateWithDictionary).name()) + ", got "
throw Exception("Invalid SerializeBinaryBulkState for DataTypeLowCardinality. Expected: "
+ demangle(typeid(SerializeStateLowCardinality).name()) + ", got "
+ demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
}
return with_dictionary_state;
return low_cardinality_state;
}
static DeserializeStateWithDictionary * checkAndGetWithDictionaryDeserializeState(
static DeserializeStateLowCardinality * checkAndGetLowCardinalityDeserializeState(
IDataType::DeserializeBinaryBulkStatePtr & state)
{
if (!state)
throw Exception("Got empty state for DataTypeWithDictionary.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty state for DataTypeLowCardinality.", ErrorCodes::LOGICAL_ERROR);
auto * with_dictionary_state = typeid_cast<DeserializeStateWithDictionary *>(state.get());
if (!with_dictionary_state)
auto * low_cardinality_state = typeid_cast<DeserializeStateLowCardinality *>(state.get());
if (!low_cardinality_state)
{
auto & state_ref = *state;
throw Exception("Invalid DeserializeBinaryBulkState for DataTypeWithDictionary. Expected: "
+ demangle(typeid(DeserializeStateWithDictionary).name()) + ", got "
throw Exception("Invalid DeserializeBinaryBulkState for DataTypeLowCardinality. Expected: "
+ demangle(typeid(DeserializeStateLowCardinality).name()) + ", got "
+ demangle(typeid(state_ref).name()), ErrorCodes::LOGICAL_ERROR);
}
return with_dictionary_state;
return low_cardinality_state;
}
void DataTypeWithDictionary::serializeBinaryBulkStatePrefix(
void DataTypeLowCardinality::serializeBinaryBulkStatePrefix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
@ -245,7 +245,7 @@ void DataTypeWithDictionary::serializeBinaryBulkStatePrefix(
settings.path.pop_back();
if (!stream)
throw Exception("Got empty stream in DataTypeWithDictionary::serializeBinaryBulkStatePrefix",
throw Exception("Got empty stream in DataTypeLowCardinality::serializeBinaryBulkStatePrefix",
ErrorCodes::LOGICAL_ERROR);
/// Write version and create SerializeBinaryBulkState.
@ -253,36 +253,36 @@ void DataTypeWithDictionary::serializeBinaryBulkStatePrefix(
writeIntBinary(key_version, *stream);
state = std::make_shared<SerializeStateWithDictionary>(key_version);
state = std::make_shared<SerializeStateLowCardinality>(key_version);
}
void DataTypeWithDictionary::serializeBinaryBulkStateSuffix(
void DataTypeLowCardinality::serializeBinaryBulkStateSuffix(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
auto * state_with_dictionary = checkAndGetWithDictionarySerializeState(state);
KeysSerializationVersion::checkVersion(state_with_dictionary->key_version.value);
auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state);
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
if (state_with_dictionary->shared_dictionary && settings.low_cardinality_max_dictionary_size)
if (low_cardinality_state->shared_dictionary && settings.low_cardinality_max_dictionary_size)
{
auto nested_column = state_with_dictionary->shared_dictionary->getNestedNotNullableColumn();
auto nested_column = low_cardinality_state->shared_dictionary->getNestedNotNullableColumn();
settings.path.push_back(Substream::DictionaryKeys);
auto * stream = settings.getter(settings.path);
settings.path.pop_back();
if (!stream)
throw Exception("Got empty stream in DataTypeWithDictionary::serializeBinaryBulkStateSuffix",
throw Exception("Got empty stream in DataTypeLowCardinality::serializeBinaryBulkStateSuffix",
ErrorCodes::LOGICAL_ERROR);
UInt64 num_keys = nested_column->size();
writeIntBinary(num_keys, *stream);
removeNullable(dictionary_type)->serializeBinaryBulk(*nested_column, *stream, 0, num_keys);
state_with_dictionary->shared_dictionary = nullptr;
low_cardinality_state->shared_dictionary = nullptr;
}
}
void DataTypeWithDictionary::deserializeBinaryBulkStatePrefix(
void DataTypeLowCardinality::deserializeBinaryBulkStatePrefix(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
@ -296,7 +296,7 @@ void DataTypeWithDictionary::deserializeBinaryBulkStatePrefix(
UInt64 keys_version;
readIntBinary(keys_version, *stream);
state = std::make_shared<DeserializeStateWithDictionary>(keys_version);
state = std::make_shared<DeserializeStateLowCardinality>(keys_version);
}
namespace
@ -475,7 +475,7 @@ namespace
}
}
void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
@ -492,16 +492,16 @@ void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
return;
if (!keys_stream)
throw Exception("Got empty stream for DataTypeWithDictionary keys.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty stream for DataTypeLowCardinality keys.", ErrorCodes::LOGICAL_ERROR);
if (!indexes_stream)
throw Exception("Got empty stream for DataTypeWithDictionary indexes.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty stream for DataTypeLowCardinality indexes.", ErrorCodes::LOGICAL_ERROR);
const ColumnWithDictionary & column_with_dictionary = typeid_cast<const ColumnWithDictionary &>(column);
const ColumnLowCardinality & low_cardinality_column = typeid_cast<const ColumnLowCardinality &>(column);
auto * state_with_dictionary = checkAndGetWithDictionarySerializeState(state);
auto & global_dictionary = state_with_dictionary->shared_dictionary;
KeysSerializationVersion::checkVersion(state_with_dictionary->key_version.value);
auto * low_cardinality_state = checkAndGetLowCardinalitySerializeState(state);
auto & global_dictionary = low_cardinality_state->shared_dictionary;
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
bool need_update_dictionary = global_dictionary == nullptr;
if (need_update_dictionary)
@ -510,7 +510,7 @@ void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
size_t max_limit = column.size() - offset;
limit = limit ? std::min(limit, max_limit) : max_limit;
auto sub_column = column_with_dictionary.cutAndCompact(offset, limit);
auto sub_column = low_cardinality_column.cutAndCompact(offset, limit);
ColumnPtr positions = sub_column->getIndexesPtr();
ColumnPtr keys = sub_column->getDictionary().getNestedColumn();
@ -520,7 +520,7 @@ void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
auto indexes_with_overflow = global_dictionary->uniqueInsertRangeWithOverflow(*keys, 0, keys->size(),
settings.low_cardinality_max_dictionary_size);
size_t max_size = settings.low_cardinality_max_dictionary_size + indexes_with_overflow.overflowed_keys->size();
ColumnWithDictionary::Index(indexes_with_overflow.indexes->getPtr()).check(max_size);
ColumnLowCardinality::Index(indexes_with_overflow.indexes->getPtr()).check(max_size);
if (global_dictionary->size() > settings.low_cardinality_max_dictionary_size)
throw Exception("Got dictionary with size " + toString(global_dictionary->size()) +
@ -553,7 +553,7 @@ void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
UInt64 num_keys = nested_column->size();
writeIntBinary(num_keys, *keys_stream);
removeNullable(dictionary_type)->serializeBinaryBulk(*nested_column, *keys_stream, 0, num_keys);
state_with_dictionary->shared_dictionary = nullptr;
low_cardinality_state->shared_dictionary = nullptr;
}
if (need_additional_keys)
@ -568,13 +568,13 @@ void DataTypeWithDictionary::serializeBinaryBulkWithMultipleStreams(
index_version.getDataType()->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows);
}
void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
ColumnWithDictionary & column_with_dictionary = typeid_cast<ColumnWithDictionary &>(column);
ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(column);
settings.path.push_back(Substream::DictionaryKeys);
auto * keys_stream = settings.getter(settings.path);
@ -586,15 +586,15 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
return;
if (!keys_stream)
throw Exception("Got empty stream for DataTypeWithDictionary keys.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty stream for DataTypeLowCardinality keys.", ErrorCodes::LOGICAL_ERROR);
if (!indexes_stream)
throw Exception("Got empty stream for DataTypeWithDictionary indexes.", ErrorCodes::LOGICAL_ERROR);
throw Exception("Got empty stream for DataTypeLowCardinality indexes.", ErrorCodes::LOGICAL_ERROR);
auto * state_with_dictionary = checkAndGetWithDictionaryDeserializeState(state);
KeysSerializationVersion::checkVersion(state_with_dictionary->key_version.value);
auto * low_cardinality_state = checkAndGetLowCardinalityDeserializeState(state);
KeysSerializationVersion::checkVersion(low_cardinality_state->key_version.value);
auto readDictionary = [this, state_with_dictionary, keys_stream]()
auto readDictionary = [this, low_cardinality_state, keys_stream]()
{
UInt64 num_keys;
readIntBinary(num_keys, *keys_stream);
@ -604,62 +604,62 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
keys_type->deserializeBinaryBulk(*global_dict_keys, *keys_stream, num_keys, 0);
auto column_unique = createColumnUnique(*dictionary_type, std::move(global_dict_keys));
state_with_dictionary->global_dictionary = std::move(column_unique);
low_cardinality_state->global_dictionary = std::move(column_unique);
};
auto readAdditionalKeys = [this, state_with_dictionary, indexes_stream]()
auto readAdditionalKeys = [this, low_cardinality_state, indexes_stream]()
{
UInt64 num_keys;
readIntBinary(num_keys, *indexes_stream);
auto keys_type = removeNullable(dictionary_type);
auto additional_keys = keys_type->createColumn();
keys_type->deserializeBinaryBulk(*additional_keys, *indexes_stream, num_keys, 0);
state_with_dictionary->additional_keys = std::move(additional_keys);
low_cardinality_state->additional_keys = std::move(additional_keys);
if (!state_with_dictionary->index_type.need_global_dictionary && dictionary_type->isNullable())
if (!low_cardinality_state->index_type.need_global_dictionary && dictionary_type->isNullable())
{
auto null_map = ColumnUInt8::create(num_keys, 0);
if (num_keys)
null_map->getElement(0) = 1;
state_with_dictionary->null_map = std::move(null_map);
low_cardinality_state->null_map = std::move(null_map);
}
};
auto readIndexes = [this, state_with_dictionary, indexes_stream, &column_with_dictionary](UInt64 num_rows)
auto readIndexes = [this, low_cardinality_state, indexes_stream, &low_cardinality_column](UInt64 num_rows)
{
auto indexes_type = state_with_dictionary->index_type.getDataType();
auto indexes_type = low_cardinality_state->index_type.getDataType();
MutableColumnPtr indexes_column = indexes_type->createColumn();
indexes_type->deserializeBinaryBulk(*indexes_column, *indexes_stream, num_rows, 0);
auto & global_dictionary = state_with_dictionary->global_dictionary;
const auto & additional_keys = state_with_dictionary->additional_keys;
auto & global_dictionary = low_cardinality_state->global_dictionary;
const auto & additional_keys = low_cardinality_state->additional_keys;
bool has_additional_keys = state_with_dictionary->index_type.has_additional_keys;
bool column_is_empty = column_with_dictionary.empty();
bool has_additional_keys = low_cardinality_state->index_type.has_additional_keys;
bool column_is_empty = low_cardinality_column.empty();
if (!state_with_dictionary->index_type.need_global_dictionary)
if (!low_cardinality_state->index_type.need_global_dictionary)
{
ColumnPtr keys_column = additional_keys;
if (state_with_dictionary->null_map)
keys_column = ColumnNullable::create(additional_keys, state_with_dictionary->null_map);
column_with_dictionary.insertRangeFromDictionaryEncodedColumn(*keys_column, *indexes_column);
if (low_cardinality_state->null_map)
keys_column = ColumnNullable::create(additional_keys, low_cardinality_state->null_map);
low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*keys_column, *indexes_column);
}
else if (!has_additional_keys)
{
if (column_is_empty)
column_with_dictionary.setSharedDictionary(global_dictionary);
low_cardinality_column.setSharedDictionary(global_dictionary);
auto local_column = ColumnWithDictionary::create(global_dictionary, std::move(indexes_column));
column_with_dictionary.insertRangeFrom(*local_column, 0, num_rows);
auto local_column = ColumnLowCardinality::create(global_dictionary, std::move(indexes_column));
low_cardinality_column.insertRangeFrom(*local_column, 0, num_rows);
}
else
{
auto maps = mapIndexWithAdditionalKeys(*indexes_column, global_dictionary->size());
ColumnWithDictionary::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size());
ColumnLowCardinality::Index(maps.additional_keys_map->getPtr()).check(additional_keys->size());
ColumnWithDictionary::Index(indexes_column->getPtr()).check(
ColumnLowCardinality::Index(indexes_column->getPtr()).check(
maps.dictionary_map->size() + maps.additional_keys_map->size());
auto used_keys = (*std::move(global_dictionary->getNestedColumn()->index(*maps.dictionary_map, 0))).mutate();
@ -677,23 +677,23 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
used_keys->insertRangeFrom(*used_add_keys, 0, used_add_keys->size());
}
column_with_dictionary.insertRangeFromDictionaryEncodedColumn(*used_keys, *indexes_column);
low_cardinality_column.insertRangeFromDictionaryEncodedColumn(*used_keys, *indexes_column);
}
};
if (!settings.continuous_reading)
state_with_dictionary->num_pending_rows = 0;
low_cardinality_state->num_pending_rows = 0;
bool first_dictionary = true;
while (limit)
{
if (state_with_dictionary->num_pending_rows == 0)
if (low_cardinality_state->num_pending_rows == 0)
{
if (indexes_stream->eof())
break;
auto & index_type = state_with_dictionary->index_type;
auto & global_dictionary = state_with_dictionary->global_dictionary;
auto & index_type = low_cardinality_state->index_type;
auto & global_dictionary = low_cardinality_state->global_dictionary;
index_type.deserialize(*indexes_stream);
@ -703,51 +703,51 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams(
first_dictionary = false;
}
if (state_with_dictionary->index_type.has_additional_keys)
if (low_cardinality_state->index_type.has_additional_keys)
readAdditionalKeys();
else
state_with_dictionary->additional_keys = nullptr;
low_cardinality_state->additional_keys = nullptr;
readIntBinary(state_with_dictionary->num_pending_rows, *indexes_stream);
readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream);
}
size_t num_rows_to_read = std::min(limit, state_with_dictionary->num_pending_rows);
size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows);
readIndexes(num_rows_to_read);
limit -= num_rows_to_read;
state_with_dictionary->num_pending_rows -= num_rows_to_read;
low_cardinality_state->num_pending_rows -= num_rows_to_read;
}
}
void DataTypeWithDictionary::serializeBinary(const Field & field, WriteBuffer & ostr) const
void DataTypeLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
dictionary_type->serializeBinary(field, ostr);
}
void DataTypeWithDictionary::deserializeBinary(Field & field, ReadBuffer & istr) const
void DataTypeLowCardinality::deserializeBinary(Field & field, ReadBuffer & istr) const
{
dictionary_type->deserializeBinary(field, istr);
}
template <typename ... Args>
void DataTypeWithDictionary::serializeImpl(
void DataTypeLowCardinality::serializeImpl(
const IColumn & column, size_t row_num, WriteBuffer & ostr,
DataTypeWithDictionary::SerealizeFunctionPtr<Args ...> func, Args & ... args) const
DataTypeLowCardinality::SerealizeFunctionPtr<Args ...> func, Args & ... args) const
{
auto & column_with_dictionary = getColumnWithDictionary(column);
size_t unique_row_number = column_with_dictionary.getIndexes().getUInt(row_num);
(dictionary_type.get()->*func)(*column_with_dictionary.getDictionary().getNestedColumn(), unique_row_number, ostr, std::forward<Args>(args)...);
auto & low_cardinality_column = getColumnLowCardinality(column);
size_t unique_row_number = low_cardinality_column.getIndexes().getUInt(row_num);
(dictionary_type.get()->*func)(*low_cardinality_column.getDictionary().getNestedColumn(), unique_row_number, ostr, std::forward<Args>(args)...);
}
template <typename ... Args>
void DataTypeWithDictionary::deserializeImpl(
void DataTypeLowCardinality::deserializeImpl(
IColumn & column, ReadBuffer & istr,
DataTypeWithDictionary::DeserealizeFunctionPtr<Args ...> func, Args & ... args) const
DataTypeLowCardinality::DeserealizeFunctionPtr<Args ...> func, Args & ... args) const
{
auto & column_with_dictionary = getColumnWithDictionary(column);
auto temp_column = column_with_dictionary.getDictionary().getNestedColumn()->cloneEmpty();
auto & low_cardinality_column= getColumnLowCardinality(column);
auto temp_column = low_cardinality_column.getDictionary().getNestedColumn()->cloneEmpty();
(dictionary_type.get()->*func)(*temp_column, istr, std::forward<Args>(args)...);
column_with_dictionary.insertFromFullColumn(*temp_column, 0);
low_cardinality_column.insertFromFullColumn(*temp_column, 0);
}
namespace
@ -774,7 +774,7 @@ namespace
}
template <typename Creator>
MutableColumnUniquePtr DataTypeWithDictionary::createColumnUniqueImpl(const IDataType & keys_type,
MutableColumnUniquePtr DataTypeLowCardinality::createColumnUniqueImpl(const IDataType & keys_type,
const Creator & creator)
{
auto * type = &keys_type;
@ -800,12 +800,12 @@ MutableColumnUniquePtr DataTypeWithDictionary::createColumnUniqueImpl(const IDat
return column;
}
throw Exception("Unexpected dictionary type for DataTypeWithDictionary: " + type->getName(),
throw Exception("Unexpected dictionary type for DataTypeLowCardinality: " + type->getName(),
ErrorCodes::LOGICAL_ERROR);
}
MutableColumnUniquePtr DataTypeWithDictionary::createColumnUnique(const IDataType & keys_type)
MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataType & keys_type)
{
auto creator = [&](auto x)
{
@ -815,7 +815,7 @@ MutableColumnUniquePtr DataTypeWithDictionary::createColumnUnique(const IDataTyp
return createColumnUniqueImpl(keys_type, creator);
}
MutableColumnUniquePtr DataTypeWithDictionary::createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys)
MutableColumnUniquePtr DataTypeLowCardinality::createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys)
{
auto creator = [&](auto x)
{
@ -825,20 +825,20 @@ MutableColumnUniquePtr DataTypeWithDictionary::createColumnUnique(const IDataTyp
return createColumnUniqueImpl(keys_type, creator);
}
MutableColumnPtr DataTypeWithDictionary::createColumn() const
MutableColumnPtr DataTypeLowCardinality::createColumn() const
{
MutableColumnPtr indexes = DataTypeUInt8().createColumn();
MutableColumnPtr dictionary = createColumnUnique(*dictionary_type);
return ColumnWithDictionary::create(std::move(dictionary), std::move(indexes));
return ColumnLowCardinality::create(std::move(dictionary), std::move(indexes));
}
bool DataTypeWithDictionary::equals(const IDataType & rhs) const
bool DataTypeLowCardinality::equals(const IDataType & rhs) const
{
if (typeid(rhs) != typeid(*this))
return false;
auto & rhs_with_dictionary = static_cast<const DataTypeWithDictionary &>(rhs);
return dictionary_type->equals(*rhs_with_dictionary.dictionary_type);
auto & low_cardinality_rhs= static_cast<const DataTypeLowCardinality &>(rhs);
return dictionary_type->equals(*low_cardinality_rhs.dictionary_type);
}
@ -848,10 +848,10 @@ static DataTypePtr create(const ASTPtr & arguments)
throw Exception("LowCardinality data type family must have single argument - type of elements",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return std::make_shared<DataTypeWithDictionary>(DataTypeFactory::instance().get(arguments->children[0]));
return std::make_shared<DataTypeLowCardinality>(DataTypeFactory::instance().get(arguments->children[0]));
}
void registerDataTypeWithDictionary(DataTypeFactory & factory)
void registerDataTypeLowCardinality(DataTypeFactory & factory)
{
factory.registerDataType("LowCardinality", create);
}
@ -859,8 +859,8 @@ void registerDataTypeWithDictionary(DataTypeFactory & factory)
DataTypePtr removeLowCardinality(const DataTypePtr & type)
{
if (auto * type_with_dictionary = typeid_cast<const DataTypeWithDictionary *>(type.get()))
return type_with_dictionary->getDictionaryType();
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
return low_cardinality_type->getDictionaryType();
return type;
}

View File

@ -5,13 +5,13 @@
namespace DB
{
class DataTypeWithDictionary : public IDataType
class DataTypeLowCardinality : public IDataType
{
private:
DataTypePtr dictionary_type;
public:
DataTypeWithDictionary(DataTypePtr dictionary_type_);
DataTypeLowCardinality(DataTypePtr dictionary_type_);
const DataTypePtr & getDictionaryType() const { return dictionary_type; }
@ -136,7 +136,7 @@ public:
bool isCategorial() const override { return false; }
bool isNullable() const override { return false; }
bool onlyNull() const override { return false; }
bool withDictionary() const override { return true; }
bool lowCardinality() const override { return true; }
static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type);
static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys);
@ -161,7 +161,7 @@ private:
static MutableColumnUniquePtr createColumnUniqueImpl(const IDataType & keys_type, const Creator & creator);
};
/// Returns dictionary type if type is DataTypeWithDictionary, type otherwise.
/// Returns dictionary type if type is DataTypeLowCardinality, type otherwise.
DataTypePtr removeLowCardinality(const DataTypePtr & type);
}

View File

@ -396,7 +396,7 @@ public:
*/
virtual bool canBeInsideNullable() const { return false; }
virtual bool withDictionary() const { return false; }
virtual bool lowCardinality() const { return false; }
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.

View File

@ -37,8 +37,8 @@
#include <Functions/FunctionsMiscellaneous.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/DateTimeTransforms.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <Columns/ColumnWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnLowCardinality.h>
namespace DB
@ -1374,7 +1374,7 @@ protected:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
private:
@ -1750,10 +1750,10 @@ private:
WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const
{
const auto * from_with_dict = typeid_cast<const DataTypeWithDictionary *>(from_type.get());
const auto * to_with_dict = typeid_cast<const DataTypeWithDictionary *>(to_type.get());
const auto & from_nested = from_with_dict ? from_with_dict->getDictionaryType() : from_type;
const auto & to_nested = to_with_dict ? to_with_dict->getDictionaryType() : to_type;
const auto * from_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(from_type.get());
const auto * to_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(to_type.get());
const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type;
const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type;
if (from_type->onlyNull())
{
@ -1768,10 +1768,10 @@ private:
}
auto wrapper = prepareRemoveNullable(from_nested, to_nested);
if (!from_with_dict && !to_with_dict)
if (!from_low_cardinality && !to_low_cardinality)
return wrapper;
return [wrapper, from_with_dict, to_with_dict]
return [wrapper, from_low_cardinality, to_low_cardinality]
(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
auto & arg = block.getByPosition(arguments[0]);
@ -1790,21 +1790,21 @@ private:
auto tmp_rows_count = input_rows_count;
if (to_with_dict)
res.type = to_with_dict->getDictionaryType();
if (to_low_cardinality)
res.type = to_low_cardinality->getDictionaryType();
if (from_with_dict)
if (from_low_cardinality)
{
auto * col_with_dict = typeid_cast<const ColumnWithDictionary *>(prev_arg_col.get());
arg.column = col_with_dict->getDictionary().getNestedColumn();
arg.type = from_with_dict->getDictionaryType();
auto * col_low_cardinality = typeid_cast<const ColumnLowCardinality *>(prev_arg_col.get());
arg.column = col_low_cardinality->getDictionary().getNestedColumn();
arg.type = from_low_cardinality->getDictionaryType();
/// TODO: Make map with defaults conversion.
src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res.type));
if (src_converted_to_full_column)
arg.column = arg.column->index(col_with_dict->getIndexes(), 0);
arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0);
else
res_indexes = col_with_dict->getIndexesPtr();
res_indexes = col_low_cardinality->getIndexesPtr();
tmp_rows_count = arg.column->size();
}
@ -1817,18 +1817,18 @@ private:
res.type = prev_res_type;
}
if (to_with_dict)
if (to_low_cardinality)
{
auto res_column = to_with_dict->createColumn();
auto * col_with_dict = typeid_cast<ColumnWithDictionary *>(res_column.get());
auto res_column = to_low_cardinality->createColumn();
auto * col_low_cardinality = typeid_cast<ColumnLowCardinality *>(res_column.get());
if (from_with_dict && !src_converted_to_full_column)
if (from_low_cardinality && !src_converted_to_full_column)
{
auto res_keys = std::move(res.column);
col_with_dict->insertRangeFromDictionaryEncodedColumn(*res_keys, *res_indexes);
col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*res_keys, *res_indexes);
}
else
col_with_dict->insertRangeFromFullColumn(*res.column, 0, res.column->size());
col_low_cardinality->insertRangeFromFullColumn(*res.column, 0, res.column->size());
res.column = std::move(res_column);
}
@ -2026,7 +2026,7 @@ protected:
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
private:
template <typename DataType>

View File

@ -8,12 +8,12 @@
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/Native.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/ExpressionActions.h>
@ -42,6 +42,9 @@ namespace ErrorCodes
}
/// Cache for functions result if it was executed on low cardinality column.
/// It's LRUCache which stores function result executed on dictionary and index mapping.
/// It's expected that cache_size is a number of reading streams (so, will store single cached value per thread).
class PreparedFunctionLowCardinalityResultCache
{
public:
@ -120,7 +123,7 @@ static DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
return std::make_shared<DataTypeTuple>(elements);
}
if (const auto * low_cardinality_type = typeid_cast<const DataTypeWithDictionary *>(type.get()))
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(type.get()))
return low_cardinality_type->getDictionaryType();
return type;
@ -145,7 +148,7 @@ static ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
return ColumnTuple::create(columns);
}
if (const auto * column_low_cardinality = typeid_cast<const ColumnWithDictionary *>(column.get()))
if (const auto * column_low_cardinality = typeid_cast<const ColumnLowCardinality *>(column.get()))
return column_low_cardinality->convertToFullColumn();
return column;
@ -306,7 +309,7 @@ bool PreparedFunctionImpl::defaultImplementationForConstantArguments(Block & blo
for (size_t i = 0; i < arguments_size; ++i)
temporary_argument_numbers[i] = i;
executeWithoutColumnsWithDictionary(temporary_block, temporary_argument_numbers, arguments_size, temporary_block.rows());
executeWithoutLowCardinalityColumns(temporary_block, temporary_argument_numbers, arguments_size, temporary_block.rows());
block.getByPosition(result).column = ColumnConst::create(temporary_block.getByPosition(arguments_size).column, input_rows_count);
return true;
@ -330,7 +333,7 @@ bool PreparedFunctionImpl::defaultImplementationForNulls(Block & block, const Co
if (null_presence.has_nullable)
{
Block temporary_block = createBlockWithNestedColumns(block, args, result);
executeWithoutColumnsWithDictionary(temporary_block, args, result, temporary_block.rows());
executeWithoutLowCardinalityColumns(temporary_block, args, result, temporary_block.rows());
block.getByPosition(result).column = wrapInNullable(temporary_block.getByPosition(result).column, block, args,
result, input_rows_count);
return true;
@ -339,7 +342,7 @@ bool PreparedFunctionImpl::defaultImplementationForNulls(Block & block, const Co
return false;
}
void PreparedFunctionImpl::executeWithoutColumnsWithDictionary(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count)
void PreparedFunctionImpl::executeWithoutLowCardinalityColumns(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count)
{
if (defaultImplementationForConstantArguments(block, args, result, input_rows_count))
return;
@ -350,14 +353,14 @@ void PreparedFunctionImpl::executeWithoutColumnsWithDictionary(Block & block, co
executeImpl(block, args, result, input_rows_count);
}
static const ColumnWithDictionary * findLowCardinalityArgument(const Block & block, const ColumnNumbers & args)
static const ColumnLowCardinality * findLowCardinalityArgument(const Block & block, const ColumnNumbers & args)
{
const ColumnWithDictionary * result_column = nullptr;
const ColumnLowCardinality * result_column = nullptr;
for (auto arg : args)
{
const ColumnWithTypeAndName & column = block.getByPosition(arg);
if (auto * low_cardinality_column = checkAndGetColumn<ColumnWithDictionary>(column.column.get()))
if (auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
if (result_column)
throw Exception("Expected single dictionary argument for function.", ErrorCodes::LOGICAL_ERROR);
@ -369,7 +372,7 @@ static const ColumnWithDictionary * findLowCardinalityArgument(const Block & blo
return result_column;
}
static ColumnPtr replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes(
static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
Block & block, const ColumnNumbers & args, bool can_be_executed_on_default_arguments)
{
size_t num_rows = 0;
@ -378,13 +381,13 @@ static ColumnPtr replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes(
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
if (auto * column_with_dict = checkAndGetColumn<ColumnWithDictionary>(column.column.get()))
if (auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
if (indexes)
throw Exception("Expected single dictionary argument for function.", ErrorCodes::LOGICAL_ERROR);
indexes = column_with_dict->getIndexesPtr();
num_rows = column_with_dict->getDictionary().size();
indexes = low_cardinality_column->getIndexesPtr();
num_rows = low_cardinality_column->getDictionary().size();
}
}
@ -393,30 +396,30 @@ static ColumnPtr replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes(
ColumnWithTypeAndName & column = block.getByPosition(arg);
if (auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
column.column = column_const->removeLowCardinality()->cloneResized(num_rows);
else if (auto * column_with_dict = checkAndGetColumn<ColumnWithDictionary>(column.column.get()))
else if (auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
auto * type_with_dict = checkAndGetDataType<DataTypeWithDictionary>(column.type.get());
auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!type_with_dict)
throw Exception("Incompatible type for column with dictionary: " + column.type->getName(),
if (!low_cardinality_type)
throw Exception("Incompatible type for low cardinality column: " + column.type->getName(),
ErrorCodes::LOGICAL_ERROR);
if (can_be_executed_on_default_arguments)
column.column = column_with_dict->getDictionary().getNestedColumn();
column.column = low_cardinality_column->getDictionary().getNestedColumn();
else
{
auto dict_encoded = column_with_dict->getMinimalDictionaryEncodedColumn(0, column_with_dict->size());
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
column.type = type_with_dict->getDictionaryType();
column.type = low_cardinality_type->getDictionaryType();
}
}
return indexes;
}
static void convertColumnsWithDictionaryToFull(Block & block, const ColumnNumbers & args)
static void convertLowCardinalityColumnsToFull(Block & block, const ColumnNumbers & args)
{
for (auto arg : args)
{
@ -429,18 +432,19 @@ static void convertColumnsWithDictionaryToFull(Block & block, const ColumnNumber
void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count)
{
if (useDefaultImplementationForColumnsWithDictionary())
if (useDefaultImplementationForLowCardinalityColumns())
{
auto & res = block.safeGetByPosition(result);
Block block_without_dicts = block.cloneWithoutColumns();
Block block_without_low_cardinality = block.cloneWithoutColumns();
for (auto arg : args)
block_without_dicts.safeGetByPosition(arg).column = block.safeGetByPosition(arg).column;
block_without_low_cardinality.safeGetByPosition(arg).column = block.safeGetByPosition(arg).column;
if (auto * res_type_with_dict = typeid_cast<const DataTypeWithDictionary *>(res.type.get()))
if (auto * res_low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(res.type.get()))
{
const auto * low_cardinality_column = findLowCardinalityArgument(block, args);
bool use_cache = low_cardinality_result_cache
bool can_be_executed_on_default_arguments = canBeExecutedOnDefaultArguments();
bool use_cache = low_cardinality_result_cache && can_be_executed_on_default_arguments
&& low_cardinality_column && low_cardinality_column->isSharedDictionary();
PreparedFunctionLowCardinalityResultCache::DictionaryKey key;
@ -453,22 +457,22 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si
if (cached_values)
{
auto indexes = cached_values->index_mapping->index(low_cardinality_column->getIndexes(), 0);
res.column = ColumnWithDictionary::create(cached_values->function_result, indexes, true);
res.column = ColumnLowCardinality::create(cached_values->function_result, indexes, true);
return;
}
}
block_without_dicts.safeGetByPosition(result).type = res_type_with_dict->getDictionaryType();
ColumnPtr indexes = replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes(
block_without_dicts, args, canBeExecutedOnDefaultArguments());
block_without_low_cardinality.safeGetByPosition(result).type = res_low_cardinality_type->getDictionaryType();
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
block_without_low_cardinality, args, can_be_executed_on_default_arguments);
executeWithoutColumnsWithDictionary(block_without_dicts, args, result, block_without_dicts.rows());
executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, block_without_low_cardinality.rows());
auto & keys = block_without_dicts.safeGetByPosition(result).column;
auto & keys = block_without_low_cardinality.safeGetByPosition(result).column;
if (auto full_column = keys->convertToFullColumnIfConst())
keys = full_column;
auto res_mut_dictionary = DataTypeWithDictionary::createColumnUnique(*res_type_with_dict->getDictionaryType());
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
ColumnUniquePtr res_dictionary = std::move(res_mut_dictionary);
@ -486,22 +490,22 @@ void PreparedFunctionImpl::execute(Block & block, const ColumnNumbers & args, si
res_indexes = cache_values->index_mapping;
}
res.column = ColumnWithDictionary::create(res_dictionary, res_indexes->index(*indexes, 0), use_cache);
res.column = ColumnLowCardinality::create(res_dictionary, res_indexes->index(*indexes, 0), use_cache);
}
else
{
res.column = ColumnWithDictionary::create(res_dictionary, res_indexes);
res.column = ColumnLowCardinality::create(res_dictionary, res_indexes);
}
}
else
{
convertColumnsWithDictionaryToFull(block_without_dicts, args);
executeWithoutColumnsWithDictionary(block_without_dicts, args, result, input_rows_count);
res.column = block_without_dicts.safeGetByPosition(result).column;
convertLowCardinalityColumnsToFull(block_without_low_cardinality, args);
executeWithoutLowCardinalityColumns(block_without_low_cardinality, args, result, input_rows_count);
res.column = block_without_low_cardinality.safeGetByPosition(result).column;
}
}
else
executeWithoutColumnsWithDictionary(block, args, result, input_rows_count);
executeWithoutLowCardinalityColumns(block, args, result, input_rows_count);
}
void FunctionBuilderImpl::checkNumberOfArguments(size_t number_of_arguments) const
@ -517,7 +521,7 @@ void FunctionBuilderImpl::checkNumberOfArguments(size_t number_of_arguments) con
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
DataTypePtr FunctionBuilderImpl::getReturnTypeWithoutDictionary(const ColumnsWithTypeAndName & arguments) const
DataTypePtr FunctionBuilderImpl::getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const
{
checkNumberOfArguments(arguments.size());
@ -609,23 +613,23 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const DataTypes
DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & arguments) const
{
if (useDefaultImplementationForColumnsWithDictionary())
if (useDefaultImplementationForLowCardinalityColumns())
{
bool has_low_cardinality = false;
size_t num_full_low_cardinality_columns = 0;
size_t num_full_ordinary_columns = 0;
ColumnsWithTypeAndName args_without_dictionary(arguments);
ColumnsWithTypeAndName args_without_low_cardinality(arguments);
for (ColumnWithTypeAndName & arg : args_without_dictionary)
for (ColumnWithTypeAndName & arg : args_without_low_cardinality)
{
bool is_const = arg.column && arg.column->isColumnConst();
if (is_const)
arg.column = static_cast<const ColumnConst &>(*arg.column).removeLowCardinality();
if (auto * type_with_dictionary = typeid_cast<const DataTypeWithDictionary *>(arg.type.get()))
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(arg.type.get()))
{
arg.type = type_with_dictionary->getDictionaryType();
arg.type = low_cardinality_type->getDictionaryType();
has_low_cardinality = true;
if (!is_const)
@ -635,7 +639,7 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar
++num_full_ordinary_columns;
}
for (auto & arg : args_without_dictionary)
for (auto & arg : args_without_low_cardinality)
{
arg.column = recursiveRemoveLowCardinality(arg.column);
arg.type = recursiveRemoveLowCardinality(arg.type);
@ -643,11 +647,11 @@ DataTypePtr FunctionBuilderImpl::getReturnType(const ColumnsWithTypeAndName & ar
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
&& num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0)
return std::make_shared<DataTypeWithDictionary>(getReturnTypeWithoutDictionary(args_without_dictionary));
return std::make_shared<DataTypeLowCardinality>(getReturnTypeWithoutLowCardinality(args_without_low_cardinality));
else
return getReturnTypeWithoutDictionary(args_without_dictionary);
return getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
}
return getReturnTypeWithoutDictionary(arguments);
return getReturnTypeWithoutLowCardinality(arguments);
}
}

View File

@ -77,11 +77,11 @@ protected:
*/
virtual bool useDefaultImplementationForConstants() const { return false; }
/** If function arguments has single column with dictionary and all other arguments are constants, call function on nested column.
* Otherwise, convert all columns with dictionary to ordinary columns.
* Returns ColumnWithDictionary if at least one argument is ColumnWithDictionary.
/** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column.
* Otherwise, convert all low cardinality columns to ordinary columns.
* Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality.
*/
virtual bool useDefaultImplementationForColumnsWithDictionary() const { return true; }
virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; }
/** Some arguments could remain constant during this implementation.
*/
@ -97,7 +97,7 @@ private:
size_t input_rows_count);
bool defaultImplementationForConstantArguments(Block & block, const ColumnNumbers & args, size_t result,
size_t input_rows_count);
void executeWithoutColumnsWithDictionary(Block & block, const ColumnNumbers & arguments, size_t result,
void executeWithoutLowCardinalityColumns(Block & block, const ColumnNumbers & arguments, size_t result,
size_t input_rows_count);
/// Cache is created by function createLowCardinalityResultCache()
@ -292,12 +292,12 @@ protected:
virtual bool useDefaultImplementationForNulls() const { return true; }
/** If useDefaultImplementationForNulls() is true, than change arguments for getReturnType() and buildImpl().
* If function arguments has types with dictionary, convert them to ordinary types.
* getReturnType returns ColumnWithDictionary if at least one argument type is ColumnWithDictionary.
* If function arguments has low cardinality types, convert them to ordinary types.
* getReturnType returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality.
*/
virtual bool useDefaultImplementationForColumnsWithDictionary() const { return true; }
virtual bool useDefaultImplementationForLowCardinalityColumns() const { return true; }
/// If it isn't, will convert all ColumnWithDictionary arguments to full columns.
/// If it isn't, will convert all ColumnLowCardinality arguments to full columns.
virtual bool canBeExecutedOnLowCardinalityDictionary() const { return true; }
virtual FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const = 0;
@ -309,7 +309,7 @@ protected:
private:
DataTypePtr getReturnTypeWithoutDictionary(const ColumnsWithTypeAndName & arguments) const;
DataTypePtr getReturnTypeWithoutLowCardinality(const ColumnsWithTypeAndName & arguments) const;
};
/// Previous function interface.
@ -324,7 +324,7 @@ public:
/// Override this functions to change default implementation behavior. See details in IMyFunction.
bool useDefaultImplementationForNulls() const override { return true; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return true; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {}; }
bool canBeExecutedOnDefaultArguments() const override { return true; }
bool canBeExecutedOnLowCardinalityDictionary() const override { return isDeterministicInScopeOfQuery(); }
@ -406,7 +406,7 @@ protected:
}
bool useDefaultImplementationForNulls() const final { return function->useDefaultImplementationForNulls(); }
bool useDefaultImplementationForConstants() const final { return function->useDefaultImplementationForConstants(); }
bool useDefaultImplementationForColumnsWithDictionary() const final { return function->useDefaultImplementationForColumnsWithDictionary(); }
bool useDefaultImplementationForLowCardinalityColumns() const final { return function->useDefaultImplementationForLowCardinalityColumns(); }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return function->getArgumentsThatAreAlwaysConstant(); }
bool canBeExecutedOnDefaultArguments() const override { return function->canBeExecutedOnDefaultArguments(); }
@ -477,7 +477,7 @@ protected:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { return function->getReturnTypeImpl(arguments); }
bool useDefaultImplementationForNulls() const override { return function->useDefaultImplementationForNulls(); }
bool useDefaultImplementationForColumnsWithDictionary() const override { return function->useDefaultImplementationForColumnsWithDictionary(); }
bool useDefaultImplementationForLowCardinalityColumns() const override { return function->useDefaultImplementationForLowCardinalityColumns(); }
bool canBeExecutedOnLowCardinalityDictionary() const override { return function->canBeExecutedOnLowCardinalityDictionary(); }
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override

View File

@ -1,9 +1,9 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/typeid_cast.h>
@ -27,13 +27,13 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
auto * type = typeid_cast<const DataTypeWithDictionary *>(arguments[0].get());
auto * type = typeid_cast<const DataTypeLowCardinality *>(arguments[0].get());
if (!type)
throw Exception("First first argument of function lowCardinalityIndexes must be ColumnWithDictionary, but got"
throw Exception("First first argument of function lowCardinalityIndexes must be ColumnLowCardinality, but got"
+ arguments[0]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeUInt64>();
@ -44,7 +44,7 @@ public:
auto arg_num = arguments[0];
const auto & arg = block.getByPosition(arg_num);
auto & res = block.getByPosition(result);
auto indexes_col = typeid_cast<const ColumnWithDictionary *>(arg.column.get())->getIndexesPtr();
auto indexes_col = typeid_cast<const ColumnLowCardinality *>(arg.column.get())->getIndexesPtr();
auto new_indexes_col = ColumnUInt64::create(indexes_col->size());
auto & data = new_indexes_col->getData();
for (size_t i = 0; i < data.size(); ++i)

View File

@ -1,7 +1,7 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <Columns/ColumnWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/typeid_cast.h>
@ -26,13 +26,13 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
auto * type = typeid_cast<const DataTypeWithDictionary *>(arguments[0].get());
auto * type = typeid_cast<const DataTypeLowCardinality *>(arguments[0].get());
if (!type)
throw Exception("First first argument of function lowCardinalityKeys must be ColumnWithDictionary, but got"
throw Exception("First first argument of function lowCardinalityKeys must be ColumnLowCardinality, but got"
+ arguments[0]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return type->getDictionaryType();
@ -43,8 +43,8 @@ public:
auto arg_num = arguments[0];
const auto & arg = block.getByPosition(arg_num);
auto & res = block.getByPosition(result);
const auto * column_with_dictionary = typeid_cast<const ColumnWithDictionary *>(arg.column.get());
res.column = column_with_dictionary->getDictionary().getNestedColumn()->cloneResized(arg.column->size());
const auto * low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(arg.column.get());
res.column = low_cardinality_column->getDictionary().getNestedColumn()->cloneResized(arg.column->size());
}
};

View File

@ -1,9 +1,9 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/typeid_cast.h>
@ -22,14 +22,14 @@ public:
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments[0]->withDictionary())
if (arguments[0]->lowCardinality())
return arguments[0];
return std::make_shared<DataTypeWithDictionary>(arguments[0]);
return std::make_shared<DataTypeLowCardinality>(arguments[0]);
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
@ -38,12 +38,12 @@ public:
const auto & arg = block.getByPosition(arg_num);
auto & res = block.getByPosition(result);
if (arg.type->withDictionary())
if (arg.type->lowCardinality())
res.column = arg.column;
else
{
auto column = res.type->createColumn();
typeid_cast<ColumnWithDictionary &>(*column).insertRangeFromFullColumn(*arg.column, 0, arg.column->size());
typeid_cast<ColumnLowCardinality &>(*column).insertRangeFromFullColumn(*arg.column, 0, arg.column->size());
res.column = std::move(column);
}
}

View File

@ -24,7 +24,7 @@ public:
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForColumnsWithDictionary() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
size_t getNumberOfArguments() const override
{

View File

@ -12,7 +12,7 @@
#include <Columns/IColumn.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
template <>

View File

@ -24,8 +24,8 @@
#include <common/demangle.h>
#if __has_include(<Interpreters/config_compile.h>)
#include <Interpreters/config_compile.h>
#include <Columns/ColumnWithDictionary.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h>
#endif
@ -405,7 +405,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
{
DataTypePtr type = (params.src_header ? params.src_header : params.intermediate_header).safeGetByPosition(pos).type;
if (type->withDictionary())
if (type->lowCardinality())
{
has_low_cardinality = true;
type = removeLowCardinality(type);
@ -748,7 +748,6 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re
* To make them work anyway, we materialize them.
*/
Columns materialized_columns;
// ColumnRawPtrs key_counts;
/// Remember the columns we will work with
for (size_t i = 0; i < params.keys_size; ++i)
@ -761,14 +760,13 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re
key_columns[i] = materialized_columns.back().get();
}
if (const auto * column_with_dictionary = typeid_cast<const ColumnWithDictionary *>(key_columns[i]))
if (const auto * low_cardinality_column = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
{
if (!result.isLowCardinality())
{
materialized_columns.push_back(column_with_dictionary->convertToFullColumn());
materialized_columns.push_back(low_cardinality_column->convertToFullColumn());
key_columns[i] = materialized_columns.back().get();
}
//key_counts.push_back(materialized_columns.back().get());
}
}
@ -787,9 +785,9 @@ bool Aggregator::executeOnBlock(const Block & block, AggregatedDataVariants & re
aggregate_columns[i][j] = materialized_columns.back().get();
}
if (auto * col_with_dict = typeid_cast<const ColumnWithDictionary *>(aggregate_columns[i][j]))
if (auto * col_low_cardinality = typeid_cast<const ColumnLowCardinality *>(aggregate_columns[i][j]))
{
materialized_columns.push_back(col_with_dict->convertToFullColumn());
materialized_columns.push_back(col_low_cardinality->convertToFullColumn());
aggregate_columns[i][j] = materialized_columns.back().get();
}
}

View File

@ -28,7 +28,7 @@
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnWithDictionary.h>
#include <Columns/ColumnLowCardinality.h>
namespace DB
@ -413,7 +413,7 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
void init(ColumnRawPtrs & key_columns, const AggregationStateCachePtr & cache_ptr)
{
auto column = typeid_cast<const ColumnWithDictionary *>(key_columns[0]);
auto column = typeid_cast<const ColumnLowCardinality *>(key_columns[0]);
if (!column)
throw Exception("Invalid aggregation key type for AggregationMethodSingleLowCardinalityColumn method. "
"Excepted LowCardinality, got " + key_columns[0]->getName(), ErrorCodes::LOGICAL_ERROR);
@ -583,7 +583,7 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, size_t /*keys_size*/, const Sizes & /*key_sizes*/)
{
auto ref = Base::getValueRef(value);
static_cast<ColumnWithDictionary *>(key_columns[0].get())->insertData(ref.data, ref.size);
static_cast<ColumnLowCardinality *>(key_columns[0].get())->insertData(ref.data, ref.size);
}
};
@ -732,7 +732,7 @@ struct AggregationMethodKeysFixed
low_cardinality_keys.position_sizes.resize(key_columns.size());
for (size_t i = 0; i < key_columns.size(); ++i)
{
if (auto * low_cardinality_col = typeid_cast<const ColumnWithDictionary *>(key_columns[i]))
if (auto * low_cardinality_col = typeid_cast<const ColumnLowCardinality *>(key_columns[i]))
{
low_cardinality_keys.nested_columns[i] = low_cardinality_col->getDictionary().getNestedColumn().get();
low_cardinality_keys.positions[i] = &low_cardinality_col->getIndexes();

View File

@ -89,8 +89,11 @@ public:
ColumnPtr added_column;
/// For APPLY_FUNCTION and LEFT ARRAY JOIN.
/// FunctionBuilder is used before action was added to ExpressionActions (when we don't know types of arguments).
FunctionBuilderPtr function_builder;
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
FunctionBasePtr function_base;
/// Prepared function which is used in function execution.
PreparedFunctionPtr function;
Names argument_names;
bool is_function_compiled = false;

View File

@ -70,7 +70,7 @@
#include <Interpreters/evaluateQualified.h>
#include <Interpreters/QueryNormalizer.h>
#include <Interpreters/QueryAliasesVisitor.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#if 0
@ -1443,7 +1443,7 @@ void ExpressionAnalyzer::makeExplicitSet(const ASTFunction * node, const Block &
set_element_types = left_tuple_type->getElements();
for (auto & element_type : set_element_types)
if (const auto * low_cardinality_type = typeid_cast<const DataTypeWithDictionary *>(element_type.get()))
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(element_type.get()))
element_type = low_cardinality_type->getDictionaryType();
ASTPtr elements_ast = nullptr;

View File

@ -373,7 +373,7 @@ void InterpreterCreateQuery::checkSupportedTypes(const ColumnsDescription & colu
{
for (const auto & column : list)
{
if (!allow_low_cardinality && column.type && column.type->withDictionary())
if (!allow_low_cardinality && column.type && column.type->lowCardinality())
{
String message = "Cannot create table with column '" + column.name + "' which type is '"
+ column.type->getName() + "' because LowCardinality type is not allowed. "

View File

@ -20,7 +20,7 @@
#include <DataStreams/ConcatBlockInputStream.h>
#include <DataStreams/RollupBlockInputStream.h>
#include <DataStreams/CubeBlockInputStream.h>
#include <DataStreams/ConvertColumnWithDictionaryToFullBlockInputStream.h>
#include <DataStreams/ConvertColumnLowCardinalityToFullBlockInputStream.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
@ -962,8 +962,7 @@ void InterpreterSelectQuery::executeAggregation(Pipeline & pipeline, const Expre
{
pipeline.transform([&](auto & stream)
{
stream = //std::make_shared<ConvertColumnWithDictionaryToFullBlockInputStream>(
std::make_shared<ExpressionBlockInputStream>(stream, expression); //);
stream = std::make_shared<ExpressionBlockInputStream>(stream, expression);
});
Names key_names;

View File

@ -27,7 +27,7 @@
#include <Storages/MergeTree/KeyCondition.h>
#include <ext/range.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
namespace DB
@ -131,10 +131,10 @@ void Set::setHeader(const Block & block)
}
/// Convert low cardinality column to full.
if (auto * low_cardinality_type = typeid_cast<const DataTypeWithDictionary *>(data_types.back().get()))
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(data_types.back().get()))
{
data_types.back() = low_cardinality_type->getDictionaryType();
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfWithDictionary());
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality());
key_columns.back() = materialized_columns.back().get();
}
}
@ -184,9 +184,9 @@ bool Set::insertFromBlock(const Block & block)
}
/// Convert low cardinality column to full.
if (key_columns.back()->withDictionary())
if (key_columns.back()->lowCardinality())
{
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfWithDictionary());
materialized_columns.emplace_back(key_columns.back()->convertToFullColumnIfLowCardinality());
key_columns.back() = materialized_columns.back().get();
}
}

View File

@ -19,7 +19,7 @@
#include <Common/typeid_cast.h>
#include <Common/NaNUtils.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeWithDictionary.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <common/DateLUT.h>
@ -257,8 +257,8 @@ Field convertFieldToType(const Field & from_value, const IDataType & to_type, co
if (from_type_hint && from_type_hint->equals(to_type))
return from_value;
if (auto * with_dict_type = typeid_cast<const DataTypeWithDictionary *>(&to_type))
return convertFieldToType(from_value, *with_dict_type->getDictionaryType(), from_type_hint);
if (auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(&to_type))
return convertFieldToType(from_value, *low_cardinality_type->getDictionaryType(), from_type_hint);
else if (auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
return convertFieldToTypeImpl(from_value, *nullable_type->getNestedType(), from_type_hint);
else

View File

@ -200,7 +200,7 @@ MergeTreeReader::Stream::Stream(
getMark(right).offset_in_compressed_file - getMark(all_mark_ranges[i].begin).offset_in_compressed_file);
}
/// Avoid empty buffer. May happen while reading dictionary for DataTypeWithDictionary.
/// Avoid empty buffer. May happen while reading dictionary for DataTypeLowCardinality.
/// For example: part has single dictionary and all marks point to the same position.
if (max_mark_range == 0)
max_mark_range = max_read_buffer_size;

View File

@ -355,6 +355,14 @@ def test_optimize_query(started_cluster):
ddl_check_query(instance, "CREATE TABLE test_optimize ON CLUSTER cluster (p Date, i Int32) ENGINE = MergeTree(p, p, 8192)")
ddl_check_query(instance, "OPTIMIZE TABLE test_optimize ON CLUSTER cluster FORMAT TSV")
def test_create_as_select(started_cluster):
instance = cluster.instances['ch2']
ddl_check_query(instance, "CREATE TABLE test_as_select ON CLUSTER cluster ENGINE = Memory AS (SELECT 1 AS x UNION ALL SELECT 2 AS x)")
assert TSV(instance.query("SELECT x FROM test_as_select ORDER BY x")) == TSV("1\n2\n")
ddl_check_query(instance, "DROP TABLE IF EXISTS test_as_select ON CLUSTER cluster")
if __name__ == '__main__':
with contextmanager(started_cluster)() as cluster:
for name, instance in cluster.instances.items():