mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Updated RangeHashedDictionary to new interface
This commit is contained in:
parent
d92d843e20
commit
d61e8c083b
@ -48,6 +48,7 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn(
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
|
||||
/// TODO: Check that attribute type is same as result type
|
||||
/// TODO: Check if const will work as expected
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
|
||||
@ -433,8 +434,8 @@ ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingTyp
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributType>(attr, null_value);
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
@ -116,12 +116,12 @@ ColumnPtr FlatDictionary::getColumn(
|
||||
ColumnPtr result;
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
|
||||
const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
|
||||
const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
|
||||
/// TODO: Check that attribute type is same as result type
|
||||
/// TODO: Check if const will work as expected
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
@ -266,7 +266,7 @@ ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataType
|
||||
assert(!key_columns.empty());
|
||||
|
||||
PaddedPODArray<Key> backup_storage;
|
||||
const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage);
|
||||
const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage);
|
||||
|
||||
auto result = ColumnUInt8::create(ext::size(ids));
|
||||
auto& out = result->getData();
|
||||
@ -472,8 +472,8 @@ FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const Attribut
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributType>(attr, null_value);
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
@ -595,29 +595,6 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string
|
||||
return attributes[it->second];
|
||||
}
|
||||
|
||||
const PaddedPODArray<FlatDictionary::Key> & FlatDictionary::getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const
|
||||
{
|
||||
|
||||
if (const auto *id_col = checkAndGetColumn<ColumnUInt64>(&column))
|
||||
{
|
||||
return id_col->getData();
|
||||
}
|
||||
else if (const auto *id_col_const = checkAndGetColumnConst<ColumnUInt64>(&column))
|
||||
{
|
||||
const auto full_column = id_col_const->convertToFullColumnIfConst();
|
||||
const auto size = full_column->size();
|
||||
backup_storage.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
backup_storage[i] = full_column->getUInt(i);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
else
|
||||
throw Exception{"Identifier column must be UInt64", ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
|
||||
PaddedPODArray<FlatDictionary::Key> FlatDictionary::getIds() const
|
||||
{
|
||||
const auto ids_count = ext::size(loaded_ids);
|
||||
|
@ -78,7 +78,6 @@ public:
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_untyped) const override;
|
||||
|
||||
|
||||
ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
@ -90,7 +89,6 @@ private:
|
||||
struct Attribute final
|
||||
{
|
||||
AttributeUnderlyingType type;
|
||||
// bool is_array;
|
||||
|
||||
std::variant<
|
||||
UInt8,
|
||||
@ -127,7 +125,6 @@ private:
|
||||
ContainerType<StringRef>>
|
||||
arrays;
|
||||
|
||||
std::optional<ContainerType<size_t>> array_offsets;
|
||||
std::unique_ptr<Arena> string_arena;
|
||||
};
|
||||
|
||||
@ -163,8 +160,6 @@ private:
|
||||
template <typename ChildType, typename AncestorType>
|
||||
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
||||
|
||||
const PaddedPODArray<Key> & getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray<Key> & backup_storage) const;
|
||||
|
||||
PaddedPODArray<Key> getIds() const;
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
|
@ -155,6 +155,7 @@ struct IDictionary : IDictionaryBase
|
||||
|
||||
virtual void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const = 0;
|
||||
|
||||
/// TODO: Rewrite
|
||||
/// Methods for hierarchy.
|
||||
|
||||
virtual void isInVectorVector(
|
||||
@ -196,4 +197,34 @@ inline void checkAttributeType(const IDictionaryBase * dictionary, const std::st
|
||||
attribute_name, toString(attribute_type), toString(to)};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> &
|
||||
getColumnDataAsPaddedPODArray(const IDictionaryBase * dictionary, const ColumnPtr column, PaddedPODArray<T> & backup_storage)
|
||||
{
|
||||
bool is_const_column = isColumnConst(*column);
|
||||
auto full_column = column->convertToFullColumnIfConst();
|
||||
auto vector_col = checkAndGetColumn<ColumnVector<T>>(full_column.get());
|
||||
|
||||
if (!vector_col)
|
||||
{
|
||||
throw Exception{
|
||||
ErrorCodes::TYPE_MISMATCH,
|
||||
"{}: type mismatch: column has wrong type expected {}",
|
||||
dictionary->getDictionaryID().getNameForLogs(),
|
||||
"" /* TODO: Type name*/};
|
||||
}
|
||||
|
||||
if (is_const_column)
|
||||
{
|
||||
// With type conversion and const columns we need to use backup storage here
|
||||
auto & data = vector_col->getData();
|
||||
backup_storage.assign(data);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vector_col->getData();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -37,26 +37,6 @@ protected:
|
||||
Block getBlock(size_t start, size_t length) const override;
|
||||
|
||||
private:
|
||||
template <typename Type>
|
||||
using DictionaryGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, PaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename Type>
|
||||
using DictionaryDecimalGetter = void (DictionaryType::*)(
|
||||
const std::string &, const PaddedPODArray<Key> &, const PaddedPODArray<Int64> &, DecimalPaddedPODArray<Type> &) const;
|
||||
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
ColumnPtr getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const;
|
||||
template <typename T>
|
||||
ColumnPtr getColumnFromPODArray(const PaddedPODArray<T> & array) const;
|
||||
|
||||
@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getBlock(
|
||||
return fillBlock(block_ids, block_start_dates, block_end_dates);
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename AttributeType, typename Getter>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttribute(
|
||||
Getter getter,
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
auto column = ColumnDecimal<AttributeType>::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused.
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData());
|
||||
return column;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto column_vector = ColumnVector<AttributeType>::create(ids_to_fill.size());
|
||||
(concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData());
|
||||
return column_vector;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromAttributeString(
|
||||
const PaddedPODArray<Key> & ids_to_fill,
|
||||
const PaddedPODArray<Int64> & dates,
|
||||
const DictionaryAttribute & attribute,
|
||||
const DictionaryType & concrete_dictionary) const
|
||||
{
|
||||
auto column_string = ColumnString::create();
|
||||
concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get());
|
||||
return column_string;
|
||||
}
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename T>
|
||||
ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getColumnFromPODArray(const PaddedPODArray<T> & array) const
|
||||
@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::getCo
|
||||
return column_vector;
|
||||
}
|
||||
|
||||
|
||||
template <typename DictionaryType, typename RangeType, typename Key>
|
||||
template <typename DictionarySpecialAttributeType, typename T>
|
||||
void RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::addSpecialColumn(
|
||||
@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream<DictionaryType, RangeType, Key>::fillBlock
|
||||
std::unordered_set<std::string> names(column_names.begin(), column_names.end());
|
||||
|
||||
addSpecialColumn(structure.id, std::make_shared<DataTypeUInt64>(), "ID", names, ids_to_fill, columns);
|
||||
auto ids_column = columns.back().column;
|
||||
addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns);
|
||||
addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns);
|
||||
|
||||
auto date_key = makeDateKey(block_start_dates, block_end_dates);
|
||||
auto date_column = getColumnFromPODArray(date_key);
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (names.find(attribute.name) != names.end())
|
||||
{
|
||||
ColumnPtr column;
|
||||
#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \
|
||||
column = getColumnFromAttribute<TYPE>(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary)
|
||||
switch (attribute.underlying_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(UInt128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int8);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int16);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Int64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Float64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal32);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal64);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
GET_COLUMN_FORM_ATTRIBUTE(Decimal128);
|
||||
break;
|
||||
case AttributeUnderlyingType::utString:
|
||||
column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary);
|
||||
break;
|
||||
}
|
||||
#undef GET_COLUMN_FORM_ATTRIBUTE
|
||||
ColumnPtr column = dictionary->getColumn(
|
||||
attribute.name,
|
||||
attribute.type,
|
||||
{ids_column, date_column},
|
||||
{std::make_shared<DataTypeUInt64>(), structure.range_max->type},
|
||||
nullptr);
|
||||
columns.emplace_back(column, attribute.type, attribute.name);
|
||||
}
|
||||
}
|
||||
|
@ -50,6 +50,7 @@ namespace ErrorCodes
|
||||
extern const int DICTIONARY_IS_EMPTY;
|
||||
extern const int TYPE_MISMATCH;
|
||||
extern const int UNSUPPORTED_METHOD;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date)
|
||||
@ -85,66 +86,163 @@ RangeHashedDictionary::RangeHashedDictionary(
|
||||
calculateBytesAllocated();
|
||||
}
|
||||
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void RangeHashedDictionary::get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const \
|
||||
{ \
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \
|
||||
getItems<TYPE>(attribute, ids, dates, out); \
|
||||
}
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
|
||||
void RangeHashedDictionary::getString(
|
||||
ColumnPtr RangeHashedDictionary::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const
|
||||
const DataTypePtr &,
|
||||
const Columns & key_columns,
|
||||
const DataTypes &,
|
||||
const ColumnPtr default_untyped) const
|
||||
{
|
||||
const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString);
|
||||
const auto & attr = *std::get<Ptr<StringRef>>(attribute.maps);
|
||||
const auto & null_value = std::get<String>(attribute.null_values);
|
||||
/// TODO: Validate input types
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
ColumnPtr result;
|
||||
|
||||
const auto & attribute = getAttribute(attribute_name);
|
||||
|
||||
/// TODO: Check that attribute type is same as result type
|
||||
|
||||
auto size = key_columns.front()->size();
|
||||
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
const auto * it = attr.find(ids[i]);
|
||||
if (it)
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
|
||||
if constexpr (std::is_same_v<AttributeType, String>)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
|
||||
{
|
||||
return v.range.contains(date);
|
||||
});
|
||||
auto column_string = ColumnString::create();
|
||||
auto out = column_string.get();
|
||||
|
||||
const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value};
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
if (default_untyped != nullptr)
|
||||
{
|
||||
if (const auto default_col = checkAndGetColumn<ColumnString>(*default_untyped))
|
||||
{
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t row) { return default_col->getDataAt(row); });
|
||||
}
|
||||
else if (const auto default_col_const = checkAndGetColumnConst<ColumnString>(default_untyped.get()))
|
||||
{
|
||||
const auto & def = default_col_const->template getValue<String>();
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return def; });
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & null_value = std::get<StringRef>(attribute.null_values);
|
||||
|
||||
getItemsImpl<StringRef, StringRef>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t, const StringRef value) { out->insertData(value.data, value.size); },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
result = std::move(column_string);
|
||||
}
|
||||
else
|
||||
out->insertData(null_value.data(), null_value.size());
|
||||
}
|
||||
else if constexpr (IsNumber<AttributeType>)
|
||||
{
|
||||
auto column = ColumnVector<AttributeType>::create(size);
|
||||
auto& out = column->getData();
|
||||
|
||||
query_count.fetch_add(ids.size(), std::memory_order_relaxed);
|
||||
if (default_untyped != nullptr)
|
||||
{
|
||||
if (const auto default_col = checkAndGetColumn<ColumnVector<AttributeType>>(*default_untyped))
|
||||
{
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t row) { return default_col->getData()[row]; }
|
||||
);
|
||||
}
|
||||
else if (const auto default_col_const = checkAndGetColumnConst<ColumnVector<AttributeType>>(default_untyped.get()))
|
||||
{
|
||||
const auto & def = default_col_const->template getValue<AttributeType>();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t) { return def; }
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto null_value = std::get<AttributeType>(attribute.null_values);
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t) { return null_value; });
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
}
|
||||
else if constexpr (IsDecimalNumber<AttributeType>)
|
||||
{
|
||||
// auto scale = getDecimalScale(*attribute.type);
|
||||
auto column = ColumnDecimal<AttributeType>::create(size, 0);
|
||||
auto& out = column->getData();
|
||||
|
||||
if (default_untyped != nullptr)
|
||||
{
|
||||
if (const auto default_col = checkAndGetColumn<ColumnDecimal<AttributeType>>(*default_untyped))
|
||||
{
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t row) { return default_col->getData()[row]; }
|
||||
);
|
||||
}
|
||||
else if (const auto default_col_const = checkAndGetColumnConst<ColumnDecimal<AttributeType>>(default_untyped.get()))
|
||||
{
|
||||
const auto & def = default_col_const->template getValue<AttributeType>();
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t) { return def; }
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto null_value = std::get<AttributeType>(attribute.null_values);
|
||||
|
||||
getItemsImpl<AttributeType, AttributeType>(
|
||||
attribute,
|
||||
key_columns,
|
||||
[&](const size_t row, const auto value) { return out[row] = value; },
|
||||
[&](const size_t) { return null_value; }
|
||||
);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnUInt8::Ptr RangeHashedDictionary::has(const Columns &, const DataTypes &) const
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Has not supported", getDictionaryID().getNameForLogs());
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::createAttributes()
|
||||
{
|
||||
@ -220,66 +318,84 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||
bucket_count = map_ref->getBufferSizeInCells();
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeHashedDictionary::addAttributeSize<String>(const Attribute & attribute)
|
||||
{
|
||||
const auto & map_ref = std::get<Ptr<StringRef>>(attribute.maps);
|
||||
bytes_allocated += sizeof(Collection<StringRef>) + map_ref->getBufferSizeInBytes();
|
||||
bucket_count = map_ref->getBufferSizeInCells();
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
}
|
||||
|
||||
void RangeHashedDictionary::calculateBytesAllocated()
|
||||
{
|
||||
bytes_allocated += attributes.size() * sizeof(attributes.front());
|
||||
|
||||
for (const auto & attribute : attributes)
|
||||
{
|
||||
switch (attribute.type)
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
addAttributeSize<UInt8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
addAttributeSize<UInt16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
addAttributeSize<UInt32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
addAttributeSize<UInt64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
addAttributeSize<UInt128>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
addAttributeSize<Int8>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
addAttributeSize<Int16>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
addAttributeSize<Int32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
addAttributeSize<Int64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
addAttributeSize<Float32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
addAttributeSize<Float64>(attribute);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
addAttributeSize<AttributeType>(attribute);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
addAttributeSize<Decimal32>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
addAttributeSize<Decimal64>(attribute);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
addAttributeSize<Decimal128>(attribute);
|
||||
break;
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
addAttributeSize<StringRef>(attribute);
|
||||
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
// switch (attribute.type)
|
||||
// {
|
||||
// case AttributeUnderlyingType::utUInt8:
|
||||
// addAttributeSize<UInt8>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utUInt16:
|
||||
// addAttributeSize<UInt16>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utUInt32:
|
||||
// addAttributeSize<UInt32>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utUInt64:
|
||||
// addAttributeSize<UInt64>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utUInt128:
|
||||
// addAttributeSize<UInt128>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utInt8:
|
||||
// addAttributeSize<Int8>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utInt16:
|
||||
// addAttributeSize<Int16>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utInt32:
|
||||
// addAttributeSize<Int32>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utInt64:
|
||||
// addAttributeSize<Int64>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utFloat32:
|
||||
// addAttributeSize<Float32>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utFloat64:
|
||||
// addAttributeSize<Float64>(attribute);
|
||||
// break;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
// case AttributeUnderlyingType::utDecimal32:
|
||||
// addAttributeSize<Decimal32>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utDecimal64:
|
||||
// addAttributeSize<Decimal64>(attribute);
|
||||
// break;
|
||||
// case AttributeUnderlyingType::utDecimal128:
|
||||
// addAttributeSize<Decimal128>(attribute);
|
||||
// break;
|
||||
|
||||
// case AttributeUnderlyingType::utString:
|
||||
// {
|
||||
// addAttributeSize<StringRef>(attribute);
|
||||
// bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
|
||||
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
@ -290,113 +406,54 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie
|
||||
attribute.maps = std::make_unique<Collection<T>>();
|
||||
}
|
||||
|
||||
template <>
|
||||
void RangeHashedDictionary::createAttributeImpl<String>(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.string_arena = std::make_unique<Arena>();
|
||||
const String & string = null_value.get<String>();
|
||||
const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||
attribute.null_values.emplace<StringRef>(string_in_arena, string.size());
|
||||
attribute.maps = std::make_unique<Collection<StringRef>>();
|
||||
}
|
||||
|
||||
RangeHashedDictionary::Attribute
|
||||
RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||
{
|
||||
Attribute attr{type, {}, {}, {}};
|
||||
|
||||
switch (type)
|
||||
auto type_call = [&](const auto &dictionary_attribute_type)
|
||||
{
|
||||
case AttributeUnderlyingType::utUInt8:
|
||||
createAttributeImpl<UInt8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt16:
|
||||
createAttributeImpl<UInt16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt32:
|
||||
createAttributeImpl<UInt32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt64:
|
||||
createAttributeImpl<UInt64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utUInt128:
|
||||
createAttributeImpl<UInt128>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt8:
|
||||
createAttributeImpl<Int8>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt16:
|
||||
createAttributeImpl<Int16>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt32:
|
||||
createAttributeImpl<Int32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utInt64:
|
||||
createAttributeImpl<Int64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat32:
|
||||
createAttributeImpl<Float32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utFloat64:
|
||||
createAttributeImpl<Float64>(attr, null_value);
|
||||
break;
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
createAttributeImpl<AttributeType>(attr, null_value);
|
||||
};
|
||||
|
||||
case AttributeUnderlyingType::utDecimal32:
|
||||
createAttributeImpl<Decimal32>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal64:
|
||||
createAttributeImpl<Decimal64>(attr, null_value);
|
||||
break;
|
||||
case AttributeUnderlyingType::utDecimal128:
|
||||
createAttributeImpl<Decimal128>(attr, null_value);
|
||||
break;
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
attr.maps = std::make_unique<Collection<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
callOnDictionaryAttributeType(type, type_call);
|
||||
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void RangeHashedDictionary::getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
{
|
||||
if (false) {} // NOLINT
|
||||
#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl<TYPE, OutputType>(attribute, ids, dates, out);
|
||||
DISPATCH(UInt8)
|
||||
DISPATCH(UInt16)
|
||||
DISPATCH(UInt32)
|
||||
DISPATCH(UInt64)
|
||||
DISPATCH(UInt128)
|
||||
DISPATCH(Int8)
|
||||
DISPATCH(Int16)
|
||||
DISPATCH(Int32)
|
||||
DISPATCH(Int64)
|
||||
DISPATCH(Float32)
|
||||
DISPATCH(Float64)
|
||||
DISPATCH(Decimal32)
|
||||
DISPATCH(Decimal64)
|
||||
DISPATCH(Decimal128)
|
||||
#undef DISPATCH
|
||||
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void RangeHashedDictionary::getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultGetter && get_default) const
|
||||
{
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
const auto null_value = std::get<AttributeType>(attribute.null_values);
|
||||
PaddedPODArray<Key> key_backup_storage;
|
||||
PaddedPODArray<RangeStorageType> range_backup_storage;
|
||||
|
||||
for (const auto i : ext::range(0, ids.size()))
|
||||
const PaddedPODArray<Key> & ids = getColumnDataAsPaddedPODArray(this, key_columns[0], key_backup_storage);
|
||||
const PaddedPODArray<RangeStorageType> & dates = getColumnDataAsPaddedPODArray(this, key_columns[1], range_backup_storage);
|
||||
|
||||
const auto & attr = *std::get<Ptr<AttributeType>>(attribute.maps);
|
||||
|
||||
for (const auto row : ext::range(0, ids.size()))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
const auto it = attr.find(ids[row]);
|
||||
if (it)
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto date = dates[row];
|
||||
const auto & ranges_and_values = it->getMapped();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
|
||||
@ -404,11 +461,11 @@ void RangeHashedDictionary::getItemsImpl(
|
||||
return v.range.contains(date);
|
||||
});
|
||||
|
||||
out[i] = static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT
|
||||
set_value(row, static_cast<OutputType>(val_it != std::end(ranges_and_values) ? val_it->value : get_default(row))); // NOLINT
|
||||
}
|
||||
else
|
||||
{
|
||||
out[i] = static_cast<OutputType>(null_value); // NOLINT
|
||||
set_value(row, get_default(row));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,38 +52,18 @@ public:
|
||||
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
|
||||
}
|
||||
|
||||
typedef Int64 RangeStorageType;
|
||||
DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::range; }
|
||||
|
||||
template <typename T>
|
||||
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
|
||||
ColumnPtr getColumn(
|
||||
const std::string& attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr default_untyped) const override;
|
||||
|
||||
#define DECLARE_MULTIPLE_GETTER(TYPE) \
|
||||
void get##TYPE( \
|
||||
const std::string & attribute_name, \
|
||||
const PaddedPODArray<Key> & ids, \
|
||||
const PaddedPODArray<RangeStorageType> & dates, \
|
||||
ResultArrayType<TYPE> & out) const;
|
||||
DECLARE_MULTIPLE_GETTER(UInt8)
|
||||
DECLARE_MULTIPLE_GETTER(UInt16)
|
||||
DECLARE_MULTIPLE_GETTER(UInt32)
|
||||
DECLARE_MULTIPLE_GETTER(UInt64)
|
||||
DECLARE_MULTIPLE_GETTER(UInt128)
|
||||
DECLARE_MULTIPLE_GETTER(Int8)
|
||||
DECLARE_MULTIPLE_GETTER(Int16)
|
||||
DECLARE_MULTIPLE_GETTER(Int32)
|
||||
DECLARE_MULTIPLE_GETTER(Int64)
|
||||
DECLARE_MULTIPLE_GETTER(Float32)
|
||||
DECLARE_MULTIPLE_GETTER(Float64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal32)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal64)
|
||||
DECLARE_MULTIPLE_GETTER(Decimal128)
|
||||
#undef DECLARE_MULTIPLE_GETTER
|
||||
ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
void getString(
|
||||
const std::string & attribute_name,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
ColumnString * out) const;
|
||||
using RangeStorageType = Int64;
|
||||
|
||||
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
|
||||
|
||||
@ -130,7 +110,7 @@ private:
|
||||
Decimal128,
|
||||
Float32,
|
||||
Float64,
|
||||
String>
|
||||
StringRef>
|
||||
null_values;
|
||||
std::variant<
|
||||
Ptr<UInt8>,
|
||||
@ -166,21 +146,12 @@ private:
|
||||
|
||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||
|
||||
|
||||
template <typename OutputType>
|
||||
void getItems(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
template <typename AttributeType, typename OutputType>
|
||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||
void getItemsImpl(
|
||||
const Attribute & attribute,
|
||||
const PaddedPODArray<Key> & ids,
|
||||
const PaddedPODArray<RangeStorageType> & dates,
|
||||
PaddedPODArray<OutputType> & out) const;
|
||||
|
||||
const Columns & key_columns,
|
||||
ValueSetter && set_value,
|
||||
DefaultGetter && get_default) const;
|
||||
|
||||
template <typename T>
|
||||
void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value);
|
||||
|
@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionDictGetDateTimeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetUUIDOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetStringOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType>();
|
||||
factory.registerFunction<FunctionDictGetNoTypeOrDefault>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withoutDefault>>();
|
||||
factory.registerFunction<FunctionDictGetNoType<DictionaryGetFunctionType::withDefault>>();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -104,8 +104,8 @@ public:
|
||||
if (!sample_columns)
|
||||
return false;
|
||||
|
||||
if (sample_columns.columns() != 3 && sample_columns.columns() != 4)
|
||||
throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
if (sample_columns.columns() < 3)
|
||||
throw Exception{"Wrong arguments count", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
const auto * dict_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(0).column.get());
|
||||
if (!dict_name_col)
|
||||
@ -121,7 +121,6 @@ public:
|
||||
private:
|
||||
const Context & context;
|
||||
const ExternalDictionariesLoader & external_loader;
|
||||
mutable std::shared_ptr<const IDictionaryBase> dictionary;
|
||||
/// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
|
||||
std::atomic<bool> access_checked = false;
|
||||
};
|
||||
@ -178,19 +177,26 @@ private:
|
||||
auto dictionary = helper.getDictionary(arguments[0]);
|
||||
auto dictionary_identifier_type = dictionary->getIdentifierType();
|
||||
|
||||
const auto id_col_untyped = arguments[1].column;
|
||||
const ColumnWithTypeAndName & key_column_with_type = arguments[1];
|
||||
const auto key_column = key_column_with_type.column;
|
||||
|
||||
if (dictionary_identifier_type == DictionaryIdentifierType::simple)
|
||||
{
|
||||
return dictionary->has({ id_col_untyped }, { std::make_shared<DataTypeUInt64>() });
|
||||
return dictionary->has({key_column}, {std::make_shared<DataTypeUInt64>()});
|
||||
}
|
||||
else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
|
||||
{
|
||||
/// TODO: Check if column is tuple and pass
|
||||
return nullptr;
|
||||
/// Functions in external dictionaries_loader only support full-value (not constant) columns with keys.
|
||||
ColumnPtr key_column_full = key_column_with_type.column->convertToFullColumnIfConst();
|
||||
|
||||
const auto & key_columns = typeid_cast<const ColumnTuple &>(*key_column_full).getColumnsCopy();
|
||||
const auto & key_types = static_cast<const DataTypeTuple &>(*key_column_with_type.type).getElements();
|
||||
|
||||
return dictionary->has(key_columns, key_types);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// TODO: Add support for range
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
@ -198,13 +204,6 @@ private:
|
||||
mutable FunctionDictHelper helper;
|
||||
};
|
||||
|
||||
|
||||
/** For ColumnVector. Either returns a reference to internal data,
|
||||
* or convert it to T type, stores the result in backup_storage and returns a reference to it.
|
||||
*/
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage);
|
||||
|
||||
enum class DictionaryGetFunctionType
|
||||
{
|
||||
withoutDefault,
|
||||
@ -232,12 +231,9 @@ public:
|
||||
String getName() const override { return name; }
|
||||
|
||||
private:
|
||||
size_t getNumberOfArguments() const override {
|
||||
/// TODO: Check if ranged dictionary is working
|
||||
return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault ? 0 : 4;
|
||||
}
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
bool isVariadic() const override { return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault; }
|
||||
bool isVariadic() const override { return true; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const final { return true; }
|
||||
|
||||
@ -245,38 +241,8 @@ private:
|
||||
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName()
|
||||
+ ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName()
|
||||
+ ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[2]).isUInt64() &&
|
||||
!isTuple(arguments[2]))
|
||||
throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName()
|
||||
+ ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
|
||||
{
|
||||
if (!checkAndGetDataType<DataType>(arguments[3].get()))
|
||||
throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName()
|
||||
+ ", must be " + TypeName<Type>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
else
|
||||
{
|
||||
/// This is for the case of range dictionaries_loader.
|
||||
if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger())
|
||||
{
|
||||
throw Exception{"Illegal type " + arguments[3]->getName() +
|
||||
" of fourth argument of function " + getName() +
|
||||
" must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN};
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (IsDataTypeDecimal<DataType>)
|
||||
return std::make_shared<DataType>(DataType::maxPrecision(), decimal_scale);
|
||||
else
|
||||
@ -285,11 +251,21 @@ private:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
if (arguments.size() < 3)
|
||||
throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
if (input_rows_count == 0)
|
||||
return result_type->createColumn();
|
||||
|
||||
auto dictionary = helper.getDictionary(arguments[0]);
|
||||
ColumnPtr res;
|
||||
const auto * dictionary_name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get());
|
||||
if (!dictionary_name_col)
|
||||
throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
String dictionary_name = dictionary_name_col->getValue<String>();
|
||||
|
||||
auto dictionary = helper.getDictionary(dictionary_name);
|
||||
if (!dictionary)
|
||||
throw Exception("First argument of function " + getName() + " does not name a dictionary", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
|
||||
if (!attr_name_col)
|
||||
@ -297,21 +273,57 @@ private:
|
||||
|
||||
String attr_name = attr_name_col->getValue<String>();
|
||||
|
||||
const ColumnWithTypeAndName & key_col_with_type = arguments[2];
|
||||
const auto key_column = key_col_with_type.column;
|
||||
/// TODO: Use accurateCast if argument is integer
|
||||
if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type))
|
||||
throw Exception{
|
||||
"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
|
||||
+ ", must be UInt64 or tuple(...).",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
auto dictionary_identifier_type = dictionary->getIdentifierType();
|
||||
|
||||
size_t current_arguments_index = 3;
|
||||
|
||||
/// TODO: Add more information to error messages
|
||||
|
||||
ColumnPtr range_col = nullptr;
|
||||
DataTypePtr range_col_type = nullptr;
|
||||
|
||||
if (dictionary_identifier_type == DictionaryIdentifierType::range)
|
||||
{
|
||||
if (current_arguments_index >= arguments.size())
|
||||
throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
range_col = arguments[current_arguments_index].column;
|
||||
range_col_type = arguments[current_arguments_index].type;
|
||||
|
||||
if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64)))
|
||||
throw Exception{
|
||||
"Illegal type " + range_col_type->getName() + " of fourth argument of function " + getName()
|
||||
+ " must be convertible to Int64.",
|
||||
ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
++current_arguments_index;
|
||||
}
|
||||
|
||||
ColumnPtr default_col = nullptr;
|
||||
|
||||
if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault)
|
||||
{
|
||||
default_col = arguments[3].column;
|
||||
if (current_arguments_index >= arguments.size())
|
||||
throw Exception{"Wrong argument count for function test " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
default_col = arguments[current_arguments_index].column;
|
||||
}
|
||||
|
||||
ColumnPtr res;
|
||||
|
||||
const ColumnWithTypeAndName & key_col_with_type = arguments[2];
|
||||
const auto key_column = key_col_with_type.column;
|
||||
|
||||
if (dictionary_identifier_type == DictionaryIdentifierType::simple)
|
||||
{
|
||||
res = dictionary->getColumn(attr_name, result_type, { key_column }, { std::make_shared<DataTypeUInt64>() }, default_col);
|
||||
res = dictionary->getColumn(attr_name, result_type, {key_column}, {std::make_shared<DataTypeUInt64>()}, default_col);
|
||||
}
|
||||
else if (dictionary_identifier_type == DictionaryIdentifierType::complex)
|
||||
{
|
||||
@ -323,10 +335,13 @@ private:
|
||||
|
||||
res = dictionary->getColumn(attr_name, result_type, key_columns, key_types, default_col);
|
||||
}
|
||||
else
|
||||
else if (dictionary_identifier_type == DictionaryIdentifierType::range)
|
||||
{
|
||||
res = nullptr;
|
||||
res = dictionary->getColumn(
|
||||
attr_name, result_type, {key_column, range_col}, {std::make_shared<DataTypeUInt64>(), range_col_type}, default_col);
|
||||
}
|
||||
else
|
||||
throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -416,10 +431,11 @@ using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault<DataTypeString,
|
||||
|
||||
/// TODO: Use new API
|
||||
/// This variant of function derives the result type automatically.
|
||||
template <DictionaryGetFunctionType dictionary_get_function_type>
|
||||
class FunctionDictGetNoType final : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "dictGet";
|
||||
static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::withDefault ? "dictGetOrDefault" : "dictGet";
|
||||
|
||||
static FunctionPtr create(const Context & context)
|
||||
{
|
||||
@ -437,6 +453,8 @@ private:
|
||||
bool useDefaultImplementationForConstants() const final { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
|
||||
{
|
||||
return helper.isDictGetFunctionInjective(sample_columns);
|
||||
@ -444,8 +462,8 @@ private:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (arguments.size() != 3 && arguments.size() != 4)
|
||||
throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
if (arguments.size() < 3)
|
||||
throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
|
||||
|
||||
String dict_name;
|
||||
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
|
||||
@ -465,29 +483,20 @@ private:
|
||||
throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
|
||||
+ ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[2].type).isUInt64() &&
|
||||
!isTuple(arguments[2].type))
|
||||
throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
|
||||
+ ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (arguments.size() == 4)
|
||||
{
|
||||
const auto * range_argument = arguments[3].type.get();
|
||||
if (!(range_argument->isValueRepresentedByInteger() &&
|
||||
range_argument->getSizeOfValueInMemory() <= sizeof(Int64)))
|
||||
throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName()
|
||||
+ ", must be convertible to " + TypeName<Int64>::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
auto dict = helper.getDictionary(dict_name);
|
||||
const DictionaryStructure & structure = dict->getStructure();
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
for (const auto& attribute : structure.attributes)
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (attribute.name == attr_name)
|
||||
if (attribute.name != attr_name)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
WhichDataType dt = attribute.type;
|
||||
|
||||
if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault)
|
||||
{
|
||||
WhichDataType dt = attribute.type;
|
||||
switch (dt.idx)
|
||||
{
|
||||
case TypeIndex::String:
|
||||
@ -545,90 +554,9 @@ private:
|
||||
default:
|
||||
throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE);
|
||||
}
|
||||
return attribute.type;
|
||||
}
|
||||
}
|
||||
throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return impl->executeImpl(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
const Context & context;
|
||||
mutable FunctionDictHelper helper;
|
||||
mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info
|
||||
};
|
||||
|
||||
|
||||
class FunctionDictGetNoTypeOrDefault final : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "dictGetOrDefault";
|
||||
|
||||
static FunctionPtr create(const Context & context)
|
||||
{
|
||||
return std::make_shared<FunctionDictGetNoTypeOrDefault>(context);
|
||||
}
|
||||
|
||||
explicit FunctionDictGetNoTypeOrDefault(const Context & context_) : context(context_), helper(context_) {}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
private:
|
||||
size_t getNumberOfArguments() const override { return 4; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const final { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
|
||||
|
||||
bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override
|
||||
{
|
||||
return helper.isDictGetFunctionInjective(sample_columns);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
String dict_name;
|
||||
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
|
||||
{
|
||||
dict_name = name_col->getValue<String>();
|
||||
}
|
||||
else
|
||||
throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName()
|
||||
+ ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
String attr_name;
|
||||
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[1].column.get()))
|
||||
{
|
||||
attr_name = name_col->getValue<String>();
|
||||
}
|
||||
else
|
||||
throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName()
|
||||
+ ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
if (!WhichDataType(arguments[2].type).isUInt64() &&
|
||||
!isTuple(arguments[2].type))
|
||||
throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName()
|
||||
+ ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
auto dict = helper.getDictionary(dict_name);
|
||||
const DictionaryStructure & structure = dict->getStructure();
|
||||
|
||||
for (const auto idx : ext::range(0, structure.attributes.size()))
|
||||
{
|
||||
const DictionaryAttribute & attribute = structure.attributes[idx];
|
||||
if (attribute.name == attr_name)
|
||||
else
|
||||
{
|
||||
auto arg_type = arguments[3].type;
|
||||
WhichDataType dt = attribute.type;
|
||||
|
||||
if ((arg_type->getTypeId() != dt.idx) || (dt.isStringOrFixedString() && !isString(arg_type)))
|
||||
throw Exception{"Illegal type " + arg_type->getName() + " of fourth argument of function " + getName() +
|
||||
", must be " + getTypeName(dt.idx) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
switch (dt.idx)
|
||||
{
|
||||
case TypeIndex::String:
|
||||
@ -685,15 +613,14 @@ private:
|
||||
default:
|
||||
throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE);
|
||||
}
|
||||
|
||||
return attribute.type;
|
||||
}
|
||||
|
||||
return attribute.type;
|
||||
}
|
||||
|
||||
throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS};
|
||||
}
|
||||
|
||||
bool isDeterministic() const override { return false; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
return impl->executeImpl(arguments, result_type, input_rows_count);
|
||||
@ -1004,27 +931,4 @@ private:
|
||||
mutable FunctionDictHelper helper;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
static const PaddedPODArray<T> & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray<T> & backup_storage)
|
||||
{
|
||||
if (!isColumnConst(column))
|
||||
{
|
||||
if (const auto vector_col = checkAndGetColumn<ColumnVector<T>>(&column))
|
||||
{
|
||||
return vector_col->getData();
|
||||
}
|
||||
}
|
||||
|
||||
const auto full_column = column.convertToFullColumnIfConst();
|
||||
|
||||
// With type conversion and const columns we need to use backup storage here
|
||||
const auto size = full_column->size();
|
||||
backup_storage.resize(size);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
backup_storage[i] = full_column->getUInt(i);
|
||||
|
||||
return backup_storage;
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user