diff --git a/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 6a734a2f7ab..23d28bfe6ec 100644 --- a/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -48,7 +48,8 @@ ColumnPtr ComplexKeyHashedDictionary::getColumn( const auto & attribute = getAttribute(attribute_name); /// TODO: Check that attribute type is same as result type - + /// TODO: Check if const will work as expected + auto size = key_columns.front()->size(); auto type_call = [&](const auto &dictionary_attribute_type) @@ -433,8 +434,8 @@ ComplexKeyHashedDictionary::createAttributeWithType(const AttributeUnderlyingTyp auto type_call = [&](const auto &dictionary_attribute_type) { using Type = std::decay_t; - using AttributType = typename Type::AttributeType; - createAttributeImpl(attr, null_value); + using AttributeType = typename Type::AttributeType; + createAttributeImpl(attr, null_value); }; callOnDictionaryAttributeType(type, type_call); diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 0fa5e88ab3b..58e54931dee 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -116,12 +116,12 @@ ColumnPtr FlatDictionary::getColumn( ColumnPtr result; PaddedPODArray backup_storage; - - const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage); - + const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage); + const auto & attribute = getAttribute(attribute_name); /// TODO: Check that attribute type is same as result type + /// TODO: Check if const will work as expected auto type_call = [&](const auto &dictionary_attribute_type) { @@ -266,7 +266,7 @@ ColumnUInt8::Ptr FlatDictionary::has(const Columns & key_columns, const DataType assert(!key_columns.empty()); PaddedPODArray backup_storage; - const auto& ids = getColumnDataAsIdendifiers(*key_columns.front(), backup_storage); + const auto& ids = getColumnDataAsPaddedPODArray(this, key_columns.front(), backup_storage); auto result = ColumnUInt8::create(ext::size(ids)); auto& out = result->getData(); @@ -472,8 +472,8 @@ FlatDictionary::Attribute FlatDictionary::createAttributeWithType(const Attribut auto type_call = [&](const auto &dictionary_attribute_type) { using Type = std::decay_t; - using AttributType = typename Type::AttributeType; - createAttributeImpl(attr, null_value); + using AttributeType = typename Type::AttributeType; + createAttributeImpl(attr, null_value); }; callOnDictionaryAttributeType(type, type_call); @@ -595,29 +595,6 @@ const FlatDictionary::Attribute & FlatDictionary::getAttribute(const std::string return attributes[it->second]; } -const PaddedPODArray & FlatDictionary::getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray & backup_storage) const -{ - - if (const auto *id_col = checkAndGetColumn(&column)) - { - return id_col->getData(); - } - else if (const auto *id_col_const = checkAndGetColumnConst(&column)) - { - const auto full_column = id_col_const->convertToFullColumnIfConst(); - const auto size = full_column->size(); - backup_storage.resize(size); - for (size_t i = 0; i < size; ++i) - backup_storage[i] = full_column->getUInt(i); - - return backup_storage; - } - else - throw Exception{"Identifier column must be UInt64", ErrorCodes::ILLEGAL_COLUMN}; - - return backup_storage; -} - PaddedPODArray FlatDictionary::getIds() const { const auto ids_count = ext::size(loaded_ids); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index af211f2edda..72da13b4cd4 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -78,7 +78,6 @@ public: const DataTypes & key_types, const ColumnPtr default_untyped) const override; - ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; @@ -90,7 +89,6 @@ private: struct Attribute final { AttributeUnderlyingType type; - // bool is_array; std::variant< UInt8, @@ -127,7 +125,6 @@ private: ContainerType> arrays; - std::optional> array_offsets; std::unique_ptr string_arena; }; @@ -163,8 +160,6 @@ private: template void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray & out) const; - const PaddedPODArray & getColumnDataAsIdendifiers(const IColumn & column, PaddedPODArray & backup_storage) const; - PaddedPODArray getIds() const; const DictionaryStructure dict_struct; diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index ce613a26020..c619e85f523 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -155,6 +155,7 @@ struct IDictionary : IDictionaryBase virtual void toParent(const PaddedPODArray & ids, PaddedPODArray & out) const = 0; + /// TODO: Rewrite /// Methods for hierarchy. virtual void isInVectorVector( @@ -196,4 +197,34 @@ inline void checkAttributeType(const IDictionaryBase * dictionary, const std::st attribute_name, toString(attribute_type), toString(to)}; } +template +static const PaddedPODArray & +getColumnDataAsPaddedPODArray(const IDictionaryBase * dictionary, const ColumnPtr column, PaddedPODArray & backup_storage) +{ + bool is_const_column = isColumnConst(*column); + auto full_column = column->convertToFullColumnIfConst(); + auto vector_col = checkAndGetColumn>(full_column.get()); + + if (!vector_col) + { + throw Exception{ + ErrorCodes::TYPE_MISMATCH, + "{}: type mismatch: column has wrong type expected {}", + dictionary->getDictionaryID().getNameForLogs(), + "" /* TODO: Type name*/}; + } + + if (is_const_column) + { + // With type conversion and const columns we need to use backup storage here + auto & data = vector_col->getData(); + backup_storage.assign(data); + + return backup_storage; + } + else + { + return vector_col->getData(); + } +} } diff --git a/src/Dictionaries/RangeDictionaryBlockInputStream.h b/src/Dictionaries/RangeDictionaryBlockInputStream.h index a2353051e5d..aeb16f389b4 100644 --- a/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -37,26 +37,6 @@ protected: Block getBlock(size_t start, size_t length) const override; private: - template - using DictionaryGetter = void (DictionaryType::*)( - const std::string &, const PaddedPODArray &, const PaddedPODArray &, PaddedPODArray &) const; - - template - using DictionaryDecimalGetter = void (DictionaryType::*)( - const std::string &, const PaddedPODArray &, const PaddedPODArray &, DecimalPaddedPODArray &) const; - - template - ColumnPtr getColumnFromAttribute( - Getter getter, - const PaddedPODArray & ids_to_fill, - const PaddedPODArray & dates, - const DictionaryAttribute & attribute, - const DictionaryType & concrete_dictionary) const; - ColumnPtr getColumnFromAttributeString( - const PaddedPODArray & ids_to_fill, - const PaddedPODArray & dates, - const DictionaryAttribute & attribute, - const DictionaryType & concrete_dictionary) const; template ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) const; @@ -122,41 +102,6 @@ Block RangeDictionaryBlockInputStream::getBlock( return fillBlock(block_ids, block_start_dates, block_end_dates); } -template -template -ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttribute( - Getter getter, - const PaddedPODArray & ids_to_fill, - const PaddedPODArray & dates, - const DictionaryAttribute & attribute, - const DictionaryType & concrete_dictionary) const -{ - if constexpr (IsDecimalNumber) - { - auto column = ColumnDecimal::create(ids_to_fill.size(), 0); /// NOTE: There's wrong scale here, but it's unused. - (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column->getData()); - return column; - } - else - { - auto column_vector = ColumnVector::create(ids_to_fill.size()); - (concrete_dictionary.*getter)(attribute.name, ids_to_fill, dates, column_vector->getData()); - return column_vector; - } -} - -template -ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttributeString( - const PaddedPODArray & ids_to_fill, - const PaddedPODArray & dates, - const DictionaryAttribute & attribute, - const DictionaryType & concrete_dictionary) const -{ - auto column_string = ColumnString::create(); - concrete_dictionary.getString(attribute.name, ids_to_fill, dates, column_string.get()); - return column_string; -} - template template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromPODArray(const PaddedPODArray & array) const @@ -168,7 +113,6 @@ ColumnPtr RangeDictionaryBlockInputStream::getCo return column_vector; } - template template void RangeDictionaryBlockInputStream::addSpecialColumn( @@ -216,68 +160,24 @@ Block RangeDictionaryBlockInputStream::fillBlock std::unordered_set names(column_names.begin(), column_names.end()); addSpecialColumn(structure.id, std::make_shared(), "ID", names, ids_to_fill, columns); + auto ids_column = columns.back().column; addSpecialColumn(structure.range_min, structure.range_max->type, "Range Start", names, block_start_dates, columns); addSpecialColumn(structure.range_max, structure.range_max->type, "Range End", names, block_end_dates, columns); auto date_key = makeDateKey(block_start_dates, block_end_dates); + auto date_column = getColumnFromPODArray(date_key); for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute & attribute = structure.attributes[idx]; if (names.find(attribute.name) != names.end()) { - ColumnPtr column; -#define GET_COLUMN_FORM_ATTRIBUTE(TYPE) \ - column = getColumnFromAttribute(&DictionaryType::get##TYPE, ids_to_fill, date_key, attribute, *dictionary) - switch (attribute.underlying_type) - { - case AttributeUnderlyingType::utUInt8: - GET_COLUMN_FORM_ATTRIBUTE(UInt8); - break; - case AttributeUnderlyingType::utUInt16: - GET_COLUMN_FORM_ATTRIBUTE(UInt16); - break; - case AttributeUnderlyingType::utUInt32: - GET_COLUMN_FORM_ATTRIBUTE(UInt32); - break; - case AttributeUnderlyingType::utUInt64: - GET_COLUMN_FORM_ATTRIBUTE(UInt64); - break; - case AttributeUnderlyingType::utUInt128: - GET_COLUMN_FORM_ATTRIBUTE(UInt128); - break; - case AttributeUnderlyingType::utInt8: - GET_COLUMN_FORM_ATTRIBUTE(Int8); - break; - case AttributeUnderlyingType::utInt16: - GET_COLUMN_FORM_ATTRIBUTE(Int16); - break; - case AttributeUnderlyingType::utInt32: - GET_COLUMN_FORM_ATTRIBUTE(Int32); - break; - case AttributeUnderlyingType::utInt64: - GET_COLUMN_FORM_ATTRIBUTE(Int64); - break; - case AttributeUnderlyingType::utFloat32: - GET_COLUMN_FORM_ATTRIBUTE(Float32); - break; - case AttributeUnderlyingType::utFloat64: - GET_COLUMN_FORM_ATTRIBUTE(Float64); - break; - case AttributeUnderlyingType::utDecimal32: - GET_COLUMN_FORM_ATTRIBUTE(Decimal32); - break; - case AttributeUnderlyingType::utDecimal64: - GET_COLUMN_FORM_ATTRIBUTE(Decimal64); - break; - case AttributeUnderlyingType::utDecimal128: - GET_COLUMN_FORM_ATTRIBUTE(Decimal128); - break; - case AttributeUnderlyingType::utString: - column = getColumnFromAttributeString(ids_to_fill, date_key, attribute, *dictionary); - break; - } -#undef GET_COLUMN_FORM_ATTRIBUTE + ColumnPtr column = dictionary->getColumn( + attribute.name, + attribute.type, + {ids_column, date_column}, + {std::make_shared(), structure.range_max->type}, + nullptr); columns.emplace_back(column, attribute.type, attribute.name); } } diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index eeed581c6f4..57299da43b8 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -50,6 +50,7 @@ namespace ErrorCodes extern const int DICTIONARY_IS_EMPTY; extern const int TYPE_MISMATCH; extern const int UNSUPPORTED_METHOD; + extern const int NOT_IMPLEMENTED; } bool RangeHashedDictionary::Range::isCorrectDate(const RangeStorageType & date) @@ -85,66 +86,163 @@ RangeHashedDictionary::RangeHashedDictionary( calculateBytesAllocated(); } - -#define DECLARE_MULTIPLE_GETTER(TYPE) \ - void RangeHashedDictionary::get##TYPE( \ - const std::string & attribute_name, \ - const PaddedPODArray & ids, \ - const PaddedPODArray & dates, \ - ResultArrayType & out) const \ - { \ - const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::ut##TYPE); \ - getItems(attribute, ids, dates, out); \ - } -DECLARE_MULTIPLE_GETTER(UInt8) -DECLARE_MULTIPLE_GETTER(UInt16) -DECLARE_MULTIPLE_GETTER(UInt32) -DECLARE_MULTIPLE_GETTER(UInt64) -DECLARE_MULTIPLE_GETTER(UInt128) -DECLARE_MULTIPLE_GETTER(Int8) -DECLARE_MULTIPLE_GETTER(Int16) -DECLARE_MULTIPLE_GETTER(Int32) -DECLARE_MULTIPLE_GETTER(Int64) -DECLARE_MULTIPLE_GETTER(Float32) -DECLARE_MULTIPLE_GETTER(Float64) -DECLARE_MULTIPLE_GETTER(Decimal32) -DECLARE_MULTIPLE_GETTER(Decimal64) -DECLARE_MULTIPLE_GETTER(Decimal128) -#undef DECLARE_MULTIPLE_GETTER - -void RangeHashedDictionary::getString( +ColumnPtr RangeHashedDictionary::getColumn( const std::string & attribute_name, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - ColumnString * out) const + const DataTypePtr &, + const Columns & key_columns, + const DataTypes &, + const ColumnPtr default_untyped) const { - const auto & attribute = getAttributeWithType(attribute_name, AttributeUnderlyingType::utString); - const auto & attr = *std::get>(attribute.maps); - const auto & null_value = std::get(attribute.null_values); + /// TODO: Validate input types - for (const auto i : ext::range(0, ids.size())) + ColumnPtr result; + + const auto & attribute = getAttribute(attribute_name); + + /// TODO: Check that attribute type is same as result type + + auto size = key_columns.front()->size(); + + auto type_call = [&](const auto &dictionary_attribute_type) { - const auto * it = attr.find(ids[i]); - if (it) + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + + if constexpr (std::is_same_v) { - const auto date = dates[i]; - const auto & ranges_and_values = it->getMapped(); - const auto val_it - = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value & v) - { - return v.range.contains(date); - }); + auto column_string = ColumnString::create(); + auto out = column_string.get(); - const auto string_ref = val_it != std::end(ranges_and_values) ? val_it->value : StringRef{null_value}; - out->insertData(string_ref.data, string_ref.size); + if (default_untyped != nullptr) + { + if (const auto default_col = checkAndGetColumn(*default_untyped)) + { + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t row) { return default_col->getDataAt(row); }); + } + else if (const auto default_col_const = checkAndGetColumnConst(default_untyped.get())) + { + const auto & def = default_col_const->template getValue(); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return def; }); + } + } + else + { + const auto & null_value = std::get(attribute.null_values); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t, const StringRef value) { out->insertData(value.data, value.size); }, + [&](const size_t) { return null_value; }); + } + + result = std::move(column_string); } - else - out->insertData(null_value.data(), null_value.size()); - } + else if constexpr (IsNumber) + { + auto column = ColumnVector::create(size); + auto& out = column->getData(); - query_count.fetch_add(ids.size(), std::memory_order_relaxed); + if (default_untyped != nullptr) + { + if (const auto default_col = checkAndGetColumn>(*default_untyped)) + { + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t row) { return default_col->getData()[row]; } + ); + } + else if (const auto default_col_const = checkAndGetColumnConst>(default_untyped.get())) + { + const auto & def = default_col_const->template getValue(); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t) { return def; } + ); + } + } + else + { + const auto null_value = std::get(attribute.null_values); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t) { return null_value; }); + } + + result = std::move(column); + } + else if constexpr (IsDecimalNumber) + { + // auto scale = getDecimalScale(*attribute.type); + auto column = ColumnDecimal::create(size, 0); + auto& out = column->getData(); + + if (default_untyped != nullptr) + { + if (const auto default_col = checkAndGetColumn>(*default_untyped)) + { + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t row) { return default_col->getData()[row]; } + ); + } + else if (const auto default_col_const = checkAndGetColumnConst>(default_untyped.get())) + { + const auto & def = default_col_const->template getValue(); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t) { return def; } + ); + } + } + else + { + const auto null_value = std::get(attribute.null_values); + + getItemsImpl( + attribute, + key_columns, + [&](const size_t row, const auto value) { return out[row] = value; }, + [&](const size_t) { return null_value; } + ); + } + + result = std::move(column); + } + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + return result; } +ColumnUInt8::Ptr RangeHashedDictionary::has(const Columns &, const DataTypes &) const +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Has not supported", getDictionaryID().getNameForLogs()); +} void RangeHashedDictionary::createAttributes() { @@ -220,66 +318,84 @@ void RangeHashedDictionary::addAttributeSize(const Attribute & attribute) bucket_count = map_ref->getBufferSizeInCells(); } +template <> +void RangeHashedDictionary::addAttributeSize(const Attribute & attribute) +{ + const auto & map_ref = std::get>(attribute.maps); + bytes_allocated += sizeof(Collection) + map_ref->getBufferSizeInBytes(); + bucket_count = map_ref->getBufferSizeInCells(); + bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); +} + void RangeHashedDictionary::calculateBytesAllocated() { bytes_allocated += attributes.size() * sizeof(attributes.front()); for (const auto & attribute : attributes) { - switch (attribute.type) + auto type_call = [&](const auto & dictionary_attribute_type) { - case AttributeUnderlyingType::utUInt8: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utUInt16: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utUInt32: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utUInt64: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utUInt128: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utInt8: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utInt16: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utInt32: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utInt64: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utFloat32: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utFloat64: - addAttributeSize(attribute); - break; + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + addAttributeSize(attribute); + }; - case AttributeUnderlyingType::utDecimal32: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utDecimal64: - addAttributeSize(attribute); - break; - case AttributeUnderlyingType::utDecimal128: - addAttributeSize(attribute); - break; + callOnDictionaryAttributeType(attribute.type, type_call); - case AttributeUnderlyingType::utString: - { - addAttributeSize(attribute); - bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); + // switch (attribute.type) + // { + // case AttributeUnderlyingType::utUInt8: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utUInt16: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utUInt32: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utUInt64: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utUInt128: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utInt8: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utInt16: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utInt32: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utInt64: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utFloat32: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utFloat64: + // addAttributeSize(attribute); + // break; - break; - } - } + // case AttributeUnderlyingType::utDecimal32: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utDecimal64: + // addAttributeSize(attribute); + // break; + // case AttributeUnderlyingType::utDecimal128: + // addAttributeSize(attribute); + // break; + + // case AttributeUnderlyingType::utString: + // { + // addAttributeSize(attribute); + // bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); + + // break; + // } + // } } } @@ -290,113 +406,54 @@ void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Fie attribute.maps = std::make_unique>(); } +template <> +void RangeHashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value) +{ + attribute.string_arena = std::make_unique(); + const String & string = null_value.get(); + const char * string_in_arena = attribute.string_arena->insert(string.data(), string.size()); + attribute.null_values.emplace(string_in_arena, string.size()); + attribute.maps = std::make_unique>(); +} + RangeHashedDictionary::Attribute RangeHashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { Attribute attr{type, {}, {}, {}}; - switch (type) + auto type_call = [&](const auto &dictionary_attribute_type) { - case AttributeUnderlyingType::utUInt8: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utUInt16: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utUInt32: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utUInt64: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utUInt128: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utInt8: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utInt16: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utInt32: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utInt64: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utFloat32: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utFloat64: - createAttributeImpl(attr, null_value); - break; + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + createAttributeImpl(attr, null_value); + }; - case AttributeUnderlyingType::utDecimal32: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utDecimal64: - createAttributeImpl(attr, null_value); - break; - case AttributeUnderlyingType::utDecimal128: - createAttributeImpl(attr, null_value); - break; - - case AttributeUnderlyingType::utString: - { - attr.null_values = null_value.get(); - attr.maps = std::make_unique>(); - attr.string_arena = std::make_unique(); - break; - } - } + callOnDictionaryAttributeType(type, type_call); return attr; } - -template -void RangeHashedDictionary::getItems( - const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - PaddedPODArray & out) const -{ - if (false) {} // NOLINT -#define DISPATCH(TYPE) else if (attribute.type == AttributeUnderlyingType::ut##TYPE) getItemsImpl(attribute, ids, dates, out); - DISPATCH(UInt8) - DISPATCH(UInt16) - DISPATCH(UInt32) - DISPATCH(UInt64) - DISPATCH(UInt128) - DISPATCH(Int8) - DISPATCH(Int16) - DISPATCH(Int32) - DISPATCH(Int64) - DISPATCH(Float32) - DISPATCH(Float64) - DISPATCH(Decimal32) - DISPATCH(Decimal64) - DISPATCH(Decimal128) -#undef DISPATCH - else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR); -} - -template +template void RangeHashedDictionary::getItemsImpl( const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - PaddedPODArray & out) const + const Columns & key_columns, + ValueSetter && set_value, + DefaultGetter && get_default) const { - const auto & attr = *std::get>(attribute.maps); - const auto null_value = std::get(attribute.null_values); + PaddedPODArray key_backup_storage; + PaddedPODArray range_backup_storage; - for (const auto i : ext::range(0, ids.size())) + const PaddedPODArray & ids = getColumnDataAsPaddedPODArray(this, key_columns[0], key_backup_storage); + const PaddedPODArray & dates = getColumnDataAsPaddedPODArray(this, key_columns[1], range_backup_storage); + + const auto & attr = *std::get>(attribute.maps); + + for (const auto row : ext::range(0, ids.size())) { - const auto it = attr.find(ids[i]); + const auto it = attr.find(ids[row]); if (it) { - const auto date = dates[i]; + const auto date = dates[row]; const auto & ranges_and_values = it->getMapped(); const auto val_it = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value & v) @@ -404,11 +461,11 @@ void RangeHashedDictionary::getItemsImpl( return v.range.contains(date); }); - out[i] = static_cast(val_it != std::end(ranges_and_values) ? val_it->value : null_value); // NOLINT + set_value(row, static_cast(val_it != std::end(ranges_and_values) ? val_it->value : get_default(row))); // NOLINT } else { - out[i] = static_cast(null_value); // NOLINT + set_value(row, get_default(row)); } } diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 46ae0390b6a..5588bdb3ced 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -52,38 +52,18 @@ public: return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; } - typedef Int64 RangeStorageType; + DictionaryIdentifierType getIdentifierType() const override { return DictionaryIdentifierType::range; } - template - using ResultArrayType = std::conditional_t, DecimalPaddedPODArray, PaddedPODArray>; + ColumnPtr getColumn( + const std::string& attribute_name, + const DataTypePtr & result_type, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnPtr default_untyped) const override; -#define DECLARE_MULTIPLE_GETTER(TYPE) \ - void get##TYPE( \ - const std::string & attribute_name, \ - const PaddedPODArray & ids, \ - const PaddedPODArray & dates, \ - ResultArrayType & out) const; - DECLARE_MULTIPLE_GETTER(UInt8) - DECLARE_MULTIPLE_GETTER(UInt16) - DECLARE_MULTIPLE_GETTER(UInt32) - DECLARE_MULTIPLE_GETTER(UInt64) - DECLARE_MULTIPLE_GETTER(UInt128) - DECLARE_MULTIPLE_GETTER(Int8) - DECLARE_MULTIPLE_GETTER(Int16) - DECLARE_MULTIPLE_GETTER(Int32) - DECLARE_MULTIPLE_GETTER(Int64) - DECLARE_MULTIPLE_GETTER(Float32) - DECLARE_MULTIPLE_GETTER(Float64) - DECLARE_MULTIPLE_GETTER(Decimal32) - DECLARE_MULTIPLE_GETTER(Decimal64) - DECLARE_MULTIPLE_GETTER(Decimal128) -#undef DECLARE_MULTIPLE_GETTER + ColumnUInt8::Ptr has(const Columns & key_columns, const DataTypes & key_types) const override; - void getString( - const std::string & attribute_name, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - ColumnString * out) const; + using RangeStorageType = Int64; BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override; @@ -130,7 +110,7 @@ private: Decimal128, Float32, Float64, - String> + StringRef> null_values; std::variant< Ptr, @@ -166,21 +146,12 @@ private: Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); - - template - void getItems( - const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - PaddedPODArray & out) const; - - template + template void getItemsImpl( const Attribute & attribute, - const PaddedPODArray & ids, - const PaddedPODArray & dates, - PaddedPODArray & out) const; - + const Columns & key_columns, + ValueSetter && set_value, + DefaultGetter && get_default) const; template void setAttributeValueImpl(Attribute & attribute, const Key id, const Range & range, const T value); diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp index 3d536630d7a..dbdbaf0e22a 100644 --- a/src/Functions/FunctionsExternalDictionaries.cpp +++ b/src/Functions/FunctionsExternalDictionaries.cpp @@ -38,8 +38,8 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 06c8cd5d650..5b20a4dfc08 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -104,8 +104,8 @@ public: if (!sample_columns) return false; - if (sample_columns.columns() != 3 && sample_columns.columns() != 4) - throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + if (sample_columns.columns() < 3) + throw Exception{"Wrong arguments count", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; const auto * dict_name_col = checkAndGetColumnConst(sample_columns.getByPosition(0).column.get()); if (!dict_name_col) @@ -121,7 +121,6 @@ public: private: const Context & context; const ExternalDictionariesLoader & external_loader; - mutable std::shared_ptr dictionary; /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated. std::atomic access_checked = false; }; @@ -178,19 +177,26 @@ private: auto dictionary = helper.getDictionary(arguments[0]); auto dictionary_identifier_type = dictionary->getIdentifierType(); - const auto id_col_untyped = arguments[1].column; + const ColumnWithTypeAndName & key_column_with_type = arguments[1]; + const auto key_column = key_column_with_type.column; if (dictionary_identifier_type == DictionaryIdentifierType::simple) { - return dictionary->has({ id_col_untyped }, { std::make_shared() }); + return dictionary->has({key_column}, {std::make_shared()}); } else if (dictionary_identifier_type == DictionaryIdentifierType::complex) { - /// TODO: Check if column is tuple and pass - return nullptr; + /// Functions in external dictionaries_loader only support full-value (not constant) columns with keys. + ColumnPtr key_column_full = key_column_with_type.column->convertToFullColumnIfConst(); + + const auto & key_columns = typeid_cast(*key_column_full).getColumnsCopy(); + const auto & key_types = static_cast(*key_column_with_type.type).getElements(); + + return dictionary->has(key_columns, key_types); } else { + /// TODO: Add support for range return nullptr; } } @@ -198,13 +204,6 @@ private: mutable FunctionDictHelper helper; }; - -/** For ColumnVector. Either returns a reference to internal data, - * or convert it to T type, stores the result in backup_storage and returns a reference to it. - */ -template -static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray & backup_storage); - enum class DictionaryGetFunctionType { withoutDefault, @@ -232,12 +231,9 @@ public: String getName() const override { return name; } private: - size_t getNumberOfArguments() const override { - /// TODO: Check if ranged dictionary is working - return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault ? 0 : 4; - } + size_t getNumberOfArguments() const override { return 0; } - bool isVariadic() const override { return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault; } + bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const final { return true; } @@ -245,38 +241,8 @@ private: ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const DataTypes &) const override { - if (!isString(arguments[0])) - throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() - + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - if (!isString(arguments[1])) - throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() - + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - if (!WhichDataType(arguments[2]).isUInt64() && - !isTuple(arguments[2])) - throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName() - + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withDefault) - { - if (!checkAndGetDataType(arguments[3].get())) - throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName() - + ", must be " + TypeName::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - } - else - { - /// This is for the case of range dictionaries_loader. - if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger()) - { - throw Exception{"Illegal type " + arguments[3]->getName() + - " of fourth argument of function " + getName() + - " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN}; - } - } - if constexpr (IsDataTypeDecimal) return std::make_shared(DataType::maxPrecision(), decimal_scale); else @@ -285,11 +251,21 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + if (arguments.size() < 3) + throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + if (input_rows_count == 0) return result_type->createColumn(); - auto dictionary = helper.getDictionary(arguments[0]); - ColumnPtr res; + const auto * dictionary_name_col = checkAndGetColumnConst(arguments[0].column.get()); + if (!dictionary_name_col) + throw Exception{"First argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; + + String dictionary_name = dictionary_name_col->getValue(); + + auto dictionary = helper.getDictionary(dictionary_name); + if (!dictionary) + throw Exception("First argument of function " + getName() + " does not name a dictionary", ErrorCodes::ILLEGAL_COLUMN); const auto * attr_name_col = checkAndGetColumnConst(arguments[1].column.get()); if (!attr_name_col) @@ -297,21 +273,57 @@ private: String attr_name = attr_name_col->getValue(); - const ColumnWithTypeAndName & key_col_with_type = arguments[2]; - const auto key_column = key_col_with_type.column; + /// TODO: Use accurateCast if argument is integer + if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type)) + throw Exception{ + "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + + ", must be UInt64 or tuple(...).", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; auto dictionary_identifier_type = dictionary->getIdentifierType(); - + + size_t current_arguments_index = 3; + + /// TODO: Add more information to error messages + + ColumnPtr range_col = nullptr; + DataTypePtr range_col_type = nullptr; + + if (dictionary_identifier_type == DictionaryIdentifierType::range) + { + if (current_arguments_index >= arguments.size()) + throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + range_col = arguments[current_arguments_index].column; + range_col_type = arguments[current_arguments_index].type; + + if (!(range_col_type->isValueRepresentedByInteger() && range_col_type->getSizeOfValueInMemory() <= sizeof(Int64))) + throw Exception{ + "Illegal type " + range_col_type->getName() + " of fourth argument of function " + getName() + + " must be convertible to Int64.", + ErrorCodes::ILLEGAL_COLUMN}; + + ++current_arguments_index; + } + ColumnPtr default_col = nullptr; if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault) { - default_col = arguments[3].column; + if (current_arguments_index >= arguments.size()) + throw Exception{"Wrong argument count for function test " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + + default_col = arguments[current_arguments_index].column; } + ColumnPtr res; + + const ColumnWithTypeAndName & key_col_with_type = arguments[2]; + const auto key_column = key_col_with_type.column; + if (dictionary_identifier_type == DictionaryIdentifierType::simple) { - res = dictionary->getColumn(attr_name, result_type, { key_column }, { std::make_shared() }, default_col); + res = dictionary->getColumn(attr_name, result_type, {key_column}, {std::make_shared()}, default_col); } else if (dictionary_identifier_type == DictionaryIdentifierType::complex) { @@ -323,10 +335,13 @@ private: res = dictionary->getColumn(attr_name, result_type, key_columns, key_types, default_col); } - else + else if (dictionary_identifier_type == DictionaryIdentifierType::range) { - res = nullptr; + res = dictionary->getColumn( + attr_name, result_type, {key_column, range_col}, {std::make_shared(), range_col_type}, default_col); } + else + throw Exception{"Unknown dictionary identifier type", ErrorCodes::BAD_ARGUMENTS}; return res; } @@ -416,10 +431,11 @@ using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault class FunctionDictGetNoType final : public IFunction { public: - static constexpr auto name = "dictGet"; + static constexpr auto name = dictionary_get_function_type == DictionaryGetFunctionType::withDefault ? "dictGetOrDefault" : "dictGet"; static FunctionPtr create(const Context & context) { @@ -437,6 +453,8 @@ private: bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } + bool isDeterministic() const override { return false; } + bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override { return helper.isDictGetFunctionInjective(sample_columns); @@ -444,8 +462,8 @@ private: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() != 3 && arguments.size() != 4) - throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + if (arguments.size() < 3) + throw Exception{"Wrong argument count for function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; String dict_name; if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) @@ -465,29 +483,20 @@ private: throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - if (!WhichDataType(arguments[2].type).isUInt64() && - !isTuple(arguments[2].type)) - throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() - + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - if (arguments.size() == 4) - { - const auto * range_argument = arguments[3].type.get(); - if (!(range_argument->isValueRepresentedByInteger() && - range_argument->getSizeOfValueInMemory() <= sizeof(Int64))) - throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName() - + ", must be convertible to " + TypeName::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - } - auto dict = helper.getDictionary(dict_name); const DictionaryStructure & structure = dict->getStructure(); - for (const auto idx : ext::range(0, structure.attributes.size())) + for (const auto& attribute : structure.attributes) { - const DictionaryAttribute & attribute = structure.attributes[idx]; - if (attribute.name == attr_name) + if (attribute.name != attr_name) + { + continue; + } + + WhichDataType dt = attribute.type; + + if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault) { - WhichDataType dt = attribute.type; switch (dt.idx) { case TypeIndex::String: @@ -545,90 +554,9 @@ private: default: throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE); } - return attribute.type; } - } - throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; - } - - bool isDeterministic() const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - return impl->executeImpl(arguments, result_type, input_rows_count); - } - - const Context & context; - mutable FunctionDictHelper helper; - mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info -}; - - -class FunctionDictGetNoTypeOrDefault final : public IFunction -{ -public: - static constexpr auto name = "dictGetOrDefault"; - - static FunctionPtr create(const Context & context) - { - return std::make_shared(context); - } - - explicit FunctionDictGetNoTypeOrDefault(const Context & context_) : context(context_), helper(context_) {} - - String getName() const override { return name; } - -private: - size_t getNumberOfArguments() const override { return 4; } - - bool useDefaultImplementationForConstants() const final { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } - - bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override - { - return helper.isDictGetFunctionInjective(sample_columns); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - String dict_name; - if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) - { - dict_name = name_col->getValue(); - } - else - throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() - + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - String attr_name; - if (const auto * name_col = checkAndGetColumnConst(arguments[1].column.get())) - { - attr_name = name_col->getValue(); - } - else - throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() - + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - if (!WhichDataType(arguments[2].type).isUInt64() && - !isTuple(arguments[2].type)) - throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() - + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - - auto dict = helper.getDictionary(dict_name); - const DictionaryStructure & structure = dict->getStructure(); - - for (const auto idx : ext::range(0, structure.attributes.size())) - { - const DictionaryAttribute & attribute = structure.attributes[idx]; - if (attribute.name == attr_name) + else { - auto arg_type = arguments[3].type; - WhichDataType dt = attribute.type; - - if ((arg_type->getTypeId() != dt.idx) || (dt.isStringOrFixedString() && !isString(arg_type))) - throw Exception{"Illegal type " + arg_type->getName() + " of fourth argument of function " + getName() + - ", must be " + getTypeName(dt.idx) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; - switch (dt.idx) { case TypeIndex::String: @@ -685,15 +613,14 @@ private: default: throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE); } - - return attribute.type; } + + return attribute.type; } + throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; } - bool isDeterministic() const override { return false; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); @@ -1004,27 +931,4 @@ private: mutable FunctionDictHelper helper; }; - -template -static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray & backup_storage) -{ - if (!isColumnConst(column)) - { - if (const auto vector_col = checkAndGetColumn>(&column)) - { - return vector_col->getData(); - } - } - - const auto full_column = column.convertToFullColumnIfConst(); - - // With type conversion and const columns we need to use backup storage here - const auto size = full_column->size(); - backup_storage.resize(size); - for (size_t i = 0; i < size; ++i) - backup_storage[i] = full_column->getUInt(i); - - return backup_storage; -} - }