#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(OS_LINUX) || defined(__FreeBSD__) #include #include #endif #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int UNSUPPORTED_METHOD; extern const int UNKNOWN_TYPE; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int TYPE_MISMATCH; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } /** Functions that use plug-ins (external) dictionaries_loader. * * Get the value of the attribute of the specified type. * dictGetType(dictionary, attribute, id), * Type - placeholder for the type name, any numeric and string types are currently supported. * The type must match the actual attribute type with which it was declared in the dictionary structure. * * Get an array of identifiers, consisting of the source and parents chain. * dictGetHierarchy(dictionary, id). * * Is the first identifier the child of the second. * dictIsIn(dictionary, child_id, parent_id). */ class FunctionDictHelper { public: explicit FunctionDictHelper(const Context & context_) : context(context_), external_loader(context.getExternalDictionariesLoader()) {} std::shared_ptr getDictionary(const String & dictionary_name) { String resolved_name = DatabaseCatalog::instance().resolveDictionaryName(dictionary_name); auto dict = external_loader.getDictionary(resolved_name); if (!access_checked) { context.checkAccess(AccessType::dictGet, dict->getDatabaseOrNoDatabaseTag(), dict->getDictionaryID().getTableName()); access_checked = true; } return dict; } std::shared_ptr getDictionary(const ColumnWithTypeAndName & column) { const auto * dict_name_col = checkAndGetColumnConst(column.column.get()); return getDictionary(dict_name_col->getValue()); } bool isDictGetFunctionInjective(const Block & sample_columns) { /// Assume non-injective by default if (!sample_columns) return false; if (sample_columns.columns() != 3 && sample_columns.columns() != 4) throw Exception{"Function dictGet... takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; const auto * dict_name_col = checkAndGetColumnConst(sample_columns.getByPosition(0).column.get()); if (!dict_name_col) throw Exception{"First argument of function dictGet... must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; const auto * attr_name_col = checkAndGetColumnConst(sample_columns.getByPosition(1).column.get()); if (!attr_name_col) throw Exception{"Second argument of function dictGet... must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; return getDictionary(dict_name_col->getValue())->isInjective(attr_name_col->getValue()); } private: const Context & context; const ExternalDictionariesLoader & external_loader; mutable std::shared_ptr dictionary; /// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated. std::atomic access_checked = false; }; class FunctionDictHas final : public IFunction { public: static constexpr auto name = "dictHas"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } explicit FunctionDictHas(const Context & context_) : helper(context_) {} String getName() const override { return name; } private: size_t getNumberOfArguments() const override { return 2; } bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[1]).isUInt64() && !isTuple(arguments[1])) throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; return std::make_shared(); } bool isDeterministic() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { /** Do not require existence of the dictionary if the function is called for empty columns. * This is needed to allow successful query analysis on a server, * that is the initiator of a distributed query, * in the case when the function will be invoked for real data only at the remote servers. * This feature is controversial and implemented specially * for backward compatibility with the case in Yandex Banner System. */ if (input_rows_count == 0) return result_type->createColumn(); auto dict = helper.getDictionary(arguments[0]); ColumnPtr res; if (!((res = executeDispatchSimple(arguments, dict)) || (res = executeDispatchSimple(arguments, dict)) || (res = executeDispatchSimple(arguments, dict)) || (res = executeDispatchSimple(arguments, dict)) #if defined(OS_LINUX) || defined(__FreeBSD__) || (res = executeDispatchSimple(arguments, dict)) #endif || (res = executeDispatchComplex(arguments, dict)) || (res = executeDispatchComplex(arguments, dict)) || (res = executeDispatchComplex(arguments, dict)) #if defined(OS_LINUX) || defined(__FreeBSD__) || (res = executeDispatchComplex(arguments, dict)) #endif #if !defined(ARCADIA_BUILD) || (res = executeDispatchComplex(arguments, dict)) #endif || (res = executeDispatchComplex(arguments, dict)) || (res = executeDispatchComplex(arguments, dict)) || (res = executeDispatchComplex(arguments, dict)))) throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; return res; } template ColumnPtr executeDispatchSimple( const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) return nullptr; const auto * id_col_untyped = arguments[1].column.get(); if (const auto * id_col = checkAndGetColumn(id_col_untyped)) { const auto & ids = id_col->getData(); auto out = ColumnUInt8::create(ext::size(ids)); dict->has(ids, out->getData()); return out; } else throw Exception{"Second argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN}; return nullptr; } template ColumnPtr executeDispatchComplex( const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) return nullptr; const ColumnWithTypeAndName & key_col_with_type = arguments[1]; const ColumnPtr & key_col = key_col_with_type.column; if (checkColumn(key_col.get())) { const auto & key_columns = assert_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); auto out = ColumnUInt8::create(key_col_with_type.column->size()); dict->has(key_columns, key_types, out->getData()); return out; } else throw Exception{"Second argument of function " + getName() + " must be " + dict->getKeyDescription(), ErrorCodes::TYPE_MISMATCH}; } mutable FunctionDictHelper helper; }; /** For ColumnVector. Either returns a reference to internal data, * or convert it to T type, stores the result in backup_storage and returns a reference to it. */ template static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray & backup_storage); enum class DictionaryGetFunctionType { withoutDefault, withDefault }; template class FunctionDictGetImpl final : public IFunction { using Type = typename DataType::FieldType; public: static constexpr auto name = Name::name; static FunctionPtr create(const Context & context, UInt32 dec_scale = 0) { return std::make_shared(context, dec_scale); } explicit FunctionDictGetImpl(const Context & context_, UInt32 dec_scale = 0) : helper(context_) , decimal_scale(dec_scale) {} String getName() const override { return name; } private: size_t getNumberOfArguments() const override { /// TODO: Check if ranged dictionary is working return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault ? 0 : 4; } bool isVariadic() const override { return dictionary_get_function_type == DictionaryGetFunctionType::withoutDefault; } bool useDefaultImplementationForConstants() const final { return true; } bool isDeterministic() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!isString(arguments[1])) throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[2]).isUInt64() && !isTuple(arguments[2])) throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName() + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::withDefault) { if (!checkAndGetDataType(arguments[3].get())) throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName() + ", must be " + TypeName::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } else { /// This is for the case of range dictionaries_loader. if (arguments.size() == 4 && !arguments[3]->isValueRepresentedByInteger()) { throw Exception{"Illegal type " + arguments[3]->getName() + " of fourth argument of function " + getName() + " must be convertible to Int64.", ErrorCodes::ILLEGAL_COLUMN}; } } if constexpr (IsDataTypeDecimal) return std::make_shared(DataType::maxPrecision(), decimal_scale); else return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); auto dict = helper.getDictionary(arguments[0]); ColumnPtr res; if (!((res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) #if defined(OS_LINUX) || defined(__FreeBSD__) || (res = executeDispatch(arguments, result_type, dict)) #endif || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) #if defined(OS_LINUX) || defined(__FreeBSD__) || (res = executeDispatch(arguments, result_type, dict)) #endif #if !defined(ARCADIA_BUILD) || (res = executeDispatch(arguments, result_type, dict)) #endif || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)))) throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; return res; } template ColumnPtr executeDispatch( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const { const auto * dictionary = typeid_cast(dict_ptr.get()); if (!dictionary) return nullptr; const auto * attr_name_col = checkAndGetColumnConst(arguments[1].column.get()); if (!attr_name_col) throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; String attr_name = attr_name_col->getValue(); const auto id_col_untyped = arguments[2].column; constexpr auto dictionary_get_by_type = DictionaryType::get_by_type; if constexpr (dictionary_get_by_type == DictionaryGetByType::getByIdentifiers) { if (dictionary_get_function_type == DictionaryGetFunctionType::withDefault) { const auto default_col_untyped = arguments[3].column; return dictionary->get(attr_name, result_type, id_col_untyped, default_col_untyped); } else { return dictionary->get(attr_name, result_type, id_col_untyped, nullptr); } } else if constexpr (dictionary_get_by_type == DictionaryGetByType::getByComplexKeys) { return nullptr; } else { return nullptr; } } mutable FunctionDictHelper helper; UInt32 decimal_scale; }; template using FunctionDictGet = FunctionDictGetImpl; struct NameDictGetUInt8 { static constexpr auto name = "dictGetUInt8"; }; struct NameDictGetUInt16 { static constexpr auto name = "dictGetUInt16"; }; struct NameDictGetUInt32 { static constexpr auto name = "dictGetUInt32"; }; struct NameDictGetUInt64 { static constexpr auto name = "dictGetUInt64"; }; struct NameDictGetInt8 { static constexpr auto name = "dictGetInt8"; }; struct NameDictGetInt16 { static constexpr auto name = "dictGetInt16"; }; struct NameDictGetInt32 { static constexpr auto name = "dictGetInt32"; }; struct NameDictGetInt64 { static constexpr auto name = "dictGetInt64"; }; struct NameDictGetFloat32 { static constexpr auto name = "dictGetFloat32"; }; struct NameDictGetFloat64 { static constexpr auto name = "dictGetFloat64"; }; struct NameDictGetDate { static constexpr auto name = "dictGetDate"; }; struct NameDictGetDateTime { static constexpr auto name = "dictGetDateTime"; }; struct NameDictGetUUID { static constexpr auto name = "dictGetUUID"; }; struct NameDictGetDecimal32 { static constexpr auto name = "dictGetDecimal32"; }; struct NameDictGetDecimal64 { static constexpr auto name = "dictGetDecimal64"; }; struct NameDictGetDecimal128 { static constexpr auto name = "dictGetDecimal128"; }; struct NameDictGetString { static constexpr auto name = "dictGetString"; }; using FunctionDictGetUInt8 = FunctionDictGet; using FunctionDictGetUInt16 = FunctionDictGet; using FunctionDictGetUInt32 = FunctionDictGet; using FunctionDictGetUInt64 = FunctionDictGet; using FunctionDictGetInt8 = FunctionDictGet; using FunctionDictGetInt16 = FunctionDictGet; using FunctionDictGetInt32 = FunctionDictGet; using FunctionDictGetInt64 = FunctionDictGet; using FunctionDictGetFloat32 = FunctionDictGet; using FunctionDictGetFloat64 = FunctionDictGet; using FunctionDictGetDate = FunctionDictGet; using FunctionDictGetDateTime = FunctionDictGet; using FunctionDictGetUUID = FunctionDictGet; using FunctionDictGetDecimal32 = FunctionDictGet, NameDictGetDecimal32>; using FunctionDictGetDecimal64 = FunctionDictGet, NameDictGetDecimal64>; using FunctionDictGetDecimal128 = FunctionDictGet, NameDictGetDecimal128>; using FunctionDictGetString = FunctionDictGet; template using FunctionDictGetOrDefault = FunctionDictGetImpl; struct NameDictGetUInt8OrDefault { static constexpr auto name = "dictGetUInt8OrDefault"; }; struct NameDictGetUInt16OrDefault { static constexpr auto name = "dictGetUInt16OrDefault"; }; struct NameDictGetUInt32OrDefault { static constexpr auto name = "dictGetUInt32OrDefault"; }; struct NameDictGetUInt64OrDefault { static constexpr auto name = "dictGetUInt64OrDefault"; }; struct NameDictGetInt8OrDefault { static constexpr auto name = "dictGetInt8OrDefault"; }; struct NameDictGetInt16OrDefault { static constexpr auto name = "dictGetInt16OrDefault"; }; struct NameDictGetInt32OrDefault { static constexpr auto name = "dictGetInt32OrDefault"; }; struct NameDictGetInt64OrDefault { static constexpr auto name = "dictGetInt64OrDefault"; }; struct NameDictGetFloat32OrDefault { static constexpr auto name = "dictGetFloat32OrDefault"; }; struct NameDictGetFloat64OrDefault { static constexpr auto name = "dictGetFloat64OrDefault"; }; struct NameDictGetDateOrDefault { static constexpr auto name = "dictGetDateOrDefault"; }; struct NameDictGetDateTimeOrDefault { static constexpr auto name = "dictGetDateTimeOrDefault"; }; struct NameDictGetUUIDOrDefault { static constexpr auto name = "dictGetUUIDOrDefault"; }; struct NameDictGetDecimal32OrDefault { static constexpr auto name = "dictGetDecimal32OrDefault"; }; struct NameDictGetDecimal64OrDefault { static constexpr auto name = "dictGetDecimal64OrDefault"; }; struct NameDictGetDecimal128OrDefault { static constexpr auto name = "dictGetDecimal128OrDefault"; }; struct NameDictGetStringOrDefault { static constexpr auto name = "dictGetStringOrDefault"; }; using FunctionDictGetUInt8OrDefault = FunctionDictGetOrDefault; using FunctionDictGetUInt16OrDefault = FunctionDictGetOrDefault; using FunctionDictGetUInt32OrDefault = FunctionDictGetOrDefault; using FunctionDictGetUInt64OrDefault = FunctionDictGetOrDefault; using FunctionDictGetInt8OrDefault = FunctionDictGetOrDefault; using FunctionDictGetInt16OrDefault = FunctionDictGetOrDefault; using FunctionDictGetInt32OrDefault = FunctionDictGetOrDefault; using FunctionDictGetInt64OrDefault = FunctionDictGetOrDefault; using FunctionDictGetFloat32OrDefault = FunctionDictGetOrDefault; using FunctionDictGetFloat64OrDefault = FunctionDictGetOrDefault; using FunctionDictGetDateOrDefault = FunctionDictGetOrDefault; using FunctionDictGetDateTimeOrDefault = FunctionDictGetOrDefault; using FunctionDictGetUUIDOrDefault = FunctionDictGetOrDefault; using FunctionDictGetDecimal32OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal32OrDefault>; using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal64OrDefault>; using FunctionDictGetDecimal128OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal128OrDefault>; using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault; /// TODO: Use new API /// This variant of function derives the result type automatically. class FunctionDictGetNoType final : public IFunction { public: static constexpr auto name = "dictGet"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } explicit FunctionDictGetNoType(const Context & context_) : context(context_), helper(context_) {} String getName() const override { return name; } private: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override { return helper.isDictGetFunctionInjective(sample_columns); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 3 && arguments.size() != 4) throw Exception{"Function " + getName() + " takes 3 or 4 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; String dict_name; if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) { dict_name = name_col->getValue(); } else throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; String attr_name; if (const auto * name_col = checkAndGetColumnConst(arguments[1].column.get())) { attr_name = name_col->getValue(); } else throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type)) throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (arguments.size() == 4) { const auto * range_argument = arguments[3].type.get(); if (!(range_argument->isValueRepresentedByInteger() && range_argument->getSizeOfValueInMemory() <= sizeof(Int64))) throw Exception{"Illegal type " + range_argument->getName() + " of fourth argument of function " + getName() + ", must be convertible to " + TypeName::get() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } auto dict = helper.getDictionary(dict_name); const DictionaryStructure & structure = dict->getStructure(); for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute & attribute = structure.attributes[idx]; if (attribute.name == attr_name) { WhichDataType dt = attribute.type; switch (dt.idx) { case TypeIndex::String: case TypeIndex::FixedString: impl = FunctionDictGetString::create(context); break; case TypeIndex::UInt8: impl = FunctionDictGetUInt8::create(context); break; case TypeIndex::UInt16: impl = FunctionDictGetUInt16::create(context); break; case TypeIndex::UInt32: impl = FunctionDictGetUInt32::create(context); break; case TypeIndex::UInt64: impl = FunctionDictGetUInt64::create(context); break; case TypeIndex::Int8: impl = FunctionDictGetInt8::create(context); break; case TypeIndex::Int16: impl = FunctionDictGetInt16::create(context); break; case TypeIndex::Int32: impl = FunctionDictGetInt32::create(context); break; case TypeIndex::Int64: impl = FunctionDictGetInt64::create(context); break; case TypeIndex::Float32: impl = FunctionDictGetFloat32::create(context); break; case TypeIndex::Float64: impl = FunctionDictGetFloat64::create(context); break; case TypeIndex::Date: impl = FunctionDictGetDate::create(context); break; case TypeIndex::DateTime: impl = FunctionDictGetDateTime::create(context); break; case TypeIndex::UUID: impl = FunctionDictGetUUID::create(context); break; case TypeIndex::Decimal32: impl = FunctionDictGetDecimal32::create(context, getDecimalScale(*attribute.type)); break; case TypeIndex::Decimal64: impl = FunctionDictGetDecimal64::create(context, getDecimalScale(*attribute.type)); break; case TypeIndex::Decimal128: impl = FunctionDictGetDecimal128::create(context, getDecimalScale(*attribute.type)); break; default: throw Exception("Unknown dictGet type", ErrorCodes::UNKNOWN_TYPE); } return attribute.type; } } throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; } bool isDeterministic() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } const Context & context; mutable FunctionDictHelper helper; mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info }; class FunctionDictGetNoTypeOrDefault final : public IFunction { public: static constexpr auto name = "dictGetOrDefault"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } explicit FunctionDictGetNoTypeOrDefault(const Context & context_) : context(context_), helper(context_) {} String getName() const override { return name; } private: size_t getNumberOfArguments() const override { return 4; } bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; } bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override { return helper.isDictGetFunctionInjective(sample_columns); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { String dict_name; if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) { dict_name = name_col->getValue(); } else throw Exception{"Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; String attr_name; if (const auto * name_col = checkAndGetColumnConst(arguments[1].column.get())) { attr_name = name_col->getValue(); } else throw Exception{"Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ", expected a const string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[2].type).isUInt64() && !isTuple(arguments[2].type)) throw Exception{"Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + ", must be UInt64 or tuple(...).", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; auto dict = helper.getDictionary(dict_name); const DictionaryStructure & structure = dict->getStructure(); for (const auto idx : ext::range(0, structure.attributes.size())) { const DictionaryAttribute & attribute = structure.attributes[idx]; if (attribute.name == attr_name) { auto arg_type = arguments[3].type; WhichDataType dt = attribute.type; if ((arg_type->getTypeId() != dt.idx) || (dt.isStringOrFixedString() && !isString(arg_type))) throw Exception{"Illegal type " + arg_type->getName() + " of fourth argument of function " + getName() + ", must be " + getTypeName(dt.idx) + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; switch (dt.idx) { case TypeIndex::String: impl = FunctionDictGetStringOrDefault::create(context); break; case TypeIndex::UInt8: impl = FunctionDictGetUInt8OrDefault::create(context); break; case TypeIndex::UInt16: impl = FunctionDictGetUInt16OrDefault::create(context); break; case TypeIndex::UInt32: impl = FunctionDictGetUInt32OrDefault::create(context); break; case TypeIndex::UInt64: impl = FunctionDictGetUInt64OrDefault::create(context); break; case TypeIndex::Int8: impl = FunctionDictGetInt8OrDefault::create(context); break; case TypeIndex::Int16: impl = FunctionDictGetInt16OrDefault::create(context); break; case TypeIndex::Int32: impl = FunctionDictGetInt32OrDefault::create(context); break; case TypeIndex::Int64: impl = FunctionDictGetInt64OrDefault::create(context); break; case TypeIndex::Float32: impl = FunctionDictGetFloat32OrDefault::create(context); break; case TypeIndex::Float64: impl = FunctionDictGetFloat64OrDefault::create(context); break; case TypeIndex::Date: impl = FunctionDictGetDateOrDefault::create(context); break; case TypeIndex::DateTime: impl = FunctionDictGetDateTimeOrDefault::create(context); break; case TypeIndex::UUID: impl = FunctionDictGetUUIDOrDefault::create(context); break; case TypeIndex::Decimal32: impl = FunctionDictGetDecimal32OrDefault::create(context, getDecimalScale(*attribute.type)); break; case TypeIndex::Decimal64: impl = FunctionDictGetDecimal64OrDefault::create(context, getDecimalScale(*attribute.type)); break; case TypeIndex::Decimal128: impl = FunctionDictGetDecimal128OrDefault::create(context, getDecimalScale(*attribute.type)); break; default: throw Exception("Unknown dictGetOrDefault type", ErrorCodes::UNKNOWN_TYPE); } return attribute.type; } } throw Exception{"No such attribute '" + attr_name + "'", ErrorCodes::BAD_ARGUMENTS}; } bool isDeterministic() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { return impl->executeImpl(arguments, result_type, input_rows_count); } const Context & context; mutable FunctionDictHelper helper; mutable FunctionPtr impl; // underlying function used by dictGet function without explicit type info }; /// Functions to work with hierarchies. class FunctionDictGetHierarchy final : public IFunction { public: static constexpr auto name = "dictGetHierarchy"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } explicit FunctionDictGetHierarchy(const Context & context_) : helper(context_) {} String getName() const override { return name; } private: size_t getNumberOfArguments() const override { return 2; } bool isInjective(const ColumnsWithTypeAndName & /*sample_columns*/) const override { return true; } bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[1]).isUInt64()) throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ", must be UInt64.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; return std::make_shared(std::make_shared()); } bool isDeterministic() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); auto dict = helper.getDictionary(arguments[0]); ColumnPtr res; if (!((res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)) || (res = executeDispatch(arguments, result_type, dict)))) throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; return res; } template ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) return nullptr; if (!dict->hasHierarchy()) throw Exception{"Dictionary does not have a hierarchy", ErrorCodes::UNSUPPORTED_METHOD}; const auto get_hierarchies = [&] (const PaddedPODArray & in, PaddedPODArray & out, PaddedPODArray & offsets) { const auto size = in.size(); /// copy of `in` array auto in_array = std::make_unique>(std::begin(in), std::end(in)); /// used for storing and handling result of ::toParent call auto out_array = std::make_unique>(size); /// resulting hierarchies std::vector> hierarchies(size); /// TODO Bad code, poor performance. /// total number of non-zero elements, used for allocating all the required memory upfront size_t total_count = 0; while (true) { auto all_zeroes = true; /// erase zeroed identifiers, store non-zeroed ones for (const auto i : ext::range(0, size)) { const auto id = (*in_array)[i]; if (0 == id) continue; auto & hierarchy = hierarchies[i]; /// Checking for loop if (std::find(std::begin(hierarchy), std::end(hierarchy), id) != std::end(hierarchy)) continue; all_zeroes = false; /// place id at it's corresponding place hierarchy.push_back(id); ++total_count; } if (all_zeroes) break; /// translate all non-zero identifiers at once dict->toParent(*in_array, *out_array); /// we're going to use the `in_array` from this iteration as `out_array` on the next one std::swap(in_array, out_array); } out.reserve(total_count); offsets.resize(size); for (const auto i : ext::range(0, size)) { const auto & ids = hierarchies[i]; out.insert_assume_reserved(std::begin(ids), std::end(ids)); offsets[i] = out.size(); } }; const auto * id_col_untyped = arguments[1].column.get(); if (const auto * id_col = checkAndGetColumn(id_col_untyped)) { const auto & in = id_col->getData(); auto backend = ColumnUInt64::create(); auto offsets = ColumnArray::ColumnOffsets::create(); get_hierarchies(in, backend->getData(), offsets->getData()); return ColumnArray::create(std::move(backend), std::move(offsets)); } else if (const auto * id_col_const = checkAndGetColumnConst>(id_col_untyped)) { const PaddedPODArray in(1, id_col_const->getValue()); auto backend = ColumnUInt64::create(); auto offsets = ColumnArray::ColumnOffsets::create(); get_hierarchies(in, backend->getData(), offsets->getData()); auto array = ColumnArray::create(std::move(backend), std::move(offsets)); return result_type->createColumnConst(id_col_const->size(), (*array)[0].get()); } else throw Exception{"Second argument of function " + getName() + " must be UInt64", ErrorCodes::ILLEGAL_COLUMN}; } mutable FunctionDictHelper helper; }; class FunctionDictIsIn final : public IFunction { public: static constexpr auto name = "dictIsIn"; static FunctionPtr create(const Context & context) { return std::make_shared(context); } explicit FunctionDictIsIn(const Context & context_) : helper(context_) {} String getName() const override { return name; } private: size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) throw Exception{"Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ", expected a string.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[1]).isUInt64()) throw Exception{"Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ", must be UInt64.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; if (!WhichDataType(arguments[2]).isUInt64()) throw Exception{"Illegal type " + arguments[2]->getName() + " of third argument of function " + getName() + ", must be UInt64.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; return std::make_shared(); } bool isDeterministic() const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); auto dict = helper.getDictionary(arguments[0]); ColumnPtr res; if (!((res = executeDispatch(arguments, dict)) || (res = executeDispatch(arguments, dict)) || (res = executeDispatch(arguments, dict)) || (res = executeDispatch(arguments, dict)))) throw Exception{"Unsupported dictionary type " + dict->getTypeName(), ErrorCodes::UNKNOWN_TYPE}; return res; } template ColumnPtr executeDispatch(const ColumnsWithTypeAndName & arguments, const std::shared_ptr & dict_ptr) const { const auto * dict = typeid_cast(dict_ptr.get()); if (!dict) return nullptr; if (!dict->hasHierarchy()) throw Exception{"Dictionary does not have a hierarchy", ErrorCodes::UNSUPPORTED_METHOD}; const auto * child_id_col_untyped = arguments[1].column.get(); const auto * ancestor_id_col_untyped = arguments[2].column.get(); if (const auto * child_id_col = checkAndGetColumn(child_id_col_untyped)) return execute(dict, child_id_col, ancestor_id_col_untyped); else if (const auto * child_id_col_const = checkAndGetColumnConst>(child_id_col_untyped)) return execute(dict, child_id_col_const, ancestor_id_col_untyped); else throw Exception{"Illegal column " + child_id_col_untyped->getName() + " of second argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } template ColumnPtr execute(const DictionaryType * dict, const ColumnUInt64 * child_id_col, const IColumn * ancestor_id_col_untyped) const { if (const auto * ancestor_id_col = checkAndGetColumn(ancestor_id_col_untyped)) { auto out = ColumnUInt8::create(); const auto & child_ids = child_id_col->getData(); const auto & ancestor_ids = ancestor_id_col->getData(); auto & data = out->getData(); const auto size = child_id_col->size(); data.resize(size); dict->isInVectorVector(child_ids, ancestor_ids, data); return out; } else if (const auto * ancestor_id_col_const = checkAndGetColumnConst>(ancestor_id_col_untyped)) { auto out = ColumnUInt8::create(); const auto & child_ids = child_id_col->getData(); const auto ancestor_id = ancestor_id_col_const->getValue(); auto & data = out->getData(); const auto size = child_id_col->size(); data.resize(size); dict->isInVectorConstant(child_ids, ancestor_id, data); return out; } else { throw Exception{"Illegal column " + ancestor_id_col_untyped->getName() + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } } template ColumnPtr execute(const DictionaryType * dict, const ColumnConst * child_id_col, const IColumn * ancestor_id_col_untyped) const { if (const auto * ancestor_id_col = checkAndGetColumn(ancestor_id_col_untyped)) { auto out = ColumnUInt8::create(); const auto child_id = child_id_col->getValue(); const auto & ancestor_ids = ancestor_id_col->getData(); auto & data = out->getData(); const auto size = child_id_col->size(); data.resize(size); dict->isInConstantVector(child_id, ancestor_ids, data); return out; } else if (const auto * ancestor_id_col_const = checkAndGetColumnConst>(ancestor_id_col_untyped)) { const auto child_id = child_id_col->getValue(); const auto ancestor_id = ancestor_id_col_const->getValue(); UInt8 res = 0; dict->isInConstantConstant(child_id, ancestor_id, res); return DataTypeUInt8().createColumnConst(child_id_col->size(), res); } else throw Exception{"Illegal column " + ancestor_id_col_untyped->getName() + " of third argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; } mutable FunctionDictHelper helper; }; template static const PaddedPODArray & getColumnDataAsPaddedPODArray(const IColumn & column, PaddedPODArray & backup_storage) { if (!isColumnConst(column)) { if (const auto vector_col = checkAndGetColumn>(&column)) { return vector_col->getData(); } } const auto full_column = column.convertToFullColumnIfConst(); // With type conversion and const columns we need to use backup storage here const auto size = full_column->size(); backup_storage.resize(size); for (size_t i = 0; i < size; ++i) backup_storage[i] = full_column->getUInt(i); return backup_storage; } }