diff --git a/src/Functions/FunctionsExternalDictionaries.cpp b/src/Functions/FunctionsExternalDictionaries.cpp index 79236fb2cf4..6b83f761086 100644 --- a/src/Functions/FunctionsExternalDictionaries.cpp +++ b/src/Functions/FunctionsExternalDictionaries.cpp @@ -42,6 +42,7 @@ void registerFunctionsExternalDictionaries(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction>(); factory.registerFunction>(); + factory.registerFunction(); } } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 0bc22826f4e..1f8ef60b4af 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -141,7 +142,6 @@ public: String getName() const override { return name; } -private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } @@ -231,6 +231,7 @@ private: return dictionary->hasKeys({key_column, range_col}, {std::make_shared(), range_col_type}); } +private: mutable FunctionDictHelper helper; }; @@ -295,7 +296,7 @@ public: } if (types.size() > 1) - return std::make_shared(types); + return std::make_shared(types, attribute_names); else return types.front(); } @@ -694,6 +695,163 @@ using FunctionDictGetDecimal64OrDefault = FunctionDictGetOrDefault, NameDictGetDecimal128OrDefault>; using FunctionDictGetStringOrDefault = FunctionDictGetOrDefault; +class FunctionDictGetOrNull final : public IFunction +{ +public: + static constexpr auto name = "dictGetOrNull"; + + static FunctionPtr create(const Context &context) + { + return std::make_shared(context); + } + + explicit FunctionDictGetOrNull(const Context & context_) + : dictionary_get_func_impl(context_) + , dictionary_has_func_impl(context_) + {} + + String getName() const override { return name; } + +private: + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + + bool useDefaultImplementationForConstants() const override { return true; } + + bool useDefaultImplementationForNulls() const override { return false; } + + bool isDeterministic() const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; } + + bool isInjective(const ColumnsWithTypeAndName & sample_columns) const override + { + return dictionary_get_func_impl.isInjective(sample_columns); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + auto result_type = dictionary_get_func_impl.getReturnTypeImpl(arguments); + + WhichDataType result_data_type(result_type); + if (result_data_type.isTuple()) + { + const auto & data_type_tuple = static_cast(*result_type); + auto elements_types_copy = data_type_tuple.getElements(); + for (auto & element_type : elements_types_copy) + element_type = makeNullable(element_type); + + result_type = std::make_shared(elements_types_copy, data_type_tuple.getElementNames()); + } + else + result_type = makeNullable(result_type); + + return result_type; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + /** We call dictHas function to get which map is key presented in dictionary. + For key that presented in dictionary dict has result for that key index value will be 1. Otherwise 0. + We invert result, and then for key that is not presented in dictionary value will be 1. Otherwise 0. + This inverted result will be used as null column map. + After that we call dict get function, by contract for key that are not presented in dictionary we + return default value. + We create nullable column from dict get result column and null column map. + + 2 additional implementation details: + 1. Result from dict get can be tuple if client requested multiple attributes we apply such operation on each result column. + 2. If column is already nullable we merge column null map with null map that we get from dict has. + */ + + auto dict_has_arguments = filterAttributeNameArgumentForDictHas(arguments); + auto is_key_in_dictionary_column = dictionary_has_func_impl.executeImpl(dict_has_arguments, std::make_shared(), input_rows_count); + auto is_key_in_dictionary_column_mutable = is_key_in_dictionary_column->assumeMutable(); + ColumnVector & is_key_in_dictionary_column_typed = assert_cast &>(*is_key_in_dictionary_column_mutable); + PaddedPODArray & is_key_in_dictionary_data = is_key_in_dictionary_column_typed.getData(); + for (auto & key : is_key_in_dictionary_data) + key = !key; + + auto result_type = dictionary_get_func_impl.getReturnTypeImpl(arguments); + auto dictionary_get_result_column = dictionary_get_func_impl.executeImpl(arguments, result_type, input_rows_count); + + ColumnPtr result; + + WhichDataType result_data_type(result_type); + auto dictionary_get_result_column_mutable = dictionary_get_result_column->assumeMutable(); + + if (result_data_type.isTuple()) + { + ColumnTuple & column_tuple = assert_cast(*dictionary_get_result_column_mutable); + + const auto & columns = column_tuple.getColumns(); + size_t tuple_size = columns.size(); + + MutableColumns new_columns(tuple_size); + for (size_t tuple_column_index = 0; tuple_column_index < tuple_size; ++tuple_column_index) + { + auto nullable_column_map = ColumnVector::create(); + auto & nullable_column_map_data = nullable_column_map->getData(); + nullable_column_map_data.assign(is_key_in_dictionary_data); + + auto mutable_column = columns[tuple_column_index]->assumeMutable(); + if (ColumnNullable * nullable_column = typeid_cast(mutable_column.get())) + { + auto & null_map_data = nullable_column->getNullMapData(); + addNullMap(null_map_data, is_key_in_dictionary_data); + new_columns[tuple_column_index] = std::move(mutable_column); + } + else + new_columns[tuple_column_index] = ColumnNullable::create(std::move(mutable_column), std::move(nullable_column_map)); + } + + result = ColumnTuple::create(std::move(new_columns)); + } + else + { + if (ColumnNullable * nullable_column = typeid_cast(dictionary_get_result_column_mutable.get())) + { + auto & null_map_data = nullable_column->getNullMapData(); + addNullMap(null_map_data, is_key_in_dictionary_data); + result = std::move(dictionary_get_result_column); + } + else + result = ColumnNullable::create(std::move(dictionary_get_result_column), std::move(is_key_in_dictionary_column_mutable)); + } + + return result; + } + + static void addNullMap(PaddedPODArray & null_map, PaddedPODArray & null_map_to_add) + { + assert(null_map.size() == null_map_to_add.size()); + + for (size_t i = 0; i < null_map.size(); ++i) + null_map[i] = null_map[i] || null_map_to_add[i]; + } + + static ColumnsWithTypeAndName filterAttributeNameArgumentForDictHas(const ColumnsWithTypeAndName & arguments) + { + ColumnsWithTypeAndName dict_has_arguments; + dict_has_arguments.reserve(arguments.size() - 1); + size_t attribute_name_argument_index = 1; + + for (size_t i = 0; i < arguments.size(); ++i) + { + if (i == attribute_name_argument_index) + continue; + + dict_has_arguments.emplace_back(arguments[i]); + } + + return dict_has_arguments; + } + + const FunctionDictGetNoType dictionary_get_func_impl; + const FunctionDictHas dictionary_has_func_impl; +}; /// Functions to work with hierarchies. class FunctionDictGetHierarchy final : public IFunction diff --git a/tests/queries/0_stateless/01780_dict_get_or_null.reference b/tests/queries/0_stateless/01780_dict_get_or_null.reference new file mode 100644 index 00000000000..4baca9ec91b --- /dev/null +++ b/tests/queries/0_stateless/01780_dict_get_or_null.reference @@ -0,0 +1,18 @@ +Simple key dictionary dictGetOrNull +0 0 \N \N (NULL,NULL) +1 1 First First ('First','First') +2 1 Second \N ('Second',NULL) +3 1 Third Third ('Third','Third') +4 0 \N \N (NULL,NULL) +Complex key dictionary dictGetOrNull +(0,'key') 0 \N \N (NULL,NULL) +(1,'key') 1 First First ('First','First') +(2,'key') 1 Second \N ('Second',NULL) +(3,'key') 1 Third Third ('Third','Third') +(4,'key') 0 \N \N (NULL,NULL) +Range key dictionary dictGetOrNull +(0,'2019-05-20') 0 \N \N (NULL,NULL) +(1,'2019-05-20') 1 First First ('First','First') +(2,'2019-05-20') 1 Second \N ('Second',NULL) +(3,'2019-05-20') 1 Third Third ('Third','Third') +(4,'2019-05-20') 0 \N \N (NULL,NULL) diff --git a/tests/queries/0_stateless/01780_dict_get_or_null.sql b/tests/queries/0_stateless/01780_dict_get_or_null.sql new file mode 100644 index 00000000000..f13bcf57d27 --- /dev/null +++ b/tests/queries/0_stateless/01780_dict_get_or_null.sql @@ -0,0 +1,116 @@ +DROP TABLE IF EXISTS simple_key_dictionary_source_table; +CREATE TABLE simple_key_dictionary_source_table +( + id UInt64, + value String, + value_nullable Nullable(String) +) ENGINE = TinyLog; + +INSERT INTO simple_key_dictionary_source_table VALUES (1, 'First', 'First'); +INSERT INTO simple_key_dictionary_source_table VALUES (2, 'Second', NULL); +INSERT INTO simple_key_dictionary_source_table VALUES (3, 'Third', 'Third'); + +DROP DICTIONARY IF EXISTS simple_key_dictionary; +CREATE DICTIONARY simple_key_dictionary +( + id UInt64, + value String, + value_nullable Nullable(String) +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'simple_key_dictionary_source_table')) +LAYOUT(DIRECT()); + +SELECT 'Simple key dictionary dictGetOrNull'; + +SELECT + number, + dictHas('simple_key_dictionary', number), + dictGetOrNull('simple_key_dictionary', 'value', number), + dictGetOrNull('simple_key_dictionary', 'value_nullable', number), + dictGetOrNull('simple_key_dictionary', ('value', 'value_nullable'), number) +FROM system.numbers LIMIT 5; + +DROP DICTIONARY simple_key_dictionary; +DROP TABLE simple_key_dictionary_source_table; + +DROP TABLE IF EXISTS complex_key_dictionary_source_table; +CREATE TABLE complex_key_dictionary_source_table +( + id UInt64, + id_key String, + value String, + value_nullable Nullable(String) +) ENGINE = TinyLog; + +INSERT INTO complex_key_dictionary_source_table VALUES (1, 'key', 'First', 'First'); +INSERT INTO complex_key_dictionary_source_table VALUES (2, 'key', 'Second', NULL); +INSERT INTO complex_key_dictionary_source_table VALUES (3, 'key', 'Third', 'Third'); + +DROP DICTIONARY IF EXISTS complex_key_dictionary; +CREATE DICTIONARY complex_key_dictionary +( + id UInt64, + id_key String, + value String, + value_nullable Nullable(String) +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'complex_key_dictionary_source_table')) +LAYOUT(COMPLEX_KEY_DIRECT()); + +SELECT 'Complex key dictionary dictGetOrNull'; + +SELECT + (number, 'key'), + dictHas('complex_key_dictionary', (number, 'key')), + dictGetOrNull('complex_key_dictionary', 'value', (number, 'key')), + dictGetOrNull('complex_key_dictionary', 'value_nullable', (number, 'key')), + dictGetOrNull('complex_key_dictionary', ('value', 'value_nullable'), (number, 'key')) +FROM system.numbers LIMIT 5; + +DROP DICTIONARY complex_key_dictionary; +DROP TABLE complex_key_dictionary_source_table; + +DROP TABLE IF EXISTS range_key_dictionary_source_table; +CREATE TABLE range_key_dictionary_source_table +( + key UInt64, + start_date Date, + end_date Date, + value String, + value_nullable Nullable(String) +) +ENGINE = TinyLog(); + +INSERT INTO range_key_dictionary_source_table VALUES(1, toDate('2019-05-20'), toDate('2019-05-20'), 'First', 'First'); +INSERT INTO range_key_dictionary_source_table VALUES(2, toDate('2019-05-20'), toDate('2019-05-20'), 'Second', NULL); +INSERT INTO range_key_dictionary_source_table VALUES(3, toDate('2019-05-20'), toDate('2019-05-20'), 'Third', 'Third'); + +DROP DICTIONARY IF EXISTS range_key_dictionary; +CREATE DICTIONARY range_key_dictionary +( + key UInt64, + start_date Date, + end_date Date, + value String, + value_nullable Nullable(String) +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'range_key_dictionary_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(RANGE_HASHED()) +RANGE(MIN start_date MAX end_date); + +SELECT 'Range key dictionary dictGetOrNull'; + +SELECT + (number, toDate('2019-05-20')), + dictHas('range_key_dictionary', number, toDate('2019-05-20')), + dictGetOrNull('range_key_dictionary', 'value', number, toDate('2019-05-20')), + dictGetOrNull('range_key_dictionary', 'value_nullable', number, toDate('2019-05-20')), + dictGetOrNull('range_key_dictionary', ('value', 'value_nullable'), number, toDate('2019-05-20')) +FROM system.numbers LIMIT 5; + +DROP DICTIONARY range_key_dictionary; +DROP TABLE range_key_dictionary_source_table;