diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index ef285659be2..037dbb06648 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -40,6 +40,7 @@ generate_function_register(Array FunctionCountEqual FunctionArrayEnumerate FunctionArrayEnumerateUniq + FunctionArrayEnumerateDense FunctionArrayUniq FunctionArrayDistinct FunctionEmptyArrayUInt8 diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index b4436ff0608..6414171a72f 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -1503,19 +1503,22 @@ void FunctionArrayDistinct::executeHashed( } } -/// Implementation of FunctionArrayEnumerateUniq. +/// Implementation of FunctionArrayEnumerateExtended. -FunctionPtr FunctionArrayEnumerateUniq::create(const Context &) +template +FunctionPtr FunctionArrayEnumerateExtended::create(const Context &) { - return std::make_shared(); + return std::make_shared(); } -String FunctionArrayEnumerateUniq::getName() const +template +String FunctionArrayEnumerateExtended::getName() const { - return name; + return Derived::name; } -DataTypePtr FunctionArrayEnumerateUniq::getReturnTypeImpl(const DataTypes & arguments) const +template +DataTypePtr FunctionArrayEnumerateExtended::getReturnTypeImpl(const DataTypes & arguments) const { if (arguments.size() == 0) throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " @@ -1533,7 +1536,8 @@ DataTypePtr FunctionArrayEnumerateUniq::getReturnTypeImpl(const DataTypes & argu return std::make_shared(std::make_shared()); } -void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) +template +void FunctionArrayEnumerateExtended::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { const ColumnArray::Offsets * offsets = nullptr; ColumnRawPtrs data_columns; @@ -1620,8 +1624,9 @@ void FunctionArrayEnumerateUniq::executeImpl(Block & block, const ColumnNumbers } +template template -bool FunctionArrayEnumerateUniq::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +bool FunctionArrayEnumerateExtended::executeNumber(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) { const IColumn * inner_col; @@ -1649,24 +1654,55 @@ bool FunctionArrayEnumerateUniq::executeNumber(const ColumnArray * array, const ValuesToIndices indices; size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + if constexpr (std::is_same_v) { - indices.clear(); - UInt32 null_count = 0; - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + // Unique + for (size_t i = 0; i < offsets.size(); ++i) { - if (null_map_data && ((*null_map_data)[j] == 1)) - res_values[j] = ++null_count; - else - res_values[j] = ++indices[values[j]]; + indices.clear(); + UInt32 null_count = 0; + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + if (null_map_data && ((*null_map_data)[j] == 1)) + res_values[j] = ++null_count; + else + res_values[j] = ++indices[values[j]]; + } + prev_off = off; + } + } else { + // Dense + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t rank = 0; + UInt32 null_index = 0; + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + if (null_map_data && ((*null_map_data)[j] == 1)) + { + if (!null_index) + null_index = ++rank; + res_values[j] = null_index; + } + else + { + auto & idx = indices[values[j]]; + if (!idx) + idx = ++rank; + res_values[j] = idx; + } + } + prev_off = off; } - prev_off = off; } return true; } -bool FunctionArrayEnumerateUniq::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) +template +bool FunctionArrayEnumerateExtended::executeString(const ColumnArray * array, const IColumn * null_map, ColumnUInt32::Container & res_values) { const IColumn * inner_col; @@ -1693,24 +1729,55 @@ bool FunctionArrayEnumerateUniq::executeString(const ColumnArray * array, const null_map_data = &static_cast(null_map)->getData(); ValuesToIndices indices; - for (size_t i = 0; i < offsets.size(); ++i) + if constexpr (std::is_same_v) { - indices.clear(); - UInt32 null_count = 0; - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + // Unique + for (size_t i = 0; i < offsets.size(); ++i) { - if (null_map_data && ((*null_map_data)[j] == 1)) - res_values[j] = ++null_count; - else - res_values[j] = ++indices[nested->getDataAt(j)]; + indices.clear(); + UInt32 null_count = 0; + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + if (null_map_data && ((*null_map_data)[j] == 1)) + res_values[j] = ++null_count; + else + res_values[j] = ++indices[nested->getDataAt(j)]; + } + prev_off = off; + } + } else { + // Dense + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t rank = 0; + UInt32 null_index = 0; + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + if (null_map_data && ((*null_map_data)[j] == 1)) + { + if (!null_index) + null_index = ++rank; + res_values[j] = null_index; + } + else + { + auto & idx = indices[nested->getDataAt(j)]; + if (!idx) + idx = ++rank; + res_values[j] = idx; + } + } + prev_off = off; } - prev_off = off; } return true; } -bool FunctionArrayEnumerateUniq::execute128bit( +template +bool FunctionArrayEnumerateExtended::execute128bit( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, const ColumnRawPtrs & null_maps, @@ -1739,41 +1806,87 @@ bool FunctionArrayEnumerateUniq::execute128bit( ValuesToIndices indices; size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + if constexpr (std::is_same_v) { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + // Unique + for (size_t i = 0; i < offsets.size(); ++i) { - if (has_nullable_columns) + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) { - KeysNullMap bitmap{}; - - for (size_t i = 0; i < columns.size(); ++i) + if (has_nullable_columns) { - if (null_maps[i]) + KeysNullMap bitmap{}; + + for (size_t i = 0; i < columns.size(); ++i) { - const auto & null_map = static_cast(*null_maps[i]).getData(); - if (null_map[j] == 1) + if (null_maps[i]) { - size_t bucket = i / 8; - size_t offset = i % 8; - bitmap[bucket] |= UInt8(1) << offset; + const auto & null_map = static_cast(*null_maps[i]).getData(); + if (null_map[j] == 1) + { + size_t bucket = i / 8; + size_t offset = i % 8; + bitmap[bucket] |= UInt8(1) << offset; + } } } + res_values[j] = ++indices[packFixed(j, count, columns, key_sizes, bitmap)]; } - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes, bitmap)]; + else + res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; } - else - res_values[j] = ++indices[packFixed(j, count, columns, key_sizes)]; + prev_off = off; + } + } else { + // Dense + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + size_t rank = 0; + for (size_t j = prev_off; j < off; ++j) + { + if (has_nullable_columns) + { + KeysNullMap bitmap{}; + + for (size_t i = 0; i < columns.size(); ++i) + { + if (null_maps[i]) + { + const auto & null_map = static_cast(*null_maps[i]).getData(); + if (null_map[j] == 1) + { + size_t bucket = i / 8; + size_t offset = i % 8; + bitmap[bucket] |= UInt8(1) << offset; + } + } + } + auto &idx = indices[packFixed(j, count, columns, key_sizes, bitmap)]; + if (!idx) + idx = ++rank; + res_values[j] = idx; + } + else + { + auto &idx = indices[packFixed(j, count, columns, key_sizes)];; + if (!idx) + idx = ++rank; + res_values[j] = idx; + } + } + prev_off = off; } - prev_off = off; } return true; } -void FunctionArrayEnumerateUniq::executeHashed( +template +void FunctionArrayEnumerateExtended::executeHashed( const ColumnArray::Offsets & offsets, const ColumnRawPtrs & columns, ColumnUInt32::Container & res_values) @@ -1785,18 +1898,43 @@ void FunctionArrayEnumerateUniq::executeHashed( ValuesToIndices indices; size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + if constexpr (std::is_same_v) { - indices.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + // Unique + for (size_t i = 0; i < offsets.size(); ++i) { - res_values[j] = ++indices[hash128(j, count, columns)]; + indices.clear(); + size_t off = offsets[i]; + for (size_t j = prev_off; j < off; ++j) + { + res_values[j] = ++indices[hash128(j, count, columns)]; + } + prev_off = off; + } + } + else + { + // Dense + for (size_t i = 0; i < offsets.size(); ++i) + { + indices.clear(); + size_t off = offsets[i]; + size_t rank = 0; + for (size_t j = prev_off; j < off; ++j) + { + auto & idx = indices[hash128(j, count, columns)]; + if (!idx) + idx = ++rank; + res_values[j] = idx; + } + prev_off = off; } - prev_off = off; } } +template class FunctionArrayEnumerateExtended; +template class FunctionArrayEnumerateExtended; + /// Implementation of FunctionEmptyArrayToSingle. FunctionPtr FunctionEmptyArrayToSingle::create(const Context &) { return std::make_shared(); } diff --git a/dbms/src/Functions/FunctionsArray.h b/dbms/src/Functions/FunctionsArray.h index c90f6a7c6bf..cc5681b9375 100644 --- a/dbms/src/Functions/FunctionsArray.h +++ b/dbms/src/Functions/FunctionsArray.h @@ -1260,10 +1260,10 @@ private: }; -class FunctionArrayEnumerateUniq : public IFunction +template +class FunctionArrayEnumerateExtended : public IFunction { public: - static constexpr auto name = "arrayEnumerateUniq"; static FunctionPtr create(const Context & context); String getName() const override; @@ -1298,6 +1298,21 @@ private: ColumnUInt32::Container & res_values); }; +class FunctionArrayEnumerateUniq : public FunctionArrayEnumerateExtended +{ + using Base = FunctionArrayEnumerateExtended; +public: + static constexpr auto name = "arrayEnumerateUniq"; + using Base::create; +}; + +class FunctionArrayEnumerateDense : public FunctionArrayEnumerateExtended +{ + using Base = FunctionArrayEnumerateExtended; +public: + static constexpr auto name = "arrayEnumerateDense"; + using Base::create; +}; template struct TypeToColumnType { using ColumnType = ColumnVector; }; template <> struct TypeToColumnType { using ColumnType = ColumnString; }; diff --git a/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.reference b/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.reference new file mode 100644 index 00000000000..f48ebd99f0f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.reference @@ -0,0 +1,2 @@ +[1,2,3,1] +[1,2,1] diff --git a/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.sql b/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.sql new file mode 100644 index 00000000000..194c941b72b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00710_array_enumerate_dense.sql @@ -0,0 +1,5 @@ +-- empty result set +SELECT a FROM (SELECT groupArray(intDiv(number, 54321)) AS a, arrayUniq(a) AS u, arrayEnumerateDense(a) AS arr FROM (SELECT number FROM system.numbers LIMIT 1000000) GROUP BY intHash32(number) % 100000) where u <> arrayReverseSort(arr)[1]; + +SELECT arrayEnumerateDense([[1], [2], [34], [1]]); +SELECT arrayEnumerateDense([(1, 2), (3, 4), (1, 2)]);