diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 921e9765080..862ecc42158 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -998,7 +998,7 @@ SELECT ## arrayJaccardIndex -Returns the jaccard similarity between two arrays. +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. **Example** diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 211680092b3..078687a6431 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -37,6 +37,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ + // XXX {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, }; @@ -45,7 +46,7 @@ public: } template - static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) + static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) { if constexpr (is_const_left) left_size = left_offsets[0]; @@ -58,7 +59,7 @@ public: } template - static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; @@ -71,14 +72,14 @@ public: } template - static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { getArraySize(left_offsets, right_offsets, left_size, right_size, i); - if (unlikely(!left_size && !right_size)) + if ((!left_size && !right_size)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); res[i] = 0; } diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index 000106e93b7..ba5a93f1658 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,31 +1,20 @@ -drop table if exists array_jaccard_index; +SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; +DROP TABLE IF EXISTS array_jaccard_index; -insert into array_jaccard_index values ([1,2,3]); +CREATE TABLE array_jaccard_index (arr Array(UInt8)) engine = MergeTree ORDER BY arr; +INSERT INTO array_jaccard_index values ([1,2,3]); +INSERT INTO array_jaccard_index values ([1,2]); +INSERT INTO array_jaccard_index values ([1]); -insert into array_jaccard_index values ([1,2]); - -insert into array_jaccard_index values ([1]); - -select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; +SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; drop table array_jaccard_index; - -select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);