This commit is contained in:
Robert Schulze 2023-07-03 18:33:59 +00:00
parent 91d091a806
commit 9660291ab3
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
3 changed files with 21 additions and 31 deletions

View File

@ -998,7 +998,7 @@ SELECT
## arrayJaccardIndex ## arrayJaccardIndex
Returns the jaccard similarity between two arrays. Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays.
**Example** **Example**

View File

@ -37,6 +37,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{ {
FunctionArgumentDescriptors args{ FunctionArgumentDescriptors args{
// XXX
{"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"},
{"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"},
}; };
@ -45,7 +46,7 @@ public:
} }
template <bool is_const_left, bool is_const_right> template <bool is_const_left, bool is_const_right>
static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i)
{ {
if constexpr (is_const_left) if constexpr (is_const_left)
left_size = left_offsets[0]; left_size = left_offsets[0];
@ -58,7 +59,7 @@ public:
} }
template <bool is_const_left, bool is_const_right> template <bool is_const_left, bool is_const_right>
static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res) static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res)
{ {
size_t left_size; size_t left_size;
size_t right_size; size_t right_size;
@ -71,14 +72,14 @@ public:
} }
template <bool is_const_left, bool is_const_right> template <bool is_const_left, bool is_const_right>
static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res) static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res)
{ {
size_t left_size; size_t left_size;
size_t right_size; size_t right_size;
for (size_t i = 0; i < res.size(); ++i) for (size_t i = 0; i < res.size(); ++i)
{ {
getArraySize<is_const_left, is_const_right>(left_offsets, right_offsets, left_size, right_size, i); getArraySize<is_const_left, is_const_right>(left_offsets, right_offsets, left_size, right_size, i);
if (unlikely(!left_size && !right_size)) if ((!left_size && !right_size))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays");
res[i] = 0; res[i] = 0;
} }

View File

@ -1,31 +1,20 @@
drop table if exists array_jaccard_index; SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; DROP TABLE IF EXISTS array_jaccard_index;
insert into array_jaccard_index values ([1,2,3]); CREATE TABLE array_jaccard_index (arr Array(UInt8)) engine = MergeTree ORDER BY arr;
INSERT INTO array_jaccard_index values ([1,2,3]);
INSERT INTO array_jaccard_index values ([1,2]);
INSERT INTO array_jaccard_index values ([1]);
insert into array_jaccard_index values ([1,2]); SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr;
SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr;
insert into array_jaccard_index values ([1]); SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr;
SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr;
select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr;
select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr;
select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr;
select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr;
select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr;
drop table array_jaccard_index; drop table array_jaccard_index;
select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);
select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2);