CLICKHOUSE-4179 Fix function arrayEnumerateUniqRanked (#5622)

* wip

* fix

* clean
This commit is contained in:
proller 2019-06-15 22:14:17 +03:00 committed by alexey-milovidov
parent e53a5a0c7d
commit 2a6c7cda0b
4 changed files with 145 additions and 3 deletions

View File

@ -11,6 +11,7 @@
#include <Common/ColumnsHashing.h>
#include <Common/HashTable/ClearableHashMap.h>
// for better debug: #include <Core/iostream_debug_helpers.h>
/** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
* This is very unusual function made as a special order for Yandex.Metrica.
@ -335,7 +336,8 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
if (prev_off == off)
{
want_clear = true;
++indices_by_depth[0];
if (arrays_depths.max_array_depth > 1)
++indices_by_depth[0];
for (ssize_t depth = current_offset_depth - 1; depth >= 0; --depth)
{

View File

@ -171,6 +171,18 @@ arrayEnumerateUniq(a1, a2) =
2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]]
2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]]
---------END
a3,a4 1..n [[]]
a3,a4 1..n [[]]
a3,a4 1..n [[1,1]]
a3,a4 1..n [[1,1]]
a3,a4 1..n [[1,1]]
a3,a4 1..n [[1,1]]
a3,a4 1..1 [[]]
a3,a4 1..1 [[]]
a3,a4 1..1 [[1,2]]
a3,a4 1..1 [[3,4]]
a3,a4 1..1 [[1,2]]
a3,a4 1..1 [[3,4]]
---------BAD
[]
[]
@ -225,3 +237,44 @@ arrayEnumerateUniq(a1, a2) =
[[],[],[1,2,1,4]] [[],[],[1,1,2,1]]
[[],[],[3,4,5,4]] [[],[],[1,1,1,2]]
--
a1,a2 n []
a1,a2 n []
a1,a2 n [1]
a1,a2 n [1]
a1,a2 1 []
a1,a2 1 []
a1,a2 1 [1]
a1,a2 1 [1]
a1,a2 n2 []
a1,a2 n2 []
a1,a2 n2 [1,1]
a1,a2 n2 [1,1]
a1,a2 n2 [1,1]
a1,a2 n2 [1,1]
a1,a2 12 []
a1,a2 12 []
a1,a2 12 [1,2]
a1,a2 12 [1,2]
a1,a2 12 [1,2]
a1,a2 12 [1,2]
1 2019-06-06 1 1 1 1 1 1 [] [] []
1 2019-06-06 -1 1 1 1 1 1 [] [] []
1 2019-06-06 1 1 1 2 1 1 [] [] []
1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1]
1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1]
1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1]
1 2019-06-06 1 3 2 1 5 0 [] [] []
1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1]
1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1]
1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1]
--
1 2019-06-06 1 1 1 1 1 1 [] [] []
1 2019-06-06 -1 1 1 1 1 1 [] [] []
1 2019-06-06 1 1 1 2 1 1 [] [] []
1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1]
1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1]
1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1]
1 2019-06-06 1 3 2 1 5 0 [] [] []
1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1]
1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1]
1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1]

View File

@ -1,3 +1,4 @@
-- env SQL_FUZZY_FUNCTIONS=arrayEnumerateUniqRanked,arrayEnumerateDenseRanked SQL_FUZZY_RUNS=1000 clickhouse-test fuzzy
SELECT arrayEnumerateUniq( [1,1,2,2,1,1], [1,2,1,2,2,2]);
SELECT arrayEnumerateUniqRanked(1, [1,1,2,2,1,1], 1, [1,2,1,2,2,2],1);
@ -140,6 +141,16 @@ SELECT '2,a3,1,a4,2', arrayEnumerateUniqRanked(2,a3,1,a4,2) FROM arrays_test ORD
select '---------END';
DROP TABLE arrays_test;
CREATE TABLE arrays_test (a3 Array(Array(UInt8)), a4 Array(Array(UInt32)) ) ENGINE = Memory;
INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,2]], [[3,4]]), ([[5,6]], [[7,8]]), ([[]], [[]]), ([[9,10]], [[11,12]]), ([[13,14]], [[15,16]]);
SELECT 'a3,a4 1..n', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4;
TRUNCATE TABLE arrays_test;
INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]), ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]);
SELECT 'a3,a4 1..1', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4;
DROP TABLE arrays_test;
select '---------BAD';
SELECT arrayEnumerateUniqRanked(); -- { serverError 42 }
SELECT arrayEnumerateUniqRanked([]);
@ -221,3 +232,76 @@ SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1,
select '-- ';
SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1, 2, 1, 4]] AS a UNION ALL SELECT [[], [], [3, 4, 5, 4]] AS a ) ORDER BY a ASC );
select '-- ';
DROP TABLE IF EXISTS arrays_test;
CREATE TABLE arrays_test (a1 Array(UInt8), a2 Array(UInt32) ) ENGINE = Memory;
INSERT INTO arrays_test VALUES ([], []),([10], [11]), ([], []), ([12], [13]);
SELECT 'a1,a2 n', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
TRUNCATE TABLE arrays_test;
INSERT INTO arrays_test VALUES ([], []),([1], [1]), ([], []), ([1], [1]);
SELECT 'a1,a2 1', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
TRUNCATE TABLE arrays_test;
INSERT INTO arrays_test VALUES ([], []), ([1,2], [3,4]), ([5,6], [7,8]), ([], []), ([9,10], [11,12]), ([13,14], [15,16]);
SELECT 'a1,a2 n2', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
TRUNCATE TABLE arrays_test;
INSERT INTO arrays_test VALUES ([], []), ([1,1], [1,1]), ([1,1], [1,1]), ([], []), ([1,1], [1,1]), ([1,1], [1,1]);
SELECT 'a1,a2 12', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
DROP TABLE arrays_test;
DROP TABLE IF EXISTS arr_tests_visits;
CREATE TABLE arr_tests_visits
(
CounterID UInt32,
StartDate Date,
Sign Int8,
VisitID UInt64,
UserID UInt64,
VisitVersion UInt16,
`Test.BannerID` Array(UInt64),
`Test.Load` Array(UInt8),
`Test.PuidKey` Array(Array(UInt8)),
`Test.PuidVal` Array(Array(UInt32))
) ENGINE = MergeTree() PARTITION BY toMonday(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192;
truncate table arr_tests_visits;
insert into arr_tests_visits (CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, `Test.BannerID`, `Test.Load`, `Test.PuidKey`, `Test.PuidVal`)
values (1, toDate('2019-06-06'), 1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), -1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), 1, 1, 1, 2, [1,2], [1,1], [[],[1,2,3,4]], [[],[1001, 1002, 1003, 1004]]), (1, toDate('2019-06-06'), 1, 2, 1, 1, [3], [1], [[3,4,5]], [[2001, 2002, 2003]]), (1, toDate('2019-06-06'), 1, 3, 2, 1, [4, 5], [1, 0], [[5,6],[]], [[3001, 3002],[]]), (1, toDate('2019-06-06'), 1, 4, 2, 1, [5, 5, 6], [1, 0, 0], [[1,2], [1, 2], [3]], [[1001, 1002],[1002, 1003], [2001]]);
select CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, BannerID, Load, PuidKeyArr, PuidValArr, arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) as uniqTestPuid
from arr_tests_visits
array join
Test.BannerID as BannerID,
Test.Load as Load,
Test.PuidKey as PuidKeyArr,
Test.PuidVal as PuidValArr;
select '--';
SELECT
CounterID,
StartDate,
Sign,
VisitID,
UserID,
VisitVersion,
BannerID,
Load,
PuidKeyArr,
PuidValArr,
arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) AS uniqTestPuid
FROM arr_tests_visits
ARRAY JOIN
Test.BannerID AS BannerID,
Test.Load AS Load,
Test.PuidKey AS PuidKeyArr,
Test.PuidVal AS PuidValArr;
DROP TABLE arr_tests_visits;

View File

@ -150,11 +150,14 @@ Out & dump(Out & out, const char * name, T && x)
#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0)
#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0)
#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0)
#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0)
#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0)
#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0)
/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, N, ...) N
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS
#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)