mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
CLICKHOUSE-4179 Fix function arrayEnumerateUniqRanked (#5622)
* wip * fix * clean
This commit is contained in:
parent
e53a5a0c7d
commit
2a6c7cda0b
@ -11,6 +11,7 @@
|
||||
#include <Common/ColumnsHashing.h>
|
||||
#include <Common/HashTable/ClearableHashMap.h>
|
||||
|
||||
// for better debug: #include <Core/iostream_debug_helpers.h>
|
||||
|
||||
/** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths.
|
||||
* This is very unusual function made as a special order for Yandex.Metrica.
|
||||
@ -335,7 +336,8 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
|
||||
if (prev_off == off)
|
||||
{
|
||||
want_clear = true;
|
||||
++indices_by_depth[0];
|
||||
if (arrays_depths.max_array_depth > 1)
|
||||
++indices_by_depth[0];
|
||||
|
||||
for (ssize_t depth = current_offset_depth - 1; depth >= 0; --depth)
|
||||
{
|
||||
|
@ -171,6 +171,18 @@ arrayEnumerateUniq(a1, a2) =
|
||||
2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]]
|
||||
2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]]
|
||||
---------END
|
||||
a3,a4 1..n [[]]
|
||||
a3,a4 1..n [[]]
|
||||
a3,a4 1..n [[1,1]]
|
||||
a3,a4 1..n [[1,1]]
|
||||
a3,a4 1..n [[1,1]]
|
||||
a3,a4 1..n [[1,1]]
|
||||
a3,a4 1..1 [[]]
|
||||
a3,a4 1..1 [[]]
|
||||
a3,a4 1..1 [[1,2]]
|
||||
a3,a4 1..1 [[3,4]]
|
||||
a3,a4 1..1 [[1,2]]
|
||||
a3,a4 1..1 [[3,4]]
|
||||
---------BAD
|
||||
[]
|
||||
[]
|
||||
@ -225,3 +237,44 @@ arrayEnumerateUniq(a1, a2) =
|
||||
[[],[],[1,2,1,4]] [[],[],[1,1,2,1]]
|
||||
[[],[],[3,4,5,4]] [[],[],[1,1,1,2]]
|
||||
--
|
||||
a1,a2 n []
|
||||
a1,a2 n []
|
||||
a1,a2 n [1]
|
||||
a1,a2 n [1]
|
||||
a1,a2 1 []
|
||||
a1,a2 1 []
|
||||
a1,a2 1 [1]
|
||||
a1,a2 1 [1]
|
||||
a1,a2 n2 []
|
||||
a1,a2 n2 []
|
||||
a1,a2 n2 [1,1]
|
||||
a1,a2 n2 [1,1]
|
||||
a1,a2 n2 [1,1]
|
||||
a1,a2 n2 [1,1]
|
||||
a1,a2 12 []
|
||||
a1,a2 12 []
|
||||
a1,a2 12 [1,2]
|
||||
a1,a2 12 [1,2]
|
||||
a1,a2 12 [1,2]
|
||||
a1,a2 12 [1,2]
|
||||
1 2019-06-06 1 1 1 1 1 1 [] [] []
|
||||
1 2019-06-06 -1 1 1 1 1 1 [] [] []
|
||||
1 2019-06-06 1 1 1 2 1 1 [] [] []
|
||||
1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1]
|
||||
1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1]
|
||||
1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1]
|
||||
1 2019-06-06 1 3 2 1 5 0 [] [] []
|
||||
1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1]
|
||||
1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1]
|
||||
1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1]
|
||||
--
|
||||
1 2019-06-06 1 1 1 1 1 1 [] [] []
|
||||
1 2019-06-06 -1 1 1 1 1 1 [] [] []
|
||||
1 2019-06-06 1 1 1 2 1 1 [] [] []
|
||||
1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1]
|
||||
1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1]
|
||||
1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1]
|
||||
1 2019-06-06 1 3 2 1 5 0 [] [] []
|
||||
1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1]
|
||||
1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1]
|
||||
1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1]
|
||||
|
@ -1,3 +1,4 @@
|
||||
-- env SQL_FUZZY_FUNCTIONS=arrayEnumerateUniqRanked,arrayEnumerateDenseRanked SQL_FUZZY_RUNS=1000 clickhouse-test fuzzy
|
||||
|
||||
SELECT arrayEnumerateUniq( [1,1,2,2,1,1], [1,2,1,2,2,2]);
|
||||
SELECT arrayEnumerateUniqRanked(1, [1,1,2,2,1,1], 1, [1,2,1,2,2,2],1);
|
||||
@ -140,6 +141,16 @@ SELECT '2,a3,1,a4,2', arrayEnumerateUniqRanked(2,a3,1,a4,2) FROM arrays_test ORD
|
||||
select '---------END';
|
||||
DROP TABLE arrays_test;
|
||||
|
||||
CREATE TABLE arrays_test (a3 Array(Array(UInt8)), a4 Array(Array(UInt32)) ) ENGINE = Memory;
|
||||
INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,2]], [[3,4]]), ([[5,6]], [[7,8]]), ([[]], [[]]), ([[9,10]], [[11,12]]), ([[13,14]], [[15,16]]);
|
||||
SELECT 'a3,a4 1..n', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4;
|
||||
TRUNCATE TABLE arrays_test;
|
||||
INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]), ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]);
|
||||
SELECT 'a3,a4 1..1', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4;
|
||||
DROP TABLE arrays_test;
|
||||
|
||||
|
||||
|
||||
select '---------BAD';
|
||||
SELECT arrayEnumerateUniqRanked(); -- { serverError 42 }
|
||||
SELECT arrayEnumerateUniqRanked([]);
|
||||
@ -221,3 +232,76 @@ SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1,
|
||||
select '-- ';
|
||||
SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1, 2, 1, 4]] AS a UNION ALL SELECT [[], [], [3, 4, 5, 4]] AS a ) ORDER BY a ASC );
|
||||
select '-- ';
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS arrays_test;
|
||||
CREATE TABLE arrays_test (a1 Array(UInt8), a2 Array(UInt32) ) ENGINE = Memory;
|
||||
INSERT INTO arrays_test VALUES ([], []),([10], [11]), ([], []), ([12], [13]);
|
||||
SELECT 'a1,a2 n', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
|
||||
|
||||
TRUNCATE TABLE arrays_test;
|
||||
INSERT INTO arrays_test VALUES ([], []),([1], [1]), ([], []), ([1], [1]);
|
||||
SELECT 'a1,a2 1', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
|
||||
|
||||
TRUNCATE TABLE arrays_test;
|
||||
INSERT INTO arrays_test VALUES ([], []), ([1,2], [3,4]), ([5,6], [7,8]), ([], []), ([9,10], [11,12]), ([13,14], [15,16]);
|
||||
SELECT 'a1,a2 n2', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
|
||||
|
||||
TRUNCATE TABLE arrays_test;
|
||||
INSERT INTO arrays_test VALUES ([], []), ([1,1], [1,1]), ([1,1], [1,1]), ([], []), ([1,1], [1,1]), ([1,1], [1,1]);
|
||||
SELECT 'a1,a2 12', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2;
|
||||
|
||||
DROP TABLE arrays_test;
|
||||
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS arr_tests_visits;
|
||||
|
||||
CREATE TABLE arr_tests_visits
|
||||
(
|
||||
CounterID UInt32,
|
||||
StartDate Date,
|
||||
Sign Int8,
|
||||
VisitID UInt64,
|
||||
UserID UInt64,
|
||||
VisitVersion UInt16,
|
||||
`Test.BannerID` Array(UInt64),
|
||||
`Test.Load` Array(UInt8),
|
||||
`Test.PuidKey` Array(Array(UInt8)),
|
||||
`Test.PuidVal` Array(Array(UInt32))
|
||||
) ENGINE = MergeTree() PARTITION BY toMonday(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192;
|
||||
|
||||
truncate table arr_tests_visits;
|
||||
insert into arr_tests_visits (CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, `Test.BannerID`, `Test.Load`, `Test.PuidKey`, `Test.PuidVal`)
|
||||
values (1, toDate('2019-06-06'), 1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), -1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), 1, 1, 1, 2, [1,2], [1,1], [[],[1,2,3,4]], [[],[1001, 1002, 1003, 1004]]), (1, toDate('2019-06-06'), 1, 2, 1, 1, [3], [1], [[3,4,5]], [[2001, 2002, 2003]]), (1, toDate('2019-06-06'), 1, 3, 2, 1, [4, 5], [1, 0], [[5,6],[]], [[3001, 3002],[]]), (1, toDate('2019-06-06'), 1, 4, 2, 1, [5, 5, 6], [1, 0, 0], [[1,2], [1, 2], [3]], [[1001, 1002],[1002, 1003], [2001]]);
|
||||
|
||||
select CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, BannerID, Load, PuidKeyArr, PuidValArr, arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) as uniqTestPuid
|
||||
from arr_tests_visits
|
||||
array join
|
||||
Test.BannerID as BannerID,
|
||||
Test.Load as Load,
|
||||
Test.PuidKey as PuidKeyArr,
|
||||
Test.PuidVal as PuidValArr;
|
||||
|
||||
select '--';
|
||||
|
||||
SELECT
|
||||
CounterID,
|
||||
StartDate,
|
||||
Sign,
|
||||
VisitID,
|
||||
UserID,
|
||||
VisitVersion,
|
||||
BannerID,
|
||||
Load,
|
||||
PuidKeyArr,
|
||||
PuidValArr,
|
||||
arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) AS uniqTestPuid
|
||||
FROM arr_tests_visits
|
||||
ARRAY JOIN
|
||||
Test.BannerID AS BannerID,
|
||||
Test.Load AS Load,
|
||||
Test.PuidKey AS PuidKeyArr,
|
||||
Test.PuidVal AS PuidValArr;
|
||||
|
||||
DROP TABLE arr_tests_visits;
|
||||
|
@ -150,11 +150,14 @@ Out & dump(Out & out, const char * name, T && x)
|
||||
#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0)
|
||||
#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0)
|
||||
#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0)
|
||||
#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0)
|
||||
#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0)
|
||||
#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0)
|
||||
|
||||
/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ
|
||||
|
||||
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, N, ...) N
|
||||
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 6, 5, 4, 3, 2, 1)
|
||||
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N
|
||||
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
|
||||
|
||||
#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS
|
||||
#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)
|
||||
|
Loading…
Reference in New Issue
Block a user