From 2a6c7cda0b309ea6bb0da3d227f54b960cecf0a1 Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 15 Jun 2019 22:14:17 +0300 Subject: [PATCH] CLICKHOUSE-4179 Fix function arrayEnumerateUniqRanked (#5622) * wip * fix * clean --- dbms/src/Functions/arrayEnumerateRanked.h | 4 +- .../00909_arrayEnumerateUniq.reference | 53 ++++++++++++ .../0_stateless/00909_arrayEnumerateUniq.sql | 84 +++++++++++++++++++ .../include/common/iostream_debug_helpers.h | 7 +- 4 files changed, 145 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/arrayEnumerateRanked.h b/dbms/src/Functions/arrayEnumerateRanked.h index 0d8449776a0..ed7f0d647d1 100644 --- a/dbms/src/Functions/arrayEnumerateRanked.h +++ b/dbms/src/Functions/arrayEnumerateRanked.h @@ -11,6 +11,7 @@ #include #include +// for better debug: #include /** The function will enumerate distinct values of the passed multidimensional arrays looking inside at the specified depths. * This is very unusual function made as a special order for Yandex.Metrica. @@ -335,7 +336,8 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( if (prev_off == off) { want_clear = true; - ++indices_by_depth[0]; + if (arrays_depths.max_array_depth > 1) + ++indices_by_depth[0]; for (ssize_t depth = current_offset_depth - 1; depth >= 0; --depth) { diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference index a73b01f211d..5065b782f54 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.reference @@ -171,6 +171,18 @@ arrayEnumerateUniq(a1, a2) = 2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]] 2,a3,1,a4,2 [[1,1,1,2],[1,2,1],[1]] ---------END +a3,a4 1..n [[]] +a3,a4 1..n [[]] +a3,a4 1..n [[1,1]] +a3,a4 1..n [[1,1]] +a3,a4 1..n [[1,1]] +a3,a4 1..n [[1,1]] +a3,a4 1..1 [[]] +a3,a4 1..1 [[]] +a3,a4 1..1 [[1,2]] +a3,a4 1..1 [[3,4]] +a3,a4 1..1 [[1,2]] +a3,a4 1..1 [[3,4]] ---------BAD [] [] @@ -225,3 +237,44 @@ arrayEnumerateUniq(a1, a2) = [[],[],[1,2,1,4]] [[],[],[1,1,2,1]] [[],[],[3,4,5,4]] [[],[],[1,1,1,2]] -- +a1,a2 n [] +a1,a2 n [] +a1,a2 n [1] +a1,a2 n [1] +a1,a2 1 [] +a1,a2 1 [] +a1,a2 1 [1] +a1,a2 1 [1] +a1,a2 n2 [] +a1,a2 n2 [] +a1,a2 n2 [1,1] +a1,a2 n2 [1,1] +a1,a2 n2 [1,1] +a1,a2 n2 [1,1] +a1,a2 12 [] +a1,a2 12 [] +a1,a2 12 [1,2] +a1,a2 12 [1,2] +a1,a2 12 [1,2] +a1,a2 12 [1,2] +1 2019-06-06 1 1 1 1 1 1 [] [] [] +1 2019-06-06 -1 1 1 1 1 1 [] [] [] +1 2019-06-06 1 1 1 2 1 1 [] [] [] +1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1] +1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1] +1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1] +1 2019-06-06 1 3 2 1 5 0 [] [] [] +1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1] +1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1] +1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1] +-- +1 2019-06-06 1 1 1 1 1 1 [] [] [] +1 2019-06-06 -1 1 1 1 1 1 [] [] [] +1 2019-06-06 1 1 1 2 1 1 [] [] [] +1 2019-06-06 1 1 1 2 2 1 [1,2,3,4] [1001,1002,1003,1004] [1,1,1,1] +1 2019-06-06 1 2 1 1 3 1 [3,4,5] [2001,2002,2003] [1,1,1] +1 2019-06-06 1 3 2 1 4 1 [5,6] [3001,3002] [1,1] +1 2019-06-06 1 3 2 1 5 0 [] [] [] +1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1] +1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1] +1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1] diff --git a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql index bb9238be441..5f4b12e1988 100644 --- a/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql +++ b/dbms/tests/queries/0_stateless/00909_arrayEnumerateUniq.sql @@ -1,3 +1,4 @@ +-- env SQL_FUZZY_FUNCTIONS=arrayEnumerateUniqRanked,arrayEnumerateDenseRanked SQL_FUZZY_RUNS=1000 clickhouse-test fuzzy SELECT arrayEnumerateUniq( [1,1,2,2,1,1], [1,2,1,2,2,2]); SELECT arrayEnumerateUniqRanked(1, [1,1,2,2,1,1], 1, [1,2,1,2,2,2],1); @@ -140,6 +141,16 @@ SELECT '2,a3,1,a4,2', arrayEnumerateUniqRanked(2,a3,1,a4,2) FROM arrays_test ORD select '---------END'; DROP TABLE arrays_test; +CREATE TABLE arrays_test (a3 Array(Array(UInt8)), a4 Array(Array(UInt32)) ) ENGINE = Memory; +INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,2]], [[3,4]]), ([[5,6]], [[7,8]]), ([[]], [[]]), ([[9,10]], [[11,12]]), ([[13,14]], [[15,16]]); +SELECT 'a3,a4 1..n', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4; +TRUNCATE TABLE arrays_test; +INSERT INTO arrays_test VALUES ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]), ([[]], [[]]), ([[1,1]], [[1,1]]), ([[1,1]], [[1,1]]); +SELECT 'a3,a4 1..1', arrayEnumerateUniqRanked(a3,a4) FROM arrays_test ORDER BY a3, a4; +DROP TABLE arrays_test; + + + select '---------BAD'; SELECT arrayEnumerateUniqRanked(); -- { serverError 42 } SELECT arrayEnumerateUniqRanked([]); @@ -221,3 +232,76 @@ SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1, select '-- '; SELECT a, arrayEnumerateUniqRanked(a) FROM ( SELECT * FROM ( SELECT [[], [], [1, 2, 1, 4]] AS a UNION ALL SELECT [[], [], [3, 4, 5, 4]] AS a ) ORDER BY a ASC ); select '-- '; + + +DROP TABLE IF EXISTS arrays_test; +CREATE TABLE arrays_test (a1 Array(UInt8), a2 Array(UInt32) ) ENGINE = Memory; +INSERT INTO arrays_test VALUES ([], []),([10], [11]), ([], []), ([12], [13]); +SELECT 'a1,a2 n', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2; + +TRUNCATE TABLE arrays_test; +INSERT INTO arrays_test VALUES ([], []),([1], [1]), ([], []), ([1], [1]); +SELECT 'a1,a2 1', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2; + +TRUNCATE TABLE arrays_test; +INSERT INTO arrays_test VALUES ([], []), ([1,2], [3,4]), ([5,6], [7,8]), ([], []), ([9,10], [11,12]), ([13,14], [15,16]); +SELECT 'a1,a2 n2', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2; + +TRUNCATE TABLE arrays_test; +INSERT INTO arrays_test VALUES ([], []), ([1,1], [1,1]), ([1,1], [1,1]), ([], []), ([1,1], [1,1]), ([1,1], [1,1]); +SELECT 'a1,a2 12', arrayEnumerateUniqRanked(a1,a2) FROM arrays_test ORDER BY a1, a2; + +DROP TABLE arrays_test; + + + +DROP TABLE IF EXISTS arr_tests_visits; + +CREATE TABLE arr_tests_visits +( + CounterID UInt32, + StartDate Date, + Sign Int8, + VisitID UInt64, + UserID UInt64, + VisitVersion UInt16, + `Test.BannerID` Array(UInt64), + `Test.Load` Array(UInt8), + `Test.PuidKey` Array(Array(UInt8)), + `Test.PuidVal` Array(Array(UInt32)) +) ENGINE = MergeTree() PARTITION BY toMonday(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192; + +truncate table arr_tests_visits; +insert into arr_tests_visits (CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, `Test.BannerID`, `Test.Load`, `Test.PuidKey`, `Test.PuidVal`) +values (1, toDate('2019-06-06'), 1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), -1, 1, 1, 1, [1], [1], [[]], [[]]), (1, toDate('2019-06-06'), 1, 1, 1, 2, [1,2], [1,1], [[],[1,2,3,4]], [[],[1001, 1002, 1003, 1004]]), (1, toDate('2019-06-06'), 1, 2, 1, 1, [3], [1], [[3,4,5]], [[2001, 2002, 2003]]), (1, toDate('2019-06-06'), 1, 3, 2, 1, [4, 5], [1, 0], [[5,6],[]], [[3001, 3002],[]]), (1, toDate('2019-06-06'), 1, 4, 2, 1, [5, 5, 6], [1, 0, 0], [[1,2], [1, 2], [3]], [[1001, 1002],[1002, 1003], [2001]]); + +select CounterID, StartDate, Sign, VisitID, UserID, VisitVersion, BannerID, Load, PuidKeyArr, PuidValArr, arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) as uniqTestPuid + from arr_tests_visits + array join + Test.BannerID as BannerID, + Test.Load as Load, + Test.PuidKey as PuidKeyArr, + Test.PuidVal as PuidValArr; + +select '--'; + +SELECT + CounterID, + StartDate, + Sign, + VisitID, + UserID, + VisitVersion, + BannerID, + Load, + PuidKeyArr, + PuidValArr, + arrayEnumerateUniqRanked(PuidKeyArr, PuidValArr) AS uniqTestPuid +FROM arr_tests_visits +ARRAY JOIN + Test.BannerID AS BannerID, + Test.Load AS Load, + Test.PuidKey AS PuidKeyArr, + Test.PuidVal AS PuidValArr; + +DROP TABLE arr_tests_visits; diff --git a/libs/libcommon/include/common/iostream_debug_helpers.h b/libs/libcommon/include/common/iostream_debug_helpers.h index 2c41742e455..9149ffb5ed0 100644 --- a/libs/libcommon/include/common/iostream_debug_helpers.h +++ b/libs/libcommon/include/common/iostream_debug_helpers.h @@ -150,11 +150,14 @@ Out & dump(Out & out, const char * name, T && x) #define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0) #define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0) #define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0) +#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0) +#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0) +#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0) /// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ -#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, N, ...) N -#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 6, 5, 4, 3, 2, 1) +#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N +#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1) #define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS #define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)