From 7897a5bac7b8d187e198868767f4ddd362758f12 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 10:18:24 +0200 Subject: [PATCH 1/4] Perf test for Norm and Distance fuctions for arrays and tuples --- tests/performance/norm_distance.xml | 69 +++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 tests/performance/norm_distance.xml diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml new file mode 100644 index 00000000000..bd9db76554c --- /dev/null +++ b/tests/performance/norm_distance.xml @@ -0,0 +1,69 @@ + + + CREATE TABLE vecs_d ( + v Array(Float32) + ) ENGINE=Memory; + + + + + + INSERT INTO vecs_d + SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), + rand(n*10+1), + rand(n*10+2), + rand(n*10+3), + rand(n*10+4), + rand(n*10+5), + rand(n*10+6), + rand(n*10+7), + rand(n*10+8), + rand(n*10+9) + ] AS v + FROM system.numbers + LIMIT 30000000 + ); + + + + + + CREATE TABLE tuples_d ( + t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32) + ) ENGINE=Memory; + + + + INSERT INTO tuples_d + SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d; + + + + + select sum(dist) from (select L1Norm(t) as dist from tuples_d) + select sum(dist) from (select L2Norm(t) as dist from tuples_d) + select sum(dist) from (select LinfNorm(t) as dist from tuples_d) + + select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + + + + select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d) + select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d) + select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d) + + select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + + DROP TABLE vecs_d + DROP TABLE tuples_d + From 70cc27ecac6ac5999b09b8436aec707aa51ee084 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 14:08:15 +0200 Subject: [PATCH 2/4] Test with different element types --- tests/performance/norm_distance.xml | 84 +++++++++++++++++++---------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index bd9db76554c..804c3cac4d4 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -1,14 +1,29 @@ + + + + element_type + + UInt8 + Int16 + Int32 + Int64 + Float32 + Float64 + + + + - CREATE TABLE vecs_d ( - v Array(Float32) + CREATE TABLE vecs_{element_type} ( + v Array({element_type}) ) ENGINE=Memory; - - INSERT INTO vecs_d + + INSERT INTO vecs_{element_type} SELECT v FROM ( SELECT number AS n, @@ -25,45 +40,58 @@ rand(n*10+9) ] AS v FROM system.numbers - LIMIT 30000000 + LIMIT 10000000 ); - + - CREATE TABLE tuples_d ( - t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32) + CREATE TABLE tuples_{element_type} ( + t Tuple( + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type}, + {element_type} + ) ) ENGINE=Memory; - - INSERT INTO tuples_d - SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d; - + + INSERT INTO tuples_{element_type} + SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; + + - select sum(dist) from (select L1Norm(t) as dist from tuples_d) - select sum(dist) from (select L2Norm(t) as dist from tuples_d) - select sum(dist) from (select LinfNorm(t) as dist from tuples_d) + SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type}) - select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) - select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d) - select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d) - select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d) + SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type}) - select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) - select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type}) + + DROP TABLE vecs_{element_type} + DROP TABLE tuples_{element_type} - DROP TABLE vecs_d - DROP TABLE tuples_d From 2658a9eeebf58ebefd2f1f5d53097d1a5284134d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 17:01:19 +0200 Subject: [PATCH 3/4] Test with max_threads=1 --- tests/performance/norm_distance.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 804c3cac4d4..5311ee194ed 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -68,6 +68,9 @@ SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type}; + + 1 + From d0f5551c9f836860a8f0646ecf8df237a4d92015 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 23 May 2022 18:27:41 +0200 Subject: [PATCH 4/4] Parameterized with norm kind --- tests/performance/norm_distance.xml | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 5311ee194ed..b6a7f9724c2 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -72,26 +72,26 @@ 1 + + + + norm + + L1 + L2 + Linf + + + + - - SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type}) - SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type}) - - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type}) - WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type}) + SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type}) + WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type}) WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type}) - - SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type}) - SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type}) - SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type}) - - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type}) - WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type}) + SELECT sum(dist) FROM (SELECT array{norm}Norm(v) AS dist FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT array{norm}Distance(a, v) AS dist FROM vecs_{element_type}) WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type}) DROP TABLE vecs_{element_type}