From 7897a5bac7b8d187e198868767f4ddd362758f12 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Mon, 23 May 2022 10:18:24 +0200
Subject: [PATCH 1/4] Perf test for Norm and Distance fuctions for arrays and
tuples
---
tests/performance/norm_distance.xml | 69 +++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
create mode 100644 tests/performance/norm_distance.xml
diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml
new file mode 100644
index 00000000000..bd9db76554c
--- /dev/null
+++ b/tests/performance/norm_distance.xml
@@ -0,0 +1,69 @@
+
+
+ CREATE TABLE vecs_d (
+ v Array(Float32)
+ ) ENGINE=Memory;
+
+
+
+
+
+ INSERT INTO vecs_d
+ SELECT v FROM (
+ SELECT
+ number AS n,
+ [
+ rand(n*10),
+ rand(n*10+1),
+ rand(n*10+2),
+ rand(n*10+3),
+ rand(n*10+4),
+ rand(n*10+5),
+ rand(n*10+6),
+ rand(n*10+7),
+ rand(n*10+8),
+ rand(n*10+9)
+ ] AS v
+ FROM system.numbers
+ LIMIT 30000000
+ );
+
+
+
+
+
+ CREATE TABLE tuples_d (
+ t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32)
+ ) ENGINE=Memory;
+
+
+
+ INSERT INTO tuples_d
+ SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d;
+
+
+
+
+ select sum(dist) from (select L1Norm(t) as dist from tuples_d)
+ select sum(dist) from (select L2Norm(t) as dist from tuples_d)
+ select sum(dist) from (select LinfNorm(t) as dist from tuples_d)
+
+ select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
+ select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
+ select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
+ select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
+
+
+
+ select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d)
+ select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d)
+ select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d)
+
+ select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
+ select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
+ select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
+ select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
+
+ DROP TABLE vecs_d
+ DROP TABLE tuples_d
+
From 70cc27ecac6ac5999b09b8436aec707aa51ee084 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Mon, 23 May 2022 14:08:15 +0200
Subject: [PATCH 2/4] Test with different element types
---
tests/performance/norm_distance.xml | 84 +++++++++++++++++++----------
1 file changed, 56 insertions(+), 28 deletions(-)
diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml
index bd9db76554c..804c3cac4d4 100644
--- a/tests/performance/norm_distance.xml
+++ b/tests/performance/norm_distance.xml
@@ -1,14 +1,29 @@
+
+
+
+ element_type
+
+ UInt8
+ Int16
+ Int32
+ Int64
+ Float32
+ Float64
+
+
+
+
- CREATE TABLE vecs_d (
- v Array(Float32)
+ CREATE TABLE vecs_{element_type} (
+ v Array({element_type})
) ENGINE=Memory;
-
- INSERT INTO vecs_d
+
+ INSERT INTO vecs_{element_type}
SELECT v FROM (
SELECT
number AS n,
@@ -25,45 +40,58 @@
rand(n*10+9)
] AS v
FROM system.numbers
- LIMIT 30000000
+ LIMIT 10000000
);
-
+
- CREATE TABLE tuples_d (
- t Tuple(Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32, Float32)
+ CREATE TABLE tuples_{element_type} (
+ t Tuple(
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type},
+ {element_type}
+ )
) ENGINE=Memory;
-
- INSERT INTO tuples_d
- SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_d;
-
+
+ INSERT INTO tuples_{element_type}
+ SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type};
+
+
- select sum(dist) from (select L1Norm(t) as dist from tuples_d)
- select sum(dist) from (select L2Norm(t) as dist from tuples_d)
- select sum(dist) from (select LinfNorm(t) as dist from tuples_d)
+ SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type})
+ SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type})
+ SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type})
- select sum(dist) from (select L1Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
- select sum(dist) from (select L2Distance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
- select sum(dist) from (select LinfDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
- select sum(dist) from (select cosineDistance((1.0,2.0,3.0,4.0,5,6,7,8,9,0), t) as dist from tuples_d)
+ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type})
+ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type})
+ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type})
+ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type})
- select sum(dist) from (select arrayL1Norm(v) as dist from vecs_d)
- select sum(dist) from (select arrayL2Norm(v) as dist from vecs_d)
- select sum(dist) from (select arrayLinfNorm(v) as dist from vecs_d)
+ SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type})
+ SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type})
+ SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type})
- select sum(dist) from (select arrayL1Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
- select sum(dist) from (select arrayL2Distance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
- select sum(dist) from (select arrayLinfDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
- select sum(dist) from (select arrayCosineDistance([1.0,2.0,3.0,4.0,5,6,7,8,9,0], v) as dist from vecs_d)
+ WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type})
+ WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type})
+ WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type})
+ WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type})
+
+ DROP TABLE vecs_{element_type}
+ DROP TABLE tuples_{element_type}
- DROP TABLE vecs_d
- DROP TABLE tuples_d
From 2658a9eeebf58ebefd2f1f5d53097d1a5284134d Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Mon, 23 May 2022 17:01:19 +0200
Subject: [PATCH 3/4] Test with max_threads=1
---
tests/performance/norm_distance.xml | 3 +++
1 file changed, 3 insertions(+)
diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml
index 804c3cac4d4..5311ee194ed 100644
--- a/tests/performance/norm_distance.xml
+++ b/tests/performance/norm_distance.xml
@@ -68,6 +68,9 @@
SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type};
+
+ 1
+
From d0f5551c9f836860a8f0646ecf8df237a4d92015 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Mon, 23 May 2022 18:27:41 +0200
Subject: [PATCH 4/4] Parameterized with norm kind
---
tests/performance/norm_distance.xml | 32 ++++++++++++++---------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml
index 5311ee194ed..b6a7f9724c2 100644
--- a/tests/performance/norm_distance.xml
+++ b/tests/performance/norm_distance.xml
@@ -72,26 +72,26 @@
1
+
+
+
+ norm
+
+ L1
+ L2
+ Linf
+
+
+
+
-
- SELECT sum(dist) FROM (SELECT L1Norm(t) AS dist FROM tuples_{element_type})
- SELECT sum(dist) FROM (SELECT L2Norm(t) AS dist FROM tuples_{element_type})
- SELECT sum(dist) FROM (SELECT LinfNorm(t) AS dist FROM tuples_{element_type})
-
- WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L1Distance(a, t) AS dist FROM tuples_{element_type})
- WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT L2Distance(a, t) AS dist FROM tuples_{element_type})
- WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT LinfDistance(a, t) AS dist FROM tuples_{element_type})
+ SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type})
+ WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type})
WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type})
-
- SELECT sum(dist) FROM (SELECT arrayL1Norm(v) AS dist FROM vecs_{element_type})
- SELECT sum(dist) FROM (SELECT arrayL2Norm(v) AS dist FROM vecs_{element_type})
- SELECT sum(dist) FROM (SELECT arrayLinfNorm(v) AS dist FROM vecs_{element_type})
-
- WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL1Distance(a, v) AS dist FROM vecs_{element_type})
- WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayL2Distance(a, v) AS dist FROM vecs_{element_type})
- WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayLinfDistance(a, v) AS dist FROM vecs_{element_type})
+ SELECT sum(dist) FROM (SELECT array{norm}Norm(v) AS dist FROM vecs_{element_type})
+ WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT array{norm}Distance(a, v) AS dist FROM vecs_{element_type})
WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type})
DROP TABLE vecs_{element_type}