Merge pull request #37437 from ClickHouse/norm_distance_perf

Perf test for Norm and Distance functions for arrays and tuples
This commit is contained in:
Alexander Gololobov 2022-05-24 10:17:35 +02:00 committed by GitHub
commit 6094000d7c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -0,0 +1,100 @@
<test>
<substitutions>
<substitution>
<name>element_type</name>
<values>
<value>UInt8</value>
<value>Int16</value>
<value>Int32</value>
<value>Int64</value>
<value>Float32</value>
<value>Float64</value>
</values>
</substitution>
</substitutions>
<create_query>
CREATE TABLE vecs_{element_type} (
v Array({element_type})
) ENGINE=Memory;
</create_query>
<!-- Gererate arrays with random data -->
<fill_query>
INSERT INTO vecs_{element_type}
SELECT v FROM (
SELECT
number AS n,
[
rand(n*10),
rand(n*10+1),
rand(n*10+2),
rand(n*10+3),
rand(n*10+4),
rand(n*10+5),
rand(n*10+6),
rand(n*10+7),
rand(n*10+8),
rand(n*10+9)
] AS v
FROM system.numbers
LIMIT 10000000
);
</fill_query>
<!-- The same data in the form of tuples -->
<create_query>
CREATE TABLE tuples_{element_type} (
t Tuple(
{element_type},
{element_type},
{element_type},
{element_type},
{element_type},
{element_type},
{element_type},
{element_type},
{element_type},
{element_type}
)
) ENGINE=Memory;
</create_query>
<fill_query>
INSERT INTO tuples_{element_type}
SELECT (v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]) FROM vecs_{element_type};
</fill_query>
<settings>
<max_threads>1</max_threads>
</settings>
<!-- Norm kinds-->
<substitutions>
<substitution>
<name>norm</name>
<values>
<value>L1</value>
<value>L2</value>
<value>Linf</value>
</values>
</substitution>
</substitutions>
<!-- Tuples -->
<query>SELECT sum(dist) FROM (SELECT {norm}Norm(t) AS dist FROM tuples_{element_type})</query>
<query>WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT {norm}Distance(a, t) AS dist FROM tuples_{element_type})</query>
<query>WITH (SELECT t FROM tuples_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT cosineDistance(a, t) AS dist FROM tuples_{element_type})</query>
<!-- Arrays -->
<query>SELECT sum(dist) FROM (SELECT array{norm}Norm(v) AS dist FROM vecs_{element_type})</query>
<query>WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT array{norm}Distance(a, v) AS dist FROM vecs_{element_type})</query>
<query>WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dist) FROM (SELECT arrayCosineDistance(a, v) AS dist FROM vecs_{element_type})</query>
<drop_query>DROP TABLE vecs_{element_type}</drop_query>
<drop_query>DROP TABLE tuples_{element_type}</drop_query>
</test>