Merge pull request #9420 from ClickHouse/aku/faster-perf-tests

Make some performance tests faster
This commit is contained in:
alexey-milovidov 2020-03-03 21:40:03 +03:00 committed by GitHub
commit a943618748
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 32 deletions

View File

@ -14,6 +14,19 @@
</stop_conditions>
<substitutions>
<substitution>
<name>func_slow</name>
<values>
<value>atan</value>
<value>cbrt</value>
<value>cos</value>
<value>lgamma</value>
<value>tgamma</value>
<value>log10</value>
<value>sin</value>
<value>tan</value>
</values>
</substitution>
<substitution>
<name>func</name>
<values>
@ -22,26 +35,25 @@
<value>exp2</value>
<value>log2</value>
<value>exp10</value>
<value>log10</value>
<value>sqrt</value>
<value>cbrt</value>
<value>erf</value>
<value>erfc</value>
<value>lgamma</value>
<value>tgamma</value>
<value>sin</value>
<value>cos</value>
<value>tan</value>
<value>asin</value>
<value>acos</value>
<value>atan</value>
<value>sigmoid</value>
<value>tanh</value>
</values>
</substitution>
<substitution>
<name>arg</name>
<values>
<value>toFloat64(number)</value>
<value>toFloat32(number)</value>
<value>number</value>
</values>
</substitution>
</substitutions>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat64(number)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(toFloat32(number)))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}(number))</query>
<query>SELECT count() FROM numbers(100000000) WHERE NOT ignore({func}({arg}))</query>
<query>SELECT count() FROM numbers(10000000) WHERE NOT ignore({func_slow}({arg}))</query>
</test>

View File

@ -25,22 +25,33 @@
</any_of>
</stop_conditions>
<query>SELECT DISTINCT URL,Title, ngramDistance(Title, URL) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistance(Title, SearchPhrase) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'what is love') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistance(Title, 'no more') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL,Title, ngramDistanceUTF8(Title, URL) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT SearchPhrase,Title, ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT Title, ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'как дела') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM hits_10m_single FORMAT Null</query>
<substitutions>
<substitution>
<name>small_table</name>
<values>
<!-- the same for now -->
<value>hits_10m_single</value>
</values>
</substitution>
</substitutions>
<query>SELECT DISTINCT Title, ngramDistanceCaseInsensitiveUTF8(Title, 'Метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceCaseInsensitiveUTF8(URL, 'как дЕлА') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT DISTINCT URL, ngramDistanceCaseInsensitiveUTF8(URL, 'Чем зАнимаешЬся') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistance(Title, URL) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistance(Title, SearchPhrase) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'what is love') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'baby dont hurt me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistance(Title, 'no more') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'wHAt Is lovE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'BABY DonT hUrT me') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitive(Title, 'nO MOrE') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, URL) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, SearchPhrase) AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(Title, 'метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(URL, 'как дела') AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceUTF8(URL, 'чем занимаешься') AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitiveUTF8(Title, 'Метрика') AS distance FROM hits_10m_single FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitiveUTF8(URL, 'как дЕлА') AS distance FROM {small_table} FORMAT Null</query>
<query>SELECT ngramDistanceCaseInsensitiveUTF8(URL, 'Чем зАнимаешЬся') AS distance FROM {small_table} FORMAT Null</query>
</test>

View File

@ -21,5 +21,5 @@
<max_threads>1</max_threads>
</settings>
<query>SELECT Title, URL FROM hits_10m_single PREWHERE WatchID % 2 = 1 WHERE UserID = 10000 FORMAT Null</query>
<query>SELECT Title FROM hits_10m_single PREWHERE WatchID % 2 = 1 WHERE UserID = 10000 FORMAT Null</query>
</test>

View File

@ -1,8 +1,15 @@
<test>
<type>loop</type>
<create_query>CREATE TABLE IF NOT EXISTS whitespaces(value String) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY tuple()</create_query>
<fill_query>INSERT INTO whitespaces SELECT value FROM (SELECT arrayStringConcat(groupArray(' ')) AS spaces, concat(spaces, toString(any(number)), spaces) AS value FROM numbers(100000000) GROUP BY pow(number, intHash32(number) % 4) % 12345678)</fill_query>
<create_query>
create table if not exists whitespaces
engine = MergeTree() partition by tuple() order by tuple()
as
with 32 - log2(intHash32(number)) + 1 as num_spaces,
repeat(' ', toUInt32(num_spaces)) as spaces
select spaces || toString(number) || spaces value
from numbers_mt(100000000);
</create_query>
<stop_conditions>
<all_of>
@ -15,10 +22,14 @@
<substitution>
<name>func</name>
<values>
<value>value</value>
<value>trimLeft(value)</value>
<value>trimRight(value)</value>
<value>trimBoth(value)</value>
</values>
</substitution>
<substitution>
<name>func_slow</name>
<values>
<value>replaceRegexpOne(value, '^ *', '')</value>
<value>replaceRegexpOne(value, ' *$', '')</value>
<value>replaceRegexpAll(value, '^ *| *$', '')</value>
@ -26,7 +37,8 @@
</substitution>
</substitutions>
<query>SELECT count() FROM whitespaces WHERE NOT ignore({func})</query>
<query>SELECT {func} FROM whitespaces FORMAT Null</query>
<query>SELECT {func_slow} FROM whitespaces LIMIT 10000000 FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS whitespaces</drop_query>
</test>