mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
67 lines
2.9 KiB
XML
67 lines
2.9 KiB
XML
<!-- Simple benchmark to verify the caching of compiled re2 patterns (e.g. LIKE/MATCH) -->
|
|
|
|
<test>
|
|
<substitutions>
|
|
<substitution>
|
|
<name>numbers</name>
|
|
<values>
|
|
<value>numbers_mt(1500000)</value>
|
|
</values>
|
|
</substitution>
|
|
<substitution>
|
|
<name>needle_like</name>
|
|
<values>
|
|
<!-- simple patterns, all unique -->
|
|
<value>'%' || toString(number) || '_'</value>
|
|
<!-- simple patterns, low distinctness (10 patterns) -->
|
|
<value>'%' || toString(number % 10) || '_'</value>
|
|
</values>
|
|
</substitution>
|
|
<substitution>
|
|
<name>needle_match</name>
|
|
<values>
|
|
<!-- simple patterns, all unique -->
|
|
<value>'.*' || toString(number) || '.'</value>
|
|
<!-- simple patterns, low distinctness (10 patterns) -->
|
|
<value>'.*' || toString(number % 10) || '.'</value>
|
|
<!-- complex patterns, all unique - this is very slow (from 2 to 15 seconds) -->
|
|
<!-- <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value> -->
|
|
<!-- complex patterns, low distinctness -->
|
|
<value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10)</value>
|
|
<!-- Note: for this benchmark, we are only interested in compilation time, not correctness, evaluation time or the result.
|
|
Therefore, this is a maximally expensive to compile "pattern from hell": a concatenation of || email || date ||
|
|
ip4 || <number (% 10), see http://lh3lh3.users.sourceforge.net/reb.shtml and https://github.com/mariomka/regex-benchmark -->
|
|
</values>
|
|
</substitution>
|
|
</substitutions>
|
|
|
|
<!-- const needle (just for reference) -->
|
|
|
|
<query>
|
|
select toString(number) as haystack, like(haystack, '%x_')
|
|
from(select * from {numbers})
|
|
format Null
|
|
</query>
|
|
|
|
<query>
|
|
select toString(number) as haystack, match(haystack, '.*x.')
|
|
from(select * from {numbers})
|
|
format Null
|
|
</query>
|
|
|
|
<!-- non-const needle -->
|
|
|
|
<query>
|
|
select toString(number) as haystack, {needle_like} as needle, like(haystack, needle)
|
|
from (select * from {numbers})
|
|
format Null
|
|
</query>
|
|
|
|
<query>
|
|
select toString(number) as haystack, {needle_match} as needle, match(haystack, needle)
|
|
from (select * from {numbers})
|
|
format Null
|
|
</query>
|
|
|
|
</test>
|