ClickHouse/tests/performance/re2_regex_caching.xml
2023-07-09 02:21:48 +02:00

67 lines
2.9 KiB
XML

<!-- Simple benchmark to verify the caching of compiled re2 patterns (e.g. LIKE/MATCH) -->
<test>
<substitutions>
<substitution>
<name>numbers</name>
<values>
<value>numbers_mt(1500000)</value>
</values>
</substitution>
<substitution>
<name>needle_like</name>
<values>
<!-- simple patterns, all unique -->
<value>'%' || toString(number) || '_'</value>
<!-- simple patterns, low distinctness (10 patterns) -->
<value>'%' || toString(number % 10) || '_'</value>
</values>
</substitution>
<substitution>
<name>needle_match</name>
<values>
<!-- simple patterns, all unique -->
<value>'.*' || toString(number) || '.'</value>
<!-- simple patterns, low distinctness (10 patterns) -->
<value>'.*' || toString(number % 10) || '.'</value>
<!-- complex patterns, all unique - this is very slow (from 2 to 15 seconds) -->
<!-- <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value> -->
<!-- complex patterns, low distinctness -->
<value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10)</value>
<!-- Note: for this benchmark, we are only interested in compilation time, not correctness, evaluation time or the result.
Therefore, this is a maximally expensive to compile "pattern from hell": a concatenation of || email || date ||
ip4 || <number (% 10), see http://lh3lh3.users.sourceforge.net/reb.shtml and https://github.com/mariomka/regex-benchmark -->
</values>
</substitution>
</substitutions>
<!-- const needle (just for reference) -->
<query>
select toString(number) as haystack, like(haystack, '%x_')
from(select * from {numbers})
format Null
</query>
<query>
select toString(number) as haystack, match(haystack, '.*x.')
from(select * from {numbers})
format Null
</query>
<!-- non-const needle -->
<query>
select toString(number) as haystack, {needle_like} as needle, like(haystack, needle)
from (select * from {numbers})
format Null
</query>
<query>
select toString(number) as haystack, {needle_match} as needle, match(haystack, needle)
from (select * from {numbers})
format Null
</query>
</test>