Fix performance test for regexp cache

This commit is contained in:
Alexey Milovidov 2023-07-09 02:21:48 +02:00
parent 508a9c1800
commit 62bfa4ed93
2 changed files with 5 additions and 3 deletions

View File

@ -11,6 +11,7 @@
#include <Common/OptimizedRegularExpression.h>
#include <Common/ProfileEvents.h>
#include <Common/likePatternToRegexp.h>
#include <Common/HashTable/Hash.h>
#include <base/defines.h>
#include <base/StringRef.h>
#include <boost/container_hash/hash.hpp>
@ -21,6 +22,7 @@
# include <hs.h>
#endif
namespace ProfileEvents
{
extern const Event RegexpCreated;
@ -86,7 +88,7 @@ public:
private:
constexpr static size_t CACHE_SIZE = 100; /// collision probability
std::hash<String> hasher;
DefaultHash<String> hasher;
struct Bucket
{
String pattern; /// key

View File

@ -24,8 +24,8 @@
<value>'.*' || toString(number) || '.'</value>
<!-- simple patterns, low distinctness (10 patterns) -->
<value>'.*' || toString(number % 10) || '.'</value>
<!-- complex patterns, all unique -->
<value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value>
<!-- complex patterns, all unique - this is very slow (from 2 to 15 seconds) -->
<!-- <value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number)</value> -->
<!-- complex patterns, low distinctness -->
<value>'([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10)</value>
<!-- Note: for this benchmark, we are only interested in compilation time, not correctness, evaluation time or the result.