Replace hyperscan by vectorscan

This commit migrates ClickHouse to Vectorscan. The first 10 min of
[0] explain the reasons for it.

(*) Addresses (but does not resolve) #38046

(*) Config parameter names (e.g. "max_hyperscan_regexp_length") are
    preserved for compatibility. Likewise, error codes (e.g.
    "ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT") and function/class names (e.g.
    "HyperscanDeleter") are preserved as vectorscan aims to be a drop-in
    replacement.

[0] https://www.youtube.com/watch?v=KlZWmmflW6M
This commit is contained in:
Robert Schulze 2022-06-17 12:15:19 +02:00
parent 8a5ada8ccd
commit 2c828338f4
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
24 changed files with 6383 additions and 99 deletions

6
.gitmodules vendored
View File

@ -86,9 +86,6 @@
[submodule "contrib/h3"]
path = contrib/h3
url = https://github.com/ClickHouse/h3
[submodule "contrib/hyperscan"]
path = contrib/hyperscan
url = https://github.com/ClickHouse/hyperscan.git
[submodule "contrib/libunwind"]
path = contrib/libunwind
url = https://github.com/ClickHouse/libunwind.git
@ -268,6 +265,9 @@
[submodule "contrib/hashidsxx"]
path = contrib/hashidsxx
url = https://github.com/schoentoon/hashidsxx.git
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan.git
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing.git

View File

@ -58,7 +58,7 @@ add_contrib (boost-cmake boost)
add_contrib (cctz-cmake cctz)
add_contrib (consistent-hashing)
add_contrib (dragonbox-cmake dragonbox)
add_contrib (hyperscan-cmake hyperscan)
add_contrib (vectorscan-cmake vectorscan)
add_contrib (jemalloc-cmake jemalloc)
add_contrib (libcpuid-cmake libcpuid)
add_contrib (libdivide)

1
contrib/hyperscan vendored

@ -1 +0,0 @@
Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c

1
contrib/vectorscan vendored Submodule

@ -0,0 +1 @@
Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d

View File

@ -1,54 +1,65 @@
if (HAVE_SSSE3)
option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES})
elseif(ENABLE_HYPERSCAN)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use hyperscan without SSSE3")
set (ENABLE_HYPERSCAN OFF)
endif ()
# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan.
if (NOT ENABLE_HYPERSCAN)
message (STATUS "Not using hyperscan")
if (ARCH_AMD64 OR ARCH_AARCH64)
option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES})
endif()
# TODO PPC should generally work but needs manual generation of ppc/config.h file on a PPC machine
if (NOT ENABLE_VECTORSCAN)
message (STATUS "Not using vectorscan")
return()
endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hyperscan")
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/vectorscan")
# Gobble up all c/cpp files in vectorscan/src/, omit *dump*.c/cpp files as we don't use the dump feature (see x86/config.h)
set (SRCS
"${LIBRARY_DIR}/src/alloc.c"
"${LIBRARY_DIR}/src/crc32.c"
"${LIBRARY_DIR}/src/database.c"
# "${LIBRARY_DIR}/src/dispatcher.c" # the linker's wrath be upon those who include dispatcher.c.
"${LIBRARY_DIR}/src/grey.cpp"
"${LIBRARY_DIR}/src/hs.cpp"
"${LIBRARY_DIR}/src/hs_valid_platform.c"
"${LIBRARY_DIR}/src/hs_version.c"
"${LIBRARY_DIR}/src/runtime.c"
"${LIBRARY_DIR}/src/scratch.c"
"${LIBRARY_DIR}/src/stream_compress.c"
"${LIBRARY_DIR}/src/compiler/asserts.cpp"
"${LIBRARY_DIR}/src/compiler/compiler.cpp"
"${LIBRARY_DIR}/src/compiler/error.cpp"
"${LIBRARY_DIR}/src/crc32.c"
"${LIBRARY_DIR}/src/database.c"
"${LIBRARY_DIR}/src/fdr/engine_description.cpp"
"${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp"
"${LIBRARY_DIR}/src/fdr/fdr.c"
"${LIBRARY_DIR}/src/fdr/fdr_compile.cpp"
"${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp"
"${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp"
"${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp"
"${LIBRARY_DIR}/src/fdr/fdr.c"
"${LIBRARY_DIR}/src/fdr/flood_compile.cpp"
"${LIBRARY_DIR}/src/fdr/teddy.c"
"${LIBRARY_DIR}/src/fdr/teddy_avx2.c"
"${LIBRARY_DIR}/src/fdr/teddy_compile.cpp"
"${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp"
"${LIBRARY_DIR}/src/fdr/teddy.c"
"${LIBRARY_DIR}/src/grey.cpp"
"${LIBRARY_DIR}/src/hs_valid_platform.c"
"${LIBRARY_DIR}/src/hs_version.c"
"${LIBRARY_DIR}/src/hs.cpp"
"${LIBRARY_DIR}/src/hwlm/hwlm.c"
"${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp"
"${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp"
"${LIBRARY_DIR}/src/hwlm/hwlm.c"
"${LIBRARY_DIR}/src/hwlm/noodle_build.cpp"
"${LIBRARY_DIR}/src/hwlm/noodle_engine.c"
"${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp"
"${LIBRARY_DIR}/src/hwlm/noodle_engine.cpp"
"${LIBRARY_DIR}/src/nfa/accel.c"
"${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp"
"${LIBRARY_DIR}/src/nfa/accelcompile.cpp"
"${LIBRARY_DIR}/src/nfa/castle.c"
"${LIBRARY_DIR}/src/nfa/castlecompile.cpp"
"${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp"
"${LIBRARY_DIR}/src/nfa/dfa_min.cpp"
"${LIBRARY_DIR}/src/nfa/gough.c"
"${LIBRARY_DIR}/src/nfa/goughcompile.cpp"
"${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp"
"${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp"
"${LIBRARY_DIR}/src/nfa/goughcompile.cpp"
"${LIBRARY_DIR}/src/nfa/lbr.c"
"${LIBRARY_DIR}/src/nfa/limex_64.c"
"${LIBRARY_DIR}/src/nfa/limex_accel.c"
@ -59,28 +70,32 @@ set (SRCS
"${LIBRARY_DIR}/src/nfa/limex_simd384.c"
"${LIBRARY_DIR}/src/nfa/limex_simd512.c"
"${LIBRARY_DIR}/src/nfa/mcclellan.c"
"${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp"
"${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp"
"${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp"
"${LIBRARY_DIR}/src/nfa/mcsheng.c"
"${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp"
"${LIBRARY_DIR}/src/nfa/mcsheng_data.c"
"${LIBRARY_DIR}/src/nfa/mcsheng.c"
"${LIBRARY_DIR}/src/nfa/mpv.c"
"${LIBRARY_DIR}/src/nfa/mpvcompile.cpp"
"${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c"
"${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp"
"${LIBRARY_DIR}/src/nfa/rdfa.cpp"
"${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp"
"${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp"
"${LIBRARY_DIR}/src/nfa/rdfa.cpp"
"${LIBRARY_DIR}/src/nfa/repeat.c"
"${LIBRARY_DIR}/src/nfa/repeatcompile.cpp"
"${LIBRARY_DIR}/src/nfa/sheng.c"
"${LIBRARY_DIR}/src/nfa/shengcompile.cpp"
"${LIBRARY_DIR}/src/nfa/shufti.c"
"${LIBRARY_DIR}/src/nfa/shufti.cpp"
"${LIBRARY_DIR}/src/nfa/shufticompile.cpp"
"${LIBRARY_DIR}/src/nfa/tamarama.c"
"${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp"
"${LIBRARY_DIR}/src/nfa/truffle.c"
"${LIBRARY_DIR}/src/nfa/truffle.cpp"
"${LIBRARY_DIR}/src/nfa/trufflecompile.cpp"
"${LIBRARY_DIR}/src/nfa/vermicelli_simd.cpp"
"${LIBRARY_DIR}/src/nfa/vermicellicompile.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp"
@ -100,8 +115,8 @@ set (SRCS
"${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp"
@ -112,17 +127,17 @@ set (SRCS
"${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_region.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_som.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_som.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_split.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp"
@ -132,10 +147,8 @@ set (SRCS
"${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng_width.cpp"
"${LIBRARY_DIR}/src/nfagraph/ng.cpp"
"${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp"
"${LIBRARY_DIR}/src/parser/buildstate.cpp"
"${LIBRARY_DIR}/src/parser/check_refs.cpp"
"${LIBRARY_DIR}/src/parser/Component.cpp"
"${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp"
"${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp"
@ -145,31 +158,34 @@ set (SRCS
"${LIBRARY_DIR}/src/parser/ComponentByte.cpp"
"${LIBRARY_DIR}/src/parser/ComponentClass.cpp"
"${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp"
"${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp"
"${LIBRARY_DIR}/src/parser/ComponentEUS.cpp"
"${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp"
"${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp"
"${LIBRARY_DIR}/src/parser/ComponentSequence.cpp"
"${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp"
"${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp"
"${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp"
"${LIBRARY_DIR}/src/parser/control_verbs.cpp"
"${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp"
"${LIBRARY_DIR}/src/parser/buildstate.cpp"
"${LIBRARY_DIR}/src/parser/buildstate.cpp"
"${LIBRARY_DIR}/src/parser/check_refs.cpp"
"${LIBRARY_DIR}/src/parser/check_refs.cpp"
"${LIBRARY_DIR}/src/parser/logical_combination.cpp"
"${LIBRARY_DIR}/src/parser/parse_error.cpp"
"${LIBRARY_DIR}/src/parser/parser_util.cpp"
"${LIBRARY_DIR}/src/parser/Parser.cpp"
"${LIBRARY_DIR}/src/parser/prefilter.cpp"
"${LIBRARY_DIR}/src/parser/shortcut_literal.cpp"
"${LIBRARY_DIR}/src/parser/ucp_table.cpp"
"${LIBRARY_DIR}/src/parser/unsupported.cpp"
"${LIBRARY_DIR}/src/parser/utf8_validate.cpp"
"${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp"
"${LIBRARY_DIR}/src/rose/block.c"
"${LIBRARY_DIR}/src/rose/catchup.c"
"${LIBRARY_DIR}/src/rose/init.c"
"${LIBRARY_DIR}/src/rose/match.c"
"${LIBRARY_DIR}/src/rose/program_runtime.c"
"${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_add.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_castle.cpp"
@ -187,53 +203,95 @@ set (SRCS
"${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_merge.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_misc.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_misc.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_program.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp"
"${LIBRARY_DIR}/src/rose/rose_build_width.cpp"
"${LIBRARY_DIR}/src/rose/rose_in_util.cpp"
"${LIBRARY_DIR}/src/rose/stream.c"
"${LIBRARY_DIR}/src/runtime.c"
"${LIBRARY_DIR}/src/scratch.c"
"${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp"
"${LIBRARY_DIR}/src/som/slot_manager.cpp"
"${LIBRARY_DIR}/src/som/som_runtime.c"
"${LIBRARY_DIR}/src/som/som_stream.c"
"${LIBRARY_DIR}/src/stream_compress.c"
"${LIBRARY_DIR}/src/util/alloc.cpp"
"${LIBRARY_DIR}/src/util/charreach.cpp"
"${LIBRARY_DIR}/src/util/clique.cpp"
"${LIBRARY_DIR}/src/util/compile_context.cpp"
"${LIBRARY_DIR}/src/util/compile_error.cpp"
"${LIBRARY_DIR}/src/util/cpuid_flags.c"
"${LIBRARY_DIR}/src/util/depth.cpp"
"${LIBRARY_DIR}/src/util/fatbit_build.cpp"
"${LIBRARY_DIR}/src/util/multibit_build.cpp"
"${LIBRARY_DIR}/src/util/multibit.c"
"${LIBRARY_DIR}/src/util/multibit_build.cpp"
"${LIBRARY_DIR}/src/util/report_manager.cpp"
"${LIBRARY_DIR}/src/util/simd_utils.c"
"${LIBRARY_DIR}/src/util/state_compress.c"
"${LIBRARY_DIR}/src/util/target_info.cpp"
"${LIBRARY_DIR}/src/util/ue2string.cpp"
)
add_library (_hyperscan ${SRCS})
# The original build system invokes ragel on src/parser/{Parser|control_verbs}.rl (+ a few more .rl files which are unneeded). To avoid a
# build-time dependency on ragel (via contrib/ or find_program()), add the manually generated output of ragel to the sources.
# Please regenerate these files if you update vectorscan.
list (APPEND SRCS
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/Parser.cpp"
"${LIBRARY_DIR}/../vectorscan-cmake/rageled_files/control_verbs.cpp"
)
target_compile_options (_hyperscan
PRIVATE -g0 # Library has too much debug information
-mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally
-march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
-fno-sanitize=undefined # Assume the library takes care of itself
)
target_include_directories (_hyperscan
PRIVATE
common
"${LIBRARY_DIR}/include"
)
target_include_directories (_hyperscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Platform-dependent files
if (ARCH_AMD64)
target_include_directories (_hyperscan PRIVATE x86_64)
endif ()
target_link_libraries (_hyperscan PRIVATE boost::headers_only)
list(APPEND SRCS
"${LIBRARY_DIR}/src/util/arch/x86/cpuid_flags.c"
"${LIBRARY_DIR}/src/util/arch/x86/masked_move.c"
"${LIBRARY_DIR}/src/util/supervector/arch/x86/impl.cpp"
)
endif()
add_library (ch_contrib::hyperscan ALIAS _hyperscan)
if (ARCH_AARCH64)
list(APPEND SRCS
"${LIBRARY_DIR}/src/util/arch/arm/cpuid_flags.c"
"${LIBRARY_DIR}/src/util/supervector/arch/arm/impl.cpp"
)
endif()
# TODO
# if (ARCH_PPC64LE)
# list(APPEND SRCS
# "${LIBRARY_DIR}/src/util/supervector/arch/ppc64el/impl.cpp"
# )
# endif()
add_library (_vectorscan ${SRCS})
target_compile_options (_vectorscan PRIVATE
-g0 # library has too much debug information
-fno-sanitize=undefined # assume the library takes care of itself
-O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # options from original build system
)
# Include version header manually generated by running the original build system
target_include_directories (_vectorscan SYSTEM PRIVATE common)
# vectorscan inherited some patched in-source versions of boost headers to fix a bug in
# boost 1.69. This bug has been solved long ago but vectorscan's source code still
# points to the patched versions, so include it here.
target_include_directories (_vectorscan SYSTEM PRIVATE "${LIBRARY_DIR}/include")
target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Include platform-specific config header generated by manually running the original build system
# Please regenerate these files if you update vectorscan.
if (ARCH_AMD64)
target_include_directories (_vectorscan PRIVATE x86_64)
endif ()
if (ARCH_AARCH64)
target_include_directories (_vectorscan PRIVATE aarch64)
endif ()
target_link_libraries (_vectorscan PRIVATE boost::headers_only)
add_library (ch_contrib::vectorscan ALIAS _vectorscan)

View File

@ -0,0 +1,142 @@
/* used by cmake */
#ifndef CONFIG_H_
#define CONFIG_H_
/* "Define if the build is 32 bit" */
/* #undef ARCH_32_BIT */
/* "Define if the build is 64 bit" */
#define ARCH_64_BIT
/* "Define if building for IA32" */
/* #undef ARCH_IA32 */
/* "Define if building for EM64T" */
/* #undef ARCH_X86_64 */
/* "Define if building for ARM32" */
/* #undef ARCH_ARM32 */
/* "Define if building for AARCH64" */
#define ARCH_AARCH64
/* "Define if building for PPC64EL" */
/* #undef ARCH_PPC64EL */
/* "Define if cross compiling for AARCH64" */
/* #undef CROSS_COMPILE_AARCH64 */
/* Define if building SVE for AARCH64. */
/* #undef BUILD_SVE */
/* Define if building SVE2 for AARCH64. */
/* #undef BUILD_SVE2 */
/* Define if building SVE2+BITPERM for AARCH64. */
/* #undef BUILD_SVE2_BITPERM */
/* internal build, switch on dump support. */
/* #undef DUMP_SUPPORT */
/* Define if building "fat" runtime. */
/* #undef FAT_RUNTIME */
/* Define if building AVX2 in the fat runtime. */
/* #undef BUILD_AVX2 */
/* Define if building AVX-512 in the fat runtime. */
/* #undef BUILD_AVX512 */
/* Define if building AVX512VBMI in the fat runtime. */
/* #undef BUILD_AVX512VBMI */
/* Define to 1 if `backtrace' works. */
#define HAVE_BACKTRACE
/* C compiler has __builtin_assume_aligned */
#define HAVE_CC_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has __builtin_assume_aligned */
#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has x86intrin.h */
/* #undef HAVE_CXX_X86INTRIN_H */
/* C compiler has x86intrin.h */
/* #undef HAVE_C_X86INTRIN_H */
/* C++ compiler has intrin.h */
/* #undef HAVE_CXX_INTRIN_H */
/* C compiler has intrin.h */
/* #undef HAVE_C_INTRIN_H */
/* C compiler has arm_neon.h */
#define HAVE_C_ARM_NEON_H
/* C compiler has arm_sve.h */
/* #undef HAVE_C_ARM_SVE_H */
/* C compiler has arm_neon.h */
/* #undef HAVE_C_PPC64EL_ALTIVEC_H */
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
/* #undef HAVE_PTHREAD_NP_H */
/* Define to 1 if you have the `malloc_info' function. */
/* #undef HAVE_MALLOC_INFO */
/* Define to 1 if you have the `memmem' function. */
/* #undef HAVE_MEMMEM */
/* Define to 1 if you have a working `mmap' system call. */
#define HAVE_MMAP
/* Define to 1 if `posix_memalign' works. */
#define HAVE_POSIX_MEMALIGN
/* Define to 1 if you have the `setrlimit' function. */
#define HAVE_SETRLIMIT
/* Define to 1 if you have the `shmget' function. */
/* #undef HAVE_SHMGET */
/* Define to 1 if you have the `sigaction' function. */
#define HAVE_SIGACTION
/* Define to 1 if you have the `sigaltstack' function. */
#define HAVE_SIGALTSTACK
/* Define if the sqlite3_open_v2 call is available */
/* #undef HAVE_SQLITE3_OPEN_V2 */
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the `_aligned_malloc' function. */
/* #undef HAVE__ALIGNED_MALLOC */
/* Define if compiler has __builtin_constant_p */
/* #undef HAVE__BUILTIN_CONSTANT_P */
/* Optimize, inline critical functions */
#define HS_OPTIMIZE
#define HS_VERSION
#define HS_MAJOR_VERSION
#define HS_MINOR_VERSION
#define HS_PATCH_VERSION
#define BUILD_DATE
/* define if this is a release build. */
#define RELEASE_BUILD
/* define if reverse_graph requires patch for boost 1.62.0 */
/* #undef BOOST_REVGRAPH_PATCH */
#endif /* CONFIG_H_ */

View File

@ -32,9 +32,8 @@
/**
* A version string to identify this release of Hyperscan.
*/
#define HS_VERSION_STRING "5.1.1 2000-01-01"
#define HS_VERSION_STRING "5.4.7 2022-06-20"
#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0)
#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (7 << 8) | 0)
#endif /* HS_VERSION_H_C6428FAF8E3713 */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,443 @@
#line 1 "control_verbs.rl"
/*
* Copyright (c) 2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* \file
* \brief Parser for control verbs that can occur at the beginning of a pattern.
*/
#include "parser/control_verbs.h"
#include "parser/Parser.h"
#include "parser/parse_error.h"
#include <cstring>
#include <sstream>
using namespace std;
namespace ue2 {
const char *read_control_verbs(const char *ptr, const char *end, size_t start,
ParseMode &mode) {
const char *p = ptr;
const char *pe = end;
const char *eof = pe;
const char *ts, *te;
int cs;
UNUSED int act;
#line 59 "control_verbs.cpp"
static const char _ControlVerbs_actions[] = {
0, 1, 0, 1, 1, 1, 2, 1,
3, 1, 4, 1, 5, 1, 6, 1,
7, 1, 8, 1, 9
};
static const unsigned char _ControlVerbs_key_offsets[] = {
0, 7, 8, 10, 12, 14, 16, 18,
20, 21, 23, 25, 27, 30, 32, 34,
36, 38, 40, 42, 44, 46, 48, 50,
52, 55, 57, 59, 61, 63, 66, 68,
70, 72, 74, 76, 79, 82, 84, 86,
88, 90, 92, 94, 96, 98, 100, 102,
105, 107, 109, 111, 113, 115, 117, 119,
121, 123, 125, 127, 129, 131, 133, 135,
137, 139, 141, 143, 146, 148, 149, 151,
155, 157, 159, 160, 161
};
static const char _ControlVerbs_trans_keys[] = {
41, 65, 66, 67, 76, 78, 85, 41,
41, 78, 41, 89, 41, 67, 41, 82,
41, 76, 41, 70, 41, 41, 83, 41,
82, 41, 95, 41, 65, 85, 41, 78,
41, 89, 41, 67, 41, 78, 41, 73,
41, 67, 41, 79, 41, 68, 41, 69,
41, 82, 41, 76, 41, 70, 73, 41,
77, 41, 73, 41, 84, 41, 95, 41,
77, 82, 41, 65, 41, 84, 41, 67,
41, 72, 41, 61, 41, 48, 57, 41,
48, 57, 41, 69, 41, 67, 41, 85,
41, 82, 41, 83, 41, 73, 41, 79,
41, 78, 41, 79, 41, 95, 41, 65,
83, 41, 85, 41, 84, 41, 79, 41,
95, 41, 80, 41, 79, 41, 83, 41,
83, 41, 69, 41, 83, 41, 83, 41,
84, 41, 65, 41, 82, 41, 84, 41,
95, 41, 79, 41, 80, 41, 84, 41,
67, 84, 41, 80, 41, 41, 70, 41,
49, 51, 56, 41, 54, 41, 50, 41,
40, 42, 0
};
static const char _ControlVerbs_single_lengths[] = {
7, 1, 2, 2, 2, 2, 2, 2,
1, 2, 2, 2, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3, 2, 2, 2, 2, 3, 2, 2,
2, 2, 2, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 3,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 3, 2, 1, 2, 4,
2, 2, 1, 1, 1
};
static const char _ControlVerbs_range_lengths[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0
};
static const short _ControlVerbs_index_offsets[] = {
0, 8, 10, 13, 16, 19, 22, 25,
28, 30, 33, 36, 39, 43, 46, 49,
52, 55, 58, 61, 64, 67, 70, 73,
76, 80, 83, 86, 89, 92, 96, 99,
102, 105, 108, 111, 114, 117, 120, 123,
126, 129, 132, 135, 138, 141, 144, 147,
151, 154, 157, 160, 163, 166, 169, 172,
175, 178, 181, 184, 187, 190, 193, 196,
199, 202, 205, 208, 212, 215, 217, 220,
225, 228, 231, 233, 235
};
static const char _ControlVerbs_indicies[] = {
0, 2, 3, 4, 5, 6, 7, 1,
8, 1, 8, 9, 1, 8, 10, 1,
11, 12, 1, 8, 13, 1, 8, 14,
1, 8, 15, 1, 11, 1, 8, 16,
1, 8, 17, 1, 8, 18, 1, 8,
19, 20, 1, 8, 21, 1, 8, 22,
1, 8, 12, 1, 8, 23, 1, 8,
24, 1, 8, 25, 1, 8, 26, 1,
8, 27, 1, 8, 15, 1, 8, 28,
1, 11, 14, 1, 8, 15, 29, 1,
8, 30, 1, 8, 31, 1, 8, 32,
1, 8, 33, 1, 8, 34, 35, 1,
8, 36, 1, 8, 37, 1, 8, 38,
1, 8, 39, 1, 8, 40, 1, 8,
41, 1, 11, 41, 1, 8, 42, 1,
8, 43, 1, 8, 44, 1, 8, 45,
1, 8, 46, 1, 8, 47, 1, 8,
48, 1, 8, 39, 1, 8, 49, 1,
8, 50, 1, 8, 51, 52, 1, 8,
53, 1, 8, 54, 1, 8, 55, 1,
8, 56, 1, 8, 57, 1, 8, 58,
1, 8, 59, 1, 8, 60, 1, 8,
61, 1, 8, 62, 1, 8, 15, 1,
8, 63, 1, 8, 64, 1, 8, 65,
1, 8, 66, 1, 8, 67, 1, 8,
68, 1, 8, 69, 1, 8, 15, 1,
8, 70, 71, 1, 8, 72, 1, 73,
1, 8, 74, 1, 75, 76, 77, 78,
1, 8, 15, 1, 8, 15, 1, 75,
1, 80, 79, 82, 81, 0
};
static const char _ControlVerbs_trans_targs[] = {
75, 1, 2, 9, 22, 24, 45, 67,
75, 3, 4, 75, 5, 6, 7, 8,
10, 11, 12, 13, 16, 14, 15, 17,
18, 19, 20, 21, 23, 25, 26, 27,
28, 29, 30, 37, 31, 32, 33, 34,
35, 36, 38, 39, 40, 41, 42, 43,
44, 46, 47, 48, 59, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 60,
61, 62, 63, 64, 65, 66, 68, 70,
69, 75, 71, 75, 72, 73, 74, 75,
76, 75, 0
};
static const char _ControlVerbs_trans_actions[] = {
19, 0, 0, 0, 0, 0, 0, 0,
13, 0, 0, 11, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 9, 0, 7, 0, 0, 0, 15,
5, 17, 0
};
static const char _ControlVerbs_to_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0
};
static const char _ControlVerbs_from_state_actions[] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 3, 0
};
static const short _ControlVerbs_eof_trans[] = {
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 82
};
static const int ControlVerbs_start = 75;
static const int ControlVerbs_first_final = 75;
static const int ControlVerbs_error = -1;
static const int ControlVerbs_en_main = 75;
#line 249 "control_verbs.cpp"
{
cs = ControlVerbs_start;
ts = 0;
te = 0;
act = 0;
}
#line 105 "control_verbs.rl"
try {
#line 262 "control_verbs.cpp"
{
int _klen;
unsigned int _trans;
const char *_acts;
unsigned int _nacts;
const char *_keys;
if ( p == pe )
goto _test_eof;
_resume:
_acts = _ControlVerbs_actions + _ControlVerbs_from_state_actions[cs];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 ) {
switch ( *_acts++ ) {
case 1:
#line 1 "NONE"
{ts = p;}
break;
#line 281 "control_verbs.cpp"
}
}
_keys = _ControlVerbs_trans_keys + _ControlVerbs_key_offsets[cs];
_trans = _ControlVerbs_index_offsets[cs];
_klen = _ControlVerbs_single_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + _klen - 1;
while (1) {
if ( _upper < _lower )
break;
_mid = _lower + ((_upper-_lower) >> 1);
if ( (*p) < *_mid )
_upper = _mid - 1;
else if ( (*p) > *_mid )
_lower = _mid + 1;
else {
_trans += (unsigned int)(_mid - _keys);
goto _match;
}
}
_keys += _klen;
_trans += _klen;
}
_klen = _ControlVerbs_range_lengths[cs];
if ( _klen > 0 ) {
const char *_lower = _keys;
const char *_mid;
const char *_upper = _keys + (_klen<<1) - 2;
while (1) {
if ( _upper < _lower )
break;
_mid = _lower + (((_upper-_lower) >> 1) & ~1);
if ( (*p) < _mid[0] )
_upper = _mid - 2;
else if ( (*p) > _mid[1] )
_lower = _mid + 2;
else {
_trans += (unsigned int)((_mid - _keys)>>1);
goto _match;
}
}
_trans += _klen;
}
_match:
_trans = _ControlVerbs_indicies[_trans];
_eof_trans:
cs = _ControlVerbs_trans_targs[_trans];
if ( _ControlVerbs_trans_actions[_trans] == 0 )
goto _again;
_acts = _ControlVerbs_actions + _ControlVerbs_trans_actions[_trans];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 )
{
switch ( *_acts++ )
{
case 2:
#line 1 "NONE"
{te = p+1;}
break;
case 3:
#line 76 "control_verbs.rl"
{te = p+1;{
mode.utf8 = true;
}}
break;
case 4:
#line 80 "control_verbs.rl"
{te = p+1;{
mode.ucp = true;
}}
break;
case 5:
#line 84 "control_verbs.rl"
{te = p+1;{
ostringstream str;
str << "Unsupported control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}}
break;
case 6:
#line 90 "control_verbs.rl"
{te = p+1;{
ostringstream str;
str << "Unknown control verb " << string(ts, te - ts);
throw LocatedParseError(str.str());
}}
break;
case 7:
#line 97 "control_verbs.rl"
{te = p+1;{
p--;
{p++; goto _out; }
}}
break;
case 8:
#line 97 "control_verbs.rl"
{te = p;p--;{
p--;
{p++; goto _out; }
}}
break;
case 9:
#line 97 "control_verbs.rl"
{{p = ((te))-1;}{
p--;
{p++; goto _out; }
}}
break;
#line 400 "control_verbs.cpp"
}
}
_again:
_acts = _ControlVerbs_actions + _ControlVerbs_to_state_actions[cs];
_nacts = (unsigned int) *_acts++;
while ( _nacts-- > 0 ) {
switch ( *_acts++ ) {
case 0:
#line 1 "NONE"
{ts = 0;}
break;
#line 413 "control_verbs.cpp"
}
}
if ( ++p != pe )
goto _resume;
_test_eof: {}
if ( p == eof )
{
if ( _ControlVerbs_eof_trans[cs] > 0 ) {
_trans = _ControlVerbs_eof_trans[cs] - 1;
goto _eof_trans;
}
}
_out: {}
}
#line 109 "control_verbs.rl"
} catch (LocatedParseError &error) {
if (ts >= ptr && ts <= pe) {
error.locate(ts - ptr + start);
} else {
error.locate(0);
}
throw;
}
return p;
}
} // namespace ue2

View File

@ -15,15 +15,42 @@
/* "Define if building for EM64T" */
#define ARCH_X86_64
/* "Define if building for ARM32" */
/* #undef ARCH_ARM32 */
/* "Define if building for AARCH64" */
/* #undef ARCH_AARCH64 */
/* "Define if building for PPC64EL" */
/* #undef ARCH_PPC64EL */
/* "Define if cross compiling for AARCH64" */
/* #undef CROSS_COMPILE_AARCH64 */
/* Define if building SVE for AARCH64. */
/* #undef BUILD_SVE */
/* Define if building SVE2 for AARCH64. */
/* #undef BUILD_SVE2 */
/* Define if building SVE2+BITPERM for AARCH64. */
/* #undef BUILD_SVE2_BITPERM */
/* internal build, switch on dump support. */
/* #undef DUMP_SUPPORT */
/* Define if building "fat" runtime. */
/* #undef FAT_RUNTIME */
/* Define if building AVX2 in the fat runtime. */
/* #undef BUILD_AVX2 */
/* Define if building AVX-512 in the fat runtime. */
/* #undef BUILD_AVX512 */
/* Define if building AVX512VBMI in the fat runtime. */
/* #undef BUILD_AVX512VBMI */
/* Define to 1 if `backtrace' works. */
#define HAVE_BACKTRACE
@ -45,6 +72,15 @@
/* C compiler has intrin.h */
/* #undef HAVE_C_INTRIN_H */
/* C compiler has arm_neon.h */
/* #undef HAVE_C_ARM_NEON_H */
/* C compiler has arm_sve.h */
/* #undef HAVE_C_ARM_SVE_H */
/* C compiler has arm_neon.h */
/* #undef HAVE_C_PPC64EL_ALTIVEC_H */
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
@ -85,7 +121,7 @@
/* #undef HAVE__ALIGNED_MALLOC */
/* Define if compiler has __builtin_constant_p */
#define HAVE__BUILTIN_CONSTANT_P
/* #undef HAVE__BUILTIN_CONSTANT_P */
/* Optimize, inline critical functions */
#define HS_OPTIMIZE

View File

@ -40,7 +40,7 @@ The list of third-party libraries:
| googletest | [BSD 3-clause](https://github.com/google/googletest/blob/e7e591764baba0a0c3c9ad0014430e7a27331d16/LICENSE) |
| grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) |
| h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) |
| hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) |
| vectorscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/73695e419c27af7fe2a099c7aa57931cc02aea5d/LICENSE) |
| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) |
| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) |
| jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) |

View File

@ -86,8 +86,8 @@ if (TARGET ch_contrib::h3)
target_link_libraries (clickhouse_functions PRIVATE ch_contrib::h3)
endif()
if (TARGET ch_contrib::hyperscan)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::hyperscan)
if (TARGET ch_contrib::vectorscan)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::vectorscan)
endif()
if (TARGET ch_contrib::simdjson)

View File

@ -9,7 +9,7 @@
#include "config_functions.h"
#include <Common/config.h>
#if USE_HYPERSCAN
#if USE_VECTORSCAN
# include <hs.h>
#endif
@ -60,7 +60,7 @@ struct MultiMatchAllIndicesImpl
[[maybe_unused]] std::optional<UInt32> edit_distance)
{
offsets.resize(haystack_offsets.size());
#if USE_HYPERSCAN
#if USE_VECTORSCAN
const auto & hyperscan_regex = MultiRegexps::get</*SaveIndices=*/true, MultiSearchDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
@ -97,7 +97,7 @@ struct MultiMatchAllIndicesImpl
on_match,
&res);
if (err != HS_SUCCESS)
throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
offsets[i] = res.size();
offset = haystack_offsets[i];
}
@ -108,9 +108,9 @@ struct MultiMatchAllIndicesImpl
(void)res;
(void)offsets;
throw Exception(
"multi-search all indices is not implemented when hyperscan is off (is it x86 processor?)",
"multi-search all indices is not implemented when vectorscan is off",
ErrorCodes::NOT_IMPLEMENTED);
#endif // USE_HYPERSCAN
#endif // USE_VECTORSCAN
}
};

View File

@ -8,7 +8,7 @@
#include "config_functions.h"
#include <Common/config.h>
#if USE_HYPERSCAN
#if USE_VECTORSCAN
# include <hs.h>
#else
# include "MatchImpl.h"
@ -64,13 +64,13 @@ struct MultiMatchAnyImpl
(void)FindAny;
(void)FindAnyIndex;
res.resize(haystack_offsets.size());
#if USE_HYPERSCAN
#if USE_VECTORSCAN
const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex, MultiSearchDistance>(needles, edit_distance);
hs_scratch_t * scratch = nullptr;
hs_error_t err = hs_clone_scratch(hyperscan_regex->getScratch(), &scratch);
if (err != HS_SUCCESS)
throw Exception("Could not clone scratch space for hyperscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
MultiRegexps::ScratchPtr smart_scratch(scratch);
@ -92,7 +92,7 @@ struct MultiMatchAnyImpl
for (size_t i = 0; i < haystack_offsets_size; ++i)
{
UInt64 length = haystack_offsets[i] - offset - 1;
/// Hyperscan restriction.
/// Vectorscan restriction.
if (length > std::numeric_limits<UInt32>::max())
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
/// Zero the result, scan, check, update the offset.
@ -106,14 +106,14 @@ struct MultiMatchAnyImpl
on_match,
&res[i]);
if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED)
throw Exception("Failed to scan with hyperscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
offset = haystack_offsets[i];
}
#else
/// Fallback if do not use hyperscan
/// Fallback if do not use vectorscan
if constexpr (MultiSearchDistance)
throw Exception(
"Edit distance multi-search is not implemented when hyperscan is off (is it x86 processor?)",
"Edit distance multi-search is not implemented when vectorscan is off",
ErrorCodes::NOT_IMPLEMENTED);
PaddedPODArray<UInt8> accum(res.size());
memset(res.data(), 0, res.size() * sizeof(res.front()));
@ -129,7 +129,7 @@ struct MultiMatchAnyImpl
res[i] = j + 1;
}
}
#endif // USE_HYPERSCAN
#endif // USE_VECTORSCAN
}
};

View File

@ -16,7 +16,7 @@
#include "config_functions.h"
#if USE_HYPERSCAN
#if USE_VECTORSCAN
# include <hs.h>
#endif
@ -103,7 +103,7 @@ private:
}
#if USE_HYPERSCAN
#if USE_VECTORSCAN
namespace MultiRegexps
{
@ -312,6 +312,6 @@ inline Regexps * get(const std::vector<StringRef> & patterns, std::optional<UInt
}
#endif // USE_HYPERSCAN
#endif // USE_VECTORSCAN
}

View File

@ -8,8 +8,8 @@ if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
endif()
# TODO: move Functions/Regexps.h to some lib and use here
if (TARGET ch_contrib::hyperscan)
target_link_libraries(clickhouse_functions_url PRIVATE ch_contrib::hyperscan)
if (TARGET ch_contrib::vectorscan)
target_link_libraries(clickhouse_functions_url PRIVATE ch_contrib::vectorscan)
endif()
if (USE_GPERF)

View File

@ -9,4 +9,4 @@
#cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_NLP
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 USE_VECTORSCAN

View File

@ -16,6 +16,6 @@ endif()
if (TARGET ch_contrib::h3)
set(USE_H3 1)
endif()
if (TARGET ch_contrib::hyperscan)
set(USE_HYPERSCAN 1)
if (TARGET ch_contrib::vectorscan)
set(USE_VECTORSCAN 1)
endif()

View File

@ -40,7 +40,7 @@ const char * auto_config_build[]
"USE_SSL", "@USE_SSL@",
"OPENSSL_VERSION", "@OPENSSL_VERSION@",
"OPENSSL_IS_BORING_SSL", "@OPENSSL_IS_BORING_SSL@",
"USE_HYPERSCAN", "@ENABLE_HYPERSCAN@",
"USE_VECTORSCAN", "@ENABLE_VECTORSCAN@",
"USE_SIMDJSON", "@USE_SIMDJSON@",
"USE_ODBC", "@USE_ODBC@",
"USE_GRPC", "@USE_GRPC@",

View File

@ -1,4 +1,4 @@
-- Tags: no-fasttest, use-hyperscan
-- Tags: no-fasttest, use-vectorscan
select 0 = multiMatchAny(materialize('mpnsguhwsitzvuleiwebwjfitmsg'), ['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin', 'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan', 'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc', 'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu', 'ytdixvasrorhripzfhjdmlhqksmctyycwp']) from system.numbers limit 10;
select 0 = multiMatchAny(materialize('qjjzqexjpgkglgxpzrbqbnskq'), ['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq', 'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv', 'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg', 'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh', 'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap']) from system.numbers limit 10;

View File

@ -1,4 +1,4 @@
-- Tags: no-fasttest, use-hyperscan
-- Tags: no-fasttest, use-vectorscan
SET send_logs_level = 'fatal';

View File

@ -1,4 +1,4 @@
-- Tags: no-debug, no-fasttest, use-hyperscan
-- Tags: no-debug, no-fasttest, use-vectorscan
set max_hyperscan_regexp_length = 1;
set max_hyperscan_regexp_total_length = 1;

View File

@ -1,4 +1,4 @@
-- Tags: no-debug, use-hyperscan
-- Tags: no-debug, use-vectorscan
-- Check that server does not get segfault due to bad stack unwinding from Hyperscan

View File

@ -468,6 +468,7 @@ uuid
variadic
varint
vectorized
vectorscan
wchc
wchs
webpage