diff --git a/CHANGELOG.draft.md b/CHANGELOG.draft.md deleted file mode 100644 index 8b137891791..00000000000 --- a/CHANGELOG.draft.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/CHANGELOG.md b/CHANGELOG.md index 87d478a62df..72071111672 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,114 @@ +## ClickHouse release 19.1.6, 2019-01-24 + +### New Features + +* Custom per column compression codecs for tables. [#3899](https://github.com/yandex/ClickHouse/pull/3899) [#4111](https://github.com/yandex/ClickHouse/pull/4111) ([alesapin](https://github.com/alesapin), [Winter Zhang](https://github.com/zhang2014), [Anatoly](https://github.com/Sindbag)) +* Added compression codec `Delta`. [#4052](https://github.com/yandex/ClickHouse/pull/4052) ([alesapin](https://github.com/alesapin)) +* Allow to `ALTER` compression codecs. [#4054](https://github.com/yandex/ClickHouse/pull/4054) ([alesapin](https://github.com/alesapin)) +* Added functions `left`, `right`, `trim`, `ltrim`, `rtrim`, `timestampadd`, `timestampsub` for SQL standard compatibility. [#3826](https://github.com/yandex/ClickHouse/pull/3826) ([Ivan Blinkov](https://github.com/blinkov)) +* Support for write in `HDFS` tables and `hdfs` table function. [#4084](https://github.com/yandex/ClickHouse/pull/4084) ([alesapin](https://github.com/alesapin)) +* Added functions to search for multiple constant strings from big haystack: `multiPosition`, `multiSearch` ,`firstMatch` also with `-UTF8`, `-CaseInsensitive`, and `-CaseInsensitiveUTF8` variants. [#4053](https://github.com/yandex/ClickHouse/pull/4053) ([Danila Kutenin](https://github.com/danlark1)) +* Pruning of unused shards if `SELECT` query filters by sharding key (setting `distributed_optimize_skip_select_on_unused_shards`). [#3851](https://github.com/yandex/ClickHouse/pull/3851) ([Ivan](https://github.com/abyss7)) +* Allow `Kafka` engine to ignore some number of parsing errors per block. [#4094](https://github.com/yandex/ClickHouse/pull/4094) ([Ivan](https://github.com/abyss7)) +* Added support for `CatBoost` multiclass models evaluation. Function `modelEvaluate` returns tuple with per-class raw predictions for multiclass models. `libcatboostmodel.so` should be built with [#607](https://github.com/catboost/catboost/pull/607). [#3959](https://github.com/yandex/ClickHouse/pull/3959) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Added functions `filesystemAvailable`, `filesystemFree`, `filesystemCapacity`. [#4097](https://github.com/yandex/ClickHouse/pull/4097) ([Boris Granveaud](https://github.com/bgranvea)) +* Added hashing functions `xxHash64` and `xxHash32`. [#3905](https://github.com/yandex/ClickHouse/pull/3905) ([filimonov](https://github.com/filimonov)) +* Added `gccMurmurHash` hashing function (GCC flavoured Murmur hash) which uses the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191) [#4000](https://github.com/yandex/ClickHouse/pull/4000) ([sundyli](https://github.com/sundy-li)) +* Added hashing functions `javaHash`, `hiveHash`. [#3811](https://github.com/yandex/ClickHouse/pull/3811) ([shangshujie365](https://github.com/shangshujie365)) +* Added table function `remoteSecure`. Function works as `remote`, but uses secure connection. [#4088](https://github.com/yandex/ClickHouse/pull/4088) ([proller](https://github.com/proller)) + + +### Experimental features + +* Added multiple JOINs emulation (`allow_experimental_multiple_joins_emulation` setting). [#3946](https://github.com/yandex/ClickHouse/pull/3946) ([Artem Zuikov](https://github.com/4ertus2)) + + +### Bug Fixes + +* Make `compiled_expression_cache_size` setting limited by default to lower memory consumption. [#4041](https://github.com/yandex/ClickHouse/pull/4041) ([alesapin](https://github.com/alesapin)) +* Fix a bug that led to hangups in threads that perform ALTERs of Replicated tables and in the thread that updates configuration from ZooKeeper. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3891](https://github.com/yandex/ClickHouse/issues/3891) [#3934](https://github.com/yandex/ClickHouse/pull/3934) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fixed a race condition when executing a distributed ALTER task. The race condition led to more than one replica trying to execute the task and all replicas except one failing with a ZooKeeper error. [#3904](https://github.com/yandex/ClickHouse/pull/3904) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix a bug when `from_zk` config elements weren't refreshed after a request to ZooKeeper timed out. [#2947](https://github.com/yandex/ClickHouse/issues/2947) [#3947](https://github.com/yandex/ClickHouse/pull/3947) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix bug with wrong prefix for IPv4 subnet masks. [#3945](https://github.com/yandex/ClickHouse/pull/3945) ([alesapin](https://github.com/alesapin)) +* Fixed crash (`std::terminate`) in rare cases when a new thread cannot be created due to exhausted resources. [#3956](https://github.com/yandex/ClickHouse/pull/3956) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix bug when in `remote` table function execution when wrong restrictions were used for in `getStructureOfRemoteTable`. [#4009](https://github.com/yandex/ClickHouse/pull/4009) ([alesapin](https://github.com/alesapin)) +* Fix a leak of netlink sockets. They were placed in a pool where they were never deleted and new sockets were created at the start of a new thread when all current sockets were in use. [#4017](https://github.com/yandex/ClickHouse/pull/4017) ([Alex Zatelepin](https://github.com/ztlpn)) +* Fix bug with closing `/proc/self/fd` directory earlier than all fds were read from `/proc` after forking `odbc-bridge` subprocess. [#4120](https://github.com/yandex/ClickHouse/pull/4120) ([alesapin](https://github.com/alesapin)) +* Fixed String to UInt monotonic conversion in case of usage String in primary key. [#3870](https://github.com/yandex/ClickHouse/pull/3870) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed error in calculation of integer conversion function monotonicity. [#3921](https://github.com/yandex/ClickHouse/pull/3921) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed segfault in `arrayEnumerateUniq`, `arrayEnumerateDense` functions in case of some invalid arguments. [#3909](https://github.com/yandex/ClickHouse/pull/3909) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fix UB in StorageMerge. [#3910](https://github.com/yandex/ClickHouse/pull/3910) ([Amos Bird](https://github.com/amosbird)) +* Fixed segfault in functions `addDays`, `subtractDays`. [#3913](https://github.com/yandex/ClickHouse/pull/3913) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error: functions `round`, `floor`, `trunc`, `ceil` may return bogus result when executed on integer argument and large negative scale. [#3914](https://github.com/yandex/ClickHouse/pull/3914) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed a bug induced by 'kill query sync' which leads to a core dump. [#3916](https://github.com/yandex/ClickHouse/pull/3916) ([muVulDeePecker](https://github.com/fancyqlx)) +* Fix bug with long delay after empty replication queue. [#3928](https://github.com/yandex/ClickHouse/pull/3928) [#3932](https://github.com/yandex/ClickHouse/pull/3932) ([alesapin](https://github.com/alesapin)) +* Fixed excessive memory usage in case of inserting into table with `LowCardinality` primary key. [#3955](https://github.com/yandex/ClickHouse/pull/3955) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed `LowCardinality` serialization for `Native` format in case of empty arrays. [#3907](https://github.com/yandex/ClickHouse/issues/3907) [#4011](https://github.com/yandex/ClickHouse/pull/4011) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed incorrect result while using distinct by single LowCardinality numeric column. [#3895](https://github.com/yandex/ClickHouse/issues/3895) [#4012](https://github.com/yandex/ClickHouse/pull/4012) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fixed specialized aggregation with LowCardinality key (in case when `compile` setting is enabled). [#3886](https://github.com/yandex/ClickHouse/pull/3886) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Fix user and password forwarding for replicated tables queries. [#3957](https://github.com/yandex/ClickHouse/pull/3957) ([alesapin](https://github.com/alesapin)) ([小路](https://github.com/nicelulu)) +* Fixed very rare race condition that can happen when listing tables in Dictionary database while reloading dictionaries. [#3970](https://github.com/yandex/ClickHouse/pull/3970) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed incorrect result when HAVING was used with ROLLUP or CUBE. [#3756](https://github.com/yandex/ClickHouse/issues/3756) [#3837](https://github.com/yandex/ClickHouse/pull/3837) ([Sam Chou](https://github.com/reflection)) +* Fixed column aliases for query with `JOIN ON` syntax and distributed tables. [#3980](https://github.com/yandex/ClickHouse/pull/3980) ([Winter Zhang](https://github.com/zhang2014)) +* Fixed error in internal implementation of `quantileTDigest` (found by Artem Vakhrushev). This error never happens in ClickHouse and was relevant only for those who use ClickHouse codebase as a library directly. [#3935](https://github.com/yandex/ClickHouse/pull/3935) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### Improvements + +* Support for `IF NOT EXISTS` in `ALTER TABLE ADD COLUMN` statements along with `IF EXISTS` in `DROP/MODIFY/CLEAR/COMMENT COLUMN`. [#3900](https://github.com/yandex/ClickHouse/pull/3900) ([Boris Granveaud](https://github.com/bgranvea)) +* Function `parseDateTimeBestEffort`: support for formats `DD.MM.YYYY`, `DD.MM.YY`, `DD-MM-YYYY`, `DD-Mon-YYYY`, `DD/Month/YYYY` and similar. [#3922](https://github.com/yandex/ClickHouse/pull/3922) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* `CapnProtoInputStream` now support jagged structures. [#4063](https://github.com/yandex/ClickHouse/pull/4063) ([Odin Hultgren Van Der Horst](https://github.com/Miniwoffer)) +* Usability improvement: added a check that server process is started from the data directory's owner. Do not allow to start server from root if the data belongs to non-root user. [#3785](https://github.com/yandex/ClickHouse/pull/3785) ([sergey-v-galtsev](https://github.com/sergey-v-galtsev)) +* Better logic of checking required columns during analysis of queries with JOINs. [#3930](https://github.com/yandex/ClickHouse/pull/3930) ([Artem Zuikov](https://github.com/4ertus2)) +* Decreased the number of connections in case of large number of Distributed tables in a single server. [#3726](https://github.com/yandex/ClickHouse/pull/3726) ([Winter Zhang](https://github.com/zhang2014)) +* Supported totals row for `WITH TOTALS` query for ODBC driver. [#3836](https://github.com/yandex/ClickHouse/pull/3836) ([Maksim Koritckiy](https://github.com/nightweb)) +* Allowed to use `Enum`s as integers inside if function. [#3875](https://github.com/yandex/ClickHouse/pull/3875) ([Ivan](https://github.com/abyss7)) +* Added `low_cardinality_allow_in_native_format` setting. If disabled, do not use `LowCadrinality` type in `Native` format. [#3879](https://github.com/yandex/ClickHouse/pull/3879) ([KochetovNicolai](https://github.com/KochetovNicolai)) +* Removed some redundant objects from compiled expressions cache to lower memory usage. [#4042](https://github.com/yandex/ClickHouse/pull/4042) ([alesapin](https://github.com/alesapin)) +* Add check that `SET send_logs_level = 'value'` query accept appropriate value. [#3873](https://github.com/yandex/ClickHouse/pull/3873) ([Sabyanin Maxim](https://github.com/s-mx)) +* Fixed data type check in type conversion functions. [#3896](https://github.com/yandex/ClickHouse/pull/3896) ([Winter Zhang](https://github.com/zhang2014)) + +### Performance Improvements + +* Add a MergeTree setting `use_minimalistic_part_header_in_zookeeper`. If enabled, Replicated tables will store compact part metadata in a single part znode. This can dramatically reduce ZooKeeper snapshot size (especially if the tables have a lot of columns). Note that after enabling this setting you will not be able to downgrade to a version that doesn't support it. [#3960](https://github.com/yandex/ClickHouse/pull/3960) ([Alex Zatelepin](https://github.com/ztlpn)) +* Add an DFA-based implementation for functions `sequenceMatch` and `sequenceCount` in case pattern doesn't contain time. [#4004](https://github.com/yandex/ClickHouse/pull/4004) ([Léo Ercolanelli](https://github.com/ercolanelli-leo)) +* Performance improvement for integer numbers serialization. [#3968](https://github.com/yandex/ClickHouse/pull/3968) ([Amos Bird](https://github.com/amosbird)) +* Zero left padding PODArray so that -1 element is always valid and zeroed. It's used for branchless calculation of offsets. [#3920](https://github.com/yandex/ClickHouse/pull/3920) ([Amos Bird](https://github.com/amosbird)) +* Reverted `jemalloc` version which lead to performance degradation. [#4018](https://github.com/yandex/ClickHouse/pull/4018) ([alexey-milovidov](https://github.com/alexey-milovidov)) + +### Backward Incompatible Changes + +* Removed undocumented feature `ALTER MODIFY PRIMARY KEY` because it was superseded by the `ALTER MODIFY ORDER BY` command. [#3887](https://github.com/yandex/ClickHouse/pull/3887) ([Alex Zatelepin](https://github.com/ztlpn)) +* Removed function `shardByHash`. [#3833](https://github.com/yandex/ClickHouse/pull/3833) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Forbid using scalar subqueries with result of type `AggregateFunction`. [#3865](https://github.com/yandex/ClickHouse/pull/3865) ([Ivan](https://github.com/abyss7)) + +### Build/Testing/Packaging Improvements + +* Added support for PowerPC (`ppc64le`) build. [#4132](https://github.com/yandex/ClickHouse/pull/4132) ([Danila Kutenin](https://github.com/danlark1)) +* Stateful functional tests are run on public available dataset. [#3969](https://github.com/yandex/ClickHouse/pull/3969) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed error when the server cannot start with the `bash: /usr/bin/clickhouse-extract-from-config: Operation not permitted` message within Docker or systemd-nspawn. [#4136](https://github.com/yandex/ClickHouse/pull/4136) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Updated `rdkafka` library to v1.0.0-RC5. Used cppkafka instead of raw C interface. [#4025](https://github.com/yandex/ClickHouse/pull/4025) ([Ivan](https://github.com/abyss7)) +* Updated `mariadb-client` library. Fixed one of issues found by UBSan. [#3924](https://github.com/yandex/ClickHouse/pull/3924) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Some fixes for UBSan builds. [#3926](https://github.com/yandex/ClickHouse/pull/3926) [#3021](https://github.com/yandex/ClickHouse/pull/3021) [#3948](https://github.com/yandex/ClickHouse/pull/3948) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added per-commit runs of tests with UBSan build. +* Added per-commit runs of PVS-Studio static analyzer. +* Fixed bugs found by PVS-Studio. [#4013](https://github.com/yandex/ClickHouse/pull/4013) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed glibc compatibility issues. [#4100](https://github.com/yandex/ClickHouse/pull/4100) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Move Docker images to 18.10 and add compatibility file for glibc >= 2.28 [#3965](https://github.com/yandex/ClickHouse/pull/3965) ([alesapin](https://github.com/alesapin)) +* Add env variable if user don't want to chown directories in server Docker image. [#3967](https://github.com/yandex/ClickHouse/pull/3967) ([alesapin](https://github.com/alesapin)) +* Enabled most of the warnings from `-Weverything` in clang. Enabled `-Wpedantic`. [#3986](https://github.com/yandex/ClickHouse/pull/3986) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Added a few more warnings that are available only in clang 8. [#3993](https://github.com/yandex/ClickHouse/pull/3993) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Link to `libLLVM` rather than to individual LLVM libs when using shared linking. [#3989](https://github.com/yandex/ClickHouse/pull/3989) ([Orivej Desh](https://github.com/orivej)) +* Added sanitizer variables for test images. [#4072](https://github.com/yandex/ClickHouse/pull/4072) ([alesapin](https://github.com/alesapin)) +* `clickhouse-server` debian package will recommend `libcap2-bin` package to use `setcap` tool for setting capabilities. This is optional. [#4093](https://github.com/yandex/ClickHouse/pull/4093) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Improved compilation time, fixed includes. [#3898](https://github.com/yandex/ClickHouse/pull/3898) ([proller](https://github.com/proller)) +* Added performance tests for hash functions. [#3918](https://github.com/yandex/ClickHouse/pull/3918) ([filimonov](https://github.com/filimonov)) +* Fixed cyclic library dependences. [#3958](https://github.com/yandex/ClickHouse/pull/3958) ([proller](https://github.com/proller)) +* Improved compilation with low available memory. [#4030](https://github.com/yandex/ClickHouse/pull/4030) ([proller](https://github.com/proller)) +* Added test script to reproduce performance degradation in `jemalloc`. [#4036](https://github.com/yandex/ClickHouse/pull/4036) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Fixed misspells in comments and string literals under `dbms`. [#4122](https://github.com/yandex/ClickHouse/pull/4122) ([maiha](https://github.com/maiha)) +* Fixed typos in comments. [#4089](https://github.com/yandex/ClickHouse/pull/4089) ([Evgenii Pravda](https://github.com/kvinty)) + + ## ClickHouse release 18.16.1, 2018-12-21 ### Bug fixes: diff --git a/CMakeLists.txt b/CMakeLists.txt index 4265cc126f1..e75eecc4e6d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,21 @@ cmake_minimum_required (VERSION 3.3) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") +option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+ +if(ENABLE_IPO) + cmake_policy(SET CMP0069 NEW) + include(CheckIPOSupported) + check_ipo_supported(RESULT IPO_SUPPORTED OUTPUT IPO_NOT_SUPPORTED) + if(IPO_SUPPORTED) + message(STATUS "IPO/LTO is supported, enabling") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(STATUS "IPO/LTO is not supported: <${IPO_NOT_SUPPORTED}>") + endif() +else() + message(STATUS "IPO/LTO not enabled.") +endif() + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # Require at least gcc 7 if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7 AND NOT CMAKE_VERSION VERSION_LESS 2.8.9) @@ -81,7 +96,7 @@ option (ENABLE_TESTS "Enables tests" ON) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (OS_LINUX AND NOT UNBUNDLED) + if (OS_LINUX AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) if (GLIBC_COMPATIBILITY) message (STATUS "Some symbols from glibc will be replaced for compatibility") @@ -120,7 +135,9 @@ else() message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)") endif() -include (cmake/test_cpu.cmake) +if(NOT DISABLE_CPU_OPTIMIZE) + include(cmake/test_cpu.cmake) +endif() if(NOT COMPILER_CLANG) # clang: error: the clang compiler does not support '-march=native' option(ARCH_NATIVE "Enable -march=native compiler flag" ${ARCH_ARM}) @@ -229,9 +246,13 @@ include (cmake/find_re2.cmake) include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) -include (cmake/find_cpuid.cmake) +include (cmake/find_cpuid.cmake) # Freebsd, bundled +if (NOT USE_CPUID) + include (cmake/find_cpuinfo.cmake) # Debian +endif() include (cmake/find_libgsasl.cmake) include (cmake/find_libxml2.cmake) +include (cmake/find_protobuf.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) diff --git a/cmake/Modules/Findmetrohash.cmake b/cmake/Modules/Findmetrohash.cmake index 9efc1ed2db8..c51665795bd 100644 --- a/cmake/Modules/Findmetrohash.cmake +++ b/cmake/Modules/Findmetrohash.cmake @@ -28,7 +28,7 @@ find_library(METROHASH_LIBRARIES find_path(METROHASH_INCLUDE_DIR NAMES metrohash.h - PATHS ${METROHASH_ROOT_DIR}/include ${METROHASH_INCLUDE_PATHS} + PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS} ) include(FindPackageHandleStandardArgs) diff --git a/cmake/find_cpuid.cmake b/cmake/find_cpuid.cmake index cda88433a1c..bc88626405d 100644 --- a/cmake/find_cpuid.cmake +++ b/cmake/find_cpuid.cmake @@ -2,11 +2,11 @@ if (NOT ARCH_ARM) option (USE_INTERNAL_CPUID_LIBRARY "Set to FALSE to use system cpuid library instead of bundled" ${NOT_UNBUNDLED}) endif () -#if (USE_INTERNAL_CPUID_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include/cpuid/libcpuid.h") -# message (WARNING "submodule contrib/libcpuid is missing. to fix try run: \n git submodule update --init --recursive") -# set (USE_INTERNAL_CPUID_LIBRARY 0) -# set (MISSING_INTERNAL_CPUID_LIBRARY 1) -#endif () +if (USE_INTERNAL_CPUID_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcpuid/CMakeLists.txt") + message (WARNING "submodule contrib/libcpuid is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_CPUID_LIBRARY 0) + set (MISSING_INTERNAL_CPUID_LIBRARY 1) +endif () if (NOT USE_INTERNAL_CPUID_LIBRARY) find_library (CPUID_LIBRARY cpuid) @@ -20,10 +20,12 @@ if (CPUID_LIBRARY AND CPUID_INCLUDE_DIR) add_definitions(-DHAVE_STDINT_H) # TODO: make virtual target cpuid:cpuid with COMPILE_DEFINITIONS property endif () + set (USE_CPUID 1) elseif (NOT MISSING_INTERNAL_CPUID_LIBRARY) set (CPUID_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include) set (USE_INTERNAL_CPUID_LIBRARY 1) set (CPUID_LIBRARY cpuid) + set (USE_CPUID 1) endif () -message (STATUS "Using cpuid: ${CPUID_INCLUDE_DIR} : ${CPUID_LIBRARY}") +message (STATUS "Using cpuid=${USE_CPUID}: ${CPUID_INCLUDE_DIR} : ${CPUID_LIBRARY}") diff --git a/cmake/find_cpuinfo.cmake b/cmake/find_cpuinfo.cmake new file mode 100644 index 00000000000..c12050c4396 --- /dev/null +++ b/cmake/find_cpuinfo.cmake @@ -0,0 +1,17 @@ +option(USE_INTERNAL_CPUINFO_LIBRARY "Set to FALSE to use system cpuinfo library instead of bundled" ${NOT_UNBUNDLED}) + +if(NOT USE_INTERNAL_CPUINFO_LIBRARY) + find_library(CPUINFO_LIBRARY cpuinfo) + find_path(CPUINFO_INCLUDE_DIR NAMES cpuinfo.h PATHS ${CPUINFO_INCLUDE_PATHS}) +endif() + +if(CPUID_LIBRARY AND CPUID_INCLUDE_DIR) + set(USE_CPUINFO 1) +elseif(NOT MISSING_INTERNAL_CPUINFO_LIBRARY) + set(CPUINFO_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcpuinfo/include) + set(USE_INTERNAL_CPUINFO_LIBRARY 1) + set(CPUINFO_LIBRARY cpuinfo) + set(USE_CPUINFO 1) +endif() + +message(STATUS "Using cpuinfo=${USE_CPUINFO}: ${CPUINFO_INCLUDE_DIR} : ${CPUINFO_LIBRARY}") diff --git a/cmake/find_gtest.cmake b/cmake/find_gtest.cmake index ce0a3d32785..fa7b4f4828a 100644 --- a/cmake/find_gtest.cmake +++ b/cmake/find_gtest.cmake @@ -8,13 +8,22 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeList set (MISSING_INTERNAL_GTEST_LIBRARY 1) endif () -if (NOT USE_INTERNAL_GTEST_LIBRARY) - find_package (GTest) -endif () -if (NOT GTEST_INCLUDE_DIRS AND NOT MISSING_INTERNAL_GTEST_LIBRARY) +if(NOT USE_INTERNAL_GTEST_LIBRARY) + # TODO: autodetect of GTEST_SRC_DIR by EXISTS /usr/src/googletest/CMakeLists.txt + if(NOT GTEST_SRC_DIR) + find_package(GTest) + endif() +endif() + +if (NOT GTEST_SRC_DIR AND NOT GTEST_INCLUDE_DIRS AND NOT MISSING_INTERNAL_GTEST_LIBRARY) set (USE_INTERNAL_GTEST_LIBRARY 1) set (GTEST_MAIN_LIBRARIES gtest_main) + set (GTEST_INCLUDE_DIRS ${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest) endif () -message (STATUS "Using gtest: ${GTEST_INCLUDE_DIRS} : ${GTEST_MAIN_LIBRARIES}") +if((GTEST_INCLUDE_DIRS AND GTEST_MAIN_LIBRARIES) OR GTEST_SRC_DIR) + set(USE_GTEST 1) +endif() + +message (STATUS "Using gtest=${USE_GTEST}: ${GTEST_INCLUDE_DIRS} : ${GTEST_MAIN_LIBRARIES} : ${GTEST_SRC_DIR}") diff --git a/cmake/find_protobuf.cmake b/cmake/find_protobuf.cmake index 5daf5a0c186..e2fe9ca2fcd 100644 --- a/cmake/find_protobuf.cmake +++ b/cmake/find_protobuf.cmake @@ -1,18 +1,35 @@ -option (USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ON) +option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ${NOT_UNBUNDLED}) -if (NOT USE_INTERNAL_PROTOBUF_LIBRARY) +if(OS_FREEBSD AND SANITIZE STREQUAL "address") + # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found + set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) + set(USE_INTERNAL_PROTOBUF_LIBRARY 0) +endif() + +if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") + if(USE_INTERNAL_PROTOBUF_LIBRARY) + message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") + set(USE_INTERNAL_PROTOBUF_LIBRARY 0) + endif() + set(MISSING_INTERNAL_PROTOBUF_LIBRARY 1) +endif() + +if(NOT USE_INTERNAL_PROTOBUF_LIBRARY) find_package(Protobuf) -endif () +endif() if (Protobuf_LIBRARY AND Protobuf_INCLUDE_DIR) -else () - set(Protobuf_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/contrib/protobuf/src) + set(USE_PROTOBUF 1) +elseif(NOT MISSING_INTERNAL_PROTOBUF_LIBRARY) + set(Protobuf_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/protobuf/src) + set(USE_PROTOBUF 1) + set(USE_INTERNAL_PROTOBUF_LIBRARY 1) set(Protobuf_LIBRARY libprotobuf) set(Protobuf_PROTOC_LIBRARY libprotoc) set(Protobuf_LITE_LIBRARY libprotobuf-lite) - set(Protobuf_PROTOC_EXECUTABLE ${CMAKE_BINARY_DIR}/contrib/protobuf/cmake/protoc) + set(Protobuf_PROTOC_EXECUTABLE ${ClickHouse_BINARY_DIR}/contrib/protobuf/cmake/protoc) if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) @@ -77,4 +94,4 @@ else () endfunction() endif() -message (STATUS "Using protobuf: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}") +message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}") diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index 52a865cd50b..793bcd0d2d0 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -2,6 +2,11 @@ if (NOT ARCH_ARM AND NOT ARCH_32 AND NOT APPLE) option (ENABLE_RDKAFKA "Enable kafka" ON) endif () +if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt") + message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_RDKAFKA 0) +endif () + if (ENABLE_RDKAFKA) if (OS_LINUX AND NOT ARCH_ARM) diff --git a/cmake/find_re2.cmake b/cmake/find_re2.cmake index cfc701fac2c..c0136a6cc21 100644 --- a/cmake/find_re2.cmake +++ b/cmake/find_re2.cmake @@ -5,13 +5,24 @@ if (NOT USE_INTERNAL_RE2_LIBRARY) find_path (RE2_INCLUDE_DIR NAMES re2/re2.h PATHS ${RE2_INCLUDE_PATHS}) endif () +string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) +if(_have_space GREATER 0) + message(WARNING "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") + set (MISSING_INTERNAL_RE2_ST_LIBRARY 1) +endif() + if (RE2_LIBRARY AND RE2_INCLUDE_DIR) set (RE2_ST_LIBRARY ${RE2_LIBRARY}) -else () +elseif (NOT MISSING_INTERNAL_RE2_LIBRARY) set (USE_INTERNAL_RE2_LIBRARY 1) set (RE2_LIBRARY re2) - set (RE2_ST_LIBRARY re2_st) - set (USE_RE2_ST 1) + set (RE2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/re2) + if (NOT MISSING_INTERNAL_RE2_ST_LIBRARY) + set (RE2_ST_LIBRARY re2_st) + set (USE_RE2_ST 1) + else () + set (RE2_ST_LIBRARY ${RE2_LIBRARY}) + endif () endif () message (STATUS "Using re2: ${RE2_INCLUDE_DIR} : ${RE2_LIBRARY}; ${RE2_ST_INCLUDE_DIR} : ${RE2_ST_LIBRARY}") diff --git a/cmake/find_zstd.cmake b/cmake/find_zstd.cmake index 0e6db94a7c0..24bc851ed57 100644 --- a/cmake/find_zstd.cmake +++ b/cmake/find_zstd.cmake @@ -14,6 +14,7 @@ if (ZSTD_LIBRARY AND ZSTD_INCLUDE_DIR) else () set (USE_INTERNAL_ZSTD_LIBRARY 1) set (ZSTD_LIBRARY zstd) + set (ZSTD_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/zstd/lib) endif () message (STATUS "Using zstd: ${ZSTD_INCLUDE_DIR} : ${ZSTD_LIBRARY}") diff --git a/cmake/lib_name.cmake b/cmake/lib_name.cmake index 5c919b263e6..847efb15fc5 100644 --- a/cmake/lib_name.cmake +++ b/cmake/lib_name.cmake @@ -2,4 +2,5 @@ set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide) set(COMMON_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/libs/libcommon/include ${ClickHouse_BINARY_DIR}/libs/libcommon/include) set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/dbms/src ${ClickHouse_BINARY_DIR}/dbms/src) set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion) +set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) set(PCG_RANDOM_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpcg-random/include) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 7b66bfce063..fcc2cc75817 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -107,6 +107,11 @@ if (USE_INTERNAL_SSL_LIBRARY) if (NOT MAKE_STATIC_LIBRARIES) set (BUILD_SHARED 1) endif () + + # By default, ${CMAKE_INSTALL_PREFIX}/etc/ssl is selected - that is not what we need. + # We need to use system wide ssl directory. + set (OPENSSLDIR "/etc/ssl") + set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "") add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) @@ -166,13 +171,16 @@ if (USE_INTERNAL_POCO_LIBRARY) endif () endif () -if (USE_INTERNAL_GTEST_LIBRARY) +if(USE_INTERNAL_GTEST_LIBRARY) # Google Test from sources add_subdirectory(${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest) # avoid problems with target_compile_definitions (gtest INTERFACE GTEST_HAS_POSIX_RE=0) target_include_directories (gtest SYSTEM INTERFACE ${ClickHouse_SOURCE_DIR}/contrib/googletest/include) -endif () +elseif(GTEST_SRC_DIR) + add_subdirectory(${GTEST_SRC_DIR}/googletest ${CMAKE_CURRENT_BINARY_DIR}/googletest) + target_compile_definitions(gtest INTERFACE GTEST_HAS_POSIX_RE=0) +endif() if (USE_INTERNAL_LLVM_LIBRARY) file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/empty.cpp CONTENT " ") @@ -207,14 +215,14 @@ if (USE_INTERNAL_LIBXML2_LIBRARY) add_subdirectory(libxml2-cmake) endif () +if (USE_INTERNAL_PROTOBUF_LIBRARY) + set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) + set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) + set(protobuf_WITH_ZLIB 0 CACHE INTERNAL "" FORCE) # actually will use zlib, but skip find + add_subdirectory(protobuf/cmake) +endif () + if (USE_INTERNAL_HDFS3_LIBRARY) - include(${ClickHouse_SOURCE_DIR}/cmake/find_protobuf.cmake) - if (USE_INTERNAL_PROTOBUF_LIBRARY) - set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) - set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) - set(protobuf_WITH_ZLIB 0 CACHE INTERNAL "" FORCE) # actually will use zlib, but skip find - add_subdirectory(protobuf/cmake) - endif () add_subdirectory(libhdfs3-cmake) endif () diff --git a/contrib/jemalloc b/contrib/jemalloc index 41b7372eade..cd2931ad9bb 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit 41b7372eadee941b9164751b8d4963f915d3ceae +Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index 2bd5628d0f8..d71a5432715 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -1,5 +1,5 @@ if (HAVE_SSE42) # Not used. Pretty easy to port. - set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp) + set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h) endif () add_library(metrohash diff --git a/contrib/libmetrohash/LICENSE b/contrib/libmetrohash/LICENSE index 0765a504e62..261eeb9e9f8 100644 --- a/contrib/libmetrohash/LICENSE +++ b/contrib/libmetrohash/LICENSE @@ -1,22 +1,201 @@ -The MIT License (MIT) + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Copyright (c) 2015 J. Andrew Rogers + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: + 1. Definitions. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/libmetrohash/README.md b/contrib/libmetrohash/README.md index a8851cdb2d8..2ac16b1437c 100644 --- a/contrib/libmetrohash/README.md +++ b/contrib/libmetrohash/README.md @@ -5,12 +5,44 @@ MetroHash is a set of state-of-the-art hash functions for *non-cryptographic* us * Fastest general-purpose functions for bulk hashing. * Fastest general-purpose functions for small, variable length keys. * Robust statistical bias profile, similar to the MD5 cryptographic hash. +* Hashes can be constructed incrementally (**new**) * 64-bit, 128-bit, and 128-bit CRC variants currently available. * Optimized for modern x86-64 microarchitectures. * Elegant, compact, readable functions. You can read more about the design and history [here](http://www.jandrewrogers.com/2015/05/27/metrohash/). +## News + +### 23 October 2018 + +The project has been re-licensed under Apache License v2.0. The purpose of this license change is consistency with the imminent release of MetroHash v2.0, which is also licensed under the Apache license. + +### 27 July 2015 + +Two new 64-bit and 128-bit algorithms add the ability to construct hashes incrementally. In addition to supporting incremental construction, the algorithms are slightly superior to the prior versions. + +A big change is that these new algorithms are implemented as C++ classes that support both incremental and stateless hashing. These classes also have a static method for verifying the implementation against the test vectors built into the classes. Implementations are now fully contained by their respective headers e.g. "metrohash128.h". + +*Note: an incremental version of the 128-bit CRC version is on its way but is not included in this push.* + +**Usage Example For Stateless Hashing** + +`MetroHash128::Hash(key, key_length, hash_ptr, seed)` + +**Usage Example For Incremental Hashing** + +`MetroHash128 hasher;` +`hasher.Update(partial_key, partial_key_length);` +`...` +`hasher.Update(partial_key, partial_key_length);` +`hasher.Finalize(hash_ptr);` + +An `Initialize(seed)` method allows the hasher objects to be reused. + + +### 27 May 2015 + Six hash functions have been included in the initial release: * 64-bit hash functions, "metrohash64_1" and "metrohash64_2" diff --git a/contrib/libmetrohash/VERSION b/contrib/libmetrohash/VERSION index 211ea847416..43012d2e31c 100644 --- a/contrib/libmetrohash/VERSION +++ b/contrib/libmetrohash/VERSION @@ -1,7 +1,4 @@ -origin: git@github.com:jandrewrogers/MetroHash.git -commit d9dee18a54a8a6766e24c1950b814ac7ca9d1a89 -Merge: 761e8a4 3d06b24 +origin: https://github.com/jandrewrogers/MetroHash.git +commit 690a521d9beb2e1050cc8f273fdabc13b31bf8f6 tag: v1.1.3 Author: J. Andrew Rogers -Date: Sat Jun 6 16:12:06 2015 -0700 - - modified README +Date: Tue Oct 23 09:49:53 2018 -0700 diff --git a/contrib/libmetrohash/src/metrohash.h b/contrib/libmetrohash/src/metrohash.h index 0d9b76c99cf..ffab03216b7 100644 --- a/contrib/libmetrohash/src/metrohash.h +++ b/contrib/libmetrohash/src/metrohash.h @@ -1,73 +1,24 @@ // metrohash.h // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef METROHASH_METROHASH_H #define METROHASH_METROHASH_H -#include -#include - -// MetroHash 64-bit hash functions -void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - -// MetroHash 128-bit hash functions -void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - -// MetroHash 128-bit hash functions using CRC instruction -void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - - -/* rotate right idiom recognized by compiler*/ -inline static uint64_t rotate_right(uint64_t v, unsigned k) -{ - return (v >> k) | (v << (64 - k)); -} - -// unaligned reads, fast and safe on Nehalem and later microarchitectures -inline static uint64_t read_u64(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u32(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u16(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u8 (const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - +#include "metrohash64.h" +#include "metrohash128.h" +#include "metrohash128crc.h" #endif // #ifndef METROHASH_METROHASH_H diff --git a/contrib/libmetrohash/src/metrohash128.cpp b/contrib/libmetrohash/src/metrohash128.cpp index 6370412046e..5c143db9cbe 100644 --- a/contrib/libmetrohash/src/metrohash128.cpp +++ b/contrib/libmetrohash/src/metrohash128.cpp @@ -1,29 +1,260 @@ // metrohash128.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "platform.h" +#include "metrohash128.h" + +const char * MetroHash128::test_string = "012345678901234567890123456789012345678901234567890123456789012"; + +const uint8_t MetroHash128::test_seed_0[16] = { + 0xC7, 0x7C, 0xE2, 0xBF, 0xA4, 0xED, 0x9F, 0x9B, + 0x05, 0x48, 0xB2, 0xAC, 0x50, 0x74, 0xA2, 0x97 + }; + +const uint8_t MetroHash128::test_seed_1[16] = { + 0x45, 0xA3, 0xCD, 0xB8, 0x38, 0x19, 0x9D, 0x7F, + 0xBD, 0xD6, 0x8D, 0x86, 0x7A, 0x14, 0xEC, 0xEF + }; + + + +MetroHash128::MetroHash128(const uint64_t seed) +{ + Initialize(seed); +} + + +void MetroHash128::Initialize(const uint64_t seed) +{ + // initialize internal hash registers + state.v[0] = (static_cast(seed) - k0) * k3; + state.v[1] = (static_cast(seed) + k1) * k2; + state.v[2] = (static_cast(seed) + k0) * k2; + state.v[3] = (static_cast(seed) - k1) * k3; + + // initialize total length of input + bytes = 0; +} + + +void MetroHash128::Update(const uint8_t * const buffer, const uint64_t length) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + // input buffer may be partially filled + if (bytes % 32) + { + uint64_t fill = 32 - (bytes % 32); + if (fill > length) + fill = length; + + memcpy(input.b + (bytes % 32), ptr, static_cast(fill)); + ptr += fill; + bytes += fill; + + // input buffer is still partially filled + if ((bytes % 32) != 0) return; + + // process full input buffer + state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // bulk update + bytes += (end - ptr); + while (ptr <= (end - 32)) + { + // process directly from the source, bypassing the input buffer + state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // store remaining bytes in input buffer + if (ptr < end) + memcpy(input.b, ptr, end - ptr); +} + + +void MetroHash128::Finalize(uint8_t * const hash) +{ + // finalize bulk loop, if used + if (bytes >= 32) + { + state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 21) * k1; + state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 21) * k0; + state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 21) * k1; + state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 21) * k0; + } + + // process any bytes remaining in the input buffer + const uint8_t * ptr = reinterpret_cast(input.b); + const uint8_t * const end = ptr + (bytes % 32); + + if ((end - ptr) >= 16) + { + state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[1] += read_u64(ptr) * k2; ptr += 8; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 45) * k1; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 45) * k0; + } + + if ((end - ptr) >= 8) + { + state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 27) * k1; + } + + if ((end - ptr) >= 4) + { + state.v[1] += read_u32(ptr) * k2; ptr += 4; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 46) * k0; + } + + if ((end - ptr) >= 2) + { + state.v[0] += read_u16(ptr) * k2; ptr += 2; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 22) * k1; + } + + if ((end - ptr) >= 1) + { + state.v[1] += read_u8 (ptr) * k2; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 58) * k0; + } + + state.v[0] += rotate_right((state.v[0] * k0) + state.v[1], 13); + state.v[1] += rotate_right((state.v[1] * k1) + state.v[0], 37); + state.v[0] += rotate_right((state.v[0] * k2) + state.v[1], 13); + state.v[1] += rotate_right((state.v[1] * k3) + state.v[0], 37); + + bytes = 0; + + // do any endian conversion here + + memcpy(hash, state.v, 16); +} + + +void MetroHash128::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + uint64_t v[4]; + + v[0] = (static_cast(seed) - k0) * k3; + v[1] = (static_cast(seed) + k1) * k2; + + if (length >= 32) + { + v[2] = (static_cast(seed) + k0) * k2; + v[3] = (static_cast(seed) - k1) * k3; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 21) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 21) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 21) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 21) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 45) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 45) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 27) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 46) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 58) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 13); + v[1] += rotate_right((v[1] * k1) + v[0], 37); + v[0] += rotate_right((v[0] * k2) + v[1], 13); + v[1] += rotate_right((v[1] * k3) + v[0], 37); + + // do any endian conversion here + + memcpy(hash, v, 16); +} + + +bool MetroHash128::ImplementationVerified() +{ + uint8_t hash[16]; + const uint8_t * key = reinterpret_cast(MetroHash128::test_string); + + // verify one-shot implementation + MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 0); + if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; + + MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 1); + if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; + + // verify incremental implementation + MetroHash128 metro; + + metro.Initialize(0); + metro.Update(reinterpret_cast(MetroHash128::test_string), strlen(MetroHash128::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; + + metro.Initialize(1); + metro.Update(reinterpret_cast(MetroHash128::test_string), strlen(MetroHash128::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; + + return true; +} -#include "metrohash.h" void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) { @@ -97,6 +328,8 @@ void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * v[0] += rotate_right((v[0] * k2) + v[1], 13); v[1] += rotate_right((v[1] * k3) + v[0], 37); + // do any endian conversion here + memcpy(out, v, 16); } @@ -173,6 +406,8 @@ void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * v[0] += rotate_right((v[0] * k2) + v[1], 33); v[1] += rotate_right((v[1] * k3) + v[0], 33); + // do any endian conversion here + memcpy(out, v, 16); } diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h new file mode 100644 index 00000000000..639a4fa97e3 --- /dev/null +++ b/contrib/libmetrohash/src/metrohash128.h @@ -0,0 +1,72 @@ +// metrohash128.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_128_H +#define METROHASH_METROHASH_128_H + +#include + +class MetroHash128 +{ +public: + static const uint32_t bits = 128; + + // Constructor initializes the same as Initialize() + MetroHash128(const uint64_t seed=0); + + // Initializes internal state for new hash with optional seed + void Initialize(const uint64_t seed=0); + + // Update the hash state with a string of bytes. If the length + // is sufficiently long, the implementation switches to a bulk + // hashing algorithm directly on the argument buffer for speed. + void Update(const uint8_t * buffer, const uint64_t length); + + // Constructs the final hash and writes it to the argument buffer. + // After a hash is finalized, this instance must be Initialized()-ed + // again or the behavior of Update() and Finalize() is undefined. + void Finalize(uint8_t * const hash); + + // A non-incremental function implementation. This can be significantly + // faster than the incremental implementation for some usage patterns. + static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); + + // Does implementation correctly execute test vectors? + static bool ImplementationVerified(); + + // test vectors -- Hash(test_string, seed=0) => test_seed_0 + static const char * test_string; + static const uint8_t test_seed_0[16]; + static const uint8_t test_seed_1[16]; + +private: + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + struct { uint64_t v[4]; } state; + struct { uint8_t b[32]; } input; + uint64_t bytes; +}; + + +// Legacy 128-bit hash functions -- do not use +void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_128_H diff --git a/contrib/libmetrohash/src/metrohash128crc.cpp b/contrib/libmetrohash/src/metrohash128crc.cpp index c04cf5a6b23..775a9a944bf 100644 --- a/contrib/libmetrohash/src/metrohash128crc.cpp +++ b/contrib/libmetrohash/src/metrohash128crc.cpp @@ -1,31 +1,24 @@ // metrohash128crc.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include "metrohash.h" #include +#include +#include "metrohash.h" +#include "platform.h" void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) diff --git a/contrib/libmetrohash/src/metrohash128crc.h b/contrib/libmetrohash/src/metrohash128crc.h new file mode 100644 index 00000000000..f151fd4200d --- /dev/null +++ b/contrib/libmetrohash/src/metrohash128crc.h @@ -0,0 +1,27 @@ +// metrohash128crc.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_128_CRC_H +#define METROHASH_METROHASH_128_CRC_H + +#include + +// Legacy 128-bit hash functions +void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_128_CRC_H diff --git a/contrib/libmetrohash/src/metrohash64.cpp b/contrib/libmetrohash/src/metrohash64.cpp index bc4b41eb8f2..7b5ec7f1a42 100644 --- a/contrib/libmetrohash/src/metrohash64.cpp +++ b/contrib/libmetrohash/src/metrohash64.cpp @@ -1,29 +1,257 @@ // metrohash64.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "platform.h" +#include "metrohash64.h" + +#include + +const char * MetroHash64::test_string = "012345678901234567890123456789012345678901234567890123456789012"; + +const uint8_t MetroHash64::test_seed_0[8] = { 0x6B, 0x75, 0x3D, 0xAE, 0x06, 0x70, 0x4B, 0xAD }; +const uint8_t MetroHash64::test_seed_1[8] = { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0xB9, 0xB8, 0xDF }; + + + +MetroHash64::MetroHash64(const uint64_t seed) +{ + Initialize(seed); +} + + +void MetroHash64::Initialize(const uint64_t seed) +{ + vseed = (static_cast(seed) + k2) * k0; + + // initialize internal hash registers + state.v[0] = vseed; + state.v[1] = vseed; + state.v[2] = vseed; + state.v[3] = vseed; + + // initialize total length of input + bytes = 0; +} + + +void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + // input buffer may be partially filled + if (bytes % 32) + { + uint64_t fill = 32 - (bytes % 32); + if (fill > length) + fill = length; + + memcpy(input.b + (bytes % 32), ptr, static_cast(fill)); + ptr += fill; + bytes += fill; + + // input buffer is still partially filled + if ((bytes % 32) != 0) return; + + // process full input buffer + state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // bulk update + bytes += static_cast(end - ptr); + while (ptr <= (end - 32)) + { + // process directly from the source, bypassing the input buffer + state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // store remaining bytes in input buffer + if (ptr < end) + memcpy(input.b, ptr, static_cast(end - ptr)); +} + + +void MetroHash64::Finalize(uint8_t * const hash) +{ + // finalize bulk loop, if used + if (bytes >= 32) + { + state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 37) * k1; + state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 37) * k0; + state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 37) * k1; + state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 37) * k0; + + state.v[0] = vseed + (state.v[0] ^ state.v[1]); + } + + // process any bytes remaining in the input buffer + const uint8_t * ptr = reinterpret_cast(input.b); + const uint8_t * const end = ptr + (bytes % 32); + + if ((end - ptr) >= 16) + { + state.v[1] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3; + state.v[2] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[2] = rotate_right(state.v[2],29) * k3; + state.v[1] ^= rotate_right(state.v[1] * k0, 21) + state.v[2]; + state.v[2] ^= rotate_right(state.v[2] * k3, 21) + state.v[1]; + state.v[0] += state.v[2]; + } + + if ((end - ptr) >= 8) + { + state.v[0] += read_u64(ptr) * k3; ptr += 8; + state.v[0] ^= rotate_right(state.v[0], 55) * k1; + } + + if ((end - ptr) >= 4) + { + state.v[0] += read_u32(ptr) * k3; ptr += 4; + state.v[0] ^= rotate_right(state.v[0], 26) * k1; + } + + if ((end - ptr) >= 2) + { + state.v[0] += read_u16(ptr) * k3; ptr += 2; + state.v[0] ^= rotate_right(state.v[0], 48) * k1; + } + + if ((end - ptr) >= 1) + { + state.v[0] += read_u8 (ptr) * k3; + state.v[0] ^= rotate_right(state.v[0], 37) * k1; + } + + state.v[0] ^= rotate_right(state.v[0], 28); + state.v[0] *= k0; + state.v[0] ^= rotate_right(state.v[0], 29); + + bytes = 0; + + // do any endian conversion here + + memcpy(hash, state.v, 8); +} + + +void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + uint64_t h = (static_cast(seed) + k2) * k0; + + if (length >= 32) + { + uint64_t v[4]; + v[0] = h; + v[1] = h; + v[2] = h; + v[3] = h; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 37) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 37) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 37) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 37) * k0; + h += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = h + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3; + uint64_t v1 = h + (read_u64(ptr) * k2); ptr += 8; v1 = rotate_right(v1,29) * k3; + v0 ^= rotate_right(v0 * k0, 21) + v1; + v1 ^= rotate_right(v1 * k3, 21) + v0; + h += v1; + } + + if ((end - ptr) >= 8) + { + h += read_u64(ptr) * k3; ptr += 8; + h ^= rotate_right(h, 55) * k1; + } + + if ((end - ptr) >= 4) + { + h += read_u32(ptr) * k3; ptr += 4; + h ^= rotate_right(h, 26) * k1; + } + + if ((end - ptr) >= 2) + { + h += read_u16(ptr) * k3; ptr += 2; + h ^= rotate_right(h, 48) * k1; + } + + if ((end - ptr) >= 1) + { + h += read_u8 (ptr) * k3; + h ^= rotate_right(h, 37) * k1; + } + + h ^= rotate_right(h, 28); + h *= k0; + h ^= rotate_right(h, 29); + + memcpy(hash, &h, 8); +} + + +bool MetroHash64::ImplementationVerified() +{ + uint8_t hash[8]; + const uint8_t * key = reinterpret_cast(MetroHash64::test_string); + + // verify one-shot implementation + MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 0); + if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false; + + MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 1); + if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false; + + // verify incremental implementation + MetroHash64 metro; + + metro.Initialize(0); + metro.Update(reinterpret_cast(MetroHash64::test_string), strlen(MetroHash64::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false; + + metro.Initialize(1); + metro.Update(reinterpret_cast(MetroHash64::test_string), strlen(MetroHash64::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false; + + return true; +} -#include "metrohash.h" void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) { diff --git a/contrib/libmetrohash/src/metrohash64.h b/contrib/libmetrohash/src/metrohash64.h new file mode 100644 index 00000000000..d58898b117d --- /dev/null +++ b/contrib/libmetrohash/src/metrohash64.h @@ -0,0 +1,73 @@ +// metrohash64.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_64_H +#define METROHASH_METROHASH_64_H + +#include + +class MetroHash64 +{ +public: + static const uint32_t bits = 64; + + // Constructor initializes the same as Initialize() + MetroHash64(const uint64_t seed=0); + + // Initializes internal state for new hash with optional seed + void Initialize(const uint64_t seed=0); + + // Update the hash state with a string of bytes. If the length + // is sufficiently long, the implementation switches to a bulk + // hashing algorithm directly on the argument buffer for speed. + void Update(const uint8_t * buffer, const uint64_t length); + + // Constructs the final hash and writes it to the argument buffer. + // After a hash is finalized, this instance must be Initialized()-ed + // again or the behavior of Update() and Finalize() is undefined. + void Finalize(uint8_t * const hash); + + // A non-incremental function implementation. This can be significantly + // faster than the incremental implementation for some usage patterns. + static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); + + // Does implementation correctly execute test vectors? + static bool ImplementationVerified(); + + // test vectors -- Hash(test_string, seed=0) => test_seed_0 + static const char * test_string; + static const uint8_t test_seed_0[8]; + static const uint8_t test_seed_1[8]; + +private: + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + struct { uint64_t v[4]; } state; + struct { uint8_t b[32]; } input; + uint64_t bytes; + uint64_t vseed; +}; + + +// Legacy 64-bit hash functions -- do not use +void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_64_H diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h new file mode 100644 index 00000000000..31291b94b33 --- /dev/null +++ b/contrib/libmetrohash/src/platform.h @@ -0,0 +1,50 @@ +// platform.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_PLATFORM_H +#define METROHASH_PLATFORM_H + +#include + +// rotate right idiom recognized by most compilers +inline static uint64_t rotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +// unaligned reads, fast and safe on Nehalem and later microarchitectures +inline static uint64_t read_u64(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u32(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u16(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u8 (const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + + +#endif // #ifndef METROHASH_PLATFORM_H diff --git a/contrib/libmetrohash/src/testvector.h b/contrib/libmetrohash/src/testvector.h index 8c7967453e9..e4006182e4f 100644 --- a/contrib/libmetrohash/src/testvector.h +++ b/contrib/libmetrohash/src/testvector.h @@ -1,27 +1,18 @@ // testvector.h // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef METROHASH_TESTVECTOR_H #define METROHASH_TESTVECTOR_H @@ -46,6 +37,8 @@ struct TestVectorData static const char * test_key_63 = "012345678901234567890123456789012345678901234567890123456789012"; +// The hash assumes a little-endian architecture. Treating the hash results +// as an array of uint64_t should enable conversion for big-endian implementations. const TestVectorData TestVector [] = { // seed = 0 diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 115c916e9f4..62337f60fb5 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -2,6 +2,7 @@ set(RDKAFKA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/librdkafka/src) set(SRCS ${RDKAFKA_SOURCE_DIR}/crc32c.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_zstd.c ${RDKAFKA_SOURCE_DIR}/rdaddr.c ${RDKAFKA_SOURCE_DIR}/rdavl.c ${RDKAFKA_SOURCE_DIR}/rdbuf.c @@ -59,5 +60,6 @@ set(SRCS add_library(rdkafka ${LINK_MODE} ${SRCS}) target_include_directories(rdkafka SYSTEM PUBLIC include) -target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) -target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) +target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used. +target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used. +target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/contrib/librdkafka-cmake/config.h b/contrib/librdkafka-cmake/config.h index ac732dd0f58..266baae8dae 100644 --- a/contrib/librdkafka-cmake/config.h +++ b/contrib/librdkafka-cmake/config.h @@ -51,6 +51,8 @@ //#define WITH_PLUGINS 1 // zlib #define WITH_ZLIB 1 +// zstd +#define WITH_ZSTD 1 // WITH_SNAPPY #define WITH_SNAPPY 1 // WITH_SOCKEM diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 3ce29e9d65e..42a1b342a49 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -200,11 +200,20 @@ target_link_libraries (clickhouse_common_io ${Boost_SYSTEM_LIBRARY} PRIVATE apple_rt + PUBLIC + Threads::Threads + PRIVATE ${CMAKE_DL_LIBS} ) -if (NOT ARCH_ARM AND CPUID_LIBRARY) - target_link_libraries (clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) +target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR}) + +if(CPUID_LIBRARY) + target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY}) +endif() + +if(CPUINFO_LIBRARY) + target_link_libraries(clickhouse_common_io PRIVATE ${CPUINFO_LIBRARY}) endif() target_link_libraries (dbms @@ -225,11 +234,9 @@ target_link_libraries (dbms ${Boost_PROGRAM_OPTIONS_LIBRARY} PUBLIC ${Boost_SYSTEM_LIBRARY} + Threads::Threads ) -if (NOT USE_INTERNAL_RE2_LIBRARY) - target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RE2_INCLUDE_DIR}) -endif () if (NOT USE_INTERNAL_BOOST_LIBRARY) target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) @@ -249,7 +256,6 @@ if (USE_POCO_SQLODBC) endif() endif() -#if (Poco_Data_FOUND AND NOT USE_INTERNAL_POCO_LIBRARY) if (Poco_Data_FOUND) target_include_directories (clickhouse_common_io SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) target_include_directories (dbms SYSTEM PRIVATE ${Poco_Data_INCLUDE_DIR}) @@ -276,6 +282,7 @@ target_link_libraries (dbms PRIVATE ${Poco_Foundation_LIBRARY}) if (USE_ICU) target_link_libraries (dbms PRIVATE ${ICU_LIBRARIES}) + target_include_directories (dbms SYSTEM PRIVATE ${ICU_INCLUDE_DIRS}) endif () if (USE_CAPNP) @@ -298,6 +305,11 @@ target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) +if (USE_PROTOBUF) + target_link_libraries (dbms PRIVATE ${Protobuf_LIBRARY}) + target_include_directories (dbms SYSTEM BEFORE PRIVATE ${Protobuf_INCLUDE_DIR}) +endif () + if (USE_HDFS) target_link_libraries (clickhouse_common_io PRIVATE ${HDFS3_LIBRARY}) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR}) @@ -318,7 +330,7 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE add_subdirectory (programs) add_subdirectory (tests) -if (ENABLE_TESTS) +if (ENABLE_TESTS AND USE_GTEST) macro (grep_gtest_sources BASE_DIR DST_VAR) # Cold match files that are not in tests/ directories file(GLOB_RECURSE "${DST_VAR}" RELATIVE "${BASE_DIR}" "gtest*.cpp") diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 9d7c6f2cda1..44befd634f9 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -28,11 +28,18 @@ add_subdirectory (copier) add_subdirectory (format) add_subdirectory (clang) add_subdirectory (obfuscator) -add_subdirectory (odbc-bridge) + +if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + add_subdirectory (odbc-bridge) +endif () if (CLICKHOUSE_SPLIT_BINARY) set (CLICKHOUSE_ALL_TARGETS clickhouse-server clickhouse-client clickhouse-local clickhouse-benchmark clickhouse-performance-test - clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier clickhouse-odbc-bridge) + clickhouse-extract-from-config clickhouse-compressor clickhouse-format clickhouse-copier) + + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) + list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-odbc-bridge) + endif () if (USE_EMBEDDED_COMPILER) list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-clang clickhouse-lld) @@ -85,9 +92,6 @@ else () if (USE_EMBEDDED_COMPILER) target_link_libraries (clickhouse PRIVATE clickhouse-compiler-lib) endif () - if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - target_link_libraries (clickhouse PRIVATE clickhouse-odbc-bridge-lib) - endif() set (CLICKHOUSE_BUNDLE) if (ENABLE_CLICKHOUSE_SERVER) @@ -135,15 +139,14 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-format DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-format) endif () - if (ENABLE_CLICKHOUSE_COPIER) + if (ENABLE_CLICKHOUSE_OBFUSCATOR) add_custom_target (clickhouse-obfuscator ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-obfuscator DEPENDS clickhouse) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-obfuscator DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-obfuscator) endif () if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) - add_custom_target (clickhouse-odbc-bridge ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-odbc-bridge DEPENDS clickhouse) - install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-odbc-bridge DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) + # just to be able to run integration tests + add_custom_target (clickhouse-odbc-bridge-copy ALL COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_BINARY_DIR}/odbc-bridge/clickhouse-odbc-bridge clickhouse-odbc-bridge DEPENDS clickhouse-odbc-bridge) endif () diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 9bd3bda825a..b366add0ba5 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/programs/benchmark/CMakeLists.txt b/dbms/programs/benchmark/CMakeLists.txt index af11c600b2d..9814fac9875 100644 --- a/dbms/programs/benchmark/CMakeLists.txt +++ b/dbms/programs/benchmark/CMakeLists.txt @@ -5,4 +5,5 @@ target_include_directories (clickhouse-benchmark-lib SYSTEM PRIVATE ${PCG_RANDOM if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-benchmark clickhouse-benchmark.cpp) target_link_libraries (clickhouse-benchmark PRIVATE clickhouse-benchmark-lib clickhouse_aggregate_functions) + install (TARGETS clickhouse-benchmark ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index c5c5cdc664f..462720dea0e 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -7,6 +7,7 @@ endif () if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-client clickhouse-client.cpp) target_link_libraries (clickhouse-client PRIVATE clickhouse-client-lib) + install (TARGETS clickhouse-client ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () install (FILES clickhouse-client.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse-client RENAME config.xml) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 0b9ae4718e9..bde143a392f 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -219,6 +220,9 @@ private: APPLY_FOR_SETTINGS(EXTRACT_SETTING) #undef EXTRACT_SETTING + /// Set path for format schema files + if (config().has("format_schema_path")) + context.setFormatSchemaPath(Poco::Path(config().getString("format_schema_path")).toString()); } @@ -1206,6 +1210,10 @@ private: const auto & id = typeid_cast(*query_with_output->format); current_format = id.name; } + if (query_with_output->settings_ast) + { + InterpreterSetQuery(query_with_output->settings_ast, context).executeForCurrentContext(); + } } if (has_vertical_output_suffix) diff --git a/dbms/programs/client/ConnectionParameters.h b/dbms/programs/client/ConnectionParameters.h index 5df52e09b30..e4d9e4e3507 100644 --- a/dbms/programs/client/ConnectionParameters.h +++ b/dbms/programs/client/ConnectionParameters.h @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -59,7 +59,7 @@ struct ConnectionParameters SetTerminalEcho(false); SCOPE_EXIT({ - SetTerminalEcho(true); + setTerminalEcho(true); }); std::getline(std::cin, password); std::cout << std::endl; diff --git a/dbms/programs/compressor/CMakeLists.txt b/dbms/programs/compressor/CMakeLists.txt index bf3accfb8af..a76986173a5 100644 --- a/dbms/programs/compressor/CMakeLists.txt +++ b/dbms/programs/compressor/CMakeLists.txt @@ -5,4 +5,5 @@ if (CLICKHOUSE_SPLIT_BINARY) # Also in utils add_executable (clickhouse-compressor clickhouse-compressor.cpp) target_link_libraries (clickhouse-compressor PRIVATE clickhouse-compressor-lib) + install (TARGETS clickhouse-compressor ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/copier/CMakeLists.txt b/dbms/programs/copier/CMakeLists.txt index ed3e55208aa..158080ffce6 100644 --- a/dbms/programs/copier/CMakeLists.txt +++ b/dbms/programs/copier/CMakeLists.txt @@ -4,4 +4,5 @@ target_link_libraries (clickhouse-copier-lib PRIVATE clickhouse-server-lib click if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-copier clickhouse-copier.cpp) target_link_libraries (clickhouse-copier clickhouse-copier-lib) + install (TARGETS clickhouse-copier ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 4f285c83f17..59635e8cd95 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/dbms/programs/extract-from-config/CMakeLists.txt b/dbms/programs/extract-from-config/CMakeLists.txt index 62253649368..9d2ddcd7c2a 100644 --- a/dbms/programs/extract-from-config/CMakeLists.txt +++ b/dbms/programs/extract-from-config/CMakeLists.txt @@ -4,4 +4,5 @@ target_link_libraries (clickhouse-extract-from-config-lib PRIVATE clickhouse_com if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-extract-from-config clickhouse-extract-from-config.cpp) target_link_libraries (clickhouse-extract-from-config PRIVATE clickhouse-extract-from-config-lib) + install (TARGETS clickhouse-extract-from-config ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/format/CMakeLists.txt b/dbms/programs/format/CMakeLists.txt index 53d09e82621..67033730b07 100644 --- a/dbms/programs/format/CMakeLists.txt +++ b/dbms/programs/format/CMakeLists.txt @@ -3,4 +3,5 @@ target_link_libraries (clickhouse-format-lib PRIVATE dbms clickhouse_common_io c if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-format clickhouse-format.cpp) target_link_libraries (clickhouse-format PRIVATE clickhouse-format-lib) + install (TARGETS clickhouse-format ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/local/CMakeLists.txt b/dbms/programs/local/CMakeLists.txt index 07729d68563..5df54fd4e7a 100644 --- a/dbms/programs/local/CMakeLists.txt +++ b/dbms/programs/local/CMakeLists.txt @@ -4,4 +4,5 @@ target_link_libraries (clickhouse-local-lib PRIVATE clickhouse_common_io clickho if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-local clickhouse-local.cpp) target_link_libraries (clickhouse-local PRIVATE clickhouse-local-lib) + install (TARGETS clickhouse-local ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 0acdae801ac..8ee23b987bb 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ int LocalServer::main(const std::vector & /*args*/) try { Logger * log = &logger(); - + ThreadStatus thread_status; UseSSL use_ssl; if (!config().has("query") && !config().has("table-structure")) /// Nothing to process diff --git a/dbms/programs/main.cpp b/dbms/programs/main.cpp index e8b8cd365d6..2b88a5b7b0f 100644 --- a/dbms/programs/main.cpp +++ b/dbms/programs/main.cpp @@ -56,9 +56,6 @@ int mainEntryClickHouseClusterCopier(int argc, char ** argv); #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) -int mainEntryClickHouseODBCBridge(int argc, char ** argv); -#endif #if USE_EMBEDDED_COMPILER @@ -105,9 +102,6 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) {"obfuscator", mainEntryClickHouseObfuscator}, #endif -#if ENABLE_CLICKHOUSE_ODBC_BRIDGE || !defined(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - {"odbc-bridge", mainEntryClickHouseODBCBridge}, -#endif #if USE_EMBEDDED_COMPILER {"clang", mainEntryClickHouseClang}, diff --git a/dbms/programs/obfuscator/CMakeLists.txt b/dbms/programs/obfuscator/CMakeLists.txt index 73c3f01e9cb..77096c2a169 100644 --- a/dbms/programs/obfuscator/CMakeLists.txt +++ b/dbms/programs/obfuscator/CMakeLists.txt @@ -5,4 +5,5 @@ if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-obfuscator clickhouse-obfuscator.cpp) set_target_properties(clickhouse-obfuscator PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_link_libraries (clickhouse-obfuscator PRIVATE clickhouse-obfuscator-lib) + install (TARGETS clickhouse-obfuscator ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index d57a41ebfc6..12062b5a939 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) @@ -33,7 +33,11 @@ if (ENABLE_TESTS) add_subdirectory (tests) endif () -if (CLICKHOUSE_SPLIT_BINARY) - add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) - target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) -endif () +# clickhouse-odbc-bridge is always a separate binary. +# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. +# For this reason, we disabling -rdynamic linker flag. But we do it in strange way: +SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") + +add_executable (clickhouse-odbc-bridge odbc-bridge.cpp) +target_link_libraries (clickhouse-odbc-bridge PRIVATE clickhouse-odbc-bridge-lib) +install (TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/dbms/programs/performance-test/CMakeLists.txt b/dbms/programs/performance-test/CMakeLists.txt index f1a08172009..974c64ef859 100644 --- a/dbms/programs/performance-test/CMakeLists.txt +++ b/dbms/programs/performance-test/CMakeLists.txt @@ -1,8 +1,21 @@ -add_library (clickhouse-performance-test-lib ${LINK_MODE} PerformanceTest.cpp) +add_library (clickhouse-performance-test-lib ${LINK_MODE} + JSONString.cpp + StopConditionsSet.cpp + TestStopConditions.cpp + TestStats.cpp + ConfigPreprocessor.cpp + PerformanceTest.cpp + PerformanceTestInfo.cpp + executeQuery.cpp + applySubstitutions.cpp + ReportBuilder.cpp + PerformanceTestSuite.cpp +) target_link_libraries (clickhouse-performance-test-lib PRIVATE dbms clickhouse_common_io clickhouse_common_config ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-performance-test-lib SYSTEM PRIVATE ${PCG_RANDOM_INCLUDE_DIR}) if (CLICKHOUSE_SPLIT_BINARY) add_executable (clickhouse-performance-test clickhouse-performance-test.cpp) target_link_libraries (clickhouse-performance-test PRIVATE clickhouse-performance-test-lib) + install (TARGETS clickhouse-performance-test ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () diff --git a/dbms/programs/performance-test/ConfigPreprocessor.cpp b/dbms/programs/performance-test/ConfigPreprocessor.cpp new file mode 100644 index 00000000000..c448d84bc88 --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.cpp @@ -0,0 +1,90 @@ +#include "ConfigPreprocessor.h" +#include +#include +#include +namespace DB +{ +std::vector ConfigPreprocessor::processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const +{ + + std::vector result; + for (const auto & path : paths) + { + result.emplace_back(new XMLConfiguration(path)); + result.back()->setString("path", Poco::Path(path).absolute().toString()); + } + + /// Leave tests: + removeConfigurationsIf(result, FilterType::Tag, tests_tags, true); + removeConfigurationsIf(result, FilterType::Name, tests_names, true); + removeConfigurationsIf(result, FilterType::Name_regexp, tests_names_regexp, true); + + /// Skip tests + removeConfigurationsIf(result, FilterType::Tag, skip_tags, false); + removeConfigurationsIf(result, FilterType::Name, skip_names, false); + removeConfigurationsIf(result, FilterType::Name_regexp, skip_names_regexp, false); + return result; +} + +void ConfigPreprocessor::removeConfigurationsIf( + std::vector & configs, + ConfigPreprocessor::FilterType filter_type, + const Strings & values, + bool leave) const +{ + auto checker = [&filter_type, &values, &leave] (XMLConfigurationPtr & config) + { + if (values.size() == 0) + return false; + + bool remove_or_not = false; + + if (filter_type == FilterType::Tag) + { + Strings tags_keys; + config->keys("tags", tags_keys); + + Strings tags(tags_keys.size()); + for (size_t i = 0; i != tags_keys.size(); ++i) + tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); + + for (const std::string & config_tag : tags) + { + if (std::find(values.begin(), values.end(), config_tag) != values.end()) + remove_or_not = true; + } + } + + if (filter_type == FilterType::Name) + { + remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); + } + + if (filter_type == FilterType::Name_regexp) + { + std::string config_name = config->getString("name", ""); + auto regex_checker = [&config_name](const std::string & name_regexp) + { + std::regex pattern(name_regexp); + return std::regex_search(config_name, pattern); + }; + + remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; + } + + if (leave) + remove_or_not = !remove_or_not; + return remove_or_not; + }; + + auto new_end = std::remove_if(configs.begin(), configs.end(), checker); + configs.erase(new_end, configs.end()); +} + +} diff --git a/dbms/programs/performance-test/ConfigPreprocessor.h b/dbms/programs/performance-test/ConfigPreprocessor.h new file mode 100644 index 00000000000..375bf9503cb --- /dev/null +++ b/dbms/programs/performance-test/ConfigPreprocessor.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using XMLDocumentPtr = Poco::AutoPtr; + +class ConfigPreprocessor +{ +public: + ConfigPreprocessor(const Strings & paths_) + : paths(paths_) + {} + + std::vector processConfig( + const Strings & tests_tags, + const Strings & tests_names, + const Strings & tests_names_regexp, + const Strings & skip_tags, + const Strings & skip_names, + const Strings & skip_names_regexp) const; + +private: + + enum class FilterType + { + Tag, + Name, + Name_regexp + }; + + /// Removes configurations that has a given value. + /// If leave is true, the logic is reversed. + void removeConfigurationsIf( + std::vector & configs, + FilterType filter_type, + const Strings & values, + bool leave = false) const; + + const Strings paths; +}; +} diff --git a/dbms/programs/performance-test/JSONString.cpp b/dbms/programs/performance-test/JSONString.cpp new file mode 100644 index 00000000000..d25e190be50 --- /dev/null +++ b/dbms/programs/performance-test/JSONString.cpp @@ -0,0 +1,66 @@ +#include "JSONString.h" + +#include +#include +namespace DB +{ + +namespace +{ +std::string pad(size_t padding) +{ + return std::string(padding * 4, ' '); +} + +const std::regex NEW_LINE{"\n"}; +} + +void JSONString::set(const std::string & key, std::string value, bool wrap) +{ + if (value.empty()) + value = "null"; + + bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); + if (!reserved && wrap) + value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; + + content[key] = value; +} + +void JSONString::set(const std::string & key, const std::vector & run_infos) +{ + std::ostringstream value; + value << "[\n"; + + for (size_t i = 0; i < run_infos.size(); ++i) + { + value << pad(padding + 1) + run_infos[i].asString(padding + 2); + if (i != run_infos.size() - 1) + value << ','; + + value << "\n"; + } + + value << pad(padding) << ']'; + content[key] = value.str(); +} + +std::string JSONString::asString(size_t cur_padding) const +{ + std::ostringstream repr; + repr << "{"; + + for (auto it = content.begin(); it != content.end(); ++it) + { + if (it != content.begin()) + repr << ','; + /// construct "key": "value" string with padding + repr << "\n" << pad(cur_padding) << '"' << it->first << '"' << ": " << it->second; + } + + repr << "\n" << pad(cur_padding - 1) << '}'; + return repr.str(); +} + + +} diff --git a/dbms/programs/performance-test/JSONString.h b/dbms/programs/performance-test/JSONString.h new file mode 100644 index 00000000000..5695145442e --- /dev/null +++ b/dbms/programs/performance-test/JSONString.h @@ -0,0 +1,40 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace DB +{ + +/// NOTE The code is totally wrong. +class JSONString +{ +private: + std::map content; + size_t padding; + +public: + explicit JSONString(size_t padding_ = 1) : padding(padding_) {} + + void set(const std::string & key, std::string value, bool wrap = true); + + template + std::enable_if_t> set(const std::string key, T value) + { + set(key, std::to_string(value), /*wrap= */ false); + } + + void set(const std::string & key, const std::vector & run_infos); + + std::string asString() const + { + return asString(padding); + } + + std::string asString(size_t cur_padding) const; +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index e91365aeade..eb8d0ccbfda 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -1,1523 +1,237 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "PerformanceTest.h" + #include -#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") -#endif +#include - -/** Tests launcher for ClickHouse. - * The tool walks through given or default folder in order to find files with - * tests' descriptions and launches it. - */ -namespace fs = boost::filesystem; -using String = std::string; -const String FOUR_SPACES = " "; -const std::regex QUOTE_REGEX{"\""}; -const std::regex NEW_LINE{"\n"}; +#include "executeQuery.h" namespace DB { + namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; - extern const int FILE_DOESNT_EXIST; +extern const int NOT_IMPLEMENTED; } -static String pad(size_t padding) +namespace fs = boost::filesystem; + +PerformanceTest::PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_, + Context & context_) + : config(config_) + , connection(connection_) + , interrupt_listener(interrupt_listener_) + , test_info(test_info_) + , context(context_) + , log(&Poco::Logger::get("PerformanceTest")) { - return String(padding * 4, ' '); } - -/// NOTE The code is totally wrong. -class JSONString +bool PerformanceTest::checkPreconditions() const { -private: - std::map content; - size_t padding; - -public: - explicit JSONString(size_t padding_ = 1) : padding(padding_) {} - - void set(const String key, String value, bool wrap = true) - { - if (value.empty()) - value = "null"; - - bool reserved = (value[0] == '[' || value[0] == '{' || value == "null"); - if (!reserved && wrap) - value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"'; - - content[key] = value; - } - - template - std::enable_if_t> set(const String key, T value) - { - set(key, std::to_string(value), /*wrap= */ false); - } - - void set(const String key, const std::vector & run_infos) - { - String value = "[\n"; - - for (size_t i = 0; i < run_infos.size(); ++i) - { - value += pad(padding + 1) + run_infos[i].asString(padding + 2); - if (i != run_infos.size() - 1) - value += ','; - - value += "\n"; - } - - value += pad(padding) + ']'; - content[key] = value; - } - - String asString() const - { - return asString(padding); - } - - String asString(size_t cur_padding) const - { - String repr = "{"; - - for (auto it = content.begin(); it != content.end(); ++it) - { - if (it != content.begin()) - repr += ','; - /// construct "key": "value" string with padding - repr += "\n" + pad(cur_padding) + '"' + it->first + '"' + ": " + it->second; - } - - repr += "\n" + pad(cur_padding - 1) + '}'; - return repr; - } -}; - - -using ConfigurationPtr = Poco::AutoPtr; - -/// A set of supported stop conditions. -struct StopConditionsSet -{ - void loadFromConfig(const ConfigurationPtr & stop_conditions_view) - { - using Keys = std::vector; - Keys keys; - stop_conditions_view->keys(keys); - - for (const String & key : keys) - { - if (key == "total_time_ms") - total_time_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "rows_read") - rows_read.value = stop_conditions_view->getUInt64(key); - else if (key == "bytes_read_uncompressed") - bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); - else if (key == "iterations") - iterations.value = stop_conditions_view->getUInt64(key); - else if (key == "min_time_not_changing_for_ms") - min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "max_speed_not_changing_for_ms") - max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else if (key == "average_speed_not_changing_for_ms") - average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); - else - throw DB::Exception("Met unkown stop condition: " + key, DB::ErrorCodes::LOGICAL_ERROR); - - ++initialized_count; - } - } - - void reset() - { - total_time_ms.fulfilled = false; - rows_read.fulfilled = false; - bytes_read_uncompressed.fulfilled = false; - iterations.fulfilled = false; - min_time_not_changing_for_ms.fulfilled = false; - max_speed_not_changing_for_ms.fulfilled = false; - average_speed_not_changing_for_ms.fulfilled = false; - - fulfilled_count = 0; - } - - /// Note: only conditions with UInt64 minimal thresholds are supported. - /// I.e. condition is fulfilled when value is exceeded. - struct StopCondition - { - UInt64 value = 0; - bool fulfilled = false; - }; - - void report(UInt64 value, StopCondition & condition) - { - if (condition.value && !condition.fulfilled && value >= condition.value) - { - condition.fulfilled = true; - ++fulfilled_count; - } - } - - StopCondition total_time_ms; - StopCondition rows_read; - StopCondition bytes_read_uncompressed; - StopCondition iterations; - StopCondition min_time_not_changing_for_ms; - StopCondition max_speed_not_changing_for_ms; - StopCondition average_speed_not_changing_for_ms; - - size_t initialized_count = 0; - size_t fulfilled_count = 0; -}; - -/// Stop conditions for a test run. The running test will be terminated in either of two conditions: -/// 1. All conditions marked 'all_of' are fulfilled -/// or -/// 2. Any condition marked 'any_of' is fulfilled -class TestStopConditions -{ -public: - void loadFromConfig(ConfigurationPtr & stop_conditions_config) - { - if (stop_conditions_config->has("all_of")) - { - ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); - conditions_all_of.loadFromConfig(config_all_of); - } - if (stop_conditions_config->has("any_of")) - { - ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); - conditions_any_of.loadFromConfig(config_any_of); - } - } - - bool empty() const - { - return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; - } - -#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ - void FUNC_NAME(UInt64 value) \ - { \ - conditions_all_of.report(value, conditions_all_of.CONDITION); \ - conditions_any_of.report(value, conditions_any_of.CONDITION); \ - } - - DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) - DEFINE_REPORT_FUNC(reportRowsRead, rows_read) - DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) - DEFINE_REPORT_FUNC(reportIterations, iterations) - DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) - DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) - -#undef REPORT - - bool areFulfilled() const - { - return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) - || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); - } - - void reset() - { - conditions_all_of.reset(); - conditions_any_of.reset(); - } - -private: - StopConditionsSet conditions_all_of; - StopConditionsSet conditions_any_of; -}; - -struct Stats -{ - Stopwatch watch; - Stopwatch watch_per_query; - Stopwatch min_time_watch; - Stopwatch max_rows_speed_watch; - Stopwatch max_bytes_speed_watch; - Stopwatch avg_rows_speed_watch; - Stopwatch avg_bytes_speed_watch; - - bool last_query_was_cancelled = false; - - size_t queries = 0; - - size_t total_rows_read = 0; - size_t total_bytes_read = 0; - - size_t last_query_rows_read = 0; - size_t last_query_bytes_read = 0; - - using Sampler = ReservoirSampler; - Sampler sampler{1 << 16}; - - /// min_time in ms - UInt64 min_time = std::numeric_limits::max(); - double total_time = 0; - - double max_rows_speed = 0; - double max_bytes_speed = 0; - - double avg_rows_speed_value = 0; - double avg_rows_speed_first = 0; - static double avg_rows_speed_precision; - - double avg_bytes_speed_value = 0; - double avg_bytes_speed_first = 0; - static double avg_bytes_speed_precision; - - size_t number_of_rows_speed_info_batches = 0; - size_t number_of_bytes_speed_info_batches = 0; - - bool ready = false; // check if a query wasn't interrupted by SIGINT - String exception; - - String getStatisticByName(const String & statistic_name) - { - if (statistic_name == "min_time") - { - return std::to_string(min_time) + "ms"; - } - if (statistic_name == "quantiles") - { - String result = "\n"; - - for (double percent = 10; percent <= 90; percent += 10) - { - result += FOUR_SPACES + std::to_string((percent / 100)); - result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); - result += "\n"; - } - result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; - result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; - result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); - - return result; - } - if (statistic_name == "total_time") - { - return std::to_string(total_time) + "s"; - } - if (statistic_name == "queries_per_second") - { - return std::to_string(queries / total_time); - } - if (statistic_name == "rows_per_second") - { - return std::to_string(total_rows_read / total_time); - } - if (statistic_name == "bytes_per_second") - { - return std::to_string(total_bytes_read / total_time); - } - - if (statistic_name == "max_rows_per_second") - { - return std::to_string(max_rows_speed); - } - if (statistic_name == "max_bytes_per_second") - { - return std::to_string(max_bytes_speed); - } - if (statistic_name == "avg_rows_per_second") - { - return std::to_string(avg_rows_speed_value); - } - if (statistic_name == "avg_bytes_per_second") - { - return std::to_string(avg_bytes_speed_value); - } - - return ""; - } - - void update_min_time(const UInt64 min_time_candidate) - { - if (min_time_candidate < min_time) - { - min_time = min_time_candidate; - min_time_watch.restart(); - } - } - - void update_average_speed(const double new_speed_info, - Stopwatch & avg_speed_watch, - size_t & number_of_info_batches, - double precision, - double & avg_speed_first, - double & avg_speed_value) - { - avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); - ++number_of_info_batches; - avg_speed_value /= number_of_info_batches; - - if (avg_speed_first == 0) - { - avg_speed_first = avg_speed_value; - } - - if (std::abs(avg_speed_value - avg_speed_first) >= precision) - { - avg_speed_first = avg_speed_value; - avg_speed_watch.restart(); - } - } - - void update_max_speed(const size_t max_speed_candidate, Stopwatch & max_speed_watch, double & max_speed) - { - if (max_speed_candidate > max_speed) - { - max_speed = max_speed_candidate; - max_speed_watch.restart(); - } - } - - void add(size_t rows_read_inc, size_t bytes_read_inc) - { - total_rows_read += rows_read_inc; - total_bytes_read += bytes_read_inc; - last_query_rows_read += rows_read_inc; - last_query_bytes_read += bytes_read_inc; - - double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); - double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); - - /// Update rows speed - update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); - update_average_speed(new_rows_speed, - avg_rows_speed_watch, - number_of_rows_speed_info_batches, - avg_rows_speed_precision, - avg_rows_speed_first, - avg_rows_speed_value); - /// Update bytes speed - update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); - update_average_speed(new_bytes_speed, - avg_bytes_speed_watch, - number_of_bytes_speed_info_batches, - avg_bytes_speed_precision, - avg_bytes_speed_first, - avg_bytes_speed_value); - } - - void updateQueryInfo() - { - ++queries; - sampler.insert(watch_per_query.elapsedSeconds()); - update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms - } - - void setTotalTime() - { - total_time = watch.elapsedSeconds(); - } - - void clear() - { - watch.restart(); - watch_per_query.restart(); - min_time_watch.restart(); - max_rows_speed_watch.restart(); - max_bytes_speed_watch.restart(); - avg_rows_speed_watch.restart(); - avg_bytes_speed_watch.restart(); - - last_query_was_cancelled = false; - - sampler.clear(); - - queries = 0; - total_rows_read = 0; - total_bytes_read = 0; - last_query_rows_read = 0; - last_query_bytes_read = 0; - - min_time = std::numeric_limits::max(); - total_time = 0; - max_rows_speed = 0; - max_bytes_speed = 0; - avg_rows_speed_value = 0; - avg_bytes_speed_value = 0; - avg_rows_speed_first = 0; - avg_bytes_speed_first = 0; - avg_rows_speed_precision = 0.001; - avg_bytes_speed_precision = 0.001; - number_of_rows_speed_info_batches = 0; - number_of_bytes_speed_info_batches = 0; - } -}; - -double Stats::avg_rows_speed_precision = 0.001; -double Stats::avg_bytes_speed_precision = 0.001; - -class PerformanceTest : public Poco::Util::Application -{ -public: - using Strings = std::vector; - - PerformanceTest(const String & host_, - const UInt16 port_, - const bool secure_, - const String & default_database_, - const String & user_, - const String & password_, - const bool lite_output_, - const String & profiles_file_, - Strings && input_files_, - Strings && tests_tags_, - Strings && skip_tags_, - Strings && tests_names_, - Strings && skip_names_, - Strings && tests_names_regexp_, - Strings && skip_names_regexp_, - const ConnectionTimeouts & timeouts) - : connection(host_, port_, default_database_, user_, password_, timeouts, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), - gotSIGINT(false), - lite_output(lite_output_), - profiles_file(profiles_file_), - input_files(input_files_), - tests_tags(std::move(tests_tags_)), - skip_tags(std::move(skip_tags_)), - tests_names(std::move(tests_names_)), - skip_names(std::move(skip_names_)), - tests_names_regexp(std::move(tests_names_regexp_)), - skip_names_regexp(std::move(skip_names_regexp_)) - { - if (input_files.size() < 1) - { - throw DB::Exception("No tests were specified", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void initialize(Poco::Util::Application & self [[maybe_unused]]) - { - std::string home_path; - const char * home_path_cstr = getenv("HOME"); - if (home_path_cstr) - home_path = home_path_cstr; - configReadClient(Poco::Util::Application::instance().config(), home_path); - } - - int main(const std::vector < std::string > & /* args */) - { - std::string name; - UInt64 version_major; - UInt64 version_minor; - UInt64 version_patch; - UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); - - std::stringstream ss; - ss << version_major << "." << version_minor << "." << version_patch; - server_version = ss.str(); - - processTestsConfigurations(input_files); - - return 0; - } - -private: - String test_name; - - using Query = String; - using Queries = std::vector; - using QueriesWithIndexes = std::vector>; - Queries queries; - - Connection connection; - std::string server_version; - - using Keys = std::vector; - - Settings settings; - Context global_context = Context::createGlobal(); - - InterruptListener interrupt_listener; - - using XMLConfiguration = Poco::Util::XMLConfiguration; - using XMLConfigurationPtr = Poco::AutoPtr; - - using Paths = std::vector; - using StringToVector = std::map>; - using StringToMap = std::map; - StringToMap substitutions; - - using StringKeyValue = std::map; - std::vector substitutions_maps; - - bool gotSIGINT; - std::vector stop_conditions_by_run; - String main_metric; - bool lite_output; - String profiles_file; - - Strings input_files; - std::vector tests_configurations; - - Strings tests_tags; - Strings skip_tags; - Strings tests_names; - Strings skip_names; - Strings tests_names_regexp; - Strings skip_names_regexp; - - enum class ExecutionType - { - Loop, - Once - }; - ExecutionType exec_type; - - enum class FilterType - { - Tag, - Name, - Name_regexp - }; - - size_t times_to_run = 1; - std::vector statistics_by_run; - - /// Removes configurations that has a given value. If leave is true, the logic is reversed. - void removeConfigurationsIf( - std::vector & configs, FilterType filter_type, const Strings & values, bool leave = false) - { - auto checker = [&filter_type, &values, &leave](XMLConfigurationPtr & config) - { - if (values.size() == 0) - return false; - - bool remove_or_not = false; - - if (filter_type == FilterType::Tag) - { - Keys tags_keys; - config->keys("tags", tags_keys); - - Strings tags(tags_keys.size()); - for (size_t i = 0; i != tags_keys.size(); ++i) - tags[i] = config->getString("tags.tag[" + std::to_string(i) + "]"); - - for (const String & config_tag : tags) - { - if (std::find(values.begin(), values.end(), config_tag) != values.end()) - remove_or_not = true; - } - } - - if (filter_type == FilterType::Name) - { - remove_or_not = (std::find(values.begin(), values.end(), config->getString("name", "")) != values.end()); - } - - if (filter_type == FilterType::Name_regexp) - { - String config_name = config->getString("name", ""); - auto regex_checker = [&config_name](const String & name_regexp) - { - std::regex pattern(name_regexp); - return std::regex_search(config_name, pattern); - }; - - remove_or_not = config->has("name") ? (std::find_if(values.begin(), values.end(), regex_checker) != values.end()) : false; - } - - if (leave) - remove_or_not = !remove_or_not; - return remove_or_not; - }; - - auto new_end = std::remove_if(configs.begin(), configs.end(), checker); - configs.erase(new_end, configs.end()); - } - - /// Filter tests by tags, names, regexp matching, etc. - void filterConfigurations() - { - /// Leave tests: - removeConfigurationsIf(tests_configurations, FilterType::Tag, tests_tags, true); - removeConfigurationsIf(tests_configurations, FilterType::Name, tests_names, true); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, tests_names_regexp, true); - - - /// Skip tests - removeConfigurationsIf(tests_configurations, FilterType::Tag, skip_tags, false); - removeConfigurationsIf(tests_configurations, FilterType::Name, skip_names, false); - removeConfigurationsIf(tests_configurations, FilterType::Name_regexp, skip_names_regexp, false); - } - - /// Checks specified preconditions per test (process cache, table existence, etc.) - bool checkPreconditions(const XMLConfigurationPtr & config) - { - if (!config->has("preconditions")) - return true; - - Keys preconditions; - config->keys("preconditions", preconditions); - size_t table_precondition_index = 0; - - for (const String & precondition : preconditions) - { - if (precondition == "flush_disk_cache") - { - if (system( - "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) - { - std::cerr << "Failed to flush disk cache" << std::endl; - return false; - } - } - - if (precondition == "ram_size") - { - size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); - size_t actual_ram = getMemoryAmount(); - if (!actual_ram) - throw DB::Exception("ram_size precondition not available on this platform", DB::ErrorCodes::NOT_IMPLEMENTED); - - if (ram_size_needed > actual_ram) - { - std::cerr << "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram << std::endl; - return false; - } - } - - if (precondition == "table_exists") - { - String precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; - String table_to_check = config->getString(precondition_key); - String query = "EXISTS TABLE " + table_to_check + ";"; - - size_t exist = 0; - - connection.sendQuery(query, "", QueryProcessingStage::Complete, &settings, nullptr, false); - - while (true) - { - Connection::Packet packet = connection.receivePacket(); - - if (packet.type == Protocol::Server::Data) - { - for (const ColumnWithTypeAndName & column : packet.block) - { - if (column.name == "result" && column.column->size() > 0) - { - exist = column.column->get64(0); - if (exist) - break; - } - } - } - - if (packet.type == Protocol::Server::Exception || packet.type == Protocol::Server::EndOfStream) - break; - } - - if (!exist) - { - std::cerr << "Table " << table_to_check << " doesn't exist" << std::endl; - return false; - } - } - } - + if (!config->has("preconditions")) return true; - } - void processTestsConfigurations(const Paths & paths) + Strings preconditions; + config->keys("preconditions", preconditions); + size_t table_precondition_index = 0; + + for (const std::string & precondition : preconditions) { - tests_configurations.resize(paths.size()); - - for (size_t i = 0; i != paths.size(); ++i) + if (precondition == "flush_disk_cache") { - const String path = paths[i]; - tests_configurations[i] = XMLConfigurationPtr(new XMLConfiguration(path)); + if (system( + "(>&2 echo 'Flushing disk cache...') && (sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches') && (>&2 echo 'Flushed.')")) + { + LOG_WARNING(log, "Failed to flush disk cache"); + return false; + } } - filterConfigurations(); - - if (tests_configurations.size()) + if (precondition == "ram_size") { - Strings outputs; + size_t ram_size_needed = config->getUInt64("preconditions.ram_size"); + size_t actual_ram = getMemoryAmount(); + if (!actual_ram) + throw Exception("ram_size precondition not available on this platform", ErrorCodes::NOT_IMPLEMENTED); - for (auto & test_config : tests_configurations) + if (ram_size_needed > actual_ram) { - if (!checkPreconditions(test_config)) + LOG_WARNING(log, "Not enough RAM: need = " << ram_size_needed << ", present = " << actual_ram); + return false; + } + } + + if (precondition == "table_exists") + { + std::string precondition_key = "preconditions.table_exists[" + std::to_string(table_precondition_index++) + "]"; + std::string table_to_check = config->getString(precondition_key); + std::string query = "EXISTS TABLE " + table_to_check + ";"; + + size_t exist = 0; + + connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + + while (true) + { + Connection::Packet packet = connection.receivePacket(); + + if (packet.type == Protocol::Server::Data) { - std::cerr << "Preconditions are not fulfilled for test '" + test_config->getString("name", "") + "' "; - continue; - } - - String output = runTest(test_config); - if (lite_output) - std::cout << output; - else - outputs.push_back(output); - } - - if (!lite_output && outputs.size()) - { - std::cout << "[" << std::endl; - - for (size_t i = 0; i != outputs.size(); ++i) - { - std::cout << outputs[i]; - if (i != outputs.size() - 1) - std::cout << ","; - - std::cout << std::endl; - } - - std::cout << "]" << std::endl; - } - } - } - - void extractSettings( - const XMLConfigurationPtr & config, const String & key, const Strings & settings_list, std::map & settings_to_apply) - { - for (const String & setup : settings_list) - { - if (setup == "profile") - continue; - - String value = config->getString(key + "." + setup); - if (value.empty()) - value = "true"; - - settings_to_apply[setup] = value; - } - } - - String runTest(XMLConfigurationPtr & test_config) - { - queries.clear(); - - test_name = test_config->getString("name"); - std::cerr << "Running: " << test_name << "\n"; - - if (test_config->has("settings")) - { - std::map settings_to_apply; - Keys config_settings; - test_config->keys("settings", config_settings); - - /// Preprocess configuration file - if (std::find(config_settings.begin(), config_settings.end(), "profile") != config_settings.end()) - { - if (!profiles_file.empty()) - { - String profile_name = test_config->getString("settings.profile"); - XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); - - Keys profile_settings; - profiles_config->keys("profiles." + profile_name, profile_settings); - - extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); - } - } - - extractSettings(test_config, "settings", config_settings, settings_to_apply); - - /// This macro goes through all settings in the Settings.h - /// and, if found any settings in test's xml configuration - /// with the same name, sets its value to settings - std::map::iterator it; -#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ - it = settings_to_apply.find(#NAME); \ - if (it != settings_to_apply.end()) \ - settings.set(#NAME, settings_to_apply[#NAME]); - - APPLY_FOR_SETTINGS(EXTRACT_SETTING) - -#undef EXTRACT_SETTING - - if (std::find(config_settings.begin(), config_settings.end(), "average_rows_speed_precision") != config_settings.end()) - { - Stats::avg_rows_speed_precision = test_config->getDouble("settings.average_rows_speed_precision"); - } - - if (std::find(config_settings.begin(), config_settings.end(), "average_bytes_speed_precision") != config_settings.end()) - { - Stats::avg_bytes_speed_precision = test_config->getDouble("settings.average_bytes_speed_precision"); - } - } - - if (!test_config->has("query") && !test_config->has("query_file")) - { - throw DB::Exception("Missing query fields in test's config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query") && test_config->has("query_file")) - { - throw DB::Exception("Found both query and query_file fields. Choose only one", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("query")) - { - queries = DB::getMultipleValuesFromConfig(*test_config, "", "query"); - } - - if (test_config->has("query_file")) - { - const String filename = test_config->getString("query_file"); - if (filename.empty()) - throw DB::Exception("Empty file name", DB::ErrorCodes::BAD_ARGUMENTS); - - bool tsv = fs::path(filename).extension().string() == ".tsv"; - - ReadBufferFromFile query_file(filename); - Query query; - - if (tsv) - { - while (!query_file.eof()) - { - readEscapedString(query, query_file); - assertChar('\n', query_file); - queries.push_back(query); - } - } - else - { - readStringUntilEOF(query, query_file); - queries.push_back(query); - } - } - - if (queries.empty()) - { - throw DB::Exception("Did not find any query to execute: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (test_config->has("substitutions")) - { - /// Make "subconfig" of inner xml block - ConfigurationPtr substitutions_view(test_config->createView("substitutions")); - constructSubstitutions(substitutions_view, substitutions[test_name]); - - auto queries_pre_format = queries; - queries.clear(); - for (const auto & query : queries_pre_format) - { - auto formatted = formatQueries(query, substitutions[test_name]); - queries.insert(queries.end(), formatted.begin(), formatted.end()); - } - } - - if (!test_config->has("type")) - { - throw DB::Exception("Missing type property in config: " + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - } - - String config_exec_type = test_config->getString("type"); - if (config_exec_type == "loop") - exec_type = ExecutionType::Loop; - else if (config_exec_type == "once") - exec_type = ExecutionType::Once; - else - throw DB::Exception("Unknown type " + config_exec_type + " in :" + test_name, DB::ErrorCodes::BAD_ARGUMENTS); - - times_to_run = test_config->getUInt("times_to_run", 1); - - stop_conditions_by_run.clear(); - TestStopConditions stop_conditions_template; - if (test_config->has("stop_conditions")) - { - ConfigurationPtr stop_conditions_config(test_config->createView("stop_conditions")); - stop_conditions_template.loadFromConfig(stop_conditions_config); - } - - if (stop_conditions_template.empty()) - throw DB::Exception("No termination conditions were found in config", DB::ErrorCodes::BAD_ARGUMENTS); - - for (size_t i = 0; i < times_to_run * queries.size(); ++i) - stop_conditions_by_run.push_back(stop_conditions_template); - - - ConfigurationPtr metrics_view(test_config->createView("metrics")); - Keys metrics; - metrics_view->keys(metrics); - - main_metric.clear(); - if (test_config->has("main_metric")) - { - Keys main_metrics; - test_config->keys("main_metric", main_metrics); - if (main_metrics.size()) - main_metric = main_metrics[0]; - } - - if (!main_metric.empty()) - { - if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) - metrics.push_back(main_metric); - } - else - { - if (metrics.empty()) - throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS); - main_metric = metrics[0]; - if (lite_output) - throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (metrics.size() > 0) - checkMetricsInput(metrics); - - statistics_by_run.resize(times_to_run * queries.size()); - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - QueriesWithIndexes queries_with_indexes; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - size_t statistic_index = number_of_launch * queries.size() + query_index; - stop_conditions_by_run[statistic_index].reset(); - - queries_with_indexes.push_back({queries[query_index], statistic_index}); - } - - if (interrupt_listener.check()) - gotSIGINT = true; - - if (gotSIGINT) - break; - - runQueries(queries_with_indexes); - } - - if (lite_output) - return minOutput(); - else - return constructTotalInfo(metrics); - } - - void checkMetricsInput(const Strings & metrics) const - { - std::vector loop_metrics - = {"min_time", "quantiles", "total_time", "queries_per_second", "rows_per_second", "bytes_per_second"}; - - std::vector non_loop_metrics - = {"max_rows_per_second", "max_bytes_per_second", "avg_rows_per_second", "avg_bytes_per_second"}; - - if (exec_type == ExecutionType::Loop) - { - for (const String & metric : metrics) - if (std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric) != non_loop_metrics.end()) - throw DB::Exception("Wrong type of metric for loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - { - for (const String & metric : metrics) - if (std::find(loop_metrics.begin(), loop_metrics.end(), metric) != loop_metrics.end()) - throw DB::Exception("Wrong type of metric for non-loop execution type (" + metric + ")", DB::ErrorCodes::BAD_ARGUMENTS); - } - } - - void runQueries(const QueriesWithIndexes & queries_with_indexes) - { - for (const auto & [query, run_index] : queries_with_indexes) - { - TestStopConditions & stop_conditions = stop_conditions_by_run[run_index]; - Stats & statistics = statistics_by_run[run_index]; - - statistics.clear(); - try - { - execute(query, statistics, stop_conditions); - - if (exec_type == ExecutionType::Loop) - { - for (size_t iteration = 1; !gotSIGINT; ++iteration) + for (const ColumnWithTypeAndName & column : packet.block) { - stop_conditions.reportIterations(iteration); - if (stop_conditions.areFulfilled()) - break; - - execute(query, statistics, stop_conditions); - } - } - } - catch (const DB::Exception & e) - { - statistics.exception = e.what() + String(", ") + e.displayText(); - } - - if (!gotSIGINT) - { - statistics.ready = true; - } - } - } - - void execute(const Query & query, Stats & statistics, TestStopConditions & stop_conditions) - { - statistics.watch_per_query.restart(); - statistics.last_query_was_cancelled = false; - statistics.last_query_rows_read = 0; - statistics.last_query_bytes_read = 0; - - RemoteBlockInputStream stream(connection, query, {}, global_context, &settings); - - stream.setProgressCallback( - [&](const Progress & value) { this->checkFulfilledConditionsAndUpdate(value, stream, statistics, stop_conditions); }); - - stream.readPrefix(); - while (Block block = stream.read()) - ; - stream.readSuffix(); - - if (!statistics.last_query_was_cancelled) - statistics.updateQueryInfo(); - - statistics.setTotalTime(); - } - - void checkFulfilledConditionsAndUpdate( - const Progress & progress, RemoteBlockInputStream & stream, Stats & statistics, TestStopConditions & stop_conditions) - { - statistics.add(progress.rows, progress.bytes); - - stop_conditions.reportRowsRead(statistics.total_rows_read); - stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); - stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); - stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); - - if (stop_conditions.areFulfilled()) - { - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } - - if (interrupt_listener.check()) - { - gotSIGINT = true; - statistics.last_query_was_cancelled = true; - stream.cancel(false); - } - } - - void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) - { - Keys xml_substitutions; - substitutions_view->keys(xml_substitutions); - - for (size_t i = 0; i != xml_substitutions.size(); ++i) - { - const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); - - /// Property values for substitution will be stored in a vector - /// accessible by property name - std::vector xml_values; - xml_substitution->keys("values", xml_values); - - String name = xml_substitution->getString("name"); - - for (size_t j = 0; j != xml_values.size(); ++j) - { - out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); - } - } - } - - std::vector formatQueries(const String & query, StringToVector substitutions_to_generate) - { - std::vector queries_res; - runThroughAllOptionsAndPush(substitutions_to_generate.begin(), substitutions_to_generate.end(), query, queries_res); - return queries_res; - } - - /// Recursive method which goes through all substitution blocks in xml - /// and replaces property {names} by their values - void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, - StringToVector::iterator substitutions_right, - const String & template_query, - std::vector & out_queries) - { - if (substitutions_left == substitutions_right) - { - out_queries.push_back(template_query); /// completely substituted query - return; - } - - String substitution_mask = "{" + substitutions_left->first + "}"; - - if (template_query.find(substitution_mask) == String::npos) /// nothing to substitute here - { - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); - return; - } - - for (const String & value : substitutions_left->second) - { - /// Copy query string for each unique permutation - Query query = template_query; - size_t substr_pos = 0; - - while (substr_pos != String::npos) - { - substr_pos = query.find(substitution_mask); - - if (substr_pos != String::npos) - query.replace(substr_pos, substitution_mask.length(), value); - } - - runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); - } - } - -public: - String constructTotalInfo(Strings metrics) - { - JSONString json_output; - - json_output.set("hostname", getFQDNOrHostName()); - json_output.set("num_cores", getNumberOfPhysicalCPUCores()); - json_output.set("num_threads", std::thread::hardware_concurrency()); - json_output.set("ram", getMemoryAmount()); - json_output.set("server_version", server_version); - json_output.set("time", DateLUT::instance().timeToString(time(nullptr))); - json_output.set("test_name", test_name); - json_output.set("main_metric", main_metric); - - if (substitutions[test_name].size()) - { - JSONString json_parameters(2); /// here, 2 is the size of \t padding - - for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it) - { - String parameter = it->first; - std::vector values = it->second; - - String array_string = "["; - for (size_t i = 0; i != values.size(); ++i) - { - array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"'; - if (i != values.size() - 1) - { - array_string += ", "; - } - } - array_string += ']'; - - json_parameters.set(parameter, array_string); - } - - json_output.set("parameters", json_parameters.asString()); - } - - std::vector run_infos; - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) - { - Stats & statistics = statistics_by_run[number_of_launch * queries.size() + query_index]; - - if (!statistics.ready) - continue; - - JSONString runJSON; - - runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\"")); - if (!statistics.exception.empty()) - runJSON.set("exception", statistics.exception); - - if (substitutions_maps.size()) - { - JSONString parameters(4); - - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - parameters.set(it->first, it->second); - } - - runJSON.set("parameters", parameters.asString()); - } - - - if (exec_type == ExecutionType::Loop) - { - /// in seconds - if (std::find(metrics.begin(), metrics.end(), "min_time") != metrics.end()) - runJSON.set("min_time", statistics.min_time / double(1000)); - - if (std::find(metrics.begin(), metrics.end(), "quantiles") != metrics.end()) - { - JSONString quantiles(4); /// here, 4 is the size of \t padding - for (double percent = 10; percent <= 90; percent += 10) + if (column.name == "result" && column.column->size() > 0) { - String quantile_key = std::to_string(percent / 100.0); - while (quantile_key.back() == '0') - quantile_key.pop_back(); - - quantiles.set(quantile_key, statistics.sampler.quantileInterpolated(percent / 100.0)); + exist = column.column->get64(0); + if (exist) + break; } - quantiles.set("0.95", statistics.sampler.quantileInterpolated(95 / 100.0)); - quantiles.set("0.99", statistics.sampler.quantileInterpolated(99 / 100.0)); - quantiles.set("0.999", statistics.sampler.quantileInterpolated(99.9 / 100.0)); - quantiles.set("0.9999", statistics.sampler.quantileInterpolated(99.99 / 100.0)); - - runJSON.set("quantiles", quantiles.asString()); } - - if (std::find(metrics.begin(), metrics.end(), "total_time") != metrics.end()) - runJSON.set("total_time", statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "queries_per_second") != metrics.end()) - runJSON.set("queries_per_second", double(statistics.queries) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "rows_per_second") != metrics.end()) - runJSON.set("rows_per_second", double(statistics.total_rows_read) / statistics.total_time); - - if (std::find(metrics.begin(), metrics.end(), "bytes_per_second") != metrics.end()) - runJSON.set("bytes_per_second", double(statistics.total_bytes_read) / statistics.total_time); - } - else - { - if (std::find(metrics.begin(), metrics.end(), "max_rows_per_second") != metrics.end()) - runJSON.set("max_rows_per_second", statistics.max_rows_speed); - - if (std::find(metrics.begin(), metrics.end(), "max_bytes_per_second") != metrics.end()) - runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); - - if (std::find(metrics.begin(), metrics.end(), "avg_rows_per_second") != metrics.end()) - runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); - - if (std::find(metrics.begin(), metrics.end(), "avg_bytes_per_second") != metrics.end()) - runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); } - run_infos.push_back(runJSON); + if (packet.type == Protocol::Server::Exception + || packet.type == Protocol::Server::EndOfStream) + break; } - } - json_output.set("runs", run_infos); - - return json_output.asString(); - } - - String minOutput() - { - String output; - - for (size_t query_index = 0; query_index < queries.size(); ++query_index) - { - for (size_t number_of_launch = 0; number_of_launch < times_to_run; ++number_of_launch) + if (!exist) { - if (queries.size() > 1) - { - output += "query \"" + queries[query_index] + "\", "; - } - - if (substitutions_maps.size()) - { - for (auto it = substitutions_maps[query_index].begin(); it != substitutions_maps[query_index].end(); ++it) - { - output += it->first + " = " + it->second + ", "; - } - } - - output += "run " + std::to_string(number_of_launch + 1) + ": "; - output += main_metric + " = "; - output += statistics_by_run[number_of_launch * queries.size() + query_index].getStatisticByName(main_metric); - output += "\n"; + LOG_WARNING(log, "Table " << table_to_check << " doesn't exist"); + return false; } } - - return output; } -}; -} -static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) -{ - if (dir.extension().string() == ".xml") - std::cerr << "Warning: '" + dir.string() + "' is a directory, but has .xml extension" << std::endl; - - fs::directory_iterator end; - for (fs::directory_iterator it(dir); it != end; ++it) - { - const fs::path file = (*it); - if (recursive && fs::is_directory(file)) - getFilesFromDir(file, input_files, recursive); - else if (!fs::is_directory(file) && file.extension().string() == ".xml") - input_files.push_back(file.string()); - } + return true; } -int mainEntryClickHousePerformanceTest(int argc, char ** argv) -try +UInt64 PerformanceTest::calculateMaxExecTime() const { - using boost::program_options::value; - using Strings = std::vector; - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help", "produce help message") - ("lite", "use lite version of output") - ("profiles-file", value()->default_value(""), "Specify a file with global profiles") - ("host,h", value()->default_value("localhost"), "") - ("port", value()->default_value(9000), "") - ("secure,s", "Use TLS connection") - ("database", value()->default_value("default"), "") - ("user", value()->default_value("default"), "") - ("password", value()->default_value(""), "") - ("tags", value()->multitoken(), "Run only tests with tag") - ("skip-tags", value()->multitoken(), "Do not run tests with tag") - ("names", value()->multitoken(), "Run tests with specific name") - ("skip-names", value()->multitoken(), "Do not run tests with name") - ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") - ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") - ("recursive,r", "Recurse in directories to find all xml's"); - - /// These options will not be displayed in --help - boost::program_options::options_description hidden("Hidden options"); - hidden.add_options() - ("input-files", value>(), ""); - - /// But they will be legit, though. And they must be given without name - boost::program_options::positional_options_description positional; - positional.add("input-files", -1); - - boost::program_options::options_description cmdline_options; - cmdline_options.add(desc).add(hidden); - - boost::program_options::variables_map options; - boost::program_options::store( - boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(positional).run(), options); - boost::program_options::notify(options); - - if (options.count("help")) + UInt64 result = 0; + for (const auto & stop_conditions : test_info.stop_conditions_by_run) { - std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; - std::cout << desc << "\n"; - return 0; + UInt64 condition_max_time = stop_conditions.getMaxExecTime(); + if (condition_max_time == 0) + return 0; + result += condition_max_time; + } + return result; +} + + +void PerformanceTest::prepare() const +{ + for (const auto & query : test_info.create_queries) + { + LOG_INFO(log, "Executing create query '" << query << "'"); + connection.sendQuery(query); } - Strings input_files; - bool recursive = options.count("recursive"); - - if (!options.count("input-files")) + for (const auto & query : test_info.fill_queries) { - std::cerr << "Trying to find test scenario files in the current folder..."; - fs::path curr_dir("."); - - getFilesFromDir(curr_dir, input_files, recursive); - - if (input_files.empty()) - { - std::cerr << std::endl; - throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); - } - else - std::cerr << " found " << input_files.size() << " files." << std::endl; + LOG_INFO(log, "Executing fill query '" << query << "'"); + connection.sendQuery(query); } + +} + +void PerformanceTest::finish() const +{ + for (const auto & query : test_info.drop_queries) + { + LOG_INFO(log, "Executing drop query '" << query << "'"); + connection.sendQuery(query); + } +} + +std::vector PerformanceTest::execute() +{ + std::vector statistics_by_run; + size_t total_runs = test_info.times_to_run * test_info.queries.size(); + statistics_by_run.resize(total_runs); + LOG_INFO(log, "Totally will run cases " << total_runs << " times"); + UInt64 max_exec_time = calculateMaxExecTime(); + if (max_exec_time != 0) + LOG_INFO(log, "Test will be executed for a maximum of " << max_exec_time / 1000. << " seconds"); else + LOG_INFO(log, "Test execution time cannot be determined"); + + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) { - input_files = options["input-files"].as(); - Strings collected_files; + QueriesWithIndexes queries_with_indexes; - for (const String & filename : input_files) + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) { - fs::path file(filename); + size_t statistic_index = number_of_launch * test_info.queries.size() + query_index; - if (!fs::exists(file)) - throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); - - if (fs::is_directory(file)) - { - getFilesFromDir(file, collected_files, recursive); - } - else - { - if (file.extension().string() != ".xml") - throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); - collected_files.push_back(filename); - } + queries_with_indexes.push_back({test_info.queries[query_index], statistic_index}); } - input_files = std::move(collected_files); + if (got_SIGINT) + break; + + runQueries(queries_with_indexes, statistics_by_run); } - - Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); - Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); - Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); - Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); - Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); - Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); - - auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); - - DB::UseSSL use_ssl; - - DB::PerformanceTest performance_test( - options["host"].as(), - options["port"].as(), - options.count("secure"), - options["database"].as(), - options["user"].as(), - options["password"].as(), - options.count("lite") > 0, - options["profiles-file"].as(), - std::move(input_files), - std::move(tests_tags), - std::move(skip_tags), - std::move(tests_names), - std::move(skip_names), - std::move(tests_names_regexp), - std::move(skip_names_regexp), - timeouts); - return performance_test.run(); + return statistics_by_run; } -catch (...) + +void PerformanceTest::runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run) { - std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; - int code = DB::getCurrentExceptionCode(); - return code ? code : 1; + for (const auto & [query, run_index] : queries_with_indexes) + { + LOG_INFO(log, "[" << run_index<< "] Run query '" << query << "'"); + TestStopConditions & stop_conditions = test_info.stop_conditions_by_run[run_index]; + TestStats & statistics = statistics_by_run[run_index]; + statistics.startWatches(); + try + { + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); + + if (test_info.exec_type == ExecutionType::Loop) + { + LOG_INFO(log, "Will run query in loop"); + for (size_t iteration = 1; !statistics.got_SIGINT; ++iteration) + { + stop_conditions.reportIterations(iteration); + if (stop_conditions.areFulfilled()) + { + LOG_INFO(log, "Stop conditions fullfilled"); + break; + } + + executeQuery(connection, query, statistics, stop_conditions, interrupt_listener, context); + } + } + } + catch (const Exception & e) + { + statistics.exception = "Code: " + std::to_string(e.code()) + ", e.displayText() = " + e.displayText(); + LOG_WARNING(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText() + << ", Stack trace:\n\n" << e.getStackTrace().toString()); + } + + if (!statistics.got_SIGINT) + statistics.ready = true; + else + { + got_SIGINT = true; + LOG_INFO(log, "Got SIGINT, will terminate as soon as possible"); + break; + } + } +} + + } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h new file mode 100644 index 00000000000..107c1bb6963 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -0,0 +1,62 @@ +#pragma once + +#include +#include +#include +#include + +#include "PerformanceTestInfo.h" + +namespace DB +{ + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using QueriesWithIndexes = std::vector>; + +class PerformanceTest +{ +public: + PerformanceTest( + const XMLConfigurationPtr & config_, + Connection & connection_, + InterruptListener & interrupt_listener_, + const PerformanceTestInfo & test_info_, + Context & context_); + + bool checkPreconditions() const; + void prepare() const; + std::vector execute(); + void finish() const; + + const PerformanceTestInfo & getTestInfo() const + { + return test_info; + } + + bool checkSIGINT() const + { + return got_SIGINT; + } + +private: + void runQueries( + const QueriesWithIndexes & queries_with_indexes, + std::vector & statistics_by_run); + + UInt64 calculateMaxExecTime() const; + +private: + XMLConfigurationPtr config; + Connection & connection; + InterruptListener & interrupt_listener; + + PerformanceTestInfo test_info; + Context & context; + + Poco::Logger * log; + + bool got_SIGINT = false; +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.cpp b/dbms/programs/performance-test/PerformanceTestInfo.cpp new file mode 100644 index 00000000000..e10fd1e915f --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.cpp @@ -0,0 +1,285 @@ +#include "PerformanceTestInfo.h" +#include +#include +#include +#include +#include +#include "applySubstitutions.h" +#include + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace +{ + +void extractSettings( + const XMLConfigurationPtr & config, + const std::string & key, + const Strings & settings_list, + std::map & settings_to_apply) +{ + for (const std::string & setup : settings_list) + { + if (setup == "profile") + continue; + + std::string value = config->getString(key + "." + setup); + if (value.empty()) + value = "true"; + + settings_to_apply[setup] = value; + } +} + +void checkMetricsInput(const Strings & metrics, ExecutionType exec_type) +{ + Strings loop_metrics = { + "min_time", "quantiles", "total_time", + "queries_per_second", "rows_per_second", + "bytes_per_second"}; + + Strings non_loop_metrics = { + "max_rows_per_second", "max_bytes_per_second", + "avg_rows_per_second", "avg_bytes_per_second"}; + + if (exec_type == ExecutionType::Loop) + { + for (const std::string & metric : metrics) + { + auto non_loop_pos = + std::find(non_loop_metrics.begin(), non_loop_metrics.end(), metric); + + if (non_loop_pos != non_loop_metrics.end()) + throw Exception("Wrong type of metric for loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } + else + { + for (const std::string & metric : metrics) + { + auto loop_pos = std::find(loop_metrics.begin(), loop_metrics.end(), metric); + if (loop_pos != loop_metrics.end()) + throw Exception( + "Wrong type of metric for non-loop execution type (" + metric + ")", + ErrorCodes::BAD_ARGUMENTS); + } + } +} + +} + + +namespace fs = boost::filesystem; + +PerformanceTestInfo::PerformanceTestInfo( + XMLConfigurationPtr config, + const std::string & profiles_file_) + : profiles_file(profiles_file_) +{ + test_name = config->getString("name"); + path = config->getString("path"); + applySettings(config); + extractQueries(config); + processSubstitutions(config); + getExecutionType(config); + getStopConditions(config); + getMetrics(config); + extractAuxiliaryQueries(config); +} + +void PerformanceTestInfo::applySettings(XMLConfigurationPtr config) +{ + if (config->has("settings")) + { + std::map settings_to_apply; + Strings config_settings; + config->keys("settings", config_settings); + + auto settings_contain = [&config_settings] (const std::string & setting) + { + auto position = std::find(config_settings.begin(), config_settings.end(), setting); + return position != config_settings.end(); + + }; + /// Preprocess configuration file + if (settings_contain("profile")) + { + if (!profiles_file.empty()) + { + std::string profile_name = config->getString("settings.profile"); + XMLConfigurationPtr profiles_config(new XMLConfiguration(profiles_file)); + + Strings profile_settings; + profiles_config->keys("profiles." + profile_name, profile_settings); + + extractSettings(profiles_config, "profiles." + profile_name, profile_settings, settings_to_apply); + } + } + + extractSettings(config, "settings", config_settings, settings_to_apply); + + /// This macro goes through all settings in the Settings.h + /// and, if found any settings in test's xml configuration + /// with the same name, sets its value to settings + std::map::iterator it; +#define EXTRACT_SETTING(TYPE, NAME, DEFAULT, DESCRIPTION) \ + it = settings_to_apply.find(#NAME); \ + if (it != settings_to_apply.end()) \ + settings.set(#NAME, settings_to_apply[#NAME]); + + APPLY_FOR_SETTINGS(EXTRACT_SETTING) + +#undef EXTRACT_SETTING + + if (settings_contain("average_rows_speed_precision")) + TestStats::avg_rows_speed_precision = + config->getDouble("settings.average_rows_speed_precision"); + + if (settings_contain("average_bytes_speed_precision")) + TestStats::avg_bytes_speed_precision = + config->getDouble("settings.average_bytes_speed_precision"); + } +} + +void PerformanceTestInfo::extractQueries(XMLConfigurationPtr config) +{ + if (config->has("query")) + queries = getMultipleValuesFromConfig(*config, "", "query"); + + if (config->has("query_file")) + { + const std::string filename = config->getString("query_file"); + if (filename.empty()) + throw Exception("Empty file name", ErrorCodes::BAD_ARGUMENTS); + + bool tsv = fs::path(filename).extension().string() == ".tsv"; + + ReadBufferFromFile query_file(filename); + std::string query; + + if (tsv) + { + while (!query_file.eof()) + { + readEscapedString(query, query_file); + assertChar('\n', query_file); + queries.push_back(query); + } + } + else + { + readStringUntilEOF(query, query_file); + queries.push_back(query); + } + } + + if (queries.empty()) + throw Exception("Did not find any query to execute: " + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + +void PerformanceTestInfo::processSubstitutions(XMLConfigurationPtr config) +{ + if (config->has("substitutions")) + { + /// Make "subconfig" of inner xml block + ConfigurationPtr substitutions_view(config->createView("substitutions")); + constructSubstitutions(substitutions_view, substitutions); + + auto queries_pre_format = queries; + queries.clear(); + for (const auto & query : queries_pre_format) + { + auto formatted = formatQueries(query, substitutions); + queries.insert(queries.end(), formatted.begin(), formatted.end()); + } + } +} + +void PerformanceTestInfo::getExecutionType(XMLConfigurationPtr config) +{ + if (!config->has("type")) + throw Exception("Missing type property in config: " + test_name, + ErrorCodes::BAD_ARGUMENTS); + + std::string config_exec_type = config->getString("type"); + if (config_exec_type == "loop") + exec_type = ExecutionType::Loop; + else if (config_exec_type == "once") + exec_type = ExecutionType::Once; + else + throw Exception("Unknown type " + config_exec_type + " in :" + test_name, + ErrorCodes::BAD_ARGUMENTS); +} + + +void PerformanceTestInfo::getStopConditions(XMLConfigurationPtr config) +{ + TestStopConditions stop_conditions_template; + if (config->has("stop_conditions")) + { + ConfigurationPtr stop_conditions_config(config->createView("stop_conditions")); + stop_conditions_template.loadFromConfig(stop_conditions_config); + } + + if (stop_conditions_template.empty()) + throw Exception("No termination conditions were found in config", + ErrorCodes::BAD_ARGUMENTS); + + times_to_run = config->getUInt("times_to_run", 1); + + for (size_t i = 0; i < times_to_run * queries.size(); ++i) + stop_conditions_by_run.push_back(stop_conditions_template); + +} + + +void PerformanceTestInfo::getMetrics(XMLConfigurationPtr config) +{ + ConfigurationPtr metrics_view(config->createView("metrics")); + metrics_view->keys(metrics); + + if (config->has("main_metric")) + { + Strings main_metrics; + config->keys("main_metric", main_metrics); + if (main_metrics.size()) + main_metric = main_metrics[0]; + } + + if (!main_metric.empty()) + { + if (std::find(metrics.begin(), metrics.end(), main_metric) == metrics.end()) + metrics.push_back(main_metric); + } + else + { + if (metrics.empty()) + throw Exception("You shoud specify at least one metric", + ErrorCodes::BAD_ARGUMENTS); + main_metric = metrics[0]; + } + + if (metrics.size() > 0) + checkMetricsInput(metrics, exec_type); +} + +void PerformanceTestInfo::extractAuxiliaryQueries(XMLConfigurationPtr config) +{ + if (config->has("create_query")) + create_queries = getMultipleValuesFromConfig(*config, "", "create_query"); + + if (config->has("fill_query")) + fill_queries = getMultipleValuesFromConfig(*config, "", "fill_query"); + + if (config->has("drop_query")) + drop_queries = getMultipleValuesFromConfig(*config, "", "drop_query"); +} + +} diff --git a/dbms/programs/performance-test/PerformanceTestInfo.h b/dbms/programs/performance-test/PerformanceTestInfo.h new file mode 100644 index 00000000000..9b84a885de0 --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestInfo.h @@ -0,0 +1,60 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include "StopConditionsSet.h" +#include "TestStopConditions.h" +#include "TestStats.h" + +namespace DB +{ +enum class ExecutionType +{ + Loop, + Once +}; + +using XMLConfiguration = Poco::Util::XMLConfiguration; +using XMLConfigurationPtr = Poco::AutoPtr; +using StringToVector = std::map; + +/// Class containing all info to run performance test +class PerformanceTestInfo +{ +public: + PerformanceTestInfo(XMLConfigurationPtr config, const std::string & profiles_file_); + + std::string test_name; + std::string path; + std::string main_metric; + + Strings queries; + Strings metrics; + + Settings settings; + ExecutionType exec_type; + StringToVector substitutions; + size_t times_to_run; + + std::string profiles_file; + std::vector stop_conditions_by_run; + + Strings create_queries; + Strings fill_queries; + Strings drop_queries; + +private: + void applySettings(XMLConfigurationPtr config); + void extractQueries(XMLConfigurationPtr config); + void processSubstitutions(XMLConfigurationPtr config); + void getExecutionType(XMLConfigurationPtr config); + void getStopConditions(XMLConfigurationPtr config); + void getMetrics(XMLConfigurationPtr config); + void extractAuxiliaryQueries(XMLConfigurationPtr config); +}; + +} diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp new file mode 100644 index 00000000000..d26d182fc2a --- /dev/null +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -0,0 +1,389 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "TestStopConditions.h" +#include "TestStats.h" +#include "ConfigPreprocessor.h" +#include "PerformanceTest.h" +#include "ReportBuilder.h" + + +namespace fs = boost::filesystem; +namespace po = boost::program_options; + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int FILE_DOESNT_EXIST; +} + +/** Tests launcher for ClickHouse. + * The tool walks through given or default folder in order to find files with + * tests' descriptions and launches it. + */ +class PerformanceTestSuite +{ +public: + + PerformanceTestSuite(const std::string & host_, + const UInt16 port_, + const bool secure_, + const std::string & default_database_, + const std::string & user_, + const std::string & password_, + const bool lite_output_, + const std::string & profiles_file_, + Strings && input_files_, + Strings && tests_tags_, + Strings && skip_tags_, + Strings && tests_names_, + Strings && skip_names_, + Strings && tests_names_regexp_, + Strings && skip_names_regexp_, + const ConnectionTimeouts & timeouts) + : connection(host_, port_, default_database_, user_, + password_, timeouts, "performance-test", Protocol::Compression::Enable, + secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable) + , tests_tags(std::move(tests_tags_)) + , tests_names(std::move(tests_names_)) + , tests_names_regexp(std::move(tests_names_regexp_)) + , skip_tags(std::move(skip_tags_)) + , skip_names(std::move(skip_names_)) + , skip_names_regexp(std::move(skip_names_regexp_)) + , lite_output(lite_output_) + , profiles_file(profiles_file_) + , input_files(input_files_) + , log(&Poco::Logger::get("PerformanceTestSuite")) + { + if (input_files.size() < 1) + throw Exception("No tests were specified", ErrorCodes::BAD_ARGUMENTS); + } + + /// This functionality seems strange. + //void initialize(Poco::Util::Application & self [[maybe_unused]]) + //{ + // std::string home_path; + // const char * home_path_cstr = getenv("HOME"); + // if (home_path_cstr) + // home_path = home_path_cstr; + // configReadClient(Poco::Util::Application::instance().config(), home_path); + //} + + int run() + { + std::string name; + UInt64 version_major; + UInt64 version_minor; + UInt64 version_patch; + UInt64 version_revision; + connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + + std::stringstream ss; + ss << version_major << "." << version_minor << "." << version_patch; + server_version = ss.str(); + + report_builder = std::make_shared(server_version); + + processTestsConfigurations(input_files); + + return 0; + } + +private: + Connection connection; + + const Strings & tests_tags; + const Strings & tests_names; + const Strings & tests_names_regexp; + const Strings & skip_tags; + const Strings & skip_names; + const Strings & skip_names_regexp; + + Context global_context = Context::createGlobal(); + std::shared_ptr report_builder; + + std::string server_version; + + InterruptListener interrupt_listener; + + using XMLConfiguration = Poco::Util::XMLConfiguration; + using XMLConfigurationPtr = Poco::AutoPtr; + + bool lite_output; + std::string profiles_file; + + Strings input_files; + std::vector tests_configurations; + Poco::Logger * log; + + void processTestsConfigurations(const Strings & paths) + { + LOG_INFO(log, "Preparing test configurations"); + ConfigPreprocessor config_prep(paths); + tests_configurations = config_prep.processConfig( + tests_tags, + tests_names, + tests_names_regexp, + skip_tags, + skip_names, + skip_names_regexp); + + LOG_INFO(log, "Test configurations prepared"); + + if (tests_configurations.size()) + { + Strings outputs; + + for (auto & test_config : tests_configurations) + { + auto [output, signal] = runTest(test_config); + if (lite_output) + std::cout << output; + else + outputs.push_back(output); + + if (signal) + break; + } + + if (!lite_output && outputs.size()) + { + std::cout << "[" << std::endl; + + for (size_t i = 0; i != outputs.size(); ++i) + { + std::cout << outputs[i]; + if (i != outputs.size() - 1) + std::cout << ","; + + std::cout << std::endl; + } + + std::cout << "]" << std::endl; + } + } + } + + std::pair runTest(XMLConfigurationPtr & test_config) + { + PerformanceTestInfo info(test_config, profiles_file); + LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); + PerformanceTest current(test_config, connection, interrupt_listener, info, global_context); + + current.checkPreconditions(); + LOG_INFO(log, "Preconditions for test '" << info.test_name << "' are fullfilled"); + LOG_INFO(log, "Preparing for run, have " << info.create_queries.size() + << " create queries and " << info.fill_queries.size() << " fill queries"); + current.prepare(); + LOG_INFO(log, "Prepared"); + LOG_INFO(log, "Running test '" << info.test_name << "'"); + auto result = current.execute(); + LOG_INFO(log, "Test '" << info.test_name << "' finished"); + + LOG_INFO(log, "Running post run queries"); + current.finish(); + LOG_INFO(log, "Postqueries finished"); + + if (lite_output) + return {report_builder->buildCompactReport(info, result), current.checkSIGINT()}; + else + return {report_builder->buildFullReport(info, result), current.checkSIGINT()}; + } + +}; + +} + +static void getFilesFromDir(const fs::path & dir, std::vector & input_files, const bool recursive = false) +{ + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (dir.extension().string() == ".xml") + LOG_WARNING(log, dir.string() + "' is a directory, but has .xml extension"); + + fs::directory_iterator end; + for (fs::directory_iterator it(dir); it != end; ++it) + { + const fs::path file = (*it); + if (recursive && fs::is_directory(file)) + getFilesFromDir(file, input_files, recursive); + else if (!fs::is_directory(file) && file.extension().string() == ".xml") + input_files.push_back(file.string()); + } +} + +static std::vector getInputFiles(const po::variables_map & options, Poco::Logger * log) +{ + std::vector input_files; + bool recursive = options.count("recursive"); + + if (!options.count("input-files")) + { + LOG_INFO(log, "Trying to find test scenario files in the current folder..."); + fs::path curr_dir("."); + + getFilesFromDir(curr_dir, input_files, recursive); + + if (input_files.empty()) + throw DB::Exception("Did not find any xml files", DB::ErrorCodes::BAD_ARGUMENTS); + else + LOG_INFO(log, "Found " << input_files.size() << " files"); + } + else + { + input_files = options["input-files"].as>(); + LOG_INFO(log, "Found " + std::to_string(input_files.size()) + " input files"); + std::vector collected_files; + + for (const std::string & filename : input_files) + { + fs::path file(filename); + + if (!fs::exists(file)) + throw DB::Exception("File '" + filename + "' does not exist", DB::ErrorCodes::FILE_DOESNT_EXIST); + + if (fs::is_directory(file)) + { + getFilesFromDir(file, collected_files, recursive); + } + else + { + if (file.extension().string() != ".xml") + throw DB::Exception("File '" + filename + "' does not have .xml extension", DB::ErrorCodes::BAD_ARGUMENTS); + collected_files.push_back(filename); + } + } + + input_files = std::move(collected_files); + } + std::sort(input_files.begin(), input_files.end()); + return input_files; +} + +int mainEntryClickHousePerformanceTest(int argc, char ** argv) +try +{ + using po::value; + using Strings = DB::Strings; + + + po::options_description desc("Allowed options"); + desc.add_options() + ("help", "produce help message") + ("lite", "use lite version of output") + ("profiles-file", value()->default_value(""), "Specify a file with global profiles") + ("host,h", value()->default_value("localhost"), "") + ("port", value()->default_value(9000), "") + ("secure,s", "Use TLS connection") + ("database", value()->default_value("default"), "") + ("user", value()->default_value("default"), "") + ("password", value()->default_value(""), "") + ("log-level", value()->default_value("information"), "Set log level") + ("tags", value()->multitoken(), "Run only tests with tag") + ("skip-tags", value()->multitoken(), "Do not run tests with tag") + ("names", value()->multitoken(), "Run tests with specific name") + ("skip-names", value()->multitoken(), "Do not run tests with name") + ("names-regexp", value()->multitoken(), "Run tests with names matching regexp") + ("skip-names-regexp", value()->multitoken(), "Do not run tests with names matching regexp") + ("recursive,r", "Recurse in directories to find all xml's"); + + /// These options will not be displayed in --help + po::options_description hidden("Hidden options"); + hidden.add_options() + ("input-files", value>(), ""); + + /// But they will be legit, though. And they must be given without name + po::positional_options_description positional; + positional.add("input-files", -1); + + po::options_description cmdline_options; + cmdline_options.add(desc).add(hidden); + + po::variables_map options; + po::store( + po::command_line_parser(argc, argv). + options(cmdline_options).positional(positional).run(), options); + po::notify(options); + + Poco::AutoPtr formatter(new Poco::PatternFormatter("%Y.%m.%d %H:%M:%S.%F <%p> %s: %t")); + Poco::AutoPtr console_chanel(new Poco::ConsoleChannel); + Poco::AutoPtr channel(new Poco::FormattingChannel(formatter, console_chanel)); + + Poco::Logger::root().setLevel(options["log-level"].as()); + Poco::Logger::root().setChannel(channel); + + Poco::Logger * log = &Poco::Logger::get("PerformanceTestSuite"); + if (options.count("help")) + { + std::cout << "Usage: " << argv[0] << " [options] [test_file ...] [tests_folder]\n"; + std::cout << desc << "\n"; + return 0; + } + + Strings input_files = getInputFiles(options, log); + + Strings tests_tags = options.count("tags") ? options["tags"].as() : Strings({}); + Strings skip_tags = options.count("skip-tags") ? options["skip-tags"].as() : Strings({}); + Strings tests_names = options.count("names") ? options["names"].as() : Strings({}); + Strings skip_names = options.count("skip-names") ? options["skip-names"].as() : Strings({}); + Strings tests_names_regexp = options.count("names-regexp") ? options["names-regexp"].as() : Strings({}); + Strings skip_names_regexp = options.count("skip-names-regexp") ? options["skip-names-regexp"].as() : Strings({}); + + auto timeouts = DB::ConnectionTimeouts::getTCPTimeoutsWithoutFailover(DB::Settings()); + + DB::UseSSL use_ssl; + + DB::PerformanceTestSuite performance_test_suite( + options["host"].as(), + options["port"].as(), + options.count("secure"), + options["database"].as(), + options["user"].as(), + options["password"].as(), + options.count("lite") > 0, + options["profiles-file"].as(), + std::move(input_files), + std::move(tests_tags), + std::move(skip_tags), + std::move(tests_names), + std::move(skip_names), + std::move(tests_names_regexp), + std::move(skip_names_regexp), + timeouts); + return performance_test_suite.run(); +} +catch (...) +{ + std::cout << DB::getCurrentExceptionMessage(/*with stacktrace = */ true) << std::endl; + int code = DB::getCurrentExceptionCode(); + return code ? code : 1; +} diff --git a/dbms/programs/performance-test/ReportBuilder.cpp b/dbms/programs/performance-test/ReportBuilder.cpp new file mode 100644 index 00000000000..766184bd114 --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.cpp @@ -0,0 +1,196 @@ +#include "ReportBuilder.h" + +#include +#include +#include +#include + +#include +#include +#include + +#include "JSONString.h" + +namespace DB +{ + +namespace +{ +const std::regex QUOTE_REGEX{"\""}; +} + +ReportBuilder::ReportBuilder(const std::string & server_version_) + : server_version(server_version_) + , hostname(getFQDNOrHostName()) + , num_cores(getNumberOfPhysicalCPUCores()) + , num_threads(std::thread::hardware_concurrency()) + , ram(getMemoryAmount()) +{ +} + +std::string ReportBuilder::getCurrentTime() const +{ + return DateLUT::instance().timeToString(time(nullptr)); +} + +std::string ReportBuilder::buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + JSONString json_output; + + json_output.set("hostname", hostname); + json_output.set("num_cores", num_cores); + json_output.set("num_threads", num_threads); + json_output.set("ram", ram); + json_output.set("server_version", server_version); + json_output.set("time", getCurrentTime()); + json_output.set("test_name", test_info.test_name); + json_output.set("path", test_info.path); + json_output.set("main_metric", test_info.main_metric); + + auto has_metric = [&test_info] (const std::string & metric_name) + { + return std::find(test_info.metrics.begin(), + test_info.metrics.end(), metric_name) != test_info.metrics.end(); + }; + + if (test_info.substitutions.size()) + { + JSONString json_parameters(2); /// here, 2 is the size of \t padding + + for (auto it = test_info.substitutions.begin(); it != test_info.substitutions.end(); ++it) + { + std::string parameter = it->first; + Strings values = it->second; + + std::ostringstream array_string; + array_string << "["; + for (size_t i = 0; i != values.size(); ++i) + { + array_string << '"' << std::regex_replace(values[i], QUOTE_REGEX, "\\\"") << '"'; + if (i != values.size() - 1) + { + array_string << ", "; + } + } + array_string << ']'; + + json_parameters.set(parameter, array_string.str()); + } + + json_output.set("parameters", json_parameters.asString()); + } + + std::vector run_infos; + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + size_t stat_index = number_of_launch * test_info.queries.size() + query_index; + TestStats & statistics = stats[stat_index]; + + if (!statistics.ready) + continue; + + JSONString runJSON; + + auto query = std::regex_replace(test_info.queries[query_index], QUOTE_REGEX, "\\\""); + runJSON.set("query", query); + if (!statistics.exception.empty()) + runJSON.set("exception", statistics.exception); + + if (test_info.exec_type == ExecutionType::Loop) + { + /// in seconds + if (has_metric("min_time")) + runJSON.set("min_time", statistics.min_time / double(1000)); + + if (has_metric("quantiles")) + { + JSONString quantiles(4); /// here, 4 is the size of \t padding + for (double percent = 10; percent <= 90; percent += 10) + { + std::string quantile_key = std::to_string(percent / 100.0); + while (quantile_key.back() == '0') + quantile_key.pop_back(); + + quantiles.set(quantile_key, + statistics.sampler.quantileInterpolated(percent / 100.0)); + } + quantiles.set("0.95", + statistics.sampler.quantileInterpolated(95 / 100.0)); + quantiles.set("0.99", + statistics.sampler.quantileInterpolated(99 / 100.0)); + quantiles.set("0.999", + statistics.sampler.quantileInterpolated(99.9 / 100.0)); + quantiles.set("0.9999", + statistics.sampler.quantileInterpolated(99.99 / 100.0)); + + runJSON.set("quantiles", quantiles.asString()); + } + + if (has_metric("total_time")) + runJSON.set("total_time", statistics.total_time); + + if (has_metric("queries_per_second")) + runJSON.set("queries_per_second", + double(statistics.queries) / statistics.total_time); + + if (has_metric("rows_per_second")) + runJSON.set("rows_per_second", + double(statistics.total_rows_read) / statistics.total_time); + + if (has_metric("bytes_per_second")) + runJSON.set("bytes_per_second", + double(statistics.total_bytes_read) / statistics.total_time); + } + else + { + if (has_metric("max_rows_per_second")) + runJSON.set("max_rows_per_second", statistics.max_rows_speed); + + if (has_metric("max_bytes_per_second")) + runJSON.set("max_bytes_per_second", statistics.max_bytes_speed); + + if (has_metric("avg_rows_per_second")) + runJSON.set("avg_rows_per_second", statistics.avg_rows_speed_value); + + if (has_metric("avg_bytes_per_second")) + runJSON.set("avg_bytes_per_second", statistics.avg_bytes_speed_value); + } + + run_infos.push_back(runJSON); + } + } + + json_output.set("runs", run_infos); + + return json_output.asString(); +} + +std::string ReportBuilder::buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const +{ + + std::ostringstream output; + + for (size_t query_index = 0; query_index < test_info.queries.size(); ++query_index) + { + for (size_t number_of_launch = 0; number_of_launch < test_info.times_to_run; ++number_of_launch) + { + if (test_info.queries.size() > 1) + output << "query \"" << test_info.queries[query_index] << "\", "; + + output << "run " << std::to_string(number_of_launch + 1) << ": "; + output << test_info.main_metric << " = "; + size_t index = number_of_launch * test_info.queries.size() + query_index; + output << stats[index].getStatisticByName(test_info.main_metric); + output << "\n"; + } + } + return output.str(); +} + +} diff --git a/dbms/programs/performance-test/ReportBuilder.h b/dbms/programs/performance-test/ReportBuilder.h new file mode 100644 index 00000000000..9bc1e809f55 --- /dev/null +++ b/dbms/programs/performance-test/ReportBuilder.h @@ -0,0 +1,32 @@ +#pragma once +#include "PerformanceTestInfo.h" +#include +#include + +namespace DB +{ + +class ReportBuilder +{ +public: + explicit ReportBuilder(const std::string & server_version_); + std::string buildFullReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; + + std::string buildCompactReport( + const PerformanceTestInfo & test_info, + std::vector & stats) const; +private: + std::string server_version; + std::string hostname; + size_t num_cores; + size_t num_threads; + size_t ram; + +private: + std::string getCurrentTime() const; + +}; + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.cpp b/dbms/programs/performance-test/StopConditionsSet.cpp new file mode 100644 index 00000000000..45ae65f3600 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.cpp @@ -0,0 +1,63 @@ +#include "StopConditionsSet.h" +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +void StopConditionsSet::loadFromConfig(const ConfigurationPtr & stop_conditions_view) +{ + Strings keys; + stop_conditions_view->keys(keys); + + for (const std::string & key : keys) + { + if (key == "total_time_ms") + total_time_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "rows_read") + rows_read.value = stop_conditions_view->getUInt64(key); + else if (key == "bytes_read_uncompressed") + bytes_read_uncompressed.value = stop_conditions_view->getUInt64(key); + else if (key == "iterations") + iterations.value = stop_conditions_view->getUInt64(key); + else if (key == "min_time_not_changing_for_ms") + min_time_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "max_speed_not_changing_for_ms") + max_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else if (key == "average_speed_not_changing_for_ms") + average_speed_not_changing_for_ms.value = stop_conditions_view->getUInt64(key); + else + throw Exception("Met unkown stop condition: " + key, ErrorCodes::LOGICAL_ERROR); + } + ++initialized_count; +} + +void StopConditionsSet::reset() +{ + total_time_ms.fulfilled = false; + rows_read.fulfilled = false; + bytes_read_uncompressed.fulfilled = false; + iterations.fulfilled = false; + min_time_not_changing_for_ms.fulfilled = false; + max_speed_not_changing_for_ms.fulfilled = false; + average_speed_not_changing_for_ms.fulfilled = false; + + fulfilled_count = 0; +} + +void StopConditionsSet::report(UInt64 value, StopConditionsSet::StopCondition & condition) +{ + if (condition.value && !condition.fulfilled && value >= condition.value) + { + condition.fulfilled = true; + ++fulfilled_count; + } +} + + + +} diff --git a/dbms/programs/performance-test/StopConditionsSet.h b/dbms/programs/performance-test/StopConditionsSet.h new file mode 100644 index 00000000000..ad29c748a76 --- /dev/null +++ b/dbms/programs/performance-test/StopConditionsSet.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ + +using ConfigurationPtr = Poco::AutoPtr; + +/// A set of supported stop conditions. +struct StopConditionsSet +{ + void loadFromConfig(const ConfigurationPtr & stop_conditions_view); + void reset(); + + /// Note: only conditions with UInt64 minimal thresholds are supported. + /// I.e. condition is fulfilled when value is exceeded. + struct StopCondition + { + UInt64 value = 0; + bool fulfilled = false; + }; + + void report(UInt64 value, StopCondition & condition); + + StopCondition total_time_ms; + StopCondition rows_read; + StopCondition bytes_read_uncompressed; + StopCondition iterations; + StopCondition min_time_not_changing_for_ms; + StopCondition max_speed_not_changing_for_ms; + StopCondition average_speed_not_changing_for_ms; + + size_t initialized_count = 0; + size_t fulfilled_count = 0; +}; + +} diff --git a/dbms/programs/performance-test/TestStats.cpp b/dbms/programs/performance-test/TestStats.cpp new file mode 100644 index 00000000000..100c7a84391 --- /dev/null +++ b/dbms/programs/performance-test/TestStats.cpp @@ -0,0 +1,165 @@ +#include "TestStats.h" +namespace DB +{ + +namespace +{ +const std::string FOUR_SPACES = " "; +} + +std::string TestStats::getStatisticByName(const std::string & statistic_name) +{ + if (statistic_name == "min_time") + return std::to_string(min_time) + "ms"; + + if (statistic_name == "quantiles") + { + std::string result = "\n"; + + for (double percent = 10; percent <= 90; percent += 10) + { + result += FOUR_SPACES + std::to_string((percent / 100)); + result += ": " + std::to_string(sampler.quantileInterpolated(percent / 100.0)); + result += "\n"; + } + result += FOUR_SPACES + "0.95: " + std::to_string(sampler.quantileInterpolated(95 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.99: " + std::to_string(sampler.quantileInterpolated(99 / 100.0)) + "\n"; + result += FOUR_SPACES + "0.999: " + std::to_string(sampler.quantileInterpolated(99.9 / 100.)) + "\n"; + result += FOUR_SPACES + "0.9999: " + std::to_string(sampler.quantileInterpolated(99.99 / 100.)); + + return result; + } + if (statistic_name == "total_time") + return std::to_string(total_time) + "s"; + + if (statistic_name == "queries_per_second") + return std::to_string(queries / total_time); + + if (statistic_name == "rows_per_second") + return std::to_string(total_rows_read / total_time); + + if (statistic_name == "bytes_per_second") + return std::to_string(total_bytes_read / total_time); + + if (statistic_name == "max_rows_per_second") + return std::to_string(max_rows_speed); + + if (statistic_name == "max_bytes_per_second") + return std::to_string(max_bytes_speed); + + if (statistic_name == "avg_rows_per_second") + return std::to_string(avg_rows_speed_value); + + if (statistic_name == "avg_bytes_per_second") + return std::to_string(avg_bytes_speed_value); + + return ""; +} + + +void TestStats::update_min_time(UInt64 min_time_candidate) +{ + if (min_time_candidate < min_time) + { + min_time = min_time_candidate; + min_time_watch.restart(); + } +} + +void TestStats::update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + UInt64 & max_speed) +{ + if (max_speed_candidate > max_speed) + { + max_speed = max_speed_candidate; + max_speed_watch.restart(); + } +} + + +void TestStats::update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value) +{ + avg_speed_value = ((avg_speed_value * number_of_info_batches) + new_speed_info); + ++number_of_info_batches; + avg_speed_value /= number_of_info_batches; + + if (avg_speed_first == 0) + { + avg_speed_first = avg_speed_value; + } + + if (std::abs(avg_speed_value - avg_speed_first) >= precision) + { + avg_speed_first = avg_speed_value; + avg_speed_watch.restart(); + } +} + +void TestStats::add(size_t rows_read_inc, size_t bytes_read_inc) +{ + total_rows_read += rows_read_inc; + total_bytes_read += bytes_read_inc; + last_query_rows_read += rows_read_inc; + last_query_bytes_read += bytes_read_inc; + + double new_rows_speed = last_query_rows_read / watch_per_query.elapsedSeconds(); + double new_bytes_speed = last_query_bytes_read / watch_per_query.elapsedSeconds(); + + /// Update rows speed + update_max_speed(new_rows_speed, max_rows_speed_watch, max_rows_speed); + update_average_speed(new_rows_speed, + avg_rows_speed_watch, + number_of_rows_speed_info_batches, + avg_rows_speed_precision, + avg_rows_speed_first, + avg_rows_speed_value); + /// Update bytes speed + update_max_speed(new_bytes_speed, max_bytes_speed_watch, max_bytes_speed); + update_average_speed(new_bytes_speed, + avg_bytes_speed_watch, + number_of_bytes_speed_info_batches, + avg_bytes_speed_precision, + avg_bytes_speed_first, + avg_bytes_speed_value); +} + +void TestStats::updateQueryInfo() +{ + ++queries; + sampler.insert(watch_per_query.elapsedSeconds()); + update_min_time(watch_per_query.elapsed() / (1000 * 1000)); /// ns to ms +} + + +TestStats::TestStats() +{ + watch.reset(); + watch_per_query.reset(); + min_time_watch.reset(); + max_rows_speed_watch.reset(); + max_bytes_speed_watch.reset(); + avg_rows_speed_watch.reset(); + avg_bytes_speed_watch.reset(); +} + + +void TestStats::startWatches() +{ + watch.start(); + watch_per_query.start(); + min_time_watch.start(); + max_rows_speed_watch.start(); + max_bytes_speed_watch.start(); + avg_rows_speed_watch.start(); + avg_bytes_speed_watch.start(); +} + +} diff --git a/dbms/programs/performance-test/TestStats.h b/dbms/programs/performance-test/TestStats.h new file mode 100644 index 00000000000..84880b7b189 --- /dev/null +++ b/dbms/programs/performance-test/TestStats.h @@ -0,0 +1,87 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ +struct TestStats +{ + TestStats(); + Stopwatch watch; + Stopwatch watch_per_query; + Stopwatch min_time_watch; + Stopwatch max_rows_speed_watch; + Stopwatch max_bytes_speed_watch; + Stopwatch avg_rows_speed_watch; + Stopwatch avg_bytes_speed_watch; + + bool last_query_was_cancelled = false; + + size_t queries = 0; + + size_t total_rows_read = 0; + size_t total_bytes_read = 0; + + size_t last_query_rows_read = 0; + size_t last_query_bytes_read = 0; + + using Sampler = ReservoirSampler; + Sampler sampler{1 << 16}; + + /// min_time in ms + UInt64 min_time = std::numeric_limits::max(); + double total_time = 0; + + UInt64 max_rows_speed = 0; + UInt64 max_bytes_speed = 0; + + double avg_rows_speed_value = 0; + double avg_rows_speed_first = 0; + static inline double avg_rows_speed_precision = 0.001; + + double avg_bytes_speed_value = 0; + double avg_bytes_speed_first = 0; + static inline double avg_bytes_speed_precision = 0.001; + + size_t number_of_rows_speed_info_batches = 0; + size_t number_of_bytes_speed_info_batches = 0; + + bool ready = false; // check if a query wasn't interrupted by SIGINT + std::string exception; + + /// Hack, actually this field doesn't required for statistics + bool got_SIGINT = false; + + std::string getStatisticByName(const std::string & statistic_name); + + void update_min_time(UInt64 min_time_candidate); + + void update_average_speed( + double new_speed_info, + Stopwatch & avg_speed_watch, + size_t & number_of_info_batches, + double precision, + double & avg_speed_first, + double & avg_speed_value); + + void update_max_speed( + size_t max_speed_candidate, + Stopwatch & max_speed_watch, + UInt64 & max_speed); + + void add(size_t rows_read_inc, size_t bytes_read_inc); + + void updateQueryInfo(); + + void setTotalTime() + { + total_time = watch.elapsedSeconds(); + } + + void startWatches(); +}; + +} diff --git a/dbms/programs/performance-test/TestStopConditions.cpp b/dbms/programs/performance-test/TestStopConditions.cpp new file mode 100644 index 00000000000..b88526b0261 --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.cpp @@ -0,0 +1,38 @@ +#include "TestStopConditions.h" + +namespace DB +{ + +void TestStopConditions::loadFromConfig(ConfigurationPtr & stop_conditions_config) +{ + if (stop_conditions_config->has("all_of")) + { + ConfigurationPtr config_all_of(stop_conditions_config->createView("all_of")); + conditions_all_of.loadFromConfig(config_all_of); + } + if (stop_conditions_config->has("any_of")) + { + ConfigurationPtr config_any_of(stop_conditions_config->createView("any_of")); + conditions_any_of.loadFromConfig(config_any_of); + } +} + +bool TestStopConditions::areFulfilled() const +{ + return (conditions_all_of.initialized_count && conditions_all_of.fulfilled_count >= conditions_all_of.initialized_count) + || (conditions_any_of.initialized_count && conditions_any_of.fulfilled_count); +} + +UInt64 TestStopConditions::getMaxExecTime() const +{ + UInt64 all_of_time = conditions_all_of.total_time_ms.value; + if (all_of_time == 0 && conditions_all_of.initialized_count != 0) /// max time is not set in all conditions + return 0; + else if(all_of_time != 0 && conditions_all_of.initialized_count > 1) /// max time is set, but we have other conditions + return 0; + + UInt64 any_of_time = conditions_any_of.total_time_ms.value; + return std::max(all_of_time, any_of_time); +} + +} diff --git a/dbms/programs/performance-test/TestStopConditions.h b/dbms/programs/performance-test/TestStopConditions.h new file mode 100644 index 00000000000..2dcbcce4674 --- /dev/null +++ b/dbms/programs/performance-test/TestStopConditions.h @@ -0,0 +1,57 @@ +#pragma once +#include "StopConditionsSet.h" +#include + +namespace DB +{ +/// Stop conditions for a test run. The running test will be terminated in either of two conditions: +/// 1. All conditions marked 'all_of' are fulfilled +/// or +/// 2. Any condition marked 'any_of' is fulfilled + +using ConfigurationPtr = Poco::AutoPtr; + +class TestStopConditions +{ +public: + void loadFromConfig(ConfigurationPtr & stop_conditions_config); + inline bool empty() const + { + return !conditions_all_of.initialized_count && !conditions_any_of.initialized_count; + } + +#define DEFINE_REPORT_FUNC(FUNC_NAME, CONDITION) \ + void FUNC_NAME(UInt64 value) \ + { \ + conditions_all_of.report(value, conditions_all_of.CONDITION); \ + conditions_any_of.report(value, conditions_any_of.CONDITION); \ + } + + DEFINE_REPORT_FUNC(reportTotalTime, total_time_ms) + DEFINE_REPORT_FUNC(reportRowsRead, rows_read) + DEFINE_REPORT_FUNC(reportBytesReadUncompressed, bytes_read_uncompressed) + DEFINE_REPORT_FUNC(reportIterations, iterations) + DEFINE_REPORT_FUNC(reportMinTimeNotChangingFor, min_time_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportMaxSpeedNotChangingFor, max_speed_not_changing_for_ms) + DEFINE_REPORT_FUNC(reportAverageSpeedNotChangingFor, average_speed_not_changing_for_ms) + +#undef REPORT + + bool areFulfilled() const; + + void reset() + { + conditions_all_of.reset(); + conditions_any_of.reset(); + } + + /// Return max exec time for these conditions + /// Return zero if max time cannot be determined + UInt64 getMaxExecTime() const; + +private: + StopConditionsSet conditions_all_of; + StopConditionsSet conditions_any_of; +}; + +} diff --git a/dbms/programs/performance-test/applySubstitutions.cpp b/dbms/programs/performance-test/applySubstitutions.cpp new file mode 100644 index 00000000000..b8c1d4b6059 --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.cpp @@ -0,0 +1,82 @@ +#include "applySubstitutions.h" +#include +#include + +namespace DB +{ + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions) +{ + Strings xml_substitutions; + substitutions_view->keys(xml_substitutions); + + for (size_t i = 0; i != xml_substitutions.size(); ++i) + { + const ConfigurationPtr xml_substitution(substitutions_view->createView("substitution[" + std::to_string(i) + "]")); + + /// Property values for substitution will be stored in a vector + /// accessible by property name + Strings xml_values; + xml_substitution->keys("values", xml_values); + + std::string name = xml_substitution->getString("name"); + + for (size_t j = 0; j != xml_values.size(); ++j) + { + out_substitutions[name].push_back(xml_substitution->getString("values.value[" + std::to_string(j) + "]")); + } + } +} + +/// Recursive method which goes through all substitution blocks in xml +/// and replaces property {names} by their values +void runThroughAllOptionsAndPush(StringToVector::iterator substitutions_left, + StringToVector::iterator substitutions_right, + const std::string & template_query, + Strings & out_queries) +{ + if (substitutions_left == substitutions_right) + { + out_queries.push_back(template_query); /// completely substituted query + return; + } + + std::string substitution_mask = "{" + substitutions_left->first + "}"; + + if (template_query.find(substitution_mask) == std::string::npos) /// nothing to substitute here + { + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, template_query, out_queries); + return; + } + + for (const std::string & value : substitutions_left->second) + { + /// Copy query string for each unique permutation + std::string query = template_query; + size_t substr_pos = 0; + + while (substr_pos != std::string::npos) + { + substr_pos = query.find(substitution_mask); + + if (substr_pos != std::string::npos) + query.replace(substr_pos, substitution_mask.length(), value); + } + + runThroughAllOptionsAndPush(std::next(substitutions_left), substitutions_right, query, out_queries); + } +} + +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate) +{ + Strings queries_res; + runThroughAllOptionsAndPush( + substitutions_to_generate.begin(), + substitutions_to_generate.end(), + query, + queries_res); + return queries_res; +} + + +} diff --git a/dbms/programs/performance-test/applySubstitutions.h b/dbms/programs/performance-test/applySubstitutions.h new file mode 100644 index 00000000000..3412167d6be --- /dev/null +++ b/dbms/programs/performance-test/applySubstitutions.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +using StringToVector = std::map; +using ConfigurationPtr = Poco::AutoPtr; + +void constructSubstitutions(ConfigurationPtr & substitutions_view, StringToVector & out_substitutions); + +Strings formatQueries(const std::string & query, StringToVector substitutions_to_generate); + +} diff --git a/dbms/programs/performance-test/executeQuery.cpp b/dbms/programs/performance-test/executeQuery.cpp new file mode 100644 index 00000000000..98a1c7a9ef7 --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.cpp @@ -0,0 +1,73 @@ +#include "executeQuery.h" +#include +#include +#include + +namespace DB +{ +namespace +{ + +void checkFulfilledConditionsAndUpdate( + const Progress & progress, RemoteBlockInputStream & stream, + TestStats & statistics, TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener) +{ + statistics.add(progress.rows, progress.bytes); + + stop_conditions.reportRowsRead(statistics.total_rows_read); + stop_conditions.reportBytesReadUncompressed(statistics.total_bytes_read); + stop_conditions.reportTotalTime(statistics.watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMinTimeNotChangingFor(statistics.min_time_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportMaxSpeedNotChangingFor(statistics.max_rows_speed_watch.elapsed() / (1000 * 1000)); + stop_conditions.reportAverageSpeedNotChangingFor(statistics.avg_rows_speed_watch.elapsed() / (1000 * 1000)); + + if (stop_conditions.areFulfilled()) + { + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } + + if (interrupt_listener.check()) + { + statistics.got_SIGINT = true; + statistics.last_query_was_cancelled = true; + stream.cancel(false); + } +} + +} + +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener, + Context & context) +{ + statistics.watch_per_query.restart(); + statistics.last_query_was_cancelled = false; + statistics.last_query_rows_read = 0; + statistics.last_query_bytes_read = 0; + + Settings settings; + RemoteBlockInputStream stream(connection, query, {}, context, &settings); + + stream.setProgressCallback( + [&](const Progress & value) + { + checkFulfilledConditionsAndUpdate( + value, stream, statistics, + stop_conditions, interrupt_listener); + }); + stream.readPrefix(); + while (Block block = stream.read()); + stream.readSuffix(); + + if (!statistics.last_query_was_cancelled) + statistics.updateQueryInfo(); + + statistics.setTotalTime(); +} +} diff --git a/dbms/programs/performance-test/executeQuery.h b/dbms/programs/performance-test/executeQuery.h new file mode 100644 index 00000000000..b1942437e0a --- /dev/null +++ b/dbms/programs/performance-test/executeQuery.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include "TestStats.h" +#include "TestStopConditions.h" +#include +#include +#include + +namespace DB +{ +void executeQuery( + Connection & connection, + const std::string & query, + TestStats & statistics, + TestStopConditions & stop_conditions, + InterruptListener & interrupt_listener, + Context & context); +} diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt index 9de696e417f..f8e805fbc74 100644 --- a/dbms/programs/server/CMakeLists.txt +++ b/dbms/programs/server/CMakeLists.txt @@ -23,7 +23,7 @@ if (CLICKHOUSE_SPLIT_BINARY) install (TARGETS clickhouse-server ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () -if (OS_LINUX AND MAKE_STATIC_LIBRARIES) +if (GLIBC_COMPATIBILITY) set (GLIBC_MAX_REQUIRED 2.4 CACHE INTERNAL "") # temporary disabled. to enable - change 'exit 0' to 'exit $a' add_test(NAME GLIBC_required_version COMMAND bash -c "readelf -s ${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-server | perl -nE 'END {exit 0 if $a} ++$a, print if /\\x40GLIBC_(\\S+)/ and pack(q{C*}, split /\\./, \$1) gt pack q{C*}, split /\\./, q{${GLIBC_MAX_REQUIRED}}'") diff --git a/dbms/programs/server/HTTPHandler.cpp b/dbms/programs/server/HTTPHandler.cpp index d86c526784b..a645019875a 100644 --- a/dbms/programs/server/HTTPHandler.cpp +++ b/dbms/programs/server/HTTPHandler.cpp @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include @@ -647,6 +647,7 @@ void HTTPHandler::trySendExceptionToClient(const std::string & s, int exception_ void HTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) { setThreadName("HTTPHandler"); + ThreadStatus thread_status; Output used_output; diff --git a/dbms/programs/server/MetricsTransmitter.h b/dbms/programs/server/MetricsTransmitter.h index e85113ad141..fd3853a7a9e 100644 --- a/dbms/programs/server/MetricsTransmitter.h +++ b/dbms/programs/server/MetricsTransmitter.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -46,7 +47,7 @@ private: bool quit = false; std::mutex mutex; std::condition_variable cond; - std::thread thread{&MetricsTransmitter::run, this}; + ThreadFromGlobalPool thread{&MetricsTransmitter::run, this}; static constexpr auto profile_events_path_prefix = "ClickHouse.ProfileEvents."; static constexpr auto current_metrics_path_prefix = "ClickHouse.Metrics."; diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index c64b8358612..c8965cec0da 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -129,9 +130,10 @@ std::string Server::getDefaultCorePath() const int Server::main(const std::vector & /*args*/) { Logger * log = &logger(); - UseSSL use_ssl; + ThreadStatus thread_status; + registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); @@ -418,7 +420,7 @@ int Server::main(const std::vector & /*args*/) /// Set path for format schema files auto format_schema_path = Poco::File(config().getString("format_schema_path", path + "format_schemas/")); - global_context->setFormatSchemaPath(format_schema_path.path() + "/"); + global_context->setFormatSchemaPath(format_schema_path.path()); format_schema_path.createDirectories(); LOG_INFO(log, "Loading metadata."); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index c3dff11146e..62e32c5df6d 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -55,6 +55,7 @@ namespace ErrorCodes void TCPHandler::runImpl() { setThreadName("TCPHandler"); + ThreadStatus thread_status; connection_context = server.context(); connection_context.setSessionContext(connection_context); @@ -485,53 +486,44 @@ void TCPHandler::processTablesStatusRequest() void TCPHandler::sendProfileInfo() { - if (const IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) - { - writeVarUInt(Protocol::Server::ProfileInfo, *out); - input->getProfileInfo().write(*out); - out->next(); - } + writeVarUInt(Protocol::Server::ProfileInfo, *out); + state.io.in->getProfileInfo().write(*out); + out->next(); } void TCPHandler::sendTotals() { - if (IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) + const Block & totals = state.io.in->getTotals(); + + if (totals) { - const Block & totals = input->getTotals(); + initBlockOutput(totals); - if (totals) - { - initBlockOutput(totals); + writeVarUInt(Protocol::Server::Totals, *out); + writeStringBinary("", *out); - writeVarUInt(Protocol::Server::Totals, *out); - writeStringBinary("", *out); - - state.block_out->write(totals); - state.maybe_compressed_out->next(); - out->next(); - } + state.block_out->write(totals); + state.maybe_compressed_out->next(); + out->next(); } } void TCPHandler::sendExtremes() { - if (IProfilingBlockInputStream * input = dynamic_cast(state.io.in.get())) + Block extremes = state.io.in->getExtremes(); + + if (extremes) { - Block extremes = input->getExtremes(); + initBlockOutput(extremes); - if (extremes) - { - initBlockOutput(extremes); + writeVarUInt(Protocol::Server::Extremes, *out); + writeStringBinary("", *out); - writeVarUInt(Protocol::Server::Extremes, *out); - writeStringBinary("", *out); - - state.block_out->write(extremes); - state.maybe_compressed_out->next(); - out->next(); - } + state.block_out->write(extremes); + state.maybe_compressed_out->next(); + out->next(); } } diff --git a/dbms/programs/server/config.d/listen.xml b/dbms/programs/server/config.d/listen.xml deleted file mode 100644 index 24c64bbb60a..00000000000 --- a/dbms/programs/server/config.d/listen.xml +++ /dev/null @@ -1 +0,0 @@ -0.0.0.0 \ No newline at end of file diff --git a/dbms/programs/server/config.d/zookeeper.xml b/dbms/programs/server/config.d/zookeeper.xml index 095f4be78c1..140e34c42ac 100644 --- a/dbms/programs/server/config.d/zookeeper.xml +++ b/dbms/programs/server/config.d/zookeeper.xml @@ -1,16 +1,8 @@ - + diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h index ad25ff95af3..cee2b6fe0c0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -100,7 +100,7 @@ public: return res; } - void NO_SANITIZE_UNDEFINED add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { /// Out of range conversion may occur. This is Ok. @@ -177,8 +177,11 @@ public: static void assertSecondArg(const DataTypes & argument_types) { if constexpr (has_second_arg) - /// TODO: check that second argument is of numerical type. + { assertBinary(Name::name, argument_types); + if (!isUnsignedInteger(argument_types[1])) + throw Exception("Second argument (weight) for function " + std::string(Name::name) + " must be unsigned integer, but it has type " + argument_types[1]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } else assertUnary(Name::name, argument_types); } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp index 571d6f5c0a1..75cd62c00f1 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionSumMap.cpp @@ -12,10 +12,41 @@ namespace DB namespace { -AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, const DataTypes & arguments, const Array & params) +struct WithOverflowPolicy { - assertNoParameters(name, params); + /// Overflow, meaning that the returned type is the same as the input type. + static DataTypePtr promoteType(const DataTypePtr & data_type) { return data_type; } +}; +struct WithoutOverflowPolicy +{ + /// No overflow, meaning we promote the types if necessary. + static DataTypePtr promoteType(const DataTypePtr & data_type) + { + if (!data_type->canBePromoted()) + throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return data_type->promoteNumericType(); + } +}; + +template +using SumMapWithOverflow = AggregateFunctionSumMap; + +template +using SumMapWithoutOverflow = AggregateFunctionSumMap; + +template +using SumMapFilteredWithOverflow = AggregateFunctionSumMapFiltered; + +template +using SumMapFilteredWithoutOverflow = AggregateFunctionSumMapFiltered; + +using SumMapArgs = std::pair; + +SumMapArgs parseArguments(const std::string & name, const DataTypes & arguments) +{ if (arguments.size() < 2) throw Exception("Aggregate function " + name + " requires at least two arguments of Array type.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -25,9 +56,11 @@ AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, con throw Exception("First argument for function " + name + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const DataTypePtr & keys_type = array_type->getNestedType(); + + DataTypePtr keys_type = array_type->getNestedType(); DataTypes values_types; + values_types.reserve(arguments.size() - 1); for (size_t i = 1; i < arguments.size(); ++i) { array_type = checkAndGetDataType(arguments[i].get()); @@ -37,20 +70,55 @@ AggregateFunctionPtr createAggregateFunctionSumMap(const std::string & name, con values_types.push_back(array_type->getNestedType()); } - AggregateFunctionPtr res(createWithNumericBasedType(*keys_type, keys_type, values_types)); + return {std::move(keys_type), std::move(values_types)}; +} + +template