Merge branch 'master' into fix-osx-setsockopt-errors

This commit is contained in:
alekar 2023-05-13 23:03:17 -07:00 committed by GitHub
commit 2631d3db20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
859 changed files with 11693 additions and 10954 deletions

View File

@ -23,7 +23,7 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-reserved-identifier,
-bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
-bugprone-unchecked-optional-access,
-cert-dcl16-c,
@ -111,6 +111,7 @@ Checks: '*,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-confusable-identifiers, # useful but slooow
-misc-use-anonymous-namespace,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
@ -136,7 +137,7 @@ Checks: '*,
-readability-function-cognitive-complexity,
-readability-function-size,
-readability-identifier-length,
-readability-identifier-naming,
-readability-identifier-naming, # useful but too slow
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
@ -148,7 +149,7 @@ Checks: '*,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-zirkon-*,
-zircon-*,
'
WarningsAsErrors: '*'
@ -168,11 +169,10 @@ CheckOptions:
readability-identifier-naming.ParameterPackCase: lower_case
readability-identifier-naming.StructCase: CamelCase
readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
readability-identifier-naming.TemplateUsingCase: lower_case
readability-identifier-naming.TemplateParameterCase: lower_case
readability-identifier-naming.TypeTemplateParameterCase: CamelCase
readability-identifier-naming.TypedefCase: CamelCase
readability-identifier-naming.UnionCase: CamelCase
readability-identifier-naming.UsingCase: CamelCase
modernize-loop-convert.UseCxx20ReverseRanges: false
performance-move-const-arg.CheckTriviallyCopyableMove: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097

16
.clangd Normal file
View File

@ -0,0 +1,16 @@
Diagnostics:
# clangd does parse .clang-tidy, but some checks are too slow to run in
# clang-tidy build, so let's enable them explicitly for clangd at least.
ClangTidy:
# The following checks had been disabled due to slowliness with C++23,
# for more details see [1].
#
# [1]: https://github.com/llvm/llvm-project/issues/61418
#
# But the code base had been written in a style that had been checked
# by this check, so at least, let's enable it for clangd.
Add: [
# configured in .clang-tidy
readability-identifier-naming,
bugprone-reserved-identifier,
]

View File

@ -1341,6 +1341,40 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestReleaseAnalyzer:
needs: [BuilderDebRelease]
runs-on: [self-hosted, func-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/stateless_analyzer
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Stateless tests (release, analyzer)
REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
KILL_TIMEOUT=10800
EOF
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.REPORTS_PATH }}
- name: Check out repository code
uses: ClickHouse/checkout@v1
with:
clear-repository: true
- name: Functional test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH"
FunctionalStatelessTestAarch64:
needs: [BuilderDebAarch64]
runs-on: [self-hosted, func-tester-aarch64]

View File

@ -72,6 +72,9 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
Codebrowser:
needs: [DockerHubPush]
uses: ./.github/workflows/woboq.yml
BuilderCoverity:
needs: DockerHubPush
runs-on: [self-hosted, builder]
@ -125,8 +128,8 @@ jobs:
SONAR_SCANNER_VERSION: 4.8.0.2856
SONAR_SERVER_URL: "https://sonarcloud.io"
BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
CC: clang-15
CXX: clang++-15
CC: clang-16
CXX: clang++-16
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1

View File

@ -6,9 +6,8 @@ env:
concurrency:
group: woboq
on: # yamllint disable-line rule:truthy
schedule:
- cron: '0 */18 * * *'
workflow_dispatch:
workflow_call:
jobs:
# don't use dockerhub push because this image updates so rarely
WoboqCodebrowser:
@ -26,6 +25,10 @@ jobs:
with:
clear-repository: true
submodules: 'true'
- name: Download json reports
uses: actions/download-artifact@v3
with:
path: ${{ env.IMAGES_PATH }}
- name: Codebrowser
run: |
sudo rm -fr "$TEMP_PATH"

1
.gitignore vendored
View File

@ -129,7 +129,6 @@ website/package-lock.json
/.ccls-cache
# clangd cache
/.clangd
/.cache
/compile_commands.json

8
.gitmodules vendored
View File

@ -253,6 +253,9 @@
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl
[submodule "contrib/idxd-config"]
path = contrib/idxd-config
url = https://github.com/intel/idxd-config
[submodule "contrib/wyhash"]
path = contrib/wyhash
url = https://github.com/wangyi-fudan/wyhash
@ -264,7 +267,7 @@
url = https://github.com/ClickHouse/nats.c
[submodule "contrib/vectorscan"]
path = contrib/vectorscan
url = https://github.com/VectorCamp/vectorscan
url = https://github.com/ClickHouse/vectorscan.git
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares
@ -335,6 +338,9 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
[submodule "contrib/libfiu"]
path = contrib/libfiu
url = https://github.com/ClickHouse/libfiu.git
[submodule "contrib/isa-l"]
path = contrib/isa-l
url = https://github.com/ClickHouse/isa-l.git

View File

@ -57,7 +57,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
# set CPU time limit to 1000 seconds
set (RLIMIT_CPU 1000)
# gcc10/gcc10/clang -fsanitize=memory is too heavy
# -fsanitize=memory is too heavy
if (SANITIZE STREQUAL "memory")
set (RLIMIT_DATA 10000000000) # 10G
endif()
@ -280,7 +280,7 @@ set (CMAKE_C_STANDARD 11)
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
set (CMAKE_C_STANDARD_REQUIRED ON)
# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
# Compiler-specific coverage flags e.g. -fcoverage-mapping
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
if (COMPILER_CLANG)
@ -342,13 +342,6 @@ if (COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fstrict-vtable-pointers")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
endif ()
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it
# completely.
@ -395,6 +388,8 @@ if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
set(ENABLE_GWP_ASAN OFF)
endif ()
option (ENABLE_FIU "Enable Fiu" ON)
option(WERROR "Enable -Werror compiler option" ON)
if (WERROR)
@ -522,6 +517,26 @@ include (cmake/print_flags.cmake)
if (ENABLE_RUST)
add_subdirectory (rust)
# With LTO Rust adds few symbols with global visiblity, the most common is
# rust_eh_personality. And this leads to linking errors because multiple
# Rust libraries contains the same symbol.
#
# If it was shared library, that we could use version script for linker to
# hide this symbols, but libraries are static.
#
# we could in theory compile everything to one library but this will be a
# mess
#
# But this should be OK since CI has lots of other builds that are done
# without LTO and it will find multiple definitions if there will be any.
#
# More information about this behaviour in Rust can be found here
# - https://github.com/rust-lang/rust/issues/44322
# - https://alanwu.space/post/symbol-hygiene/
if (ENABLE_THINLTO)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition")
endif()
endif()
add_subdirectory (base)

View File

@ -21,11 +21,26 @@ curl https://clickhouse.com/ | sh
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming Events
* [**ClickHouse Spring Meetup in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/292517734) - April 26 - It's spring, and it's time to meet again in the city! Talks include: "Building a domain specific query language on top of Clickhouse", "A Galaxy of Information", "Our Journey to ClickHouse Cloud from Redshift", and a ClickHouse update!
* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 - Save the date! ClickHouse is coming back to Berlin. Were excited to announce an upcoming ClickHouse Meetup that you wont want to miss. Join us as we gather together to discuss the latest in the world of ClickHouse and share user stories.
* [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16
* [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25
* [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25
* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7
* [**ClickHouse Meetup in Stockholm**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - Jun 13
Also, keep an eye out for upcoming meetups in Amsterdam, Boston, NYC, Beijing, and Toronto. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v23.3 Release Webinar**](https://www.youtube.com/watch?v=ISaGUjvBNao) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now!
* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
## Interested in joining ClickHouse and making it your full time job?
We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - well definitely click!
Check out our **current openings** here: https://clickhouse.com/company/careers
Cant find what you are looking for, but want to let us know you are interested in joining ClickHouse? Email careers@clickhouse.com!

View File

@ -73,18 +73,6 @@
# endif
#endif
#if defined(ADDRESS_SANITIZER)
# define BOOST_USE_ASAN 1
# define BOOST_USE_UCONTEXT 1
#endif
#if defined(THREAD_SANITIZER)
# define BOOST_USE_TSAN 1
# define BOOST_USE_UCONTEXT 1
#endif
/// TODO: Strange enough, there is no way to detect UB sanitizer.
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can.
/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input.

View File

@ -314,7 +314,14 @@ struct integer<Bits, Signed>::_impl
const T alpha = t / static_cast<T>(max_int);
if (alpha <= static_cast<T>(max_int))
/** Here we have to use strict comparison.
* The max_int is 2^64 - 1.
* When casted to floating point type, it will be rounded to the closest representable number,
* which is 2^64.
* But 2^64 is not representable in uint64_t,
* so the maximum representable number will be strictly less.
*/
if (alpha < static_cast<T>(max_int))
self = static_cast<uint64_t>(alpha);
else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
set_multiplier<double>(self, static_cast<double>(alpha));

View File

@ -53,7 +53,7 @@ float logf(float x)
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t)tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0x1ff << 23);
iz = ix - (tmp & 0xff800000);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t)asfloat(iz);

View File

@ -53,7 +53,7 @@
// Define if no <locale> header is available (such as on WinCE)
// #define POCO_NO_LOCALE
#define POCO_NO_LOCALE
// Define to desired default thread stack size

View File

@ -30,9 +30,6 @@
#include <cctype>
#include <cmath>
#include <limits>
#if !defined(POCO_NO_LOCALE)
# include <locale>
#endif
// binary numbers are supported, thus 64 (bits) + 1 (string terminating zero)
@ -53,11 +50,7 @@ inline char decimalSeparator()
/// Returns decimal separator from global locale or
/// default '.' for platforms where locale is unavailable.
{
#if !defined(POCO_NO_LOCALE)
return std::use_facet<std::numpunct<char>>(std::locale()).decimal_point();
#else
return '.';
#endif
}
@ -65,11 +58,7 @@ inline char thousandSeparator()
/// Returns thousand separator from global locale or
/// default ',' for platforms where locale is unavailable.
{
#if !defined(POCO_NO_LOCALE)
return std::use_facet<std::numpunct<char>>(std::locale()).thousands_sep();
#else
return ',';
#endif
}

View File

@ -16,9 +16,6 @@
#include "Poco/Exception.h"
#include "Poco/Ascii.h"
#include <sstream>
#if !defined(POCO_NO_LOCALE)
#include <locale>
#endif
#include <cstddef>
@ -147,9 +144,6 @@ namespace
void formatOne(std::string& result, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt, std::vector<Any>::const_iterator& itVal)
{
std::ostringstream str;
#if !defined(POCO_NO_LOCALE)
str.imbue(std::locale::classic());
#endif
try
{
parseFlags(str, itFmt, endFmt);

View File

@ -15,9 +15,6 @@
#include "Poco/NumberFormatter.h"
#include "Poco/MemoryStream.h"
#include <iomanip>
#if !defined(POCO_NO_LOCALE)
#include <locale>
#endif
#include <cstdio>

View File

@ -19,9 +19,6 @@
#include <cstdio>
#include <cctype>
#include <stdlib.h>
#if !defined(POCO_NO_LOCALE)
#include <locale>
#endif
#if defined(POCO_LONG_IS_64_BIT)

View File

@ -9,27 +9,19 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
return()
endif()
set(ENABLE_CCACHE "default" CACHE STRING "Deprecated, use COMPILER_CACHE=(auto|ccache|sccache|disabled)")
if (NOT ENABLE_CCACHE STREQUAL "default")
message(WARNING "The -DENABLE_CCACHE is deprecated in favor of -DCOMPILER_CACHE")
endif()
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'")
# It has pretty complex logic, because the ENABLE_CCACHE is deprecated, but still should
# control the COMPILER_CACHE
# After it will be completely removed, the following block will be much simpler
if (COMPILER_CACHE STREQUAL "ccache" OR (ENABLE_CCACHE AND NOT ENABLE_CCACHE STREQUAL "default"))
find_program (CCACHE_EXECUTABLE ccache)
elseif(COMPILER_CACHE STREQUAL "disabled" OR NOT ENABLE_CCACHE STREQUAL "default")
message(STATUS "Using *ccache: no (disabled via configuration)")
return()
elseif(COMPILER_CACHE STREQUAL "auto")
if(COMPILER_CACHE STREQUAL "auto")
find_program (CCACHE_EXECUTABLE ccache sccache)
elseif (COMPILER_CACHE STREQUAL "ccache")
find_program (CCACHE_EXECUTABLE ccache)
elseif(COMPILER_CACHE STREQUAL "sccache")
find_program (CCACHE_EXECUTABLE sccache)
elseif(COMPILER_CACHE STREQUAL "disabled")
message(STATUS "Using *ccache: no (disabled via configuration)")
return()
else()
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), given '${COMPILER_CACHE}'")
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), value: '${COMPILER_CACHE}'")
endif()

View File

@ -21,7 +21,7 @@ set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=bfd")
# Currently, lld does not work with the error:
# ld.lld: error: section size decrease is too large

View File

@ -10,9 +10,16 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER")
if (SANITIZE)
if (SANITIZE STREQUAL "address")
# LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope',
# see https://github.com/llvm/llvm-project/issues/58633
set (ASAN_FLAGS "-fsanitize=address -fno-sanitize-address-use-after-scope")
set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope")
if (COMPILER_CLANG)
if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16)
# LLVM-15 has a bug in Address Sanitizer, preventing the usage
# of 'sanitize-address-use-after-scope', see [1].
#
# [1]: https://github.com/llvm/llvm-project/issues/58633
set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope")
endif()
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}")

View File

@ -70,13 +70,15 @@ if (LINKER_NAME)
if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif ()
if (COMPILER_CLANG)
# This a temporary quirk to emit .debug_aranges with ThinLTO, can be removed after upgrade to clang-16
# This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16
if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
endif ()
else ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
endif()
endif ()

View File

@ -105,6 +105,7 @@ add_contrib (libfarmhash)
add_contrib (icu-cmake icu)
add_contrib (h3-cmake h3)
add_contrib (mariadb-connector-c-cmake mariadb-connector-c)
add_contrib (libfiu-cmake libfiu)
if (ENABLE_TESTS)
add_contrib (googletest-cmake googletest)
@ -177,7 +178,19 @@ endif()
add_contrib (sqlite-cmake sqlite-amalgamation)
add_contrib (s2geometry-cmake s2geometry)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
if (OS_LINUX AND ARCH_AMD64 AND ENABLE_SSE42)
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
elseif(ENABLE_QPL)
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with SSE 4.2 or higher")
endif()
if (ENABLE_QPL)
add_contrib (idxd-config-cmake idxd-config)
add_contrib (qpl-cmake qpl) # requires: idxd-config
else()
message(STATUS "Not using QPL")
endif ()
add_contrib (morton-nd-cmake morton-nd)
if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x)

View File

@ -92,6 +92,8 @@ add_library (boost::system ALIAS _boost_system)
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
# context
option (BOOST_USE_UCONTEXT "Use ucontext_t for context switching of boost::fiber within boost::context" OFF)
enable_language(ASM)
SET(ASM_OPTIONS "-x assembler-with-cpp")
@ -100,20 +102,6 @@ set (SRCS_CONTEXT
"${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp"
)
if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
add_compile_definitions(BOOST_USE_UCONTEXT)
if (SANITIZE STREQUAL "address")
add_compile_definitions(BOOST_USE_ASAN)
elseif (SANITIZE STREQUAL "thread")
add_compile_definitions(BOOST_USE_TSAN)
endif()
set (SRCS_CONTEXT ${SRCS_CONTEXT}
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
)
endif()
if (ARCH_AARCH64)
set (SRCS_CONTEXT ${SRCS_CONTEXT}
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S"
@ -152,10 +140,27 @@ else()
)
endif()
if (SANITIZE OR BOOST_USE_UCONTEXT)
list (APPEND SRCS_CONTEXT
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
)
endif()
add_library (_boost_context ${SRCS_CONTEXT})
add_library (boost::context ALIAS _boost_context)
target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR})
if (SANITIZE OR BOOST_USE_UCONTEXT)
target_compile_definitions(_boost_context PUBLIC BOOST_USE_UCONTEXT)
endif()
if (SANITIZE STREQUAL "address")
target_compile_definitions(_boost_context PUBLIC BOOST_USE_ASAN)
elseif (SANITIZE STREQUAL "thread")
target_compile_definitions(_boost_context PUBLIC BOOST_USE_TSAN)
endif()
# coroutine
set (SRCS_COROUTINE

View File

@ -111,6 +111,8 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips")
set(ARCH "generic")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc64le")
set(ARCH "ppc64le")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "riscv64")
set(ARCH "riscv64")
else()
message(FATAL_ERROR "Unknown processor:" ${CMAKE_SYSTEM_PROCESSOR})
endif()

View File

@ -8,7 +8,7 @@
/*
* (all numbers are written in big-endian manner: the least significant digit on the right)
* (only bit representations are used - no hex or octal, leading zeroes are ommited)
* (only bit representations are used - no hex or octal, leading zeroes are omitted)
*
* Consistent hashing scheme:
*

1
contrib/idxd-config vendored Submodule

@ -0,0 +1 @@
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99

View File

@ -0,0 +1,23 @@
## accel_config is the utility library required by QPL-Deflate codec for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA).
set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config-cmake/include")
set (SRCS
"${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c"
"${LIBACCEL_SOURCE_DIR}/util/log.c"
"${LIBACCEL_SOURCE_DIR}/util/sysfs.c"
)
add_library(_accel-config ${SRCS})
target_compile_options(_accel-config PRIVATE "-D_GNU_SOURCE")
target_include_directories(_accel-config BEFORE
PRIVATE ${UUID_DIR}
PRIVATE ${LIBACCEL_HEADER_DIR}
PRIVATE ${LIBACCEL_SOURCE_DIR})
target_include_directories(_accel-config SYSTEM BEFORE
PUBLIC ${LIBACCEL_SOURCE_DIR}/accfg)
add_library(ch_contrib::accel-config ALIAS _accel-config)

1
contrib/libfiu vendored Submodule

@ -0,0 +1 @@
Subproject commit b85edbde4cf974b1b40d27828a56f0505f4e2ee5

View File

@ -0,0 +1,20 @@
if (NOT ENABLE_FIU)
message (STATUS "Not using fiu")
return ()
endif ()
set(FIU_DIR "${ClickHouse_SOURCE_DIR}/contrib/libfiu/")
set(FIU_SOURCES
${FIU_DIR}/libfiu/fiu.c
${FIU_DIR}/libfiu/fiu-rc.c
${FIU_DIR}/libfiu/backtrace.c
${FIU_DIR}/libfiu/wtable.c
)
set(FIU_HEADERS "${FIU_DIR}/libfiu")
add_library(_fiu ${FIU_SOURCES})
target_compile_definitions(_fiu PUBLIC DUMMY_BACKTRACE)
target_include_directories(_fiu PUBLIC ${FIU_HEADERS})
add_library(ch_contrib::fiu ALIAS _fiu)

View File

@ -1,10 +0,0 @@
#include <string.h>
int main()
{
// We can't test "char *p = strerror_r()" because that only causes a
// compiler warning when strerror_r returns an integer.
char *buf = 0;
int i = strerror_r(0, buf, 100);
return i;
}

View File

@ -1,46 +0,0 @@
FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS)
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
SET(PATH ".")
IF (${ARGC} EQUAL 4)
LIST(GET ARGV 3 PATH)
ENDIF ()
ENDIF()
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
UNSET(${RETURN_VALUE})
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*)
FOREACH(DIR ${SUBDIRS})
IF (IS_DIRECTORY ${PATH}/${DIR})
IF (NOT "${DIR}" STREQUAL "CMAKEFILES")
FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
ENDIF()
ENDIF()
ENDFOREACH()
ELSE ()
FILE(GLOB ${RETURN_VALUE} "${PATTERN}")
FOREACH (PATH ${SOURCE_SUBDIRS})
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
ENDFOREACH(PATH ${SOURCE_SUBDIRS})
ENDIF ()
IF (${FILTER_OUT})
LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT})
ENDIF()
SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE)
ENDFUNCTION(AUTO_SOURCES)
FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE)
FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*")
IF (FILE_CONTENTS)
SET(${RETURN_VALUE} TRUE PARENT_SCOPE)
ENDIF()
ENDFUNCTION(CONTAINS_STRING)

View File

@ -1,44 +0,0 @@
OPTION(ENABLE_SSE "enable SSE4.2 builtin function" ON)
INCLUDE (CheckFunctionExists)
CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR)
CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
IF(ENABLE_SSE STREQUAL ON AND ARCH_AMD64)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
ENDIF()
IF(NOT TEST_HDFS_PREFIX)
SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)
ENDIF(NOT TEST_HDFS_PREFIX)
ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
ADD_DEFINITIONS(-D_GNU_SOURCE)
ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
TRY_COMPILE(STRERROR_R_RETURN_INT
${CMAKE_CURRENT_BINARY_DIR}
"${CMAKE_CURRENT_SOURCE_DIR}/CMake/CMakeTestCompileStrerror.c"
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)
MESSAGE(STATUS "Checking whether strerror_r returns an int")
IF(STRERROR_R_RETURN_INT)
MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes")
ELSE(STRERROR_R_RETURN_INT)
MESSAGE(STATUS "Checking whether strerror_r returns an int -- no")
ENDIF(STRERROR_R_RETURN_INT)
set(HAVE_STEADY_CLOCK 1)
set(HAVE_NESTED_EXCEPTION 1)
SET(HAVE_BOOST_CHRONO 0)
SET(HAVE_BOOST_ATOMIC 0)
SET(HAVE_STD_CHRONO 1)
SET(HAVE_STD_ATOMIC 1)

View File

@ -1,42 +0,0 @@
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(OS_LINUX true CACHE INTERNAL "Linux operating system")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system")
ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"")
ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
IF(CMAKE_COMPILER_IS_GNUCXX)
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpfullversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
IF (NOT GCC_COMPILER_VERSION)
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
IF (NOT GCC_COMPILER_VERSION)
MESSAGE(FATAL_ERROR "Cannot get gcc version")
ENDIF (NOT GCC_COMPILER_VERSION)
ENDIF (NOT GCC_COMPILER_VERSION)
STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH)
LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
if (GCC_COMPILER_VERSION_LENGTH GREATER 1)
LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
else ()
set (GCC_COMPILER_VERSION_MINOR 0)
endif ()
SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})")
ELSE(CMAKE_COMPILER_IS_GNUCXX)
EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_OUTPUT)
IF(COMPILER_OUTPUT MATCHES "clang")
SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler")
MESSAGE(STATUS "checking compiler: CLANG")
ELSE(COMPILER_OUTPUT MATCHES "clang")
MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"")
ENDIF(COMPILER_OUTPUT MATCHES "clang")
ENDIF(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -21,10 +21,17 @@ set(HDFS3_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3")
set(HDFS3_SOURCE_DIR "${HDFS3_ROOT_DIR}/src")
set(HDFS3_COMMON_DIR "${HDFS3_SOURCE_DIR}/common")
# module
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
include(Platform)
include(Options)
ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
ADD_DEFINITIONS(-D_GNU_SOURCE)
ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
ADD_DEFINITIONS(-DHAVE_NANOSLEEP)
set(HAVE_STEADY_CLOCK 1)
set(HAVE_NESTED_EXCEPTION 1)
SET(HAVE_BOOST_CHRONO 0)
SET(HAVE_BOOST_ATOMIC 0)
SET(HAVE_STD_CHRONO 1)
SET(HAVE_STD_ATOMIC 1)
# source
set(PROTO_FILES

@ -1 +1 @@
Subproject commit 2aedf7598a4040b23881dbe05b6afaca25a337ef
Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit 0bce2b03423f6fbeb8bce66cc8be0bf558058848
Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679

View File

@ -1,36 +1,5 @@
## The Intel® QPL provides high performance implementations of data processing functions for existing hardware accelerator, and/or software path in case if hardware accelerator is not available.
if (OS_LINUX AND ARCH_AMD64 AND (ENABLE_AVX2 OR ENABLE_AVX512))
option (ENABLE_QPL "Enable Intel® Query Processing Library" ${ENABLE_LIBRARIES})
elseif(ENABLE_QPL)
message (${RECONFIGURE_MESSAGE_LEVEL} "QPL library is only supported on x86_64 arch with avx2/avx512 support")
endif()
if (NOT ENABLE_QPL)
message(STATUS "Not using QPL")
return()
endif()
## QPL has build dependency on libaccel-config. Here is to build libaccel-config which is required by QPL.
## libaccel-config is the utility library for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA).
set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake/idxd-header")
set (SRCS
"${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c"
"${LIBACCEL_SOURCE_DIR}/util/log.c"
"${LIBACCEL_SOURCE_DIR}/util/sysfs.c"
)
add_library(accel-config ${SRCS})
target_compile_options(accel-config PRIVATE "-D_GNU_SOURCE")
target_include_directories(accel-config BEFORE
PRIVATE ${UUID_DIR}
PRIVATE ${LIBACCEL_HEADER_DIR}
PRIVATE ${LIBACCEL_SOURCE_DIR})
## QPL build start here.
set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
@ -53,8 +22,8 @@ GetLibraryVersion("${HEADER_CONTENT}" QPL_VERSION)
message(STATUS "Intel QPL version: ${QPL_VERSION}")
# There are 5 source subdirectories under $QPL_SRC_DIR: isal, c_api, core-sw, middle-layer, c_api.
# Generate 7 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 7 library targets.
# Generate 8 library targets: middle_layer_lib, isal, isal_asm, qplcore_px, qplcore_avx512, qplcore_sw_dispatcher, core_iaa, middle_layer_lib.
# Output ch_contrib::qpl by linking with 8 library targets.
include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
@ -119,31 +88,36 @@ set(ISAL_ASM_SRC ${QPL_SRC_DIR}/isal/igzip/igzip_body.asm
add_library(isal OBJECT ${ISAL_C_SRC})
add_library(isal_asm OBJECT ${ISAL_ASM_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:isal>)
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:isal_asm>)
# Setting external and internal interfaces for ISA-L library
target_include_directories(isal
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
PRIVATE ${QPL_SRC_DIR}/isal/include
PUBLIC ${QPL_SRC_DIR}/isal/igzip)
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/isal/include>
PRIVATE ${QPL_SRC_DIR}/isal/include
PUBLIC ${QPL_SRC_DIR}/isal/igzip)
set_target_properties(isal PROPERTIES
CXX_STANDARD 11
C_STANDARD 99)
target_compile_options(isal PRIVATE
"$<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}>"
"$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/"
PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/"
PRIVATE "-I${QPL_SRC_DIR}/isal/crc/"
PRIVATE "-DHAVE_AS_KNOWS_AVX512"
PRIVATE "-DAS_FEATURE_LEVEL=10"
PRIVATE "-DQPL_LIB")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
if (ENABLE_AVX512)
target_compile_options(isal_asm PRIVATE "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
else()
target_compile_options(isal_asm PRIVATE "-DAS_FEATURE_LEVEL=5")
endif()
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
if (SANITIZE STREQUAL "undefined")
@ -157,74 +131,97 @@ target_compile_definitions(isal PUBLIC
NDEBUG)
# [SUBDIR]core-sw
# Two libraries:qplcore_avx512/qplcore_px for SW fallback will be created which are implemented by AVX512 and non-AVX512 instructions respectively.
# Create set of libraries corresponding to supported platforms for SW fallback which are implemented by AVX512 and non-AVX512 instructions respectively.
# The upper level QPL API will check SIMD capabilities of the target system at runtime and decide to call AVX512 function or non-AVX512 function.
# Hence, here we don't need put qplcore_avx512 under an ENABLE_AVX512 CMake switch.
# Actually, if we do that, some undefined symbols errors would happen because both of AVX512 function and non-AVX512 function are referenced by QPL API.
# PLATFORM=2 means AVX512 implementation; PLATFORM=0 means non-AVX512 implementation.
# Hence, here we don't need put ENABLE_AVX512 CMake switch.
# Find Core Sources
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
get_list_of_supported_optimizations(PLATFORMS_LIST)
file(GLOB DATA_SOURCES
${QPL_SRC_DIR}/core-sw/src/data/*.c)
foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
# Find Core Sources
file(GLOB SOURCES
${QPL_SRC_DIR}/core-sw/src/checksums/*.c
${QPL_SRC_DIR}/core-sw/src/filtering/*.c
${QPL_SRC_DIR}/core-sw/src/other/*.c
${QPL_SRC_DIR}/core-sw/src/compression/*.c)
# Create avx512 library
add_library(qplcore_avx512 OBJECT ${SOURCES})
file(GLOB DATA_SOURCES
${QPL_SRC_DIR}/core-sw/src/data/*.c)
target_compile_definitions(qplcore_avx512 PRIVATE PLATFORM=2)
# Create library
add_library(qplcore_${PLATFORM_ID} OBJECT ${SOURCES})
target_include_directories(qplcore_avx512
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:qplcore_${PLATFORM_ID}>)
set_target_properties(qplcore_avx512 PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_include_directories(qplcore_${PLATFORM_ID}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal)
set_target_properties(qplcore_${PLATFORM_ID} PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
target_compile_options(qplcore_avx512
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE -march=skylake-avx512
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_compile_options(qplcore_${PLATFORM_ID}
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
# Set specific compiler options and/or definitions based on a platform
if (${PLATFORM_ID} MATCHES "avx512")
target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=2)
target_compile_options(qplcore_${PLATFORM_ID} PRIVATE -march=skylake-avx512)
else() # Create default px library
target_compile_definitions(qplcore_${PLATFORM_ID} PRIVATE PLATFORM=0)
endif()
target_compile_definitions(qplcore_avx512 PUBLIC QPL_BADARG_CHECK)
target_link_libraries(qplcore_${PLATFORM_ID} isal)
endforeach()
#
# Create px library
# Create dispatcher between platforms and auto-generated wrappers
#
#set(CMAKE_INCLUDE_CURRENT_DIR ON)
file(GLOB SW_DISPATCHER_SOURCES ${QPL_SRC_DIR}/core-sw/dispatcher/*.cpp)
# Create library
add_library(qplcore_px OBJECT ${SOURCES} ${DATA_SOURCES})
add_library(qplcore_sw_dispatcher OBJECT ${SW_DISPATCHER_SOURCES})
target_compile_definitions(qplcore_px PRIVATE PLATFORM=0)
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:qplcore_sw_dispatcher>)
target_include_directories(qplcore_px
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/src/compression/include>
PRIVATE $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>)
target_include_directories(qplcore_sw_dispatcher
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-sw/dispatcher>)
set_target_properties(qplcore_px PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
# Generate kernel wrappers
generate_unpack_kernel_arrays(${QPL_BINARY_DIR} "${PLATFORMS_LIST}")
target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS})
foreach(PLATFORM_ID IN LISTS PLATFORMS_LIST)
file(GLOB GENERATED_${PLATFORM_ID}_TABLES_SRC ${QPL_BINARY_DIR}/generated/${PLATFORM_ID}_*.cpp)
target_compile_options(qplcore_px
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
PRIVATE "$<$<CONFIG:Debug>:>"
PRIVATE "$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>")
target_sources(qplcore_sw_dispatcher PRIVATE ${GENERATED_${PLATFORM_ID}_TABLES_SRC})
target_compile_definitions(qplcore_px PUBLIC QPL_BADARG_CHECK)
# Set specific compiler options and/or definitions based on a platform
if (${PLATFORM_ID} MATCHES "avx512")
set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
else()
set_source_files_properties(${GENERATED_${PLATFORM_ID}_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0)
endif()
target_include_directories(qplcore_sw_dispatcher
PUBLIC $<TARGET_PROPERTY:qplcore_${PLATFORM_ID},INTERFACE_INCLUDE_DIRECTORIES>)
endforeach()
set_target_properties(qplcore_sw_dispatcher PROPERTIES CXX_STANDARD 17)
# w/a for build compatibility with ISAL codebase
target_compile_definitions(qplcore_sw_dispatcher PUBLIC -DQPL_LIB)
target_compile_options(qplcore_sw_dispatcher
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
$<$<CONFIG:Release>:-O3;-D_FORTIFY_SOURCE=2>>
PRIVATE $<$<COMPILE_LANG_AND_ID:CXX,GNU>:${QPL_LINUX_TOOLCHAIN_CPP_EMBEDDED_FLAGS}>)
# [SUBDIR]core-iaa
file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
@ -238,13 +235,20 @@ file(GLOB HW_PATH_SRC ${QPL_SRC_DIR}/core-iaa/sources/aecs/*.c
# Create library
add_library(core_iaa OBJECT ${HW_PATH_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:core_iaa>)
target_include_directories(core_iaa
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/include>
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/core-iaa/sources/include>
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include> # status.h in own_checkers.h
PRIVATE $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/sources/c_api> # own_checkers.h
PRIVATE $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>)
PRIVATE $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>)
set_target_properties(core_iaa PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(core_iaa
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
@ -254,11 +258,10 @@ target_compile_features(core_iaa PRIVATE c_std_11)
target_compile_definitions(core_iaa PRIVATE QPL_BADARG_CHECK
PRIVATE $<$<BOOL:${BLOCK_ON_FAULT}>: BLOCK_ON_FAULT_ENABLED>
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>)
PRIVATE $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)
# [SUBDIR]middle-layer
generate_unpack_kernel_arrays(${QPL_BINARY_DIR})
file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/analytics/*.cpp
${QPL_SRC_DIR}/middle-layer/c_wrapper/*.cpp
@ -273,14 +276,12 @@ file(GLOB MIDDLE_LAYER_SRC
${QPL_SRC_DIR}/middle-layer/inflate/*.cpp
${QPL_SRC_DIR}/core-iaa/sources/accelerator/*.cpp) # todo
file(GLOB GENERATED_PX_TABLES_SRC ${QPL_BINARY_DIR}/generated/px_*.cpp)
file(GLOB GENERATED_AVX512_TABLES_SRC ${QPL_BINARY_DIR}/generated/avx512_*.cpp)
add_library(middle_layer_lib OBJECT
${GENERATED_PX_TABLES_SRC}
${GENERATED_AVX512_TABLES_SRC}
${MIDDLE_LAYER_SRC})
set_property(GLOBAL APPEND PROPERTY QPL_LIB_DEPS
$<TARGET_OBJECTS:middle_layer_lib>)
target_compile_options(middle_layer_lib
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
@ -291,17 +292,16 @@ target_compile_definitions(middle_layer_lib
PUBLIC QPL_VERSION="${QPL_VERSION}"
PUBLIC $<$<BOOL:${LOG_HW_INIT}>:LOG_HW_INIT>
PUBLIC $<$<BOOL:${EFFICIENT_WAIT}>:QPL_EFFICIENT_WAIT>
PUBLIC QPL_BADARG_CHECK)
PUBLIC QPL_BADARG_CHECK
PUBLIC $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>)
set_source_files_properties(${GENERATED_PX_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=0)
set_source_files_properties(${GENERATED_AVX512_TABLES_SRC} PROPERTIES COMPILE_DEFINITIONS PLATFORM=2)
set_target_properties(middle_layer_lib PROPERTIES CXX_STANDARD 17)
target_include_directories(middle_layer_lib
PRIVATE ${UUID_DIR}
PUBLIC $<BUILD_INTERFACE:${QPL_SRC_DIR}/middle-layer>
PUBLIC $<TARGET_PROPERTY:_qpl,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_px,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_avx512,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:qplcore_sw_dispatcher,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:isal,INTERFACE_INCLUDE_DIRECTORIES>
PUBLIC $<TARGET_PROPERTY:core_iaa,INTERFACE_INCLUDE_DIRECTORIES>)
@ -312,20 +312,19 @@ file(GLOB_RECURSE QPL_C_API_SRC
${QPL_SRC_DIR}/c_api/*.c
${QPL_SRC_DIR}/c_api/*.cpp)
add_library(_qpl STATIC ${QPL_C_API_SRC}
$<TARGET_OBJECTS:middle_layer_lib>
$<TARGET_OBJECTS:isal>
$<TARGET_OBJECTS:isal_asm>
$<TARGET_OBJECTS:qplcore_px>
$<TARGET_OBJECTS:qplcore_avx512>
$<TARGET_OBJECTS:core_iaa>
$<TARGET_OBJECTS:middle_layer_lib>)
get_property(LIB_DEPS GLOBAL PROPERTY QPL_LIB_DEPS)
add_library(_qpl STATIC ${QPL_C_API_SRC} ${LIB_DEPS})
target_include_directories(_qpl
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/>
PUBLIC $<BUILD_INTERFACE:${QPL_PROJECT_DIR}/include/> $<INSTALL_INTERFACE:include>
PRIVATE $<TARGET_PROPERTY:middle_layer_lib,INTERFACE_INCLUDE_DIRECTORIES>
PRIVATE $<BUILD_INTERFACE:${QPL_SRC_DIR}/c_api>)
set_target_properties(_qpl PROPERTIES
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>
CXX_STANDARD 17)
target_compile_options(_qpl
PRIVATE $<$<C_COMPILER_ID:GNU>:${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS};
${QPL_LINUX_TOOLCHAIN_DYNAMIC_LIBRARY_FLAGS};
@ -335,14 +334,15 @@ target_compile_options(_qpl
target_compile_definitions(_qpl
PRIVATE -DQPL_LIB
PRIVATE -DQPL_BADARG_CHECK
PRIVATE $<$<BOOL:${DYNAMIC_LOADING_LIBACCEL_CONFIG}>:DYNAMIC_LOADING_LIBACCEL_CONFIG>
PUBLIC -DENABLE_QPL_COMPRESSION)
target_link_libraries(_qpl
PRIVATE accel-config
PRIVATE ${CMAKE_DL_LIBS})
PRIVATE ch_contrib::accel-config
PRIVATE ch_contrib::isal)
add_library (ch_contrib::qpl ALIAS _qpl)
target_include_directories(_qpl SYSTEM BEFORE
PUBLIC "${QPL_PROJECT_DIR}/include"
PUBLIC "${LIBACCEL_SOURCE_DIR}/accfg"
PUBLIC ${UUID_DIR})
add_library (ch_contrib::qpl ALIAS _qpl)

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30
Subproject commit 1f4d448314e581473103187765e4c949d01b4259

View File

@ -1,6 +1,6 @@
## The bare minimum ClickHouse Docker image.
It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
It is intended as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
Example usage:

View File

@ -362,17 +362,16 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--compiler",
choices=(
"clang-15",
"clang-15-darwin",
"clang-15-darwin-aarch64",
"clang-15-aarch64",
"clang-15-aarch64-v80compat",
"clang-15-ppc64le",
"clang-15-amd64-compat",
"clang-15-freebsd",
"gcc-11",
"clang-16",
"clang-16-darwin",
"clang-16-darwin-aarch64",
"clang-16-aarch64",
"clang-16-aarch64-v80compat",
"clang-16-ppc64le",
"clang-16-amd64-compat",
"clang-16-freebsd",
),
default="clang-15",
default="clang-16",
help="a compiler to use",
)
parser.add_argument(

View File

@ -10,53 +10,21 @@ RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
*) exit 1 ;; \
esac \
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
esac
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone https://github.com/ClickHouse/woboq_codebrowser \
&& cd woboq_codebrowser \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
&& ninja \
&& cd .. \
&& rm -rf woboq_codebrowser
# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \
&& cd /woboq_codebrowser \
&& cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \
&& ninja
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
ENV STATIC_DATA=/woboq_codebrowser/data
ENV SOURCE_DIRECTORY=/repo_folder
ENV BUILD_DIRECTORY=/build
ENV HTML_RESULT_DIRECTORY=$BUILD_DIRECTORY/html_report
ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
$CODEINDEX $HTML_RESULT_DIRECTORY -d "$DATA" | ts '%Y-%m-%d %H:%M:%S' && \
mv $HTML_RESULT_DIRECTORY /test_output
COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -x -e
STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data}
SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build}
BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build}
OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output}
HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report}
SHA=${SHA:-nosha}
DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data}
nproc=$(($(nproc) + 2)) # increase parallelism
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY"
cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 "${CMAKE_FLAGS[@]}"
mkdir -p "$HTML_RESULT_DIRECTORY"
echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log'
/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \
-o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \
-d "$DATA" \
|& ts '%Y-%m-%d %H:%M:%S' \
| tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \
| grep --line-buffered -v ':[0-9]* Error: '
cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/"
/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \
-d "$DATA" |& ts '%Y-%m-%d %H:%M:%S'

View File

@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
stage=${stage:-}
# Compiler version, normally set by Dockerfile
export LLVM_VERSION=${LLVM_VERSION:-13}
export LLVM_VERSION=${LLVM_VERSION:-16}
# A variable to pass additional flags to CMake.
# Here we explicitly default it to nothing so that bash doesn't complain about
@ -147,6 +147,7 @@ function clone_submodules
contrib/xxHash
contrib/simdjson
contrib/liburing
contrib/libfiu
)
git submodule sync

View File

@ -15,7 +15,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function git_clone_with_retry

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -20,31 +20,27 @@ install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
fi
export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
export ZOOKEEPER_FAULT_INJECTION=1
# Initial run without S3 to create system.*_log on local file system to make it

View File

@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
# 15.0.2
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16
RUN apt-get update \
&& apt-get install \
@ -52,6 +52,7 @@ RUN apt-get update \
lld-${LLVM_VERSION} \
llvm-${LLVM_VERSION} \
llvm-${LLVM_VERSION}-dev \
libclang-${LLVM_VERSION}-dev \
moreutils \
nasm \
ninja-build \

View File

@ -59,12 +59,16 @@ def process_test_log(log_path, broken_tests):
total += 1
if TIMEOUT_SIGN in line:
failed += 1
test_results.append((test_name, "Timeout", test_time, []))
if test_name in broken_tests:
success += 1
test_results.append((test_name, "BROKEN", test_time, []))
else:
failed += 1
test_results.append((test_name, "Timeout", test_time, []))
elif FAIL_SIGN in line:
if test_name in broken_tests:
success += 1
test_results.append((test_name, "OK", test_time, []))
test_results.append((test_name, "BROKEN", test_time, []))
else:
failed += 1
test_results.append((test_name, "FAIL", test_time, []))
@ -76,11 +80,11 @@ def process_test_log(log_path, broken_tests):
test_results.append((test_name, "SKIPPED", test_time, []))
else:
if OK_SIGN in line and test_name in broken_tests:
failed += 1
skipped += 1
test_results.append(
(
test_name,
"SKIPPED",
"NOT_FAILED",
test_time,
["This test passed. Update broken_tests.txt.\n"],
)

View File

@ -98,7 +98,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
## Interpreters {#interpreters}
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.

View File

@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If
The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-15
## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
``` bash
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-15 main" >> /etc/apt/sources.list
sudo apt-get install clang-15
sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list
sudo apt-get install clang-16
```
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1
cd ClickHouse
mkdir build-darwin
cd build-darwin
CC=clang-15 CXX=clang++-15 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake ..
ninja
```

View File

@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first.
## Install Clang-13
## Install Clang-16
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do
```
@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
``` bash
cd ClickHouse
mkdir build-riscv64
CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
ninja -C build-riscv64
```

View File

@ -47,8 +47,8 @@ GCC as a compiler is not supported
To build with a specific Clang version:
``` bash
export CC=clang-15
export CXX=clang++-15
export CC=clang-16
export CXX=clang++-16
```
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}

View File

@ -4,20 +4,22 @@ sidebar_position: 73
sidebar_label: Building and Benchmarking DEFLATE_QPL
description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
---
# Build Clickhouse with DEFLATE_QPL
- Make sure your target machine meet the QPL required [Prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse, depending on the capabilities of your target machine:
- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse:
``` bash
cmake -DENABLE_AVX2=1 -DENABLE_QPL=1 ..
```
or
``` bash
cmake -DENABLE_AVX512=1 -DENABLE_QPL=1 ..
cmake -DENABLE_QPL=1 ..
```
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)
# Run Benchmark with DEFLATE_QPL
## Files list
The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/ClickHouse/tree/master/contrib/qpl-cmake) give example to run benchmark with python scripts:
`client_scripts` contains python scripts for running typical benchmark, for example:
@ -28,48 +30,60 @@ The folders `benchmark_sample` under [qpl-cmake](https://github.com/ClickHouse/C
`database_files` means it will store database files according to lz4/deflate/zstd codec.
## Run benchmark automatically for Star Schema:
``` bash
$ cd ./benchmark_sample/client_scripts
$ sh run_ssb.sh
```
After complete, please check all the results in this folder:`./output/`
In case you run into failure, please manually run benchmark as below sections.
## Definition
[CLICKHOUSE_EXE] means the path of clickhouse executable program.
## Environment
- CPU: Sapphire Rapid
- OS Requirements refer to [System Requirements for QPL](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#system-requirements)
- IAA Setup refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration)
- Install python modules:
``` bash
pip3 install clickhouse_driver numpy
```
[Self-check for IAA]
``` bash
$ accel-config list | grep -P 'iax|state'
```
Expected output like this:
``` bash
"dev":"iax1",
"state":"enabled",
"state":"enabled",
```
If you see nothing output, it means IAA is not ready to work. Please check IAA setup again.
## Generate raw data
``` bash
$ cd ./benchmark_sample
$ mkdir rawdata_dir && cd rawdata_dir
```
Use [`dbgen`](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema) to generate 100 million rows data with the parameters:
-s 20
The files like `*.tbl` are expected to output under `./benchmark_sample/rawdata_dir/ssb-dbgen`:
## Database setup
Set up database with LZ4 codec
``` bash
@ -77,6 +91,7 @@ $ cd ./database_dir/lz4
$ [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
$ [CLICKHOUSE_EXE] client
```
Here you should see the message `Connected to ClickHouse server` from console which means client successfully setup connection with server.
Complete below three steps mentioned in [Star Schema Benchmark](https://clickhouse.com/docs/en/getting-started/example-datasets/star-schema)
@ -114,6 +129,7 @@ You are expected to see below output:
└───────────┘
```
[Self-check for IAA Deflate codec]
At the first time you execute insertion or query from client, clickhouse server console is expected to print this log:
```text
Hardware-assisted DeflateQpl codec is ready!
@ -125,17 +141,21 @@ Initialization of hardware-assisted DeflateQpl codec failed
That means IAA devices is not ready, you need check IAA setup again.
## Benchmark with single instance
- Before start benchmark, Please disable C6 and set CPU frequency governor to be `performance`
``` bash
$ cpupower idle-set -d 3
$ cpupower frequency-set -g performance
```
- To eliminate impact of memory bound on cross sockets, we use `numactl` to bind server on one socket and client on another socket.
- Single instance means single server connected with single client
Now run benchmark for LZ4/Deflate/ZSTD respectively:
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
@ -144,13 +164,16 @@ $ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > lz4.log
```
IAA deflate:
``` bash
$ cd ./database_dir/deflate
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 1 > deflate.log
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -m 0 -N 0 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
@ -170,6 +193,7 @@ How to check performance metrics:
We focus on QPS, please search the keyword: `QPS_Final` and collect statistics
## Benchmark with multi-instances
- To reduce impact of memory bound on too much threads, We recommend run benchmark with multi-instances.
- Multi-instance means multiple2 or 4servers connected with respective client.
- The cores of one socket need to be divided equally and assigned to the servers respectively.
@ -182,35 +206,46 @@ There are 2 differences:
Here we assume there are 60 cores per socket and take 2 instances for example.
Launch server for first instance
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
```
IAA Deflate:
``` bash
$ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
```
[Launch server for second instance]
LZ4:
``` bash
$ cd ./database_dir && mkdir lz4_s2 && cd lz4_s2
$ cp ../../server_config/config_lz4_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/null&
```
ZSTD:
``` bash
$ cd ./database_dir && mkdir zstd_s2 && cd zstd_s2
$ cp ../../server_config/config_zstd_s2.xml ./
$ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/null&
```
IAA Deflate:
``` bash
$ cd ./database_dir && mkdir deflate_s2 && cd deflate_s2
$ cp ../../server_config/config_deflate_s2.xml ./
@ -220,19 +255,24 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d
Creating tables && Inserting data for second instance
Creating tables:
``` bash
$ [CLICKHOUSE_EXE] client -m --port=9001
```
Inserting data:
``` bash
$ [CLICKHOUSE_EXE] client --query "INSERT INTO [TBL_FILE_NAME] FORMAT CSV" < [TBL_FILE_NAME].tbl --port=9001
```
- [TBL_FILE_NAME] represents the name of a file named with the regular expression: *. tbl under `./benchmark_sample/rawdata_dir/ssb-dbgen`.
- `--port=9001` stands for the assigned port for server instance which is also defined in config_lz4_s2.xml/config_zstd_s2.xml/config_deflate_s2.xml. For even more instances, you need replace it with the value: 9002/9003 which stand for s3/s4 instance respectively. If you don't assign it, the port is 9000 by default which has been used by first instance.
Benchmarking with 2 instances
LZ4:
``` bash
$ cd ./database_dir/lz4
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_lz4.xml >&/dev/null&
@ -241,7 +281,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_lz4_s2.xml >&/dev/n
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > lz4_2insts.log
```
ZSTD:
``` bash
$ cd ./database_dir/zstd
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_zstd.xml >&/dev/null&
@ -250,7 +292,9 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_zstd_s2.xml >&/dev/
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > zstd_2insts.log
```
IAA deflate
``` bash
$ cd ./database_dir/deflate
$ numactl -C 0-29,120-149 [CLICKHOUSE_EXE] server -C config_deflate.xml >&/dev/null&
@ -259,9 +303,11 @@ $ numactl -C 30-59,150-179 [CLICKHOUSE_EXE] server -C config_deflate_s2.xml >&/d
$ cd ./client_scripts
$ numactl -m 1 -N 1 python3 client_stressing_test.py queries_ssb.sql 2 > deflate_2insts.log
```
Here the last argument: `2` of client_stressing_test.py stands for the number of instances. For more instances, you need replace it with the value: 3 or 4. This script support up to 4 instances/
Now three logs should be output as expected:
``` text
lz4_2insts.log
deflate_2insts.log
@ -275,7 +321,9 @@ Benchmark setup for 4 instances is similar with 2 instances above.
We recommend use 2 instances benchmark data as final report for review.
## Tips
Each time before launch new clickhouse server, please make sure no background clickhouse process running, please check and kill old one:
``` bash
$ ps -aux| grep clickhouse
$ kill -9 [PID]

View File

@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
### Report Details
- **Compiler**: `clang-15`, optionally with the name of a target platform
- **Compiler**: `clang-16`, optionally with the name of a target platform
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Status**: `success` or `fail`

View File

@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor
export CC=clang CXX=clang++
cmake ..
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-15 CXX=clang++-15`. The clang version will be in the script output.
If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output.
The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building.

View File

@ -148,7 +148,7 @@ Valid values:
- `all` (default) - a universal rule, used when `rule_type` is omitted.
- `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
- `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
- `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
- `tag_list` - a rule for tagged metrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
- `regexp` A pattern for the metric name (a regular or DSL).
- `age` The minimum age of the data in seconds.
- `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).

View File

@ -727,7 +727,7 @@ TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPR
SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
```
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value across the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
``` sql
CREATE TABLE table_for_aggregation

View File

@ -90,15 +90,17 @@ SELECT * FROM mySecondReplacingMT FINAL;
### is_deleted
`is_deleted` — Name of the column with the type of row: `1` is a “deleted“ row, `0` is a “state“ row.
`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row.
Column data type — `Int8`.
Column data type — `UInt8`.
Can only be enabled when `ver` is used.
The row is deleted when use the `OPTIMIZE ... FINAL CLEANUP`, or `OPTIMIZE ... FINAL` if the engine settings `clean_deleted_rows` has been set to `Always`.
No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted one is the one kept.
:::note
`is_deleted` can only be enabled when `ver` is used.
The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`.
No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept.
:::
## Query clauses

View File

@ -242,7 +242,7 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
## Virtual Columns {#virtual-columns}

View File

@ -120,7 +120,7 @@ Some comments about the `sentiment` table:
- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
Here is the defintion of `sentiment.py`:
Here is the definition of `sentiment.py`:
```python
#!/usr/local/bin/python3.9

View File

@ -14,7 +14,7 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
If this argument is not specified, ClickHouse detects the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.

View File

@ -308,7 +308,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
:::note
If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
If **ClickHouse Connect** is not one of your options, then you will need to install it. The command is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
:::
#### Add your connection details:

View File

@ -261,5 +261,5 @@ The results look like
```
:::note
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022.
:::

View File

@ -143,8 +143,9 @@ You can also download and install packages manually from [here](https://packages
#### Install standalone ClickHouse Keeper
:::tip
If you are going to run ClickHouse Keeper on the same server as ClickHouse server you
do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers.
In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes.
In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server.
This command is only needed on standalone ClickHouse Keeper servers.
:::
```bash
@ -184,6 +185,15 @@ sudo yum install -y yum-utils
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
```
For systems with `zypper` package manager (openSUSE, SLES):
``` bash
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
```
Later any `yum install` can be replaced by `zypper install`. To specify a particular version, add `-$VERSION` to the end of the package name, e.g. `clickhouse-client-22.2.2.22`.
#### Install ClickHouse server and client
```bash
@ -202,8 +212,9 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
#### Install standalone ClickHouse Keeper
:::tip
If you are going to run ClickHouse Keeper on the same server as ClickHouse server you
do not need to install ClickHouse Keeper as it is included with ClickHouse server. This command is only needed on standalone ClickHouse Keeper servers.
In production environment we [strongly recommend](/docs/en/operations/tips.md#L143-L144) running ClickHouse Keeper on dedicated nodes.
In test environments, if you decide to run ClickHouse Server and ClickHouse Keeper on the same server, you do not need to install ClickHouse Keeper as it is included with ClickHouse server.
This command is only needed on standalone ClickHouse Keeper servers.
:::
```bash

View File

@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should
```
:::note ALL
`ALL` is only applicable to the `RESTORE` command.
`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse.
:::
## Background

View File

@ -23,6 +23,6 @@ Additional cache types:
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
- Schema inference cache.
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
- [(Experimental) Query cache](query-cache.md).
- [Query cache](query-cache.md).
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.

View File

@ -1,10 +1,10 @@
---
slug: /en/operations/query-cache
sidebar_position: 65
sidebar_label: Query Cache [experimental]
sidebar_label: Query Cache
---
# Query Cache [experimental]
# Query Cache
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
@ -29,21 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
This reduces maintenance effort and avoids redundancy.
:::note
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
processing) where wrong results are returned.
:::
## Configuration Settings and Usage
As long as the result cache is experimental it must be activated using the following configuration setting:
```sql
SET allow_experimental_query_cache = true;
```
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
of the current session should utilize the query cache. For example, the first execution of query
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
current session should utilize the query cache. For example, the first execution of query
```sql
SELECT some_expensive_calculation(column_1, column_2)

View File

@ -208,7 +208,7 @@ Default value: `3600` (1 hour).
## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec}
Parameter of a task that cleans up garbage from `store/` directory.
If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
If some subdirectory is not used by clickhouse-server and it was previously "hidden"
(see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
and this directory was not modified for last
`database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
@ -1045,7 +1045,7 @@ Default value: `0`.
## background_pool_size {#background_pool_size}
Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
Sets the number of threads performing background merges and mutations for tables with MergeTree engines. This setting is also could be applied at server startup from the `default` profile configuration for backward compatibility at the ClickHouse server start. You can only increase the number of threads at runtime. To lower the number of threads you have to restart the server. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
Before changing it, please also take a look at related MergeTree settings, such as [number_of_free_entries_in_pool_to_lower_max_size_of_merge](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-lower-max-size-of-merge) and [number_of_free_entries_in_pool_to_execute_mutation](../../operations/settings/merge-tree-settings.md#number-of-free-entries-in-pool-to-execute-mutation).
@ -1063,8 +1063,8 @@ Default value: 16.
## background_merges_mutations_concurrency_ratio {#background_merges_mutations_concurrency_ratio}
Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example if the ratio equals to 2 and
`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operation could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server.
Sets a ratio between the number of threads and the number of background merges and mutations that can be executed concurrently. For example, if the ratio equals to 2 and
`background_pool_size` is set to 16 then ClickHouse can execute 32 background merges concurrently. This is possible, because background operations could be suspended and postponed. This is needed to give small merges more execution priority. You can only increase this ratio at runtime. To lower it you have to restart the server.
The same as for `background_pool_size` setting `background_merges_mutations_concurrency_ratio` could be applied from the `default` profile for backward compatibility.
Possible values:
@ -1079,6 +1079,33 @@ Default value: 2.
<background_merges_mutations_concurrency_ratio>3</background_merges_mutations_concurrency_ratio>
```
## merges_mutations_memory_usage_soft_limit {#merges_mutations_memory_usage_soft_limit}
Sets the limit on how much RAM is allowed to use for performing merge and mutation operations.
Zero means unlimited.
If ClickHouse reaches this limit, it won't schedule any new background merge or mutation operations but will continue to execute already scheduled tasks.
Possible values:
- Any positive integer.
**Example**
```xml
<merges_mutations_memory_usage_soft_limit>0</merges_mutations_memory_usage_soft_limit>
```
## merges_mutations_memory_usage_to_ram_ratio {#merges_mutations_memory_usage_to_ram_ratio}
The default `merges_mutations_memory_usage_soft_limit` value is calculated as `memory_amount * merges_mutations_memory_usage_to_ram_ratio`.
Default value: `0.5`.
**See also**
- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
- [merges_mutations_memory_usage_soft_limit](#merges_mutations_memory_usage_soft_limit)
## background_merges_mutations_scheduling_policy {#background_merges_mutations_scheduling_policy}
Algorithm used to select next merge or mutation to be executed by background thread pool. Policy may be changed at runtime without server restart.

View File

@ -38,6 +38,10 @@ Structure of the `users` section:
</table_name>
</database_name>
</databases>
<grants>
<query>GRANT SELECT ON system.*</query>
</grants>
</user_name>
<!-- Other users settings -->
</users>
@ -86,6 +90,28 @@ Possible values:
Default value: 0.
### grants {#grants-user-setting}
This setting allows to grant any rights to selected user.
Each element of the list should be `GRANT` query without any grantees specified.
Example:
```xml
<user1>
<grants>
<query>GRANT SHOW ON *.*</query>
<query>GRANT CREATE ON *.* WITH GRANT OPTION</query>
<query>GRANT SELECT ON system.*</query>
</grants>
</user1>
```
This setting can't be specified at the same time with
`dictionaries`, `access_management`, `named_collection_control`, `show_named_collections_secrets`
and `allow_databases` settings.
### user_name/networks {#user-namenetworks}
List of networks from which the user can connect to the ClickHouse server.

View File

@ -608,6 +608,17 @@ See also:
- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings)
## max_rows_in_set_to_optimize_join
Maximal size of the set to filter joined tables by each other's row sets before joining.
Possible values:
- 0 — Disable.
- Any positive integer.
Default value: 100000.
## temporary_files_codec {#temporary_files_codec}
Sets compression codec for temporary files used in sorting and joining operations on disk.
@ -1027,7 +1038,7 @@ Timeout to close idle TCP connections after specified number of seconds.
Possible values:
- Positive integer (0 - close immediatly, after 0 seconds).
- Positive integer (0 - close immediately, after 0 seconds).
Default value: 3600.
@ -1125,6 +1136,12 @@ If unsuccessful, several attempts are made to connect to various replicas.
Default value: 1000.
## connect_timeout_with_failover_secure_ms
Connection timeout for selecting first healthy replica (for secure connections)
Default value: 1000.
## connection_pool_max_wait_ms {#connection-pool-max-wait-ms}
The wait time in milliseconds for a connection when the connection pool is full.
@ -1410,8 +1427,8 @@ and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in mor
Possible values:
- 0 - Yes
- 1 - No
- 0 - Disabled
- 1 - Enabled
Default value: `0`.
@ -1514,7 +1531,7 @@ Default value: `0`.
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited.
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
Possible values:
@ -1524,7 +1541,7 @@ Default value: 0 (no restriction).
## query_cache_max_entries {#query-cache-max-entries}
The maximum number of query results the current user may store in the query cache. 0 means unlimited.
The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited.
Possible values:
@ -1630,7 +1647,7 @@ For not replicated tables see [non_replicated_deduplication_window](merge-tree-s
### async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
Enables or disables asynchronous inserts. Note that deduplication is disabled by default, see [async_insert_deduplicate](#async-insert-deduplicate).
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
@ -1733,7 +1750,7 @@ Possible values:
Default value: 1.
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_isnert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)).
We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
This function does not work for non-replicated tables.
@ -1939,8 +1956,8 @@ Do not merge aggregation states from different servers for distributed query pro
Possible values:
- `0` — Disabled (final query processing is done on the initiator node).
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
Default value: `0`
@ -3562,7 +3579,7 @@ Default value: `1`.
If the setting is set to `0`, the table function does not make Nullable columns and inserts default values instead of NULL. This is also applicable for NULL values inside arrays.
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
## optimize_use_projections {#optimize_use_projections}
Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries.
@ -3575,7 +3592,7 @@ Default value: `1`.
## force_optimize_projection {#force-optimize-projection}
Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [optimize_use_projections](#optimize_use_projections) setting).
Possible values:
@ -4110,7 +4127,7 @@ Enabled by default.
## use_hedged_requests {#use_hedged_requests}
Enables hadged requests logic for remote queries. It allows to establish many connections with different replicas for query.
Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query.
New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout`
or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`);
other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported.

View File

@ -215,7 +215,7 @@ Cache **system tables**:
Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
- `SYSTEM DROP FILESYSTEM CACHE (<cache_name>) (ON CLUSTER)` -- `ON CLUSTER` is only supported when no `<cache_name>` is provided
- `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`)
@ -231,10 +231,10 @@ Result:
└───────────┘
```
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
- `DESCRIBE FILESYSTEM CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW FILESYSTEM CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
```sql
DESCRIBE CACHE 's3_cache'
DESCRIBE FILESYSTEM CACHE 's3_cache'
```
``` text

View File

@ -183,7 +183,7 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option.
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `--format`, `--output-format` — output format, `TSV` by default.

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/first_value
sidebar_position: 7
---
# first_value
Selects the first encountered value, similar to `any`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select first_value(b) from test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select first_value(b) ignore nulls sfrom test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
│ 3 │
└─────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select first_value(b) respect nulls from test_data
```
```text
┌─first_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└──────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/greatest
title: greatest
---
Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)),
greatest(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 3 │
└─────────────────────────────────────────────────────┴────────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT greatest(['hello'], ['there'], ['world'])
```
```response
┌─greatest(['hello'], ['there'], ['world'])─┐
│ ['world'] │
└───────────────────────────────────────────┘
```
```sql
SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└──---──────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md).

View File

@ -13,11 +13,11 @@ groupBitAnd(expr)
**Arguments**
`expr` An expression that results in `UInt*` type.
`expr` An expression that results in `UInt*` or `Int*` type.
**Return value**
Value of the `UInt*` type.
Value of the `UInt*` or `Int*` type.
**Example**

View File

@ -13,11 +13,11 @@ groupBitOr(expr)
**Arguments**
`expr` An expression that results in `UInt*` type.
`expr` An expression that results in `UInt*` or `Int*` type.
**Returned value**
Value of the `UInt*` type.
Value of the `UInt*` or `Int*` type.
**Example**

View File

@ -13,11 +13,11 @@ groupBitXor(expr)
**Arguments**
`expr` An expression that results in `UInt*` type.
`expr` An expression that results in `UInt*` or `Int*` type.
**Return value**
Value of the `UInt*` type.
Value of the `UInt*` or `Int*` type.
**Example**

View File

@ -26,6 +26,8 @@ ClickHouse-specific aggregate functions:
- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md)
- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md)

View File

@ -0,0 +1,53 @@
---
slug: /en/sql-reference/aggregate-functions/reference/last_value
sidebar_position: 8
---
# first_value
Selects the last encountered value, similar to `anyLast`, but could accept NULL.
## examples
```sql
insert into test_data (a,b) values (1,null), (2,3), (4, 5), (6,null)
```
### example1
The NULL value is ignored at default.
```sql
select last_value(b) from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example2
The NULL value is ignored.
```sql
select last_value(b) ignore nulls from test_data
```
```text
┌─last_value_ignore_nulls(b)─┐
│ 5 │
└────────────────────────────┘
```
### example3
The NULL value is accepted.
```sql
select last_value(b) respect nulls from test_data
```
```text
┌─last_value_respect_nulls(b)─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────┘
```

View File

@ -0,0 +1,48 @@
---
slug: /en/sql-reference/aggregate-functions/reference/least
title: least
---
Aggregate function that returns the least across a list of values. All of the list members must be of comparable types.
Examples:
```sql
SELECT
toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)),
least(1, 2, toUInt8(3), 3.)
```
```response
┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐
│ Float64 │ 1 │
└──────────────────────────────────────────────────┴─────────────────────────────┘
```
:::note
The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison.
:::
```sql
SELECT least(['hello'], ['there'], ['world'])
```
```response
┌─least(['hello'], ['there'], ['world'])─┐
│ ['hello'] │
└────────────────────────────────────────┘
```
```sql
SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3))
```
```response
┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐
│ 2023-05-12 01:16:59.000 │
└────────────────────────────────────────────────────────────────────────────┘
```
:::note
The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison.
:::
Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md).

View File

@ -23,7 +23,7 @@ Alias: `medianDeterministic`.
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly.
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occurs too often, the function works incorrectly.
**Returned value**

View File

@ -46,8 +46,6 @@ SELECT [1, 2] AS x, toTypeName(x)
## Working with Data Types
The maximum size of an array is limited to one million elements.
When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md).
If ClickHouse couldnt determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`).

View File

@ -8,10 +8,6 @@ sidebar_label: Interval
The family of data types representing time and date intervals. The resulting types of the [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator.
:::note
`Interval` data type values cant be stored in tables.
:::
Structure:
- Time interval as an unsigned integer value.
@ -19,6 +15,9 @@ Structure:
Supported interval types:
- `NANOSECOND`
- `MICROSECOND`
- `MILLISECOND`
- `SECOND`
- `MINUTE`
- `HOUR`

View File

@ -949,7 +949,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
...
```
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported.
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
@ -2218,8 +2218,6 @@ LAYOUT(regexp_tree)
...
```
We only allow `YAMLRegExpTree` to work with regexp_tree dicitionary layout. If you want to use other sources, please set variable `regexp_dict_allow_other_sources` true.
**Source**
We introduce a type of source called `YAMLRegExpTree` representing the structure of Regexp Tree dictionary. An Example of a valid yaml config is like:

View File

@ -78,6 +78,22 @@ GROUP BY
│ 1 │ Bobruisk │ Firefox │
└─────────────┴──────────┴─────────┘
```
### Important note!
Using multiple `arrayJoin` with same expression may not produce expected results due to optimizations.
For that cases, consider modifying repeated array expression with extra operations that do not affect join result - e.g. `arrayJoin(arraySort(arr))`, `arrayJoin(arrayConcat(arr, []))`
Example:
```sql
SELECT
arrayJoin(dice) as first_throw,
/* arrayJoin(dice) as second_throw */ -- is technically correct, but will annihilate result set
arrayJoin(arrayConcat(dice, [])) as second_throw -- intentionally changed expression to force re-evaluation
FROM (
SELECT [1, 2, 3, 4, 5, 6] as dice
);
```
Note the [ARRAY JOIN](../statements/select/array-join.md) syntax in the SELECT query, which provides broader possibilities.
`ARRAY JOIN` allows you to convert multiple arrays with the same number of elements at a time.

View File

@ -314,7 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
**Returned values**
Returns result of logical disjuction.
Returns result of logical disjunction.
Type: `UInt8`.

View File

@ -256,7 +256,7 @@ Result:
## bitmapCardinality
Rerturn the cardinality of a bitmap.
Returns the cardinality of a bitmap.
**Syntax**

View File

@ -14,7 +14,7 @@ The following types can be compared:
- dates
- dates with times
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not accross groups (e.g. UInt16 and DateTime).
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not across groups (e.g. UInt16 and DateTime).
Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters.

View File

@ -26,19 +26,27 @@ SELECT
## makeDate
Creates a [Date](../../sql-reference/data-types/date.md) from a year, month and day argument.
Creates a [Date](../../sql-reference/data-types/date.md)
- from a year, month and day argument, or
- from a year and day of year argument.
**Syntax**
``` sql
makeDate(year, month, day)
makeDate(year, month, day);
makeDate(year, day_of_year);
```
Alias:
- `MAKEDATE(year, month, day);`
- `MAKEDATE(year, day_of_year);`
**Arguments**
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
**Returned value**
@ -48,6 +56,8 @@ Type: [Date](../../sql-reference/data-types/date.md).
**Example**
Create a Date from a year, month and day:
``` sql
SELECT makeDate(2023, 2, 28) AS Date;
```
@ -60,6 +70,19 @@ Result:
└────────────┘
```
Create a Date from a year and day of year argument:
``` sql
SELECT makeDate(2023, 42) AS Date;
```
Result:
``` text
┌───────date─┐
│ 2023-02-11 │
└────────────┘
```
## makeDate32
Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md).
@ -108,6 +131,12 @@ Result:
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
**Syntax**
``` sql
makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
```
## timeZone
Returns the timezone of the server.
@ -289,7 +318,7 @@ Aliases: `DAYOFMONTH`, `DAY`.
Converts a date or date with time to the number of the day in the week as UInt8 value.
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument.
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
| Mode | First day of week | Range |
|------|-------------------|------------------------------------------------|

View File

@ -84,7 +84,7 @@ Result:
## s2GetNeighbors
Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
Returns S2 neighbor indexes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
**Syntax**
@ -206,7 +206,7 @@ s2CapUnion(center1, radius1, center2, radius2)
**Arguments**
- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
**Returned values**

View File

@ -64,7 +64,7 @@ This is a cryptographic hash function. It works at least three times faster than
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
1. The first and the second hash value are concatenated to an array which is hashed.
2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
3. This calculation is repeated for all remaining hash values of the original input.
**Arguments**
@ -279,6 +279,8 @@ cityHash64(par1,...)
This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results.
Note that Google changed the algorithm of CityHash after it has been added to ClickHouse. In other words, ClickHouse's cityHash64 and Google's upstream CityHash now produce different results. ClickHouse cityHash64 corresponds to CityHash v1.0.2.
**Arguments**
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).

View File

@ -59,244 +59,6 @@ A lambda function that accepts multiple arguments can also be passed to a higher
For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed.
## SQL User Defined Functions
## User Defined Functions (UDFs)
Custom functions from lambda expressions can be created using the [CREATE FUNCTION](../statements/create/function.md) statement. To delete these functions use the [DROP FUNCTION](../statements/drop.md#drop-function) statement.
## Executable User Defined Functions
ClickHouse can call any external executable program or script to process data.
The configuration of executable user defined functions can be located in one or more xml-files. The path to the configuration is specified in the [user_defined_executable_functions_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_defined_executable_functions_config) parameter.
A function configuration contains the following settings:
- `name` - a function name.
- `command` - script name to execute or command if `execute_direct` is false.
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.
- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter.
- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter.
- `pool_size` - the size of a command pool. Optional. Default value is `16`.
- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`.
- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter.
- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter.
The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk.
**Example**
Creating `test_function` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
<name>value</name>
</argument>
<format>TabSeparated</format>
<command>test_function.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function.py` (`/var/lib/clickhouse/user_scripts/test_function.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == '__main__':
for line in sys.stdin:
print("Value " + line, end='')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_python(toUInt64(2));
```
Result:
``` text
┌─test_function_python(2)─┐
│ Value 2 │
└─────────────────────────┘
```
Creating `test_function_sum` manually specifying `execute_direct` to `0` using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum</name>
<return_type>UInt64</return_type>
<argument>
<type>UInt64</type>
<name>lhs</name>
</argument>
<argument>
<type>UInt64</type>
<name>rhs</name>
</argument>
<format>TabSeparated</format>
<command>cd /; clickhouse-local --input-format TabSeparated --output-format TabSeparated --structure 'x UInt64, y UInt64' --query "SELECT x + y FROM table"</command>
<execute_direct>0</execute_direct>
</function>
</functions>
```
Query:
``` sql
SELECT test_function_sum(2, 2);
```
Result:
``` text
┌─test_function_sum(2, 2)─┐
│ 4 │
└─────────────────────────┘
```
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File `test_function.xml` (`/etc/clickhouse-server/test_function.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py` (`/var/lib/clickhouse/user_scripts/test_function_sum_json.py` with default path settings).
```python
#!/usr/bin/python3
import sys
import json
if __name__ == '__main__':
for line in sys.stdin:
value = json.loads(line)
first_arg = int(value['argument_1'])
second_arg = int(value['argument_2'])
result = {'result_name': first_arg + second_arg}
print(json.dumps(result), end='\n')
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_sum_json(2, 2);
```
Result:
``` text
┌─test_function_sum_json(2, 2)─┐
│ 4 │
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py` (`/var/lib/clickhouse/user_scripts/test_function_parameter_python.py` with default path settings).
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.
## Evaluation of Argument Expressions
In almost all programming languages, one of the arguments might not be evaluated for certain operators. This is usually the operators `&&`, `||`, and `?:`.
But in ClickHouse, arguments of functions (operators) are always evaluated. This is because entire parts of columns are evaluated at once, instead of calculating each row separately.
## Performing Functions for Distributed Query Processing
For distributed query processing, as many stages of query processing as possible are performed on remote servers, and the rest of the stages (merging intermediate results and everything after that) are performed on the requestor server.
This means that functions can be performed on different servers.
For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),`
- if a `distributed_table` has at least two shards, the functions g and h are performed on remote servers, and the function f is performed on the requestor server.
- if a `distributed_table` has only one shard, all the f, g, and h functions are performed on this shards server.
The result of a function usually does not depend on which server it is performed on. However, sometimes this is important.
For example, functions that work with dictionaries use the dictionary that exists on the server they are running on.
Another example is the `hostName` function, which returns the name of the server it is running on in order to make `GROUP BY` by servers in a `SELECT` query.
If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an any aggregate function or add it to a key in `GROUP BY`.
## Related Content
- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs)
ClickHouse supports user-defined functions. See [UDFs](/docs/en/sql-reference/functions/udf.md).

View File

@ -84,7 +84,7 @@ Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-opera
**Returned value**
- `1`, if at least one argument evalutes to `true`,
- `1`, if at least one argument evaluates to `true`,
- `0`, if all arguments evaluate to `false`,
- `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
@ -173,7 +173,7 @@ xor(val1, val2...)
**Returned value**
- `1`, for two values: if one of the values evaluates to `false` and other does not,
- `0`, for two values: if both values evalute to `false` or to both `true`,
- `0`, for two values: if both values evaluate to `false` or to both `true`,
- `NULL`, if at least one of the inputs is `NULL`
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).

View File

@ -187,7 +187,7 @@ detectLanguageMixed('text_to_be_analyzed')
**Returned value**
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
**Examples**

View File

@ -306,7 +306,7 @@ You can use this function in table engine parameters in a CREATE TABLE query whe
## currentUser()
Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query.
Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query.
``` sql
SELECT currentUser();
@ -317,7 +317,7 @@ Alias: `user()`, `USER()`.
**Returned values**
- Login of current user.
- Login of user that initiated query in case of disributed query.
- Login of user that initiated query in case of distributed query.
Type: `String`.

View File

@ -19,13 +19,13 @@ The random numbers are generated by non-cryptographic algorithms.
## rand, rand32
Returns a random UInt32 number, evenly distributed accross the range of all possible UInt32 numbers.
Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
Uses a linear congruential generator.
## rand64
Returns a random UInt64 number, evenly distributed accross the range of all possible UInt64 numbers.
Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
Uses a linear congruential generator.

View File

@ -310,7 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
## repeat
Conatenates a string as many times with itself as specified.
Concatenates a string as many times with itself as specified.
**Syntax**
@ -1215,96 +1215,3 @@ Result:
│ A240 │
└──────────────────┘
```
## extractKeyValuePairs
Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format;
It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments.
A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters.
**Syntax**
``` sql
extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character])
```
**Arguments**
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
**Returned values**
- The extracted key-value pairs in a Map(String, String).
**Examples**
Query:
**Simple case**
``` sql
arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee
┌─kv──────────────────────────────────────────────────────────────────────┐
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
└─────────────────────────────────────────────────────────────────────────┘
```
**Single quote as quoting character**
``` sql
arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
**Escape sequences without escape sequences support**
``` sql
arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356
┌─kv─────────────────────┐
│ {'age':'a\\x0A\\n\\0'} │
└────────────────────────┘
```
## extractKeyValuePairsWithEscaping
Same as `extractKeyValuePairs` but with escaping support.
Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
Leading escape sequences will be skipped in keys and will be considered invalid for values.
**Escape sequences with escape sequence support turned on**
``` sql
arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
Query id: 44c114f0-5658-4c75-ab87-4574de3a1645
┌─kv────────────────┐
│ {'age':'a\n\n\0'} │
└───────────────────┘
```

View File

@ -133,7 +133,7 @@ Tuples should have the same type of the elements.
- The Hamming distance.
Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
``` sql
SELECT
@ -223,7 +223,7 @@ Result:
└───────────────────────────────────────┘
```
It is possible to transform colums to rows using this function:
It is possible to transform columns to rows using this function:
``` sql
CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;

Some files were not shown because too many files have changed in this diff Show More