mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-12 17:32:32 +00:00
Merge branch 'master' into dot_product
This commit is contained in:
commit
4a10f4b3d0
34
.github/workflows/master.yml
vendored
34
.github/workflows/master.yml
vendored
@ -1341,6 +1341,40 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatelessTestReleaseAnalyzer:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateless_analyzer
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateless tests (release, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse
|
||||
KILL_TIMEOUT=10800
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatelessTestAarch64:
|
||||
needs: [BuilderDebAarch64]
|
||||
runs-on: [self-hosted, func-tester-aarch64]
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -253,6 +253,9 @@
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
[submodule "contrib/idxd-config"]
|
||||
path = contrib/idxd-config
|
||||
url = https://github.com/intel/idxd-config
|
||||
[submodule "contrib/wyhash"]
|
||||
path = contrib/wyhash
|
||||
url = https://github.com/wangyi-fudan/wyhash
|
||||
|
@ -57,7 +57,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
|
||||
# set CPU time limit to 1000 seconds
|
||||
set (RLIMIT_CPU 1000)
|
||||
|
||||
# gcc10/gcc10/clang -fsanitize=memory is too heavy
|
||||
# -fsanitize=memory is too heavy
|
||||
if (SANITIZE STREQUAL "memory")
|
||||
set (RLIMIT_DATA 10000000000) # 10G
|
||||
endif()
|
||||
@ -280,7 +280,7 @@ set (CMAKE_C_STANDARD 11)
|
||||
set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
|
||||
set (CMAKE_C_STANDARD_REQUIRED ON)
|
||||
|
||||
# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
|
||||
# Compiler-specific coverage flags e.g. -fcoverage-mapping
|
||||
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
|
||||
|
||||
if (COMPILER_CLANG)
|
||||
@ -522,6 +522,26 @@ include (cmake/print_flags.cmake)
|
||||
|
||||
if (ENABLE_RUST)
|
||||
add_subdirectory (rust)
|
||||
|
||||
# With LTO Rust adds few symbols with global visiblity, the most common is
|
||||
# rust_eh_personality. And this leads to linking errors because multiple
|
||||
# Rust libraries contains the same symbol.
|
||||
#
|
||||
# If it was shared library, that we could use version script for linker to
|
||||
# hide this symbols, but libraries are static.
|
||||
#
|
||||
# we could in theory compile everything to one library but this will be a
|
||||
# mess
|
||||
#
|
||||
# But this should be OK since CI has lots of other builds that are done
|
||||
# without LTO and it will find multiple definitions if there will be any.
|
||||
#
|
||||
# More information about this behaviour in Rust can be found here
|
||||
# - https://github.com/rust-lang/rust/issues/44322
|
||||
# - https://alanwu.space/post/symbol-hygiene/
|
||||
if (ENABLE_THINLTO)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_subdirectory (base)
|
||||
|
@ -73,18 +73,6 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ADDRESS_SANITIZER)
|
||||
# define BOOST_USE_ASAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
#if defined(THREAD_SANITIZER)
|
||||
# define BOOST_USE_TSAN 1
|
||||
# define BOOST_USE_UCONTEXT 1
|
||||
#endif
|
||||
|
||||
/// TODO: Strange enough, there is no way to detect UB sanitizer.
|
||||
|
||||
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
|
||||
/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can.
|
||||
/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input.
|
||||
|
@ -314,7 +314,14 @@ struct integer<Bits, Signed>::_impl
|
||||
|
||||
const T alpha = t / static_cast<T>(max_int);
|
||||
|
||||
if (alpha <= static_cast<T>(max_int))
|
||||
/** Here we have to use strict comparison.
|
||||
* The max_int is 2^64 - 1.
|
||||
* When casted to floating point type, it will be rounded to the closest representable number,
|
||||
* which is 2^64.
|
||||
* But 2^64 is not representable in uint64_t,
|
||||
* so the maximum representable number will be strictly less.
|
||||
*/
|
||||
if (alpha < static_cast<T>(max_int))
|
||||
self = static_cast<uint64_t>(alpha);
|
||||
else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations.
|
||||
set_multiplier<double>(self, static_cast<double>(alpha));
|
||||
|
@ -53,7 +53,7 @@
|
||||
|
||||
|
||||
// Define if no <locale> header is available (such as on WinCE)
|
||||
// #define POCO_NO_LOCALE
|
||||
#define POCO_NO_LOCALE
|
||||
|
||||
|
||||
// Define to desired default thread stack size
|
||||
|
@ -30,9 +30,6 @@
|
||||
#include <cctype>
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
# include <locale>
|
||||
#endif
|
||||
|
||||
|
||||
// binary numbers are supported, thus 64 (bits) + 1 (string terminating zero)
|
||||
@ -53,11 +50,7 @@ inline char decimalSeparator()
|
||||
/// Returns decimal separator from global locale or
|
||||
/// default '.' for platforms where locale is unavailable.
|
||||
{
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
return std::use_facet<std::numpunct<char>>(std::locale()).decimal_point();
|
||||
#else
|
||||
return '.';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -65,11 +58,7 @@ inline char thousandSeparator()
|
||||
/// Returns thousand separator from global locale or
|
||||
/// default ',' for platforms where locale is unavailable.
|
||||
{
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
return std::use_facet<std::numpunct<char>>(std::locale()).thousands_sep();
|
||||
#else
|
||||
return ',';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -16,9 +16,6 @@
|
||||
#include "Poco/Exception.h"
|
||||
#include "Poco/Ascii.h"
|
||||
#include <sstream>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
#include <cstddef>
|
||||
|
||||
|
||||
@ -147,9 +144,6 @@ namespace
|
||||
void formatOne(std::string& result, std::string::const_iterator& itFmt, const std::string::const_iterator& endFmt, std::vector<Any>::const_iterator& itVal)
|
||||
{
|
||||
std::ostringstream str;
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
str.imbue(std::locale::classic());
|
||||
#endif
|
||||
try
|
||||
{
|
||||
parseFlags(str, itFmt, endFmt);
|
||||
|
@ -15,9 +15,6 @@
|
||||
#include "Poco/NumberFormatter.h"
|
||||
#include "Poco/MemoryStream.h"
|
||||
#include <iomanip>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
|
@ -19,9 +19,6 @@
|
||||
#include <cstdio>
|
||||
#include <cctype>
|
||||
#include <stdlib.h>
|
||||
#if !defined(POCO_NO_LOCALE)
|
||||
#include <locale>
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(POCO_LONG_IS_64_BIT)
|
||||
|
@ -9,27 +9,19 @@ if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MA
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ENABLE_CCACHE "default" CACHE STRING "Deprecated, use COMPILER_CACHE=(auto|ccache|sccache|disabled)")
|
||||
if (NOT ENABLE_CCACHE STREQUAL "default")
|
||||
message(WARNING "The -DENABLE_CCACHE is deprecated in favor of -DCOMPILER_CACHE")
|
||||
endif()
|
||||
|
||||
set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'")
|
||||
|
||||
# It has pretty complex logic, because the ENABLE_CCACHE is deprecated, but still should
|
||||
# control the COMPILER_CACHE
|
||||
# After it will be completely removed, the following block will be much simpler
|
||||
if (COMPILER_CACHE STREQUAL "ccache" OR (ENABLE_CCACHE AND NOT ENABLE_CCACHE STREQUAL "default"))
|
||||
find_program (CCACHE_EXECUTABLE ccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "disabled" OR NOT ENABLE_CCACHE STREQUAL "default")
|
||||
message(STATUS "Using *ccache: no (disabled via configuration)")
|
||||
return()
|
||||
elseif(COMPILER_CACHE STREQUAL "auto")
|
||||
if(COMPILER_CACHE STREQUAL "auto")
|
||||
find_program (CCACHE_EXECUTABLE ccache sccache)
|
||||
elseif (COMPILER_CACHE STREQUAL "ccache")
|
||||
find_program (CCACHE_EXECUTABLE ccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "sccache")
|
||||
find_program (CCACHE_EXECUTABLE sccache)
|
||||
elseif(COMPILER_CACHE STREQUAL "disabled")
|
||||
message(STATUS "Using *ccache: no (disabled via configuration)")
|
||||
return()
|
||||
else()
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), given '${COMPILER_CACHE}'")
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "The COMPILER_CACHE must be one of (auto|ccache|sccache|disabled), value: '${COMPILER_CACHE}'")
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -92,6 +92,8 @@ add_library (boost::system ALIAS _boost_system)
|
||||
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
|
||||
|
||||
# context
|
||||
option (BOOST_USE_UCONTEXT "Use ucontext_t for context switching of boost::fiber within boost::context" OFF)
|
||||
|
||||
enable_language(ASM)
|
||||
SET(ASM_OPTIONS "-x assembler-with-cpp")
|
||||
|
||||
@ -100,20 +102,6 @@ set (SRCS_CONTEXT
|
||||
"${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp"
|
||||
)
|
||||
|
||||
if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
|
||||
add_compile_definitions(BOOST_USE_UCONTEXT)
|
||||
|
||||
if (SANITIZE STREQUAL "address")
|
||||
add_compile_definitions(BOOST_USE_ASAN)
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
add_compile_definitions(BOOST_USE_TSAN)
|
||||
endif()
|
||||
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
|
||||
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
|
||||
)
|
||||
endif()
|
||||
if (ARCH_AARCH64)
|
||||
set (SRCS_CONTEXT ${SRCS_CONTEXT}
|
||||
"${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S"
|
||||
@ -152,10 +140,27 @@ else()
|
||||
)
|
||||
endif()
|
||||
|
||||
if (SANITIZE OR BOOST_USE_UCONTEXT)
|
||||
list (APPEND SRCS_CONTEXT
|
||||
"${LIBRARY_DIR}/libs/context/src/fiber.cpp"
|
||||
"${LIBRARY_DIR}/libs/context/src/continuation.cpp"
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library (_boost_context ${SRCS_CONTEXT})
|
||||
add_library (boost::context ALIAS _boost_context)
|
||||
target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR})
|
||||
|
||||
if (SANITIZE OR BOOST_USE_UCONTEXT)
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_UCONTEXT)
|
||||
endif()
|
||||
|
||||
if (SANITIZE STREQUAL "address")
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_ASAN)
|
||||
elseif (SANITIZE STREQUAL "thread")
|
||||
target_compile_definitions(_boost_context PUBLIC BOOST_USE_TSAN)
|
||||
endif()
|
||||
|
||||
# coroutine
|
||||
|
||||
set (SRCS_COROUTINE
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
/*
|
||||
* (all numbers are written in big-endian manner: the least significant digit on the right)
|
||||
* (only bit representations are used - no hex or octal, leading zeroes are ommited)
|
||||
* (only bit representations are used - no hex or octal, leading zeroes are omitted)
|
||||
*
|
||||
* Consistent hashing scheme:
|
||||
*
|
||||
|
1
contrib/idxd-config
vendored
Submodule
1
contrib/idxd-config
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
|
@ -1,10 +0,0 @@
|
||||
#include <string.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
// We can't test "char *p = strerror_r()" because that only causes a
|
||||
// compiler warning when strerror_r returns an integer.
|
||||
char *buf = 0;
|
||||
int i = strerror_r(0, buf, 100);
|
||||
return i;
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS)
|
||||
|
||||
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
|
||||
SET(PATH ".")
|
||||
IF (${ARGC} EQUAL 4)
|
||||
LIST(GET ARGV 3 PATH)
|
||||
ENDIF ()
|
||||
ENDIF()
|
||||
|
||||
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
|
||||
UNSET(${RETURN_VALUE})
|
||||
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
|
||||
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
|
||||
|
||||
FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*)
|
||||
|
||||
FOREACH(DIR ${SUBDIRS})
|
||||
IF (IS_DIRECTORY ${PATH}/${DIR})
|
||||
IF (NOT "${DIR}" STREQUAL "CMAKEFILES")
|
||||
FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}")
|
||||
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
ELSE ()
|
||||
FILE(GLOB ${RETURN_VALUE} "${PATTERN}")
|
||||
|
||||
FOREACH (PATH ${SOURCE_SUBDIRS})
|
||||
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
|
||||
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
|
||||
ENDFOREACH(PATH ${SOURCE_SUBDIRS})
|
||||
ENDIF ()
|
||||
|
||||
IF (${FILTER_OUT})
|
||||
LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT})
|
||||
ENDIF()
|
||||
|
||||
SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE)
|
||||
ENDFUNCTION(AUTO_SOURCES)
|
||||
|
||||
FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE)
|
||||
FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*")
|
||||
IF (FILE_CONTENTS)
|
||||
SET(${RETURN_VALUE} TRUE PARENT_SCOPE)
|
||||
ENDIF()
|
||||
ENDFUNCTION(CONTAINS_STRING)
|
@ -1,44 +0,0 @@
|
||||
OPTION(ENABLE_SSE "enable SSE4.2 builtin function" ON)
|
||||
|
||||
INCLUDE (CheckFunctionExists)
|
||||
CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR)
|
||||
CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
|
||||
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
|
||||
|
||||
IF(ENABLE_SSE STREQUAL ON AND ARCH_AMD64)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT TEST_HDFS_PREFIX)
|
||||
SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)
|
||||
ENDIF(NOT TEST_HDFS_PREFIX)
|
||||
|
||||
ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
|
||||
ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
|
||||
ADD_DEFINITIONS(-D_GNU_SOURCE)
|
||||
ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
|
||||
|
||||
TRY_COMPILE(STRERROR_R_RETURN_INT
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/CMake/CMakeTestCompileStrerror.c"
|
||||
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
|
||||
OUTPUT_VARIABLE OUTPUT)
|
||||
|
||||
MESSAGE(STATUS "Checking whether strerror_r returns an int")
|
||||
|
||||
IF(STRERROR_R_RETURN_INT)
|
||||
MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes")
|
||||
ELSE(STRERROR_R_RETURN_INT)
|
||||
MESSAGE(STATUS "Checking whether strerror_r returns an int -- no")
|
||||
ENDIF(STRERROR_R_RETURN_INT)
|
||||
|
||||
set(HAVE_STEADY_CLOCK 1)
|
||||
set(HAVE_NESTED_EXCEPTION 1)
|
||||
|
||||
SET(HAVE_BOOST_CHRONO 0)
|
||||
SET(HAVE_BOOST_ATOMIC 0)
|
||||
|
||||
SET(HAVE_STD_CHRONO 1)
|
||||
SET(HAVE_STD_ATOMIC 1)
|
@ -1,42 +0,0 @@
|
||||
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
SET(OS_LINUX true CACHE INTERNAL "Linux operating system")
|
||||
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
|
||||
SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system")
|
||||
ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"")
|
||||
ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
|
||||
IF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpfullversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
|
||||
|
||||
IF (NOT GCC_COMPILER_VERSION)
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
|
||||
|
||||
IF (NOT GCC_COMPILER_VERSION)
|
||||
MESSAGE(FATAL_ERROR "Cannot get gcc version")
|
||||
ENDIF (NOT GCC_COMPILER_VERSION)
|
||||
ENDIF (NOT GCC_COMPILER_VERSION)
|
||||
|
||||
STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
|
||||
|
||||
LIST(LENGTH GCC_COMPILER_VERSION GCC_COMPILER_VERSION_LENGTH)
|
||||
LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
|
||||
if (GCC_COMPILER_VERSION_LENGTH GREATER 1)
|
||||
LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
|
||||
else ()
|
||||
set (GCC_COMPILER_VERSION_MINOR 0)
|
||||
endif ()
|
||||
|
||||
SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
|
||||
SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
|
||||
|
||||
MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})")
|
||||
ELSE(CMAKE_COMPILER_IS_GNUCXX)
|
||||
EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_OUTPUT)
|
||||
IF(COMPILER_OUTPUT MATCHES "clang")
|
||||
SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler")
|
||||
MESSAGE(STATUS "checking compiler: CLANG")
|
||||
ELSE(COMPILER_OUTPUT MATCHES "clang")
|
||||
MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"")
|
||||
ENDIF(COMPILER_OUTPUT MATCHES "clang")
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
|
@ -21,10 +21,17 @@ set(HDFS3_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3")
|
||||
set(HDFS3_SOURCE_DIR "${HDFS3_ROOT_DIR}/src")
|
||||
set(HDFS3_COMMON_DIR "${HDFS3_SOURCE_DIR}/common")
|
||||
|
||||
# module
|
||||
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
|
||||
include(Platform)
|
||||
include(Options)
|
||||
ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
|
||||
ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
|
||||
ADD_DEFINITIONS(-D_GNU_SOURCE)
|
||||
ADD_DEFINITIONS(-D_GLIBCXX_USE_NANOSLEEP)
|
||||
ADD_DEFINITIONS(-DHAVE_NANOSLEEP)
|
||||
set(HAVE_STEADY_CLOCK 1)
|
||||
set(HAVE_NESTED_EXCEPTION 1)
|
||||
SET(HAVE_BOOST_CHRONO 0)
|
||||
SET(HAVE_BOOST_ATOMIC 0)
|
||||
SET(HAVE_STD_CHRONO 1)
|
||||
SET(HAVE_STD_ATOMIC 1)
|
||||
|
||||
# source
|
||||
set(PROTO_FILES
|
||||
|
2
contrib/llvm-project
vendored
2
contrib/llvm-project
vendored
@ -1 +1 @@
|
||||
Subproject commit 2aedf7598a4040b23881dbe05b6afaca25a337ef
|
||||
Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9
|
@ -187,7 +187,9 @@ target_include_directories(qplcore_avx512
|
||||
set_target_properties(qplcore_avx512 PROPERTIES
|
||||
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
|
||||
|
||||
target_link_libraries(qplcore_avx512 ${CMAKE_DL_LIBS} isal)
|
||||
target_link_libraries(qplcore_avx512
|
||||
PRIVATE isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
target_compile_options(qplcore_avx512
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
|
||||
@ -217,7 +219,9 @@ target_include_directories(qplcore_px
|
||||
set_target_properties(qplcore_px PROPERTIES
|
||||
$<$<C_COMPILER_ID:GNU>:C_STANDARD 17>)
|
||||
|
||||
target_link_libraries(qplcore_px isal ${CMAKE_DL_LIBS})
|
||||
target_link_libraries(qplcore_px
|
||||
PRIVATE isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
target_compile_options(qplcore_px
|
||||
PRIVATE ${QPL_LINUX_TOOLCHAIN_REQUIRED_FLAGS}
|
||||
@ -339,6 +343,7 @@ target_compile_definitions(_qpl
|
||||
|
||||
target_link_libraries(_qpl
|
||||
PRIVATE accel-config
|
||||
PRIVATE ch_contrib::isal
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
add_library (ch_contrib::qpl ALIAS _qpl)
|
||||
|
@ -1,6 +1,6 @@
|
||||
## The bare minimum ClickHouse Docker image.
|
||||
|
||||
It is intented as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
|
||||
It is intended as a showcase to check the amount of implicit dependencies of ClickHouse from the OS in addition to the OS kernel.
|
||||
|
||||
Example usage:
|
||||
|
||||
|
@ -59,12 +59,16 @@ def process_test_log(log_path, broken_tests):
|
||||
|
||||
total += 1
|
||||
if TIMEOUT_SIGN in line:
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
if test_name in broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "Timeout", test_time, []))
|
||||
elif FAIL_SIGN in line:
|
||||
if test_name in broken_tests:
|
||||
success += 1
|
||||
test_results.append((test_name, "OK", test_time, []))
|
||||
test_results.append((test_name, "BROKEN", test_time, []))
|
||||
else:
|
||||
failed += 1
|
||||
test_results.append((test_name, "FAIL", test_time, []))
|
||||
@ -76,11 +80,11 @@ def process_test_log(log_path, broken_tests):
|
||||
test_results.append((test_name, "SKIPPED", test_time, []))
|
||||
else:
|
||||
if OK_SIGN in line and test_name in broken_tests:
|
||||
failed += 1
|
||||
skipped += 1
|
||||
test_results.append(
|
||||
(
|
||||
test_name,
|
||||
"SKIPPED",
|
||||
"NOT_FAILED",
|
||||
test_time,
|
||||
["This test passed. Update broken_tests.txt.\n"],
|
||||
)
|
||||
|
@ -98,7 +98,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele
|
||||
|
||||
## Interpreters {#interpreters}
|
||||
|
||||
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
|
||||
Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the `INSERT` query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.
|
||||
|
||||
`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.
|
||||
|
||||
|
@ -148,7 +148,7 @@ Valid values:
|
||||
- `all` (default) - a universal rule, used when `rule_type` is omitted.
|
||||
- `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression.
|
||||
- `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression.
|
||||
- `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
|
||||
- `tag_list` - a rule for tagged metrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`.
|
||||
- `regexp` – A pattern for the metric name (a regular or DSL).
|
||||
- `age` – The minimum age of the data in seconds.
|
||||
- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
|
||||
|
@ -727,7 +727,7 @@ TTL d + INTERVAL 1 MONTH RECOMPRESS CODEC(ZSTD(17)), d + INTERVAL 1 YEAR RECOMPR
|
||||
SETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0;
|
||||
```
|
||||
|
||||
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
|
||||
Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value across the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE table_for_aggregation
|
||||
|
@ -242,7 +242,7 @@ When querying a `Distributed` table, `SELECT` queries are sent to all shards and
|
||||
|
||||
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max_parallel_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
|
||||
|
||||
To learn more about how distibuted `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
|
||||
To learn more about how distributed `in` and `global in` queries are processed, refer to [this](../../../sql-reference/operators/in.md#select-distributed-subqueries) documentation.
|
||||
|
||||
## Virtual Columns {#virtual-columns}
|
||||
|
||||
|
@ -120,7 +120,7 @@ Some comments about the `sentiment` table:
|
||||
- The `TabSeparated` format means our Python script needs to generate rows of raw data that contain tab-separated values
|
||||
- The query selects two columns from `hackernews`. The Python script will need to parse out those column values from the incoming rows
|
||||
|
||||
Here is the defintion of `sentiment.py`:
|
||||
Here is the definition of `sentiment.py`:
|
||||
|
||||
```python
|
||||
#!/usr/local/bin/python3.9
|
||||
|
@ -14,7 +14,7 @@ Syntax: `URL(URL [,Format] [,CompressionMethod])`
|
||||
|
||||
- The `Format` must be one that ClickHouse can use in `SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see [Formats](../../../interfaces/formats.md#formats).
|
||||
|
||||
If this argument is not specified, ClickHouse detectes the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
|
||||
If this argument is not specified, ClickHouse detects the format automatically from the suffix of the `URL` parameter. If the suffix of `URL` parameter does not match any supported formats, it fails to create table. For example, for engine expression `URL('http://localhost/test.json')`, `JSON` format is applied.
|
||||
|
||||
- `CompressionMethod` indicates that whether the HTTP body should be compressed. If the compression is enabled, the HTTP packets sent by the URL engine contain 'Content-Encoding' header to indicate which compression method is used.
|
||||
|
||||
|
@ -308,7 +308,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should:
|
||||
![Choose clickhouse connect as database type](@site/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png)
|
||||
|
||||
:::note
|
||||
If **ClickHouse Connect** is not one of your options, then you will need to install it. The comand is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
|
||||
If **ClickHouse Connect** is not one of your options, then you will need to install it. The command is `pip install clickhouse-connect`, and more info is [available here](https://pypi.org/project/clickhouse-connect/).
|
||||
:::
|
||||
|
||||
#### Add your connection details:
|
||||
|
@ -261,5 +261,5 @@ The results look like
|
||||
```
|
||||
|
||||
:::note
|
||||
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
|
||||
As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the dataset is no longer updated as of September 15, 2022.
|
||||
:::
|
@ -184,6 +184,15 @@ sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
```
|
||||
|
||||
For systems with `zypper` package manager (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
```
|
||||
|
||||
Later any `yum install` can be replaced by `zypper install`. To specify a particular version, add `-$VERSION` to the end of the package name, e.g. `clickhouse-client-22.2.2.22`.
|
||||
|
||||
#### Install ClickHouse server and client
|
||||
|
||||
```bash
|
||||
|
@ -23,6 +23,6 @@ Additional cache types:
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
- Schema inference cache.
|
||||
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
|
||||
- [(Experimental) Query cache](query-cache.md).
|
||||
- [Query cache](query-cache.md).
|
||||
|
||||
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.
|
||||
|
@ -1,10 +1,10 @@
|
||||
---
|
||||
slug: /en/operations/query-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Cache [experimental]
|
||||
sidebar_label: Query Cache
|
||||
---
|
||||
|
||||
# Query Cache [experimental]
|
||||
# Query Cache
|
||||
|
||||
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
@ -29,21 +29,10 @@ Transactionally inconsistent caching is traditionally provided by client tools o
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
|
||||
This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::note
|
||||
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_query_cache = true;
|
||||
```
|
||||
|
||||
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
|
||||
of the current session should utilize the query cache. For example, the first execution of query
|
||||
Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
|
||||
current session should utilize the query cache. For example, the first execution of query
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
|
@ -208,7 +208,7 @@ Default value: `3600` (1 hour).
|
||||
## database_catalog_unused_dir_rm_timeout_sec {#database_catalog_unused_dir_rm_timeout_sec}
|
||||
|
||||
Parameter of a task that cleans up garbage from `store/` directory.
|
||||
If some subdirectory is not used by clickhouse-server and it was previousely "hidden"
|
||||
If some subdirectory is not used by clickhouse-server and it was previously "hidden"
|
||||
(see [database_catalog_unused_dir_hide_timeout_sec](../../operations/server-configuration-parameters/settings.md#database_catalog_unused_dir_hide_timeout_sec))
|
||||
and this directory was not modified for last
|
||||
`database_catalog_unused_dir_rm_timeout_sec` seconds, the task will remove this directory.
|
||||
|
@ -1027,7 +1027,7 @@ Timeout to close idle TCP connections after specified number of seconds.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer (0 - close immediatly, after 0 seconds).
|
||||
- Positive integer (0 - close immediately, after 0 seconds).
|
||||
|
||||
Default value: 3600.
|
||||
|
||||
@ -1514,7 +1514,7 @@ Default value: `0`.
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited.
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1524,7 +1524,7 @@ Default value: 0 (no restriction).
|
||||
|
||||
## query_cache_max_entries {#query-cache-max-entries}
|
||||
|
||||
The maximum number of query results the current user may store in the query cache. 0 means unlimited.
|
||||
The maximum number of query results the current user may store in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1733,7 +1733,7 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_isnert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
By default, async inserts are inserted into replicated tables by the `INSERT` statement enabling [async_insert](#async-insert) are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
|
||||
For the replicated tables, by default, only 10000 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-async-inserts), [replicated_deduplication_window_seconds_for_async_inserts](merge-tree-settings.md/#replicated-deduplication-window-seconds-async-inserts)).
|
||||
We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
|
||||
This function does not work for non-replicated tables.
|
||||
@ -1939,8 +1939,8 @@ Do not merge aggregation states from different servers for distributed query pro
|
||||
Possible values:
|
||||
|
||||
- `0` — Disabled (final query processing is done on the initiator node).
|
||||
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
|
||||
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
|
||||
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completely processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
|
||||
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completely on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
|
||||
|
||||
Default value: `0`
|
||||
|
||||
@ -4110,7 +4110,7 @@ Enabled by default.
|
||||
|
||||
## use_hedged_requests {#use_hedged_requests}
|
||||
|
||||
Enables hadged requests logic for remote queries. It allows to establish many connections with different replicas for query.
|
||||
Enables hedged requests logic for remote queries. It allows to establish many connections with different replicas for query.
|
||||
New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout`
|
||||
or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`);
|
||||
other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported.
|
||||
|
@ -183,7 +183,7 @@ Arguments:
|
||||
- `-S`, `--structure` — table structure for input data.
|
||||
- `--input-format` — input format, `TSV` by default.
|
||||
- `-f`, `--file` — path to data, `stdin` by default.
|
||||
- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option.
|
||||
- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option.
|
||||
- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option.
|
||||
- `-N`, `--table` — table name where to put output data, `table` by default.
|
||||
- `--format`, `--output-format` — output format, `TSV` by default.
|
||||
|
@ -13,11 +13,11 @@ groupBitAnd(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Return value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -13,11 +13,11 @@ groupBitOr(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -13,11 +13,11 @@ groupBitXor(expr)
|
||||
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
`expr` – An expression that results in `UInt*` or `Int*` type.
|
||||
|
||||
**Return value**
|
||||
|
||||
Value of the `UInt*` type.
|
||||
Value of the `UInt*` or `Int*` type.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -23,7 +23,7 @@ Alias: `medianDeterministic`.
|
||||
|
||||
- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median).
|
||||
- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md).
|
||||
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly.
|
||||
- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occurs too often, the function works incorrectly.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -949,7 +949,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher
|
||||
...
|
||||
```
|
||||
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronious updates are supported.
|
||||
For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported.
|
||||
|
||||
It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source.
|
||||
|
||||
|
@ -78,6 +78,22 @@ GROUP BY
|
||||
│ 1 │ Bobruisk │ Firefox │
|
||||
└─────────────┴──────────┴─────────┘
|
||||
```
|
||||
### Important note!
|
||||
Using multiple `arrayJoin` with same expression may not produce expected results due to optimizations.
|
||||
For that cases, consider modifying repeated array expression with extra operations that do not affect join result - e.g. `arrayJoin(arraySort(arr))`, `arrayJoin(arrayConcat(arr, []))`
|
||||
|
||||
Example:
|
||||
```sql
|
||||
SELECT
|
||||
arrayJoin(dice) as first_throw,
|
||||
/* arrayJoin(dice) as second_throw */ -- is technically correct, but will annihilate result set
|
||||
arrayJoin(arrayConcat(dice, [])) as second_throw -- intentionally changed expression to force re-evaluation
|
||||
FROM (
|
||||
SELECT [1, 2, 3, 4, 5, 6] as dice
|
||||
);
|
||||
```
|
||||
|
||||
|
||||
|
||||
Note the [ARRAY JOIN](../statements/select/array-join.md) syntax in the SELECT query, which provides broader possibilities.
|
||||
`ARRAY JOIN` allows you to convert multiple arrays with the same number of elements at a time.
|
||||
|
@ -314,7 +314,7 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...)
|
||||
|
||||
**Returned values**
|
||||
|
||||
Returns result of logical disjuction.
|
||||
Returns result of logical disjunction.
|
||||
|
||||
Type: `UInt8`.
|
||||
|
||||
|
@ -256,7 +256,7 @@ Result:
|
||||
|
||||
## bitmapCardinality
|
||||
|
||||
Rerturn the cardinality of a bitmap.
|
||||
Returns the cardinality of a bitmap.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -14,7 +14,7 @@ The following types can be compared:
|
||||
- dates
|
||||
- dates with times
|
||||
|
||||
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not accross groups (e.g. UInt16 and DateTime).
|
||||
Only values within the same group can be compared (e.g. UInt16 and UInt64) but not across groups (e.g. UInt16 and DateTime).
|
||||
|
||||
Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters.
|
||||
|
||||
|
@ -26,19 +26,27 @@ SELECT
|
||||
|
||||
## makeDate
|
||||
|
||||
Creates a [Date](../../sql-reference/data-types/date.md) from a year, month and day argument.
|
||||
Creates a [Date](../../sql-reference/data-types/date.md)
|
||||
- from a year, month and day argument, or
|
||||
- from a year and day of year argument.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
makeDate(year, month, day)
|
||||
makeDate(year, month, day);
|
||||
makeDate(year, day_of_year);
|
||||
```
|
||||
|
||||
Alias:
|
||||
- `MAKEDATE(year, month, day);`
|
||||
- `MAKEDATE(year, day_of_year);`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -48,6 +56,8 @@ Type: [Date](../../sql-reference/data-types/date.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Create a Date from a year, month and day:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate(2023, 2, 28) AS Date;
|
||||
```
|
||||
@ -60,6 +70,19 @@ Result:
|
||||
└────────────┘
|
||||
```
|
||||
|
||||
Create a Date from a year and day of year argument:
|
||||
|
||||
``` sql
|
||||
SELECT makeDate(2023, 42) AS Date;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────date─┐
|
||||
│ 2023-02-11 │
|
||||
└────────────┘
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md).
|
||||
@ -108,6 +131,12 @@ Result:
|
||||
|
||||
Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]])
|
||||
```
|
||||
|
||||
## timeZone
|
||||
|
||||
Returns the timezone of the server.
|
||||
@ -289,7 +318,7 @@ Aliases: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
Converts a date or date with time to the number of the day in the week as UInt8 value.
|
||||
|
||||
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument.
|
||||
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
|
||||
|
||||
| Mode | First day of week | Range |
|
||||
|------|-------------------|------------------------------------------------|
|
||||
|
@ -84,7 +84,7 @@ Result:
|
||||
|
||||
## s2GetNeighbors
|
||||
|
||||
Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
|
||||
Returns S2 neighbor indexes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -206,7 +206,7 @@ s2CapUnion(center1, radius1, center2, radius2)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `center1`, `center2` — S2 point indexes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
|
||||
- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
**Returned values**
|
||||
|
@ -64,7 +64,7 @@ This is a cryptographic hash function. It works at least three times faster than
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
|
||||
2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
@ -84,7 +84,7 @@ Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-opera
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `1`, if at least one argument evalutes to `true`,
|
||||
- `1`, if at least one argument evaluates to `true`,
|
||||
- `0`, if all arguments evaluate to `false`,
|
||||
- `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`.
|
||||
|
||||
@ -173,7 +173,7 @@ xor(val1, val2...)
|
||||
**Returned value**
|
||||
|
||||
- `1`, for two values: if one of the values evaluates to `false` and other does not,
|
||||
- `0`, for two values: if both values evalute to `false` or to both `true`,
|
||||
- `0`, for two values: if both values evaluate to `false` or to both `true`,
|
||||
- `NULL`, if at least one of the inputs is `NULL`
|
||||
|
||||
Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)).
|
||||
|
@ -187,7 +187,7 @@ detectLanguageMixed('text_to_be_analyzed')
|
||||
|
||||
**Returned value**
|
||||
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a perentage of text found for that language
|
||||
- `Map(String, Float32)`: The keys are 2-letter ISO codes and the values are a percentage of text found for that language
|
||||
|
||||
|
||||
**Examples**
|
||||
|
@ -306,7 +306,7 @@ You can use this function in table engine parameters in a CREATE TABLE query whe
|
||||
|
||||
## currentUser()
|
||||
|
||||
Returns the login of current user. Login of user, that initiated query, will be returned in case distibuted query.
|
||||
Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query.
|
||||
|
||||
``` sql
|
||||
SELECT currentUser();
|
||||
@ -317,7 +317,7 @@ Alias: `user()`, `USER()`.
|
||||
**Returned values**
|
||||
|
||||
- Login of current user.
|
||||
- Login of user that initiated query in case of disributed query.
|
||||
- Login of user that initiated query in case of distributed query.
|
||||
|
||||
Type: `String`.
|
||||
|
||||
|
@ -19,13 +19,13 @@ The random numbers are generated by non-cryptographic algorithms.
|
||||
|
||||
## rand, rand32
|
||||
|
||||
Returns a random UInt32 number, evenly distributed accross the range of all possible UInt32 numbers.
|
||||
Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers.
|
||||
|
||||
Uses a linear congruential generator.
|
||||
|
||||
## rand64
|
||||
|
||||
Returns a random UInt64 number, evenly distributed accross the range of all possible UInt64 numbers.
|
||||
Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers.
|
||||
|
||||
Uses a linear congruential generator.
|
||||
|
||||
|
@ -310,7 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
|
||||
|
||||
## repeat
|
||||
|
||||
Conatenates a string as many times with itself as specified.
|
||||
Concatenates a string as many times with itself as specified.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1215,96 +1215,3 @@ Result:
|
||||
│ A240 │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format;
|
||||
|
||||
It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments.
|
||||
|
||||
A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters.
|
||||
|
||||
**Syntax**
|
||||
``` sql
|
||||
extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
- The extracted key-value pairs in a Map(String, String).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
**Simple case**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
|
||||
Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee
|
||||
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Single quote as quoting character**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
|
||||
Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e
|
||||
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
**Escape sequences without escape sequences support**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356
|
||||
|
||||
┌─kv─────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Escape sequences with escape sequence support turned on**
|
||||
``` sql
|
||||
arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv
|
||||
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
|
||||
Query id: 44c114f0-5658-4c75-ab87-4574de3a1645
|
||||
|
||||
┌─kv────────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
@ -133,7 +133,7 @@ Tuples should have the same type of the elements.
|
||||
|
||||
- The Hamming distance.
|
||||
|
||||
Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
|
||||
Type: The result type is calculated the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples.
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -223,7 +223,7 @@ Result:
|
||||
└───────────────────────────────────────┘
|
||||
```
|
||||
|
||||
It is possible to transform colums to rows using this function:
|
||||
It is possible to transform columns to rows using this function:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tupletest (col Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
|
||||
|
@ -109,6 +109,108 @@ SELECT mapFromArrays([1, 2, 3], map('a', 1, 'b', 2, 'c', 3))
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairs
|
||||
|
||||
Extracts key-value pairs, i.e. a [Map(String, String)](../../sql-reference/data-types/map.md), from a string. Parsing is robust towards noise (e.g. log files).
|
||||
|
||||
A key-value pair consists of a key, followed by a `key_value_delimiter` and a value. Key value pairs must be separated by `pair_delimiter`. Quoted keys and values are also supported.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
extractKeyValuePairs(data[, key_value_delimiter[, pair_delimiter[, quoting_character]]])
|
||||
```
|
||||
|
||||
Alias:
|
||||
- `str_to_map`
|
||||
- `mapFromString`
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to ` `, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- A [Map(String, String)](../../sql-reference/data-types/map.md) of key-value pairs.
|
||||
|
||||
**Examples**
|
||||
|
||||
Simple case:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` Result:
|
||||
┌─kv──────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Single quote as quoting character:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Escape sequences without escape sequences support:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─kv─────────────────────┐
|
||||
│ {'age':'a\\x0A\\n\\0'} │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
## extractKeyValuePairsWithEscaping
|
||||
|
||||
Same as `extractKeyValuePairs` but with escaping support.
|
||||
|
||||
Supported escape sequences: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`.
|
||||
Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following:
|
||||
`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31).
|
||||
|
||||
This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following
|
||||
input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`.
|
||||
- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa`
|
||||
- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is.
|
||||
|
||||
Leading escape sequences will be skipped in keys and will be considered invalid for values.
|
||||
|
||||
**Examples**
|
||||
|
||||
Escape sequences with escape sequence support turned on:
|
||||
|
||||
``` sql
|
||||
SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` result
|
||||
┌─kv────────────────┐
|
||||
│ {'age':'a\n\n\0'} │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
## mapAdd
|
||||
|
||||
Collect all the keys and sum corresponding values.
|
||||
@ -449,7 +551,7 @@ mapExtractKeyLike(map, pattern)
|
||||
|
||||
**Returned value**
|
||||
|
||||
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
- A map contained elements the key of which matches the specified pattern. If there are no elements matched the pattern, it will return an empty map.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -116,7 +116,7 @@ The column description can specify a default value expression in the form of `DE
|
||||
|
||||
The expression `expr` is optional. If it is omitted, the column type must be specified explicitly and the default value will be `0` for numeric columns, `''` (the empty string) for string columns, `[]` (the empty array) for array columns, `1970-01-01` for date columns, or `NULL` for nullable columns.
|
||||
|
||||
The column type of a default value column can be omitted in which case it is infered from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
The column type of a default value column can be omitted in which case it is inferred from `expr`'s type. For example the type of column `EventDate DEFAULT toDate(EventTime)` will be date.
|
||||
|
||||
If both a data type and a default value expression are specified, an implicit type casting function inserted which converts the expression to the specified type. Example: `Hits UInt32 DEFAULT 0` is internally represented as `Hits UInt32 DEFAULT toUInt32(0)`.
|
||||
|
||||
|
@ -34,7 +34,7 @@ If the `alter_sync` is set to `2` and some replicas are not active for more than
|
||||
|
||||
## BY expression
|
||||
|
||||
If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explictly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
|
||||
If you want to perform deduplication on custom set of columns rather than on all, you can specify list of columns explicitly or use any combination of [`*`](../../sql-reference/statements/select/index.md#asterisk), [`COLUMNS`](../../sql-reference/statements/select/index.md#columns-expression) or [`EXCEPT`](../../sql-reference/statements/select/index.md#except-modifier) expressions. The explicitly written or implicitly expanded list of columns must include all columns specified in row ordering expression (both primary and sorting keys) and partitioning expression (partitioning key).
|
||||
|
||||
:::note
|
||||
Notice that `*` behaves just like in `SELECT`: [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) and [ALIAS](../../sql-reference/statements/create/table.md#alias) columns are not used for expansion.
|
||||
|
@ -105,22 +105,6 @@ Result:
|
||||
|
||||
- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database)
|
||||
|
||||
## SHOW PROCESSLIST
|
||||
|
||||
``` sql
|
||||
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries.
|
||||
|
||||
The `SELECT * FROM system.processes` query returns data about all the current queries.
|
||||
|
||||
Tip (execute in the console):
|
||||
|
||||
``` bash
|
||||
$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
```
|
||||
|
||||
## SHOW TABLES
|
||||
|
||||
Displays a list of tables.
|
||||
@ -284,6 +268,77 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## SHOW INDEX
|
||||
|
||||
Displays a list of primary and data skipping indexes of a table.
|
||||
|
||||
```sql
|
||||
SHOW [EXTENDED] {INDEX | INDEXES | KEYS } {FROM | IN} <table> [{FROM | IN} <db>] [WHERE <expr>] [INTO OUTFILE <filename>] [FORMAT <format>]
|
||||
```
|
||||
|
||||
The database and table name can be specified in abbreviated form as `<db>.<table>`, i.e. `FROM tab FROM db` and `FROM db.tab` are
|
||||
equivalent. If no database is specified, the query assumes the current database as database.
|
||||
|
||||
The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility.
|
||||
|
||||
`SHOW INDEX` produces a result table with the following structure:
|
||||
- table - The name of the table (String)
|
||||
- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8)
|
||||
- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String)
|
||||
- seq_in_index - Currently unused
|
||||
- column_name - Currently unused
|
||||
- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String))
|
||||
- cardinality - Currently unused
|
||||
- sub_part - Currently unused
|
||||
- packed - Currently unused
|
||||
- null - Currently unused
|
||||
- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String)
|
||||
- comment - Currently unused
|
||||
- index_comment - Currently unused
|
||||
- visible - If the index is visible to the optimizer, always `YES` (String)
|
||||
- expression - The index expression (String)
|
||||
|
||||
**Examples**
|
||||
|
||||
Getting information about all indexes in table 'tbl'
|
||||
|
||||
```sql
|
||||
SHOW INDEX FROM 'tbl'
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐
|
||||
│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │
|
||||
│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │
|
||||
│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │
|
||||
│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │
|
||||
│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │
|
||||
└───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘
|
||||
```
|
||||
|
||||
**See also**
|
||||
|
||||
- [system.tables](../../operations/system-tables/tables.md)
|
||||
- [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md)
|
||||
|
||||
## SHOW PROCESSLIST
|
||||
|
||||
``` sql
|
||||
SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.processes](../../operations/system-tables/processes.md#system_tables-processes) table, that contains a list of queries that is being processed at the moment, excepting `SHOW PROCESSLIST` queries.
|
||||
|
||||
The `SELECT * FROM system.processes` query returns data about all the current queries.
|
||||
|
||||
Tip (execute in the console):
|
||||
|
||||
``` bash
|
||||
$ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
|
||||
```
|
||||
|
||||
## SHOW GRANTS
|
||||
|
||||
Shows privileges for a user.
|
||||
|
184
docs/en/sql-reference/table-functions/gcs.md
Normal file
184
docs/en/sql-reference/table-functions/gcs.md
Normal file
@ -0,0 +1,184 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/gcs
|
||||
sidebar_position: 45
|
||||
sidebar_label: s3
|
||||
keywords: [gcs, bucket]
|
||||
---
|
||||
|
||||
# gcs Table Function
|
||||
|
||||
Provides a table-like interface to select/insert files in [Google Cloud Storage](https://cloud.google.com/storage/).
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
gcs(path [,hmac_key, hmac_secret] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
:::tip GCS
|
||||
The GCS Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
|
||||
|
||||
:::
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings.
|
||||
|
||||
:::note GCS
|
||||
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
```
|
||||
https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
|
||||
```
|
||||
and not ~~https://storage.cloud.google.com~~.
|
||||
:::
|
||||
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading or writing data in the specified file.
|
||||
|
||||
**Examples**
|
||||
|
||||
Selecting the first two rows from the table from GCS file `https://storage.googleapis.com/my-test-bucket-768/data.csv`:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
|
||||
LIMIT 2;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─column1─┬─column2─┬─column3─┐
|
||||
│ 1 │ 2 │ 3 │
|
||||
│ 3 │ 2 │ 1 │
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
The similar but from file with `gzip` compression:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
|
||||
LIMIT 2;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─column1─┬─column2─┬─column3─┐
|
||||
│ 1 │ 2 │ 3 │
|
||||
│ 3 │ 2 │ 1 │
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Suppose that we have several files with following URIs on GCS:
|
||||
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_1.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_2.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_3.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/some_prefix/some_file_4.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_1.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_2.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_3.csv'
|
||||
- 'https://storage.googleapis.com/my-test-bucket-768/another_prefix/some_file_4.csv'
|
||||
|
||||
Count the amount of rows in files ending with numbers from 1 to 3:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 18 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Count the total amount of rows in all files in these two directories:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 24 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
:::warning
|
||||
If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
|
||||
:::
|
||||
|
||||
Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
|
||||
|
||||
``` sql
|
||||
SELECT count(*)
|
||||
FROM gcs('https://storage.googleapis.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─count()─┐
|
||||
│ 12 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
Insert data into file `test-data.csv.gz`:
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
|
||||
VALUES ('test-data', 1), ('test-data-2', 2);
|
||||
```
|
||||
|
||||
Insert data into file `test-data.csv.gz` from existing table:
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION gcs('https://storage.googleapis.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
|
||||
SELECT name, value FROM existing_table;
|
||||
```
|
||||
|
||||
Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM gcs('https://storage.googleapis.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
|
||||
```
|
||||
|
||||
## Partitioned Write
|
||||
|
||||
If you specify `PARTITION BY` expression when inserting data into `GCS` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
|
||||
|
||||
**Examples**
|
||||
|
||||
1. Using partition ID in a key creates separate files:
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
gcs('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
|
||||
PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
|
||||
```
|
||||
As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
|
||||
|
||||
2. Using partition ID in a bucket name creates files in different buckets:
|
||||
|
||||
```sql
|
||||
INSERT INTO TABLE FUNCTION
|
||||
gcs('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
|
||||
PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
|
||||
```
|
||||
As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [S3 table function](s3.md)
|
||||
- [S3 engine](../../engines/table-engines/integrations/s3.md)
|
@ -6,7 +6,7 @@ sidebar_label: hdfsCluster
|
||||
|
||||
# hdfsCluster Table Function
|
||||
|
||||
Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
Allows processing files from HDFS in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in HDFS file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -53,7 +53,7 @@ The `remote` table function can be useful in the following cases:
|
||||
- Infrequent distributed requests that are made manually.
|
||||
- Distributed requests where the set of servers is re-defined each time.
|
||||
|
||||
### Adresses
|
||||
### Addresses
|
||||
|
||||
``` text
|
||||
example01-01-1
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: s3Cluster
|
||||
title: "s3Cluster Table Function"
|
||||
---
|
||||
|
||||
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -80,7 +80,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
|
||||
- `PARTITION BY` - defines how to break a resultset into groups.
|
||||
- `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function.
|
||||
- `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame.
|
||||
- `WINDOW` - allows to reuse a window definition with multiple exressions.
|
||||
- `WINDOW` - allows to reuse a window definition with multiple expressions.
|
||||
|
||||
### Functions
|
||||
|
||||
|
@ -77,15 +77,37 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
|
||||
Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
|
||||
|
||||
#### Установка официального репозитория
|
||||
|
||||
Сначала нужно подключить официальный репозиторий:
|
||||
|
||||
``` bash
|
||||
sudo yum install -y yum-utils
|
||||
sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
Для систем с пакетным менеджером `zypper` (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
```
|
||||
|
||||
Далее любая команда `yum install` может быть заменена на `zypper install`. Чтобы указать желаемую версию, необходимо добавить `-$VERSION` в имени пакета, например `clickhouse-client-22.2.2.22`.
|
||||
|
||||
#### Установка сервера и клиента
|
||||
|
||||
``` bash
|
||||
sudo yum install -y clickhouse-server clickhouse-client
|
||||
```
|
||||
|
||||
#### Запуск сервера
|
||||
|
||||
``` bash
|
||||
sudo systemctl enable clickhouse-server
|
||||
sudo systemctl start clickhouse-server
|
||||
sudo systemctl status clickhouse-server
|
||||
clickhouse-client # илм "clickhouse-client --password" если установлен пароль
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
@ -107,7 +107,7 @@ SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
|
||||
|
||||
## aes_encrypt_mysql {#aes_encrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
Совместима с шифрованием mysql, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
|
||||
|
||||
При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
|
||||
|
||||
@ -298,7 +298,7 @@ SELECT comment, decrypt('aes-256-ofb', secret, '12345678910121314151617181920212
|
||||
|
||||
## aes_decrypt_mysql {#aes_decrypt_mysql}
|
||||
|
||||
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
Совместима с шифрованием mysql и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
|
||||
|
||||
При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
|
||||
|
||||
|
@ -84,6 +84,17 @@ sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
```
|
||||
|
||||
For systems with `zypper` package manager (openSUSE, SLES):
|
||||
|
||||
``` bash
|
||||
sudo zypper addrepo -r https://packages.clickhouse.com/rpm/clickhouse.repo -g
|
||||
sudo zypper --gpg-auto-import-keys refresh clickhouse-stable
|
||||
sudo zypper install -y clickhouse-server clickhouse-client
|
||||
|
||||
sudo /etc/init.d/clickhouse-server start
|
||||
clickhouse-client # or "clickhouse-client --password" if you set up a password.
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Deprecated Method for installing rpm-packages</summary>
|
||||
|
@ -778,7 +778,7 @@ TCP端口,用于与客户端进行安全通信。 使用它与 [OpenSSL](#serv
|
||||
|
||||
## zookeeper {#server-settings_zookeeper}
|
||||
|
||||
包含允许ClickHouse与 [zookpeer](http://zookeeper.apache.org/) 集群。
|
||||
包含允许ClickHouse与 [zookeeper](http://zookeeper.apache.org/) 集群。
|
||||
|
||||
ClickHouse使用ZooKeeper存储复制表副本的元数据。 如果未使用复制的表,则可以省略此部分参数。
|
||||
|
||||
|
@ -114,7 +114,7 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
clickhouse_add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES})
|
||||
|
||||
# Remove some redundant dependencies
|
||||
target_compile_definitions (clickhouse-keeper PRIVATE -DKEEPER_STANDALONE_BUILD)
|
||||
target_compile_definitions (clickhouse-keeper PRIVATE -DCLICKHOUSE_PROGRAM_STANDALONE_BUILD)
|
||||
target_compile_definitions (clickhouse-keeper PUBLIC -DWITHOUT_TEXT_LOG)
|
||||
|
||||
target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../src") # uses includes from src directory
|
||||
|
@ -57,7 +57,7 @@ int mainEntryClickHouseKeeper(int argc, char ** argv)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KEEPER_STANDALONE_BUILD
|
||||
#ifdef CLICKHOUSE_PROGRAM_STANDALONE_BUILD
|
||||
|
||||
// Weak symbols don't work correctly on Darwin
|
||||
// so we have a stub implementation to avoid linker errors
|
||||
|
@ -27,7 +27,6 @@
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/Macros.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
|
||||
#include <Common/getMultipleKeysFromConfig.h>
|
||||
@ -98,9 +97,7 @@
|
||||
#include "config_version.h"
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
# include <cstddef>
|
||||
# include <cstdlib>
|
||||
# include <sys/socket.h>
|
||||
# include <sys/un.h>
|
||||
# include <sys/mman.h>
|
||||
# include <sys/ptrace.h>
|
||||
@ -108,7 +105,6 @@
|
||||
#endif
|
||||
|
||||
#if USE_SSL
|
||||
# include <Poco/Net/Context.h>
|
||||
# include <Poco/Net/SecureServerSocket.h>
|
||||
#endif
|
||||
|
||||
@ -1242,8 +1238,13 @@ try
|
||||
global_context->setMacros(std::make_unique<Macros>(*config, "macros", log));
|
||||
global_context->setExternalAuthenticatorsConfig(*config);
|
||||
|
||||
global_context->loadOrReloadDictionaries(*config);
|
||||
global_context->loadOrReloadUserDefinedExecutableFunctions(*config);
|
||||
if (global_context->isServerCompletelyStarted())
|
||||
{
|
||||
/// It does not make sense to reload anything before server has started.
|
||||
/// Moreover, it may break initialization order.
|
||||
global_context->loadOrReloadDictionaries(*config);
|
||||
global_context->loadOrReloadUserDefinedExecutableFunctions(*config);
|
||||
}
|
||||
|
||||
global_context->setRemoteHostFilter(*config);
|
||||
|
||||
|
@ -517,11 +517,12 @@
|
||||
let previous_query = '';
|
||||
|
||||
const current_url = new URL(window.location);
|
||||
const opened_locally = location.protocol == 'file:';
|
||||
|
||||
const server_address = current_url.searchParams.get('url');
|
||||
if (server_address) {
|
||||
document.getElementById('url').value = server_address;
|
||||
} else if (location.protocol != 'file:') {
|
||||
} else if (!opened_locally) {
|
||||
/// Substitute the address of the server where the page is served.
|
||||
document.getElementById('url').value = location.origin;
|
||||
}
|
||||
@ -532,6 +533,19 @@
|
||||
document.getElementById('user').value = user_from_url;
|
||||
}
|
||||
|
||||
const pass_from_url = current_url.searchParams.get('password');
|
||||
if (pass_from_url) {
|
||||
document.getElementById('password').value = pass_from_url;
|
||||
/// Browsers don't allow manipulating history for the 'file:' protocol.
|
||||
if (!opened_locally) {
|
||||
let replaced_pass = current_url.searchParams;
|
||||
replaced_pass.delete('password');
|
||||
window.history.replaceState(null, '',
|
||||
window.location.origin + window.location.pathname + '?'
|
||||
+ replaced_pass.toString() + window.location.hash);
|
||||
}
|
||||
}
|
||||
|
||||
function postImpl(posted_request_num, query)
|
||||
{
|
||||
const user = document.getElementById('user').value;
|
||||
@ -548,7 +562,7 @@
|
||||
'&max_result_rows=1000&max_result_bytes=10000000&result_overflow_mode=break';
|
||||
|
||||
// If play.html is opened locally, append username and password to the URL parameter to avoid CORS issue.
|
||||
if (document.location.href.startsWith("file://")) {
|
||||
if (opened_locally) {
|
||||
url += '&user=' + encodeURIComponent(user) +
|
||||
'&password=' + encodeURIComponent(password)
|
||||
}
|
||||
@ -557,7 +571,7 @@
|
||||
|
||||
xhr.open('POST', url, true);
|
||||
// If play.html is open normally, use Basic auth to prevent username and password being exposed in URL parameters
|
||||
if (!document.location.href.startsWith("file://")) {
|
||||
if (!opened_locally) {
|
||||
xhr.setRequestHeader("Authorization", "Basic " + btoa(user+":"+password));
|
||||
}
|
||||
xhr.onreadystatechange = function()
|
||||
|
@ -11,3 +11,10 @@ libc = "0.2.132"
|
||||
[lib]
|
||||
crate-type = ["staticlib"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[profile.release-thinlto]
|
||||
inherits = "release"
|
||||
# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit
|
||||
lto = true
|
||||
|
@ -34,9 +34,18 @@ function(clickhouse_import_crate)
|
||||
else()
|
||||
set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug")
|
||||
endif()
|
||||
# NOTE: we may use LTO for rust too
|
||||
|
||||
corrosion_import_crate(NO_STD ${ARGN})
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
|
||||
set(profile "")
|
||||
else()
|
||||
if (ENABLE_THINLTO)
|
||||
set(profile "release-thinlto")
|
||||
else()
|
||||
set(profile "release")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
|
||||
endfunction()
|
||||
|
||||
# Add crate from the build directory.
|
||||
|
@ -18,3 +18,8 @@ crate-type = ["staticlib"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
||||
[profile.release-thinlto]
|
||||
inherits = "release"
|
||||
# We use LTO here as well to slightly decrease binary size
|
||||
lto = true
|
||||
|
@ -27,7 +27,7 @@ AggregateFunctionPtr createAggregateFunctionBitwise(const std::string & name, co
|
||||
"is illegal, because it cannot be used in bitwise operations",
|
||||
argument_types[0]->getName(), name);
|
||||
|
||||
AggregateFunctionPtr res(createWithUnsignedIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
|
||||
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionBitwise, Data>(*argument_types[0], argument_types[0]));
|
||||
|
||||
if (!res)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||
|
||||
// TODO include this last because of a broken roaring header. See the comment inside.
|
||||
|
@ -13,6 +13,11 @@ namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
/** Aggregate function that takes arbitrary number of arbitrary arguments and does nothing.
|
||||
*/
|
||||
@ -69,7 +74,8 @@ public:
|
||||
{
|
||||
[[maybe_unused]] char symbol;
|
||||
readChar(symbol, buf);
|
||||
assert(symbol == '\0');
|
||||
if (symbol != '\0')
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect state of aggregate function 'nothing', it should contain exactly one zero byte, while it is {}.", static_cast<UInt32>(symbol));
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict, IColumn & to, Arena *) const override
|
||||
|
@ -100,6 +100,28 @@ static IAggregateFunction * createWithUnsignedIntegerType(const IDataType & argu
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithSignedIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
WhichDataType which(argument_type);
|
||||
if (which.idx == TypeIndex::Int8) return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int16) return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int32) return new AggregateFunctionTemplate<Int32, Data<Int32>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int64) return new AggregateFunctionTemplate<Int64, Data<Int64>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int128) return new AggregateFunctionTemplate<Int128, Data<Int128>>(std::forward<TArgs>(args)...);
|
||||
if (which.idx == TypeIndex::Int256) return new AggregateFunctionTemplate<Int256, Data<Int256>>(std::forward<TArgs>(args)...);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithIntegerType(const IDataType & argument_type, TArgs && ... args)
|
||||
{
|
||||
IAggregateFunction * f = createWithUnsignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
|
||||
if (f)
|
||||
return f;
|
||||
return createWithSignedIntegerType<AggregateFunctionTemplate, Data>(argument_type, std::forward<TArgs>(args)...);
|
||||
}
|
||||
|
||||
template <template <typename, typename> class AggregateFunctionTemplate, template <typename> class Data, typename... TArgs>
|
||||
static IAggregateFunction * createWithBasicNumberOrDateOrDateTime(const IDataType & argument_type, TArgs &&... args)
|
||||
{
|
||||
|
@ -335,7 +335,7 @@ public:
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
hash_value = hash(x);
|
||||
else
|
||||
hash_value = __builtin_bswap32(hash(x));
|
||||
hash_value = std::byteswap(hash(x));
|
||||
if (!good(hash_value))
|
||||
return;
|
||||
|
||||
|
@ -544,7 +544,8 @@ if (TARGET ch_contrib::qpl)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::qpl)
|
||||
endif ()
|
||||
|
||||
dbms_target_link_libraries(PRIVATE _boost_context)
|
||||
target_link_libraries(clickhouse_common_io PUBLIC boost::context)
|
||||
dbms_target_link_libraries(PUBLIC boost::context)
|
||||
|
||||
if (ENABLE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||
|
@ -60,13 +60,7 @@ bool ColumnFixedString::isDefaultAt(size_t index) const
|
||||
void ColumnFixedString::insert(const Field & x)
|
||||
{
|
||||
const String & s = x.get<const String &>();
|
||||
|
||||
if (s.size() > n)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string '{}' for FixedString column", s);
|
||||
|
||||
size_t old_size = chars.size();
|
||||
chars.resize_fill(old_size + n);
|
||||
memcpy(chars.data() + old_size, s.data(), s.size());
|
||||
insertData(s.data(), s.size());
|
||||
}
|
||||
|
||||
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
|
||||
@ -87,8 +81,9 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string for FixedString column");
|
||||
|
||||
size_t old_size = chars.size();
|
||||
chars.resize_fill(old_size + n);
|
||||
chars.resize(old_size + n);
|
||||
memcpy(chars.data() + old_size, pos, length);
|
||||
memset(chars.data() + old_size + length, 0, n - length);
|
||||
}
|
||||
|
||||
StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
|
||||
@ -278,7 +273,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
|
||||
|
||||
ssize_t index = mask.size() - 1;
|
||||
ssize_t from = size() - 1;
|
||||
chars.resize_fill(mask.size() * n, 0);
|
||||
chars.resize_fill(mask.size() * n);
|
||||
while (index >= 0)
|
||||
{
|
||||
if (!!mask[index] ^ inverted)
|
||||
|
@ -485,13 +485,8 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
|
||||
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
|
||||
{
|
||||
auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
|
||||
/// Create column with new indexes and old dictionary.
|
||||
/// Dictionary is shared, but will be recreated after compactInplace call.
|
||||
auto column = ColumnLowCardinality::create(getDictionary().assumeMutable(), std::move(sub_positions));
|
||||
/// Will create new dictionary.
|
||||
column->compactInplace();
|
||||
|
||||
return column;
|
||||
auto new_column_unique = Dictionary::compact(dictionary.getColumnUnique(), sub_positions);
|
||||
return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::compactInplace()
|
||||
@ -589,7 +584,7 @@ size_t ColumnLowCardinality::Index::getSizeOfIndexType(const IColumn & column, s
|
||||
column.getName());
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Index::attachPositions(ColumnPtr positions_)
|
||||
void ColumnLowCardinality::Index::attachPositions(MutableColumnPtr positions_)
|
||||
{
|
||||
positions = std::move(positions_);
|
||||
updateSizeOfType();
|
||||
@ -820,21 +815,23 @@ void ColumnLowCardinality::Dictionary::setShared(const ColumnPtr & column_unique
|
||||
shared = true;
|
||||
}
|
||||
|
||||
void ColumnLowCardinality::Dictionary::compact(ColumnPtr & positions)
|
||||
void ColumnLowCardinality::Dictionary::compact(MutableColumnPtr & positions)
|
||||
{
|
||||
auto new_column_unique = column_unique->cloneEmpty();
|
||||
column_unique = compact(getColumnUnique(), positions);
|
||||
shared = false;
|
||||
}
|
||||
|
||||
auto & unique = getColumnUnique();
|
||||
MutableColumnPtr ColumnLowCardinality::Dictionary::compact(const IColumnUnique & unique, MutableColumnPtr & positions)
|
||||
{
|
||||
auto new_column_unique = unique.cloneEmpty();
|
||||
auto & new_unique = static_cast<IColumnUnique &>(*new_column_unique);
|
||||
|
||||
auto indexes = mapUniqueIndex(positions->assumeMutableRef());
|
||||
auto indexes = mapUniqueIndex(*positions);
|
||||
auto sub_keys = unique.getNestedColumn()->index(*indexes, 0);
|
||||
auto new_indexes = new_unique.uniqueInsertRangeFrom(*sub_keys, 0, sub_keys->size());
|
||||
|
||||
positions = IColumn::mutate(new_indexes->index(*positions, 0));
|
||||
column_unique = std::move(new_column_unique);
|
||||
|
||||
shared = false;
|
||||
return new_column_unique;
|
||||
}
|
||||
|
||||
ColumnPtr ColumnLowCardinality::cloneWithDefaultOnNull() const
|
||||
|
@ -160,7 +160,9 @@ public:
|
||||
|
||||
void reserve(size_t n) override { idx.reserve(n); }
|
||||
|
||||
size_t byteSize() const override { return idx.getPositions()->byteSize() + getDictionary().byteSize(); }
|
||||
/// Don't count the dictionary size as it can be shared between different blocks.
|
||||
size_t byteSize() const override { return idx.getPositions()->byteSize(); }
|
||||
|
||||
size_t byteSizeAt(size_t n) const override { return getDictionary().byteSizeAt(getIndexes().getUInt(n)); }
|
||||
size_t allocatedBytes() const override { return idx.getPositions()->allocatedBytes() + getDictionary().allocatedBytes(); }
|
||||
|
||||
@ -301,8 +303,8 @@ public:
|
||||
|
||||
void checkSizeOfType();
|
||||
|
||||
ColumnPtr detachPositions() { return std::move(positions); }
|
||||
void attachPositions(ColumnPtr positions_);
|
||||
MutableColumnPtr detachPositions() { return IColumn::mutate(std::move(positions)); }
|
||||
void attachPositions(MutableColumnPtr positions_);
|
||||
|
||||
void countKeys(ColumnUInt64::Container & counts) const;
|
||||
|
||||
@ -350,7 +352,9 @@ private:
|
||||
bool isShared() const { return shared; }
|
||||
|
||||
/// Create new dictionary with only keys that are mentioned in positions.
|
||||
void compact(ColumnPtr & positions);
|
||||
void compact(MutableColumnPtr & positions);
|
||||
|
||||
static MutableColumnPtr compact(const IColumnUnique & column_unique, MutableColumnPtr & positions);
|
||||
|
||||
private:
|
||||
WrappedPtr column_unique;
|
||||
|
@ -214,7 +214,7 @@ void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n)
|
||||
void ColumnNullable::insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length)
|
||||
{
|
||||
getNestedColumn().insertRangeFrom(src, start, length);
|
||||
getNullMapData().resize_fill(getNullMapData().size() + length, 0);
|
||||
getNullMapData().resize_fill(getNullMapData().size() + length);
|
||||
}
|
||||
|
||||
void ColumnNullable::insertManyFromNotNullable(const IColumn & src, size_t position, size_t length)
|
||||
|
@ -176,7 +176,7 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
|
||||
/// (if not, one of exceptions below will throw) and we can calculate the resulting chars size.
|
||||
UInt64 last_offset = offsets_data[from] + (mask.size() - offsets_data.size());
|
||||
offsets_data.resize(mask.size());
|
||||
chars_data.resize_fill(last_offset, 0);
|
||||
chars_data.resize_fill(last_offset);
|
||||
while (index >= 0)
|
||||
{
|
||||
offsets_data[index] = last_offset;
|
||||
|
@ -26,9 +26,7 @@ using namespace DB;
|
||||
template <typename T>
|
||||
void checkColumn(
|
||||
const WeakHash32::Container & hash,
|
||||
const PaddedPODArray<T> & eq_class,
|
||||
size_t allowed_collisions = 0,
|
||||
size_t max_collisions_to_print = 10)
|
||||
const PaddedPODArray<T> & eq_class)
|
||||
{
|
||||
ASSERT_EQ(hash.size(), eq_class.size());
|
||||
|
||||
@ -52,41 +50,6 @@ void checkColumn(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Check have not many collisions.
|
||||
{
|
||||
std::unordered_map<UInt32, T> map;
|
||||
size_t num_collisions = 0;
|
||||
|
||||
std::stringstream collisions_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
collisions_str.exceptions(std::ios::failbit);
|
||||
|
||||
for (size_t i = 0; i < eq_class.size(); ++i)
|
||||
{
|
||||
auto & val = eq_class[i];
|
||||
auto it = map.find(hash[i]);
|
||||
|
||||
if (it == map.end())
|
||||
map[hash[i]] = val;
|
||||
else if (it->second != val)
|
||||
{
|
||||
++num_collisions;
|
||||
|
||||
if (num_collisions <= max_collisions_to_print)
|
||||
{
|
||||
collisions_str << "Collision:\n";
|
||||
}
|
||||
|
||||
if (num_collisions > allowed_collisions)
|
||||
{
|
||||
std::cerr << collisions_str.rdbuf();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_LE(num_collisions, allowed_collisions);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnVectorU8)
|
||||
@ -374,10 +337,7 @@ TEST(WeakHash32, ColumnString2)
|
||||
WeakHash32 hash(col->size());
|
||||
col->updateWeakHash32(hash);
|
||||
|
||||
/// Now there is single collision between 'k' * 544 and 'q' * 2512 (which is calculated twice)
|
||||
size_t allowed_collisions = 4;
|
||||
|
||||
checkColumn(hash.getData(), data, allowed_collisions);
|
||||
checkColumn(hash.getData(), data);
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnString3)
|
||||
@ -717,8 +677,7 @@ TEST(WeakHash32, ColumnTupleUInt64String)
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
|
||||
size_t allowed_collisions = 8;
|
||||
checkColumn(hash.getData(), eq, allowed_collisions);
|
||||
checkColumn(hash.getData(), eq);
|
||||
}
|
||||
|
||||
TEST(WeakHash32, ColumnTupleUInt64FixedString)
|
||||
@ -803,10 +762,5 @@ TEST(WeakHash32, ColumnTupleUInt64Array)
|
||||
WeakHash32 hash(col_tuple->size());
|
||||
col_tuple->updateWeakHash32(hash);
|
||||
|
||||
/// There are 2 collisions right now (repeated 2 times each):
|
||||
/// (0, [array of size 1212 with values 7]) vs (0, [array of size 2265 with values 17])
|
||||
/// (0, [array of size 558 with values 5]) vs (1, [array of size 879 with values 21])
|
||||
|
||||
size_t allowed_collisions = 8;
|
||||
checkColumn(hash.getData(), eq_data, allowed_collisions);
|
||||
checkColumn(hash.getData(), eq_data);
|
||||
}
|
||||
|
173
src/Common/ConcurrencyControl.cpp
Normal file
173
src/Common/ConcurrencyControl.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
#include <Common/ConcurrencyControl.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
ConcurrencyControl::Slot::~Slot()
|
||||
{
|
||||
allocation->release();
|
||||
}
|
||||
|
||||
ConcurrencyControl::Slot::Slot(AllocationPtr && allocation_)
|
||||
: allocation(std::move(allocation_))
|
||||
{
|
||||
}
|
||||
|
||||
ConcurrencyControl::Allocation::~Allocation()
|
||||
{
|
||||
// We have to lock parent's mutex to avoid race with grant()
|
||||
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
|
||||
parent.free(this);
|
||||
}
|
||||
|
||||
[[nodiscard]] ConcurrencyControl::SlotPtr ConcurrencyControl::Allocation::tryAcquire()
|
||||
{
|
||||
SlotCount value = granted.load();
|
||||
while (value)
|
||||
{
|
||||
if (granted.compare_exchange_strong(value, value - 1))
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
|
||||
}
|
||||
}
|
||||
return {}; // avoid unnecessary locking
|
||||
}
|
||||
|
||||
ConcurrencyControl::SlotCount ConcurrencyControl::Allocation::grantedCount() const
|
||||
{
|
||||
return granted;
|
||||
}
|
||||
|
||||
ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_)
|
||||
: parent(parent_)
|
||||
, limit(limit_)
|
||||
, allocated(granted_)
|
||||
, granted(granted_)
|
||||
, waiter(waiter_)
|
||||
{
|
||||
if (allocated < limit)
|
||||
*waiter = this;
|
||||
}
|
||||
|
||||
// Grant single slot to allocation, returns true iff more slot(s) are required
|
||||
bool ConcurrencyControl::Allocation::grant()
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
granted++;
|
||||
allocated++;
|
||||
return allocated < limit;
|
||||
}
|
||||
|
||||
// Release one slot and grant it to other allocation if required
|
||||
void ConcurrencyControl::Allocation::release()
|
||||
{
|
||||
parent.release(1);
|
||||
std::unique_lock lock{mutex};
|
||||
released++;
|
||||
if (released > allocated)
|
||||
abort();
|
||||
}
|
||||
|
||||
ConcurrencyControl::ConcurrencyControl()
|
||||
: cur_waiter(waiters.end())
|
||||
{
|
||||
}
|
||||
|
||||
ConcurrencyControl::~ConcurrencyControl()
|
||||
{
|
||||
if (!waiters.empty())
|
||||
abort();
|
||||
}
|
||||
|
||||
[[nodiscard]] ConcurrencyControl::AllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max)
|
||||
{
|
||||
if (min > max)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements");
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
|
||||
// Acquire as many slots as we can, but not lower than `min`
|
||||
SlotCount granted = std::max(min, std::min(max, available(lock)));
|
||||
cur_concurrency += granted;
|
||||
|
||||
// Create allocation and start waiting if more slots are required
|
||||
if (granted < max)
|
||||
return AllocationPtr(new Allocation(*this, max, granted,
|
||||
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
|
||||
else
|
||||
return AllocationPtr(new Allocation(*this, max, granted));
|
||||
}
|
||||
|
||||
void ConcurrencyControl::setMaxConcurrency(ConcurrencyControl::SlotCount value)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
ConcurrencyControl & ConcurrencyControl::instance()
|
||||
{
|
||||
static ConcurrencyControl result;
|
||||
return result;
|
||||
}
|
||||
|
||||
void ConcurrencyControl::free(Allocation * allocation)
|
||||
{
|
||||
// Allocation is allowed to be canceled even if there are:
|
||||
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
|
||||
// - `waiter`: active waiting for more slots to be allocated
|
||||
// Thus Allocation destruction may require the following lock, to avoid race conditions
|
||||
std::unique_lock lock{mutex};
|
||||
auto [amount, waiter] = allocation->cancel();
|
||||
|
||||
cur_concurrency -= amount;
|
||||
if (waiter)
|
||||
{
|
||||
if (cur_waiter == *waiter)
|
||||
cur_waiter = waiters.erase(*waiter);
|
||||
else
|
||||
waiters.erase(*waiter);
|
||||
}
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
void ConcurrencyControl::release(SlotCount amount)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cur_concurrency -= amount;
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
// Round-robin scheduling of available slots among waiting allocations
|
||||
void ConcurrencyControl::schedule(std::unique_lock<std::mutex> &)
|
||||
{
|
||||
while (cur_concurrency < max_concurrency && !waiters.empty())
|
||||
{
|
||||
cur_concurrency++;
|
||||
if (cur_waiter == waiters.end())
|
||||
cur_waiter = waiters.begin();
|
||||
Allocation * allocation = *cur_waiter;
|
||||
if (allocation->grant())
|
||||
++cur_waiter;
|
||||
else
|
||||
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
|
||||
}
|
||||
}
|
||||
|
||||
ConcurrencyControl::SlotCount ConcurrencyControl::available(std::unique_lock<std::mutex> &) const
|
||||
{
|
||||
if (cur_concurrency < max_concurrency)
|
||||
return max_concurrency - cur_concurrency;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
@ -5,17 +5,10 @@
|
||||
#include <mutex>
|
||||
#include <memory>
|
||||
#include <list>
|
||||
#include <condition_variable>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Controls how many threads can be allocated for a query (or another activity).
|
||||
@ -53,17 +46,12 @@ public:
|
||||
// Scoped guard for acquired slot, see Allocation::tryAcquire()
|
||||
struct Slot : boost::noncopyable
|
||||
{
|
||||
~Slot()
|
||||
{
|
||||
allocation->release();
|
||||
}
|
||||
~Slot();
|
||||
|
||||
private:
|
||||
friend struct Allocation; // for ctor
|
||||
|
||||
explicit Slot(AllocationPtr && allocation_)
|
||||
: allocation(std::move(allocation_))
|
||||
{}
|
||||
explicit Slot(AllocationPtr && allocation_);
|
||||
|
||||
AllocationPtr allocation;
|
||||
};
|
||||
@ -74,47 +62,18 @@ public:
|
||||
// Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max)
|
||||
struct Allocation : std::enable_shared_from_this<Allocation>, boost::noncopyable
|
||||
{
|
||||
~Allocation()
|
||||
{
|
||||
// We have to lock parent's mutex to avoid race with grant()
|
||||
// NOTE: shortcut can be added, but it requires Allocation::mutex lock even to check if shortcut is possible
|
||||
parent.free(this);
|
||||
}
|
||||
~Allocation();
|
||||
|
||||
// Take one already granted slot if available. Lock-free iff there is no granted slot.
|
||||
[[nodiscard]] SlotPtr tryAcquire()
|
||||
{
|
||||
SlotCount value = granted.load();
|
||||
while (value)
|
||||
{
|
||||
if (granted.compare_exchange_strong(value, value - 1))
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor
|
||||
}
|
||||
}
|
||||
return {}; // avoid unnecessary locking
|
||||
}
|
||||
[[nodiscard]] SlotPtr tryAcquire();
|
||||
|
||||
SlotCount grantedCount() const
|
||||
{
|
||||
return granted;
|
||||
}
|
||||
SlotCount grantedCount() const;
|
||||
|
||||
private:
|
||||
friend struct Slot; // for release()
|
||||
friend class ConcurrencyControl; // for grant(), free() and ctor
|
||||
|
||||
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {})
|
||||
: parent(parent_)
|
||||
, limit(limit_)
|
||||
, allocated(granted_)
|
||||
, granted(granted_)
|
||||
, waiter(waiter_)
|
||||
{
|
||||
if (allocated < limit)
|
||||
*waiter = this;
|
||||
}
|
||||
Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_ = {});
|
||||
|
||||
auto cancel()
|
||||
{
|
||||
@ -126,23 +85,10 @@ public:
|
||||
}
|
||||
|
||||
// Grant single slot to allocation, returns true iff more slot(s) are required
|
||||
bool grant()
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
granted++;
|
||||
allocated++;
|
||||
return allocated < limit;
|
||||
}
|
||||
bool grant();
|
||||
|
||||
// Release one slot and grant it to other allocation if required
|
||||
void release()
|
||||
{
|
||||
parent.release(1);
|
||||
std::unique_lock lock{mutex};
|
||||
released++;
|
||||
if (released > allocated)
|
||||
abort();
|
||||
}
|
||||
void release();
|
||||
|
||||
ConcurrencyControl & parent;
|
||||
const SlotCount limit;
|
||||
@ -157,106 +103,32 @@ public:
|
||||
};
|
||||
|
||||
public:
|
||||
ConcurrencyControl()
|
||||
: cur_waiter(waiters.end())
|
||||
{}
|
||||
ConcurrencyControl();
|
||||
|
||||
// WARNING: all Allocation objects MUST be destructed before ConcurrencyControl
|
||||
// NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries
|
||||
~ConcurrencyControl()
|
||||
{
|
||||
if (!waiters.empty())
|
||||
abort();
|
||||
}
|
||||
~ConcurrencyControl();
|
||||
|
||||
// Allocate at least `min` and at most `max` slots.
|
||||
// If not all `max` slots were successfully allocated, a subscription for later allocation is created
|
||||
// Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread.
|
||||
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max)
|
||||
{
|
||||
if (min > max)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements");
|
||||
[[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max);
|
||||
|
||||
std::unique_lock lock{mutex};
|
||||
void setMaxConcurrency(SlotCount value);
|
||||
|
||||
// Acquire as much slots as we can, but not lower than `min`
|
||||
SlotCount granted = std::max(min, std::min(max, available(lock)));
|
||||
cur_concurrency += granted;
|
||||
|
||||
// Create allocation and start waiting if more slots are required
|
||||
if (granted < max)
|
||||
return AllocationPtr(new Allocation(*this, max, granted,
|
||||
waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */)));
|
||||
else
|
||||
return AllocationPtr(new Allocation(*this, max, granted));
|
||||
}
|
||||
|
||||
void setMaxConcurrency(SlotCount value)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
max_concurrency = std::max<SlotCount>(1, value); // never allow max_concurrency to be zero
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
static ConcurrencyControl & instance()
|
||||
{
|
||||
static ConcurrencyControl result;
|
||||
return result;
|
||||
}
|
||||
static ConcurrencyControl & instance();
|
||||
|
||||
private:
|
||||
friend struct Allocation; // for free() and release()
|
||||
|
||||
void free(Allocation * allocation)
|
||||
{
|
||||
// Allocation is allowed to be canceled even if there are:
|
||||
// - `amount`: granted slots (acquired slots are not possible, because Slot holds AllocationPtr)
|
||||
// - `waiter`: active waiting for more slots to be allocated
|
||||
// Thus Allocation destruction may require the following lock, to avoid race conditions
|
||||
std::unique_lock lock{mutex};
|
||||
auto [amount, waiter] = allocation->cancel();
|
||||
void free(Allocation * allocation);
|
||||
|
||||
cur_concurrency -= amount;
|
||||
if (waiter)
|
||||
{
|
||||
if (cur_waiter == *waiter)
|
||||
cur_waiter = waiters.erase(*waiter);
|
||||
else
|
||||
waiters.erase(*waiter);
|
||||
}
|
||||
schedule(lock);
|
||||
}
|
||||
|
||||
void release(SlotCount amount)
|
||||
{
|
||||
std::unique_lock lock{mutex};
|
||||
cur_concurrency -= amount;
|
||||
schedule(lock);
|
||||
}
|
||||
void release(SlotCount amount);
|
||||
|
||||
// Round-robin scheduling of available slots among waiting allocations
|
||||
void schedule(std::unique_lock<std::mutex> &)
|
||||
{
|
||||
while (cur_concurrency < max_concurrency && !waiters.empty())
|
||||
{
|
||||
cur_concurrency++;
|
||||
if (cur_waiter == waiters.end())
|
||||
cur_waiter = waiters.begin();
|
||||
Allocation * allocation = *cur_waiter;
|
||||
if (allocation->grant())
|
||||
++cur_waiter;
|
||||
else
|
||||
cur_waiter = waiters.erase(cur_waiter); // last required slot has just been granted -- stop waiting
|
||||
}
|
||||
}
|
||||
void schedule(std::unique_lock<std::mutex> &);
|
||||
|
||||
SlotCount available(std::unique_lock<std::mutex> &) const
|
||||
{
|
||||
if (cur_concurrency < max_concurrency)
|
||||
return max_concurrency - cur_concurrency;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
SlotCount available(std::unique_lock<std::mutex> &) const;
|
||||
|
||||
std::mutex mutex;
|
||||
Waiters waiters;
|
||||
@ -264,3 +136,5 @@ private:
|
||||
SlotCount max_concurrency = Unlimited;
|
||||
SlotCount cur_concurrency = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -649,6 +649,8 @@
|
||||
M(679, IO_URING_SUBMIT_ERROR) \
|
||||
M(690, MIXED_ACCESS_PARAMETER_TYPES) \
|
||||
M(691, UNKNOWN_ELEMENT_OF_ENUM) \
|
||||
M(692, TOO_MANY_MUTATIONS) \
|
||||
M(693, AWS_ERROR) \
|
||||
\
|
||||
M(999, KEEPER_EXCEPTION) \
|
||||
M(1000, POCO_EXCEPTION) \
|
||||
|
@ -80,6 +80,8 @@ template <
|
||||
class ClearableHashSet
|
||||
: public HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>
|
||||
{
|
||||
using Cell = ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>;
|
||||
|
||||
public:
|
||||
using Base = HashTable<Key, ClearableHashTableCell<Key, HashTableCell<Key, Hash, ClearableHashSetState>>, Hash, Grower, Allocator>;
|
||||
using typename Base::LookupResult;
|
||||
@ -88,6 +90,13 @@ public:
|
||||
{
|
||||
++this->version;
|
||||
this->m_size = 0;
|
||||
|
||||
if constexpr (Cell::need_zero_value_storage)
|
||||
{
|
||||
/// clear ZeroValueStorage
|
||||
if (this->hasZero())
|
||||
this->clearHasZero();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -103,11 +112,20 @@ class ClearableHashSetWithSavedHash : public HashTable<
|
||||
Grower,
|
||||
Allocator>
|
||||
{
|
||||
using Cell = ClearableHashTableCell<Key, HashSetCellWithSavedHash<Key, Hash, ClearableHashSetState>>;
|
||||
|
||||
public:
|
||||
void clear()
|
||||
{
|
||||
++this->version;
|
||||
this->m_size = 0;
|
||||
|
||||
if constexpr (Cell::need_zero_value_storage)
|
||||
{
|
||||
/// clear ZeroValueStorage
|
||||
if (this->hasZero())
|
||||
this->clearHasZero();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -103,6 +103,9 @@
|
||||
M(DelayedInserts, "Number of times the INSERT of a block to a MergeTree table was throttled due to high number of active data parts for partition.") \
|
||||
M(RejectedInserts, "Number of times the INSERT of a block to a MergeTree table was rejected with 'Too many parts' exception due to high number of active data parts for partition.") \
|
||||
M(DelayedInsertsMilliseconds, "Total number of milliseconds spent while the INSERT of a block to a MergeTree table was throttled due to high number of active data parts for partition.") \
|
||||
M(DelayedMutations, "Number of times the mutation of a MergeTree table was throttled due to high number of unfinished mutations for table.") \
|
||||
M(RejectedMutations, "Number of times the mutation of a MergeTree table was rejected with 'Too many mutations' exception due to high number of unfinished mutations for table.") \
|
||||
M(DelayedMutationsMilliseconds, "Total number of milliseconds spent while the mutation of a MergeTree table was throttled due to high number of unfinished mutations for table.") \
|
||||
M(DistributedDelayedInserts, "Number of times the INSERT of a block to a Distributed table was throttled due to high number of pending bytes.") \
|
||||
M(DistributedRejectedInserts, "Number of times the INSERT of a block to a Distributed table was rejected with 'Too many bytes' exception due to high number of pending bytes.") \
|
||||
M(DistributedDelayedInsertsMilliseconds, "Total number of milliseconds spent while the INSERT of a block to a Distributed table was throttled due to high number of pending bytes.") \
|
||||
@ -188,10 +191,8 @@
|
||||
\
|
||||
M(InsertedWideParts, "Number of parts inserted in Wide format.") \
|
||||
M(InsertedCompactParts, "Number of parts inserted in Compact format.") \
|
||||
M(InsertedInMemoryParts, "Number of parts inserted in InMemory format.") \
|
||||
M(MergedIntoWideParts, "Number of parts merged into Wide format.") \
|
||||
M(MergedIntoCompactParts, "Number of parts merged into Compact format.") \
|
||||
M(MergedIntoInMemoryParts, "Number of parts in merged into InMemory format.") \
|
||||
\
|
||||
M(MergeTreeDataProjectionWriterRows, "Number of rows INSERTed to MergeTree tables projection.") \
|
||||
M(MergeTreeDataProjectionWriterUncompressedBytes, "Uncompressed bytes (for columns as they stored in memory) INSERTed to MergeTree tables projection.") \
|
||||
@ -252,8 +253,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
M(DNSError, "Total count of errors in DNS resolution") \
|
||||
\
|
||||
M(RealTimeMicroseconds, "Total (wall clock) time spent in processing (queries and other tasks) threads (note that this is a sum).") \
|
||||
M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
|
||||
M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel space. This include time CPU pipeline was stalled due to cache misses, branch mispredictions, hyper-threading, etc.") \
|
||||
M(UserTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in user mode. This include time CPU pipeline was stalled due to main memory access, cache misses, branch mispredictions, hyper-threading, etc.") \
|
||||
M(SystemTimeMicroseconds, "Total time spent in processing (queries and other tasks) threads executing CPU instructions in OS kernel mode. This is time spent in syscalls, excluding waiting time during blocking syscalls.") \
|
||||
M(MemoryOvercommitWaitTimeMicroseconds, "Total time spent in waiting for memory to be freed in OvercommitTracker.") \
|
||||
M(MemoryAllocatorPurge, "Total number of times memory allocator purge was requested") \
|
||||
M(MemoryAllocatorPurgeTimeMicroseconds, "Total number of times memory allocator purge was requested") \
|
||||
|
@ -37,7 +37,7 @@ using RWLock = std::shared_ptr<RWLockImpl>;
|
||||
///
|
||||
/// NOTE: it is dangerous to acquire lock with NO_QUERY, because FastPath doesn't
|
||||
/// exist for this case and deadlock, described in previous note,
|
||||
/// may accur in case of recursive locking.
|
||||
/// may occur in case of recursive locking.
|
||||
class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
|
||||
{
|
||||
public:
|
||||
|
@ -9,6 +9,10 @@
|
||||
#include <Common/MemorySanitizer.h>
|
||||
#include <Common/SymbolIndex.h>
|
||||
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
@ -340,8 +344,6 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
return callback("<Empty trace>");
|
||||
|
||||
#if defined(__ELF__) && !defined(OS_FREEBSD)
|
||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
out.exceptions(std::ios::failbit);
|
||||
|
||||
using enum DB::Dwarf::LocationInfoMode;
|
||||
const auto mode = fatal ? FULL_WITH_INLINE : FAST;
|
||||
@ -358,6 +360,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0;
|
||||
const void * physical_addr = reinterpret_cast<const void *>(uintptr_t(virtual_addr) - virtual_offset);
|
||||
|
||||
DB::WriteBufferFromOwnString out;
|
||||
out << i << ". ";
|
||||
|
||||
if (std::error_code ec; object && std::filesystem::exists(object->name, ec) && !ec)
|
||||
@ -376,7 +379,10 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
out << "?";
|
||||
|
||||
if (shouldShowAddress(physical_addr))
|
||||
out << " @ " << physical_addr;
|
||||
{
|
||||
out << " @ ";
|
||||
DB::writePointerHex(physical_addr, out);
|
||||
}
|
||||
|
||||
out << " in " << (object ? object->name : "?");
|
||||
|
||||
@ -393,7 +399,6 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
|
||||
}
|
||||
|
||||
callback(out.str());
|
||||
out.str({});
|
||||
}
|
||||
#else
|
||||
for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
|
||||
@ -431,8 +436,7 @@ String toStringCached(const StackTrace::FramePointers & pointers, size_t offset,
|
||||
return it->second;
|
||||
else
|
||||
{
|
||||
std::ostringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
out.exceptions(std::ios::failbit);
|
||||
DB::WriteBufferFromOwnString out;
|
||||
toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; });
|
||||
|
||||
return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second;
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
@ -153,19 +156,17 @@ std::pair<bool, std::string> StudentTTest::compareAndReport(size_t confidence_le
|
||||
|
||||
double mean_confidence_interval = table_value * t_statistic;
|
||||
|
||||
std::stringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
ss.exceptions(std::ios::failbit);
|
||||
DB::WriteBufferFromOwnString out;
|
||||
|
||||
if (mean_difference > mean_confidence_interval && (mean_difference - mean_confidence_interval > 0.0001)) /// difference must be more than 0.0001, to take into account connection latency.
|
||||
{
|
||||
ss << "Difference at " << confidence_level[confidence_level_index] << "% confidence: ";
|
||||
ss << std::fixed << std::setprecision(8) << "mean difference is " << mean_difference << ", but confidence interval is " << mean_confidence_interval;
|
||||
return {false, ss.str()};
|
||||
out << "Difference at " << confidence_level[confidence_level_index] << "% confidence: ";
|
||||
out << "mean difference is " << mean_difference << ", but confidence interval is " << mean_confidence_interval;
|
||||
return {false, out.str()};
|
||||
}
|
||||
else
|
||||
{
|
||||
ss << "No difference proven at " << confidence_level[confidence_level_index] << "% confidence";
|
||||
return {true, ss.str()};
|
||||
out << "No difference proven at " << confidence_level[confidence_level_index] << "% confidence";
|
||||
return {true, out.str()};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -397,7 +397,10 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
|
||||
|
||||
/// We don't run jobs after `shutdown` is set, but we have to properly dequeue all jobs and finish them.
|
||||
if (shutdown)
|
||||
{
|
||||
job_is_done = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ALLOW_ALLOCATIONS_IN_SCOPE;
|
||||
|
@ -1,7 +1,6 @@
|
||||
#if defined(OS_LINUX)
|
||||
#include <Common/TimerDescriptor.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <sys/timerfd.h>
|
||||
#include <fcntl.h>
|
||||
|
@ -44,7 +44,7 @@ struct NetworkInterfaces
|
||||
std::optional<Poco::Net::IPAddress> interface_address;
|
||||
switch (family)
|
||||
{
|
||||
/// We interested only in IP-adresses
|
||||
/// We interested only in IP-addresses
|
||||
case AF_INET:
|
||||
{
|
||||
interface_address.emplace(*(iface->ifa_addr));
|
||||
|
@ -17,10 +17,10 @@ namespace Poco { class Logger; }
|
||||
|
||||
namespace
|
||||
{
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; };
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); };
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; };
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; };
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; }
|
||||
[[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); }
|
||||
[[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
|
||||
[[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
|
||||
}
|
||||
|
||||
#define LOG_IMPL_FIRST_ARG(X, ...) X
|
||||
|
@ -43,7 +43,8 @@ void setThreadName(const char * name)
|
||||
#else
|
||||
if (0 != prctl(PR_SET_NAME, name, 0, 0, 0))
|
||||
#endif
|
||||
DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
if (errno != ENOSYS) /// It's ok if the syscall is unsupported in some environments.
|
||||
DB::throwFromErrno("Cannot set thread name with prctl(PR_SET_NAME, ...)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
|
||||
memcpy(thread_name, name, std::min<size_t>(1 + strlen(name), THREAD_NAME_SIZE - 1));
|
||||
}
|
||||
@ -62,7 +63,8 @@ const char * getThreadName()
|
||||
// throw DB::Exception(DB::ErrorCodes::PTHREAD_ERROR, "Cannot get thread name with pthread_get_name_np()");
|
||||
#else
|
||||
if (0 != prctl(PR_GET_NAME, thread_name, 0, 0, 0))
|
||||
DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
if (errno != ENOSYS) /// It's ok if the syscall is unsupported in some environments.
|
||||
DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)", DB::ErrorCodes::PTHREAD_ERROR);
|
||||
#endif
|
||||
|
||||
return thread_name;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user