Merge branch 'master' into correctly_send_close_request

This commit is contained in:
alesapin 2020-11-13 14:00:05 +03:00
commit 7ea147203b
103 changed files with 2925 additions and 916 deletions

4
.gitmodules vendored
View File

@ -193,6 +193,10 @@
[submodule "contrib/miniselect"]
path = contrib/miniselect
url = https://github.com/danlark1/miniselect
[submodule "contrib/rocksdb"]
path = contrib/rocksdb
url = https://github.com/facebook/rocksdb
branch = v6.11.4
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz

View File

@ -456,6 +456,8 @@ include (cmake/find/simdjson.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/odbc.cmake)
include (cmake/find/rocksdb.cmake)
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
set (ENABLE_ORC OFF CACHE INTERNAL "")

View File

@ -0,0 +1,21 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <errno.h>
#include "syscall.h"
// works same in x86_64 && aarch64
#define __SYSCALL_LL_E(x) (x)
#define __SYSCALL_LL_O(x) (x)
int sync_file_range(int fd, off_t pos, off_t len, unsigned flags)
{
#if defined(SYS_sync_file_range2)
return syscall(SYS_sync_file_range2, fd, flags,
__SYSCALL_LL_E(pos), __SYSCALL_LL_E(len));
#elif defined(SYS_sync_file_range)
return __syscall(SYS_sync_file_range, fd,
__SYSCALL_LL_O(pos), __SYSCALL_LL_E(len), flags);
#else
return __syscall_ret(-ENOSYS);
#endif
}

67
cmake/find/rocksdb.cmake Normal file
View File

@ -0,0 +1,67 @@
option(ENABLE_ROCKSDB "Enable ROCKSDB" ${ENABLE_LIBRARIES})
if (NOT ENABLE_ROCKSDB)
if (USE_INTERNAL_ROCKSDB_LIBRARY)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal rocksdb library with ENABLE_ROCKSDB=OFF")
endif()
return()
endif()
option(USE_INTERNAL_ROCKSDB_LIBRARY "Set to FALSE to use system ROCKSDB library instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/CMakeLists.txt")
if (USE_INTERNAL_ROCKSDB_LIBRARY)
message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init --recursive")
message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal rocksdb")
endif()
set (MISSING_INTERNAL_ROCKSDB 1)
endif ()
if (NOT USE_INTERNAL_ROCKSDB_LIBRARY)
find_library (ROCKSDB_LIBRARY rocksdb)
find_path (ROCKSDB_INCLUDE_DIR NAMES rocksdb/db.h PATHS ${ROCKSDB_INCLUDE_PATHS})
if (NOT ROCKSDB_LIBRARY OR NOT ROCKSDB_INCLUDE_DIR)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system rocksdb library")
endif()
if (NOT SNAPPY_LIBRARY)
include(cmake/find/snappy.cmake)
endif()
if (NOT ZLIB_LIBRARY)
include(cmake/find/zlib.cmake)
endif()
find_package(BZip2)
find_library(ZSTD_LIBRARY zstd)
find_library(LZ4_LIBRARY lz4)
find_library(GFLAGS_LIBRARY gflags)
if(SNAPPY_LIBRARY AND ZLIB_LIBRARY AND LZ4_LIBRARY AND BZIP2_FOUND AND ZSTD_LIBRARY AND GFLAGS_LIBRARY)
list (APPEND ROCKSDB_LIBRARY ${SNAPPY_LIBRARY})
list (APPEND ROCKSDB_LIBRARY ${ZLIB_LIBRARY})
list (APPEND ROCKSDB_LIBRARY ${LZ4_LIBRARY})
list (APPEND ROCKSDB_LIBRARY ${BZIP2_LIBRARY})
list (APPEND ROCKSDB_LIBRARY ${ZSTD_LIBRARY})
list (APPEND ROCKSDB_LIBRARY ${GFLAGS_LIBRARY})
else()
message (${RECONFIGURE_MESSAGE_LEVEL}
"Can't find system rocksdb: snappy=${SNAPPY_LIBRARY} ;"
" zlib=${ZLIB_LIBRARY} ;"
" lz4=${LZ4_LIBRARY} ;"
" bz2=${BZIP2_LIBRARY} ;"
" zstd=${ZSTD_LIBRARY} ;"
" gflags=${GFLAGS_LIBRARY} ;")
endif()
endif ()
if(ROCKSDB_LIBRARY AND ROCKSDB_INCLUDE_DIR)
set(USE_ROCKSDB 1)
elseif (NOT MISSING_INTERNAL_ROCKSDB)
set (USE_INTERNAL_ROCKSDB_LIBRARY 1)
set (ROCKSDB_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/include")
set (ROCKSDB_LIBRARY "rocksdb")
set (USE_ROCKSDB 1)
endif ()
message (STATUS "Using ROCKSDB=${USE_ROCKSDB}: ${ROCKSDB_INCLUDE_DIR} : ${ROCKSDB_LIBRARY}")

View File

@ -321,3 +321,7 @@ if (USE_KRB5)
add_subdirectory (cyrus-sasl-cmake)
endif()
endif()
if (USE_INTERNAL_ROCKSDB_LIBRARY)
add_subdirectory(rocksdb-cmake)
endif()

1
contrib/rocksdb vendored Submodule

@ -0,0 +1 @@
Subproject commit 963314ffd681596ef2738a95249fe4c1163ef87a

View File

@ -0,0 +1,668 @@
## this file is extracted from `contrib/rocksdb/CMakeLists.txt`
set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
find_program(CCACHE_FOUND ccache)
if(CCACHE_FOUND)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
endif(CCACHE_FOUND)
if (SANITIZE STREQUAL "undefined")
set(WITH_UBSAN ON)
elseif (SANITIZE STREQUAL "address")
set(WITH_ASAN ON)
elseif (SANITIZE STREQUAL "thread")
set(WITH_TSAN ON)
endif()
set(PORTABLE ON)
## always disable jemalloc for rocksdb by default
## because it introduces non-standard jemalloc APIs
option(WITH_JEMALLOC "build with JeMalloc" OFF)
option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY})
## lz4, zlib, zstd is enabled in ClickHouse by default
option(WITH_LZ4 "build with lz4" ON)
option(WITH_ZLIB "build with zlib" ON)
option(WITH_ZSTD "build with zstd" ON)
# third-party/folly is only validated to work on Linux and Windows for now.
# So only turn it on there by default.
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows")
if(MSVC AND MSVC_VERSION LESS 1910)
# Folly does not compile with MSVC older than VS2017
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
endif()
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
endif()
if( NOT DEFINED CMAKE_CXX_STANDARD )
set(CMAKE_CXX_STANDARD 11)
endif()
if(MSVC)
option(WITH_XPRESS "build with windows built in compression" OFF)
include(${ROCKSDB_SOURCE_DIR}/thirdparty.inc)
else()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD")
# FreeBSD has jemalloc as default malloc
# but it does not have all the jemalloc files in include/...
set(WITH_JEMALLOC ON)
else()
if(WITH_JEMALLOC)
add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
list(APPEND THIRDPARTY_LIBS jemalloc)
endif()
endif()
if(WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS snappy)
endif()
if(WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS zlib)
endif()
if(WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS lz4)
endif()
if(WITH_ZSTD)
add_definitions(-DZSTD)
include_directories(${ZSTD_INCLUDE_DIR})
include_directories(${ZSTD_INCLUDE_DIR}/common)
include_directories(${ZSTD_INCLUDE_DIR}/dictBuilder)
include_directories(${ZSTD_INCLUDE_DIR}/deprecated)
list(APPEND THIRDPARTY_LIBS zstd)
endif()
endif()
string(TIMESTAMP TS "%Y/%m/%d %H:%M:%S" UTC)
set(GIT_DATE_TIME "${TS}" CACHE STRING "the time we first built rocksdb")
find_package(Git)
if(GIT_FOUND AND EXISTS "${ROCKSDB_SOURCE_DIR}/.git")
if(WIN32)
execute_process(COMMAND $ENV{COMSPEC} /C ${GIT_EXECUTABLE} -C ${ROCKSDB_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} -C ${ROCKSDB_SOURCE_DIR} rev-parse HEAD OUTPUT_VARIABLE GIT_SHA)
endif()
else()
set(GIT_SHA 0)
endif()
string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA "${GIT_SHA}")
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/rocksdb_build_version.cc)
configure_file(${ROCKSDB_SOURCE_DIR}/util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY)
add_library(rocksdb_build_version OBJECT ${BUILD_VERSION_CC})
target_include_directories(rocksdb_build_version PRIVATE
${ROCKSDB_SOURCE_DIR}/util)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W4 /wd4127 /wd4800 /wd4996 /wd4351 /wd4100 /wd4204 /wd4324")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -W -Wextra -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wsign-compare -Wshadow -Wno-unused-parameter -Wno-unused-variable -Woverloaded-virtual -Wnon-virtual-dtor -Wno-missing-field-initializers -Wno-strict-aliasing")
if(MINGW)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables")
add_definitions(-D_POSIX_C_SOURCE=1)
endif()
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG("-momit-leaf-frame-pointer" HAVE_OMIT_LEAF_FRAME_POINTER)
if(HAVE_OMIT_LEAF_FRAME_POINTER)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -momit-leaf-frame-pointer")
endif()
endif()
endif()
include(CheckCCompilerFlag)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
CHECK_C_COMPILER_FLAG("-mcpu=power9" HAS_POWER9)
if(HAS_POWER9)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power9 -mtune=power9")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power9 -mtune=power9")
else()
CHECK_C_COMPILER_FLAG("-mcpu=power8" HAS_POWER8)
if(HAS_POWER8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8 -mtune=power8")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8 -mtune=power8")
endif(HAS_POWER8)
endif(HAS_POWER9)
CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC)
if(HAS_ALTIVEC)
message(STATUS " HAS_ALTIVEC yes")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec")
endif(HAS_ALTIVEC)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
CHECK_C_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC)
if(HAS_ARMV8_CRC)
message(STATUS " HAS_ARMV8_CRC yes")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
endif(HAS_ARMV8_CRC)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64")
include(CheckCXXSourceCompiles)
if(NOT MSVC)
set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul")
endif()
CHECK_CXX_SOURCE_COMPILES("
#include <cstdint>
#include <nmmintrin.h>
#include <wmmintrin.h>
int main() {
volatile uint32_t x = _mm_crc32_u32(0, 0);
const auto a = _mm_set_epi64x(0, 0);
const auto b = _mm_set_epi64x(0, 0);
const auto c = _mm_clmulepi64_si128(a, b, 0x00);
auto d = _mm_cvtsi128_si64(c);
}
" HAVE_SSE42)
unset(CMAKE_REQUIRED_FLAGS)
if(HAVE_SSE42)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
elseif(FORCE_SSE42)
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
endif()
CHECK_CXX_SOURCE_COMPILES("
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
}
" HAVE_THREAD_LOCAL)
if(HAVE_THREAD_LOCAL)
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
endif()
option(FAIL_ON_WARNINGS "Treat compile warnings as errors" ON)
if(FAIL_ON_WARNINGS)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX")
else() # assume GCC
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()
endif()
option(WITH_ASAN "build with ASAN" OFF)
if(WITH_ASAN)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
if(WITH_JEMALLOC)
message(FATAL "ASAN does not work well with JeMalloc")
endif()
endif()
option(WITH_TSAN "build with TSAN" OFF)
if(WITH_TSAN)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=thread -pie")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread -fPIC")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread -fPIC")
if(WITH_JEMALLOC)
message(FATAL "TSAN does not work well with JeMalloc")
endif()
endif()
option(WITH_UBSAN "build with UBSAN" OFF)
if(WITH_UBSAN)
add_definitions(-DROCKSDB_UBSAN_RUN)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
if(WITH_JEMALLOC)
message(FATAL "UBSAN does not work well with JeMalloc")
endif()
endif()
if(CMAKE_SYSTEM_NAME MATCHES "Cygwin")
add_definitions(-fno-builtin-memcmp -DCYGWIN)
elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
if(CMAKE_SYSTEM_PROCESSOR MATCHES arm)
add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE)
# no debug info for IOS, that will make our library big
add_definitions(-DNDEBUG)
endif()
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
add_definitions(-DOS_SOLARIS)
elseif(CMAKE_SYSTEM_NAME MATCHES "kFreeBSD")
add_definitions(-DOS_GNU_KFREEBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
add_definitions(-DOS_FREEBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
add_definitions(-DOS_NETBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
add_definitions(-DOS_OPENBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly")
add_definitions(-DOS_DRAGONFLYBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "Android")
add_definitions(-DOS_ANDROID)
elseif(CMAKE_SYSTEM_NAME MATCHES "Windows")
add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64 -DNOMINMAX)
if(MINGW)
add_definitions(-D_WIN32_WINNT=_WIN32_WINNT_VISTA)
endif()
endif()
if(NOT WIN32)
add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX)
endif()
option(WITH_FALLOCATE "build with fallocate" ON)
if(WITH_FALLOCATE)
CHECK_CXX_SOURCE_COMPILES("
#include <fcntl.h>
#include <linux/falloc.h>
int main() {
int fd = open(\"/dev/null\", 0);
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 1024);
}
" HAVE_FALLOCATE)
if(HAVE_FALLOCATE)
add_definitions(-DROCKSDB_FALLOCATE_PRESENT)
endif()
endif()
CHECK_CXX_SOURCE_COMPILES("
#include <fcntl.h>
int main() {
int fd = open(\"/dev/null\", 0);
sync_file_range(fd, 0, 1024, SYNC_FILE_RANGE_WRITE);
}
" HAVE_SYNC_FILE_RANGE_WRITE)
if(HAVE_SYNC_FILE_RANGE_WRITE)
add_definitions(-DROCKSDB_RANGESYNC_PRESENT)
endif()
CHECK_CXX_SOURCE_COMPILES("
#include <pthread.h>
int main() {
(void) PTHREAD_MUTEX_ADAPTIVE_NP;
}
" HAVE_PTHREAD_MUTEX_ADAPTIVE_NP)
if(HAVE_PTHREAD_MUTEX_ADAPTIVE_NP)
add_definitions(-DROCKSDB_PTHREAD_ADAPTIVE_MUTEX)
endif()
include(CheckCXXSymbolExists)
if(CMAKE_SYSTEM_NAME MATCHES "^FreeBSD")
check_cxx_symbol_exists(malloc_usable_size ${ROCKSDB_SOURCE_DIR}/malloc_np.h HAVE_MALLOC_USABLE_SIZE)
else()
check_cxx_symbol_exists(malloc_usable_size ${ROCKSDB_SOURCE_DIR}/malloc.h HAVE_MALLOC_USABLE_SIZE)
endif()
if(HAVE_MALLOC_USABLE_SIZE)
add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE)
endif()
check_cxx_symbol_exists(sched_getcpu sched.h HAVE_SCHED_GETCPU)
if(HAVE_SCHED_GETCPU)
add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT)
endif()
check_cxx_symbol_exists(getauxval auvx.h HAVE_AUXV_GETAUXVAL)
if(HAVE_AUXV_GETAUXVAL)
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
endif()
include_directories(${ROCKSDB_SOURCE_DIR})
include_directories(${ROCKSDB_SOURCE_DIR}/include)
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
include_directories(${ROCKSDB_SOURCE_DIR}/third-party/folly)
endif()
find_package(Threads REQUIRED)
# Main library source code
set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/cache.cc
${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_builder.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_garbage.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_meta.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc
${ROCKSDB_SOURCE_DIR}/db/column_family.cc
${ROCKSDB_SOURCE_DIR}/db/compacted_db_impl.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_job.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_fifo.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_level.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_picker_universal.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/sst_partitioner.cc
${ROCKSDB_SOURCE_DIR}/db/convenience.cc
${ROCKSDB_SOURCE_DIR}/db/db_filesnapshot.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_readonly.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_secondary.cc
${ROCKSDB_SOURCE_DIR}/db/db_info_dumper.cc
${ROCKSDB_SOURCE_DIR}/db/db_iter.cc
${ROCKSDB_SOURCE_DIR}/db/dbformat.cc
${ROCKSDB_SOURCE_DIR}/db/error_handler.cc
${ROCKSDB_SOURCE_DIR}/db/event_helpers.cc
${ROCKSDB_SOURCE_DIR}/db/experimental.cc
${ROCKSDB_SOURCE_DIR}/db/external_sst_file_ingestion_job.cc
${ROCKSDB_SOURCE_DIR}/db/file_indexer.cc
${ROCKSDB_SOURCE_DIR}/db/flush_job.cc
${ROCKSDB_SOURCE_DIR}/db/flush_scheduler.cc
${ROCKSDB_SOURCE_DIR}/db/forward_iterator.cc
${ROCKSDB_SOURCE_DIR}/db/import_column_family_job.cc
${ROCKSDB_SOURCE_DIR}/db/internal_stats.cc
${ROCKSDB_SOURCE_DIR}/db/logs_with_prep_tracker.cc
${ROCKSDB_SOURCE_DIR}/db/log_reader.cc
${ROCKSDB_SOURCE_DIR}/db/log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/malloc_stats.cc
${ROCKSDB_SOURCE_DIR}/db/memtable.cc
${ROCKSDB_SOURCE_DIR}/db/memtable_list.cc
${ROCKSDB_SOURCE_DIR}/db/merge_helper.cc
${ROCKSDB_SOURCE_DIR}/db/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/db/range_del_aggregator.cc
${ROCKSDB_SOURCE_DIR}/db/range_tombstone_fragmenter.cc
${ROCKSDB_SOURCE_DIR}/db/repair.cc
${ROCKSDB_SOURCE_DIR}/db/snapshot_impl.cc
${ROCKSDB_SOURCE_DIR}/db/table_cache.cc
${ROCKSDB_SOURCE_DIR}/db/table_properties_collector.cc
${ROCKSDB_SOURCE_DIR}/db/transaction_log_impl.cc
${ROCKSDB_SOURCE_DIR}/db/trim_history_scheduler.cc
${ROCKSDB_SOURCE_DIR}/db/version_builder.cc
${ROCKSDB_SOURCE_DIR}/db/version_edit.cc
${ROCKSDB_SOURCE_DIR}/db/version_edit_handler.cc
${ROCKSDB_SOURCE_DIR}/db/version_set.cc
${ROCKSDB_SOURCE_DIR}/db/wal_edit.cc
${ROCKSDB_SOURCE_DIR}/db/wal_manager.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch.cc
${ROCKSDB_SOURCE_DIR}/db/write_batch_base.cc
${ROCKSDB_SOURCE_DIR}/db/write_controller.cc
${ROCKSDB_SOURCE_DIR}/db/write_thread.cc
${ROCKSDB_SOURCE_DIR}/env/env.cc
${ROCKSDB_SOURCE_DIR}/env/env_chroot.cc
${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc
${ROCKSDB_SOURCE_DIR}/env/env_hdfs.cc
${ROCKSDB_SOURCE_DIR}/env/file_system.cc
${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc
${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc
${ROCKSDB_SOURCE_DIR}/file/file_util.cc
${ROCKSDB_SOURCE_DIR}/file/filename.cc
${ROCKSDB_SOURCE_DIR}/file/random_access_file_reader.cc
${ROCKSDB_SOURCE_DIR}/file/read_write_util.cc
${ROCKSDB_SOURCE_DIR}/file/readahead_raf.cc
${ROCKSDB_SOURCE_DIR}/file/sequence_file_reader.cc
${ROCKSDB_SOURCE_DIR}/file/sst_file_manager_impl.cc
${ROCKSDB_SOURCE_DIR}/file/writable_file_writer.cc
${ROCKSDB_SOURCE_DIR}/logging/auto_roll_logger.cc
${ROCKSDB_SOURCE_DIR}/logging/event_logger.cc
${ROCKSDB_SOURCE_DIR}/logging/log_buffer.cc
${ROCKSDB_SOURCE_DIR}/memory/arena.cc
${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc
${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc
${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc
${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc
${ROCKSDB_SOURCE_DIR}/memtable/skiplistrep.cc
${ROCKSDB_SOURCE_DIR}/memtable/vectorrep.cc
${ROCKSDB_SOURCE_DIR}/memtable/write_buffer_manager.cc
${ROCKSDB_SOURCE_DIR}/monitoring/histogram.cc
${ROCKSDB_SOURCE_DIR}/monitoring/histogram_windowing.cc
${ROCKSDB_SOURCE_DIR}/monitoring/in_memory_stats_history.cc
${ROCKSDB_SOURCE_DIR}/monitoring/instrumented_mutex.cc
${ROCKSDB_SOURCE_DIR}/monitoring/iostats_context.cc
${ROCKSDB_SOURCE_DIR}/monitoring/perf_context.cc
${ROCKSDB_SOURCE_DIR}/monitoring/perf_level.cc
${ROCKSDB_SOURCE_DIR}/monitoring/persistent_stats_history.cc
${ROCKSDB_SOURCE_DIR}/monitoring/statistics.cc
${ROCKSDB_SOURCE_DIR}/monitoring/stats_dump_scheduler.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_impl.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_updater.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util.cc
${ROCKSDB_SOURCE_DIR}/monitoring/thread_status_util_debug.cc
${ROCKSDB_SOURCE_DIR}/options/cf_options.cc
${ROCKSDB_SOURCE_DIR}/options/db_options.cc
${ROCKSDB_SOURCE_DIR}/options/options.cc
${ROCKSDB_SOURCE_DIR}/options/options_helper.cc
${ROCKSDB_SOURCE_DIR}/options/options_parser.cc
${ROCKSDB_SOURCE_DIR}/port/stack_trace.cc
${ROCKSDB_SOURCE_DIR}/table/adaptive/adaptive_table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/binary_search_index_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_filter_block.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_builder.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_based_table_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_builder.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefetcher.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/block_prefix_index.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_hash_index.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/data_block_footer.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/filter_block_reader_common.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/filter_policy.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/flush_block_policy.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/full_filter_block.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/hash_index_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/index_builder.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/index_reader_common.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/parsed_full_filter_block.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_filter_block.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/partitioned_index_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/reader_common.cc
${ROCKSDB_SOURCE_DIR}/table/block_based/uncompression_dict_reader.cc
${ROCKSDB_SOURCE_DIR}/table/block_fetcher.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_builder.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/cuckoo/cuckoo_table_reader.cc
${ROCKSDB_SOURCE_DIR}/table/format.cc
${ROCKSDB_SOURCE_DIR}/table/get_context.cc
${ROCKSDB_SOURCE_DIR}/table/iterator.cc
${ROCKSDB_SOURCE_DIR}/table/merging_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/meta_blocks.cc
${ROCKSDB_SOURCE_DIR}/table/persistent_cache_helper.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_bloom.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_builder.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_index.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_key_coding.cc
${ROCKSDB_SOURCE_DIR}/table/plain/plain_table_reader.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_dumper.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_reader.cc
${ROCKSDB_SOURCE_DIR}/table/sst_file_writer.cc
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc
${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc
${ROCKSDB_SOURCE_DIR}/test_util/transaction_test_util.cc
${ROCKSDB_SOURCE_DIR}/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
${ROCKSDB_SOURCE_DIR}/tools/dump/db_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_cmd.cc
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc
${ROCKSDB_SOURCE_DIR}/util/coding.cc
${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc
${ROCKSDB_SOURCE_DIR}/util/comparator.cc
${ROCKSDB_SOURCE_DIR}/util/compression_context_cache.cc
${ROCKSDB_SOURCE_DIR}/util/concurrent_task_limiter_impl.cc
${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
${ROCKSDB_SOURCE_DIR}/util/dynamic_bloom.cc
${ROCKSDB_SOURCE_DIR}/util/hash.cc
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
${ROCKSDB_SOURCE_DIR}/util/random.cc
${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc
${ROCKSDB_SOURCE_DIR}/util/slice.cc
${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc
${ROCKSDB_SOURCE_DIR}/util/status.cc
${ROCKSDB_SOURCE_DIR}/util/string_util.cc
${ROCKSDB_SOURCE_DIR}/util/thread_local.cc
${ROCKSDB_SOURCE_DIR}/util/threadpool_imp.cc
${ROCKSDB_SOURCE_DIR}/util/xxhash.cc
${ROCKSDB_SOURCE_DIR}/utilities/backupable/backupable_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
${ROCKSDB_SOURCE_DIR}/utilities/debug.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc
${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc
${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/sortlist.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/string_append/stringappend2.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/uint64add.cc
${ROCKSDB_SOURCE_DIR}/utilities/object_registry.cc
${ROCKSDB_SOURCE_DIR}/utilities/option_change_migration/option_change_migration.cc
${ROCKSDB_SOURCE_DIR}/utilities/options/options_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/block_cache_tier_metadata.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/persistent_cache_tier.cc
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_tracker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point_lock_tracker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction_db_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/optimistic_transaction.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/pessimistic_transaction_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/snapshot_checker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_base.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_db_mutex_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_lock_mgr.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/transaction_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc
$<TARGET_OBJECTS:rocksdb_build_version>)
if(HAVE_SSE42 AND NOT MSVC)
set_source_files_properties(
${ROCKSDB_SOURCE_DIR}/util/crc32c.cc
PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc.c
${ROCKSDB_SOURCE_DIR}/util/crc32c_ppc_asm.S)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(HAS_ARMV8_CRC)
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/util/crc32c_arm64.cc)
endif(HAS_ARMV8_CRC)
if(WIN32)
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/port/win/io_win.cc
${ROCKSDB_SOURCE_DIR}/port/win/env_win.cc
${ROCKSDB_SOURCE_DIR}/port/win/env_default.cc
${ROCKSDB_SOURCE_DIR}/port/win/port_win.cc
${ROCKSDB_SOURCE_DIR}/port/win/win_logger.cc)
if(NOT MINGW)
# Mingw only supports std::thread when using
# posix threads.
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/port/win/win_thread.cc)
endif()
if(WITH_XPRESS)
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/port/win/xpress_win.cc)
endif()
if(WITH_JEMALLOC)
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/port/win/win_jemalloc.cc)
endif()
else()
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/port/port_posix.cc
${ROCKSDB_SOURCE_DIR}/env/env_posix.cc
${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc
${ROCKSDB_SOURCE_DIR}/env/io_posix.cc)
endif()
if(WITH_FOLLY_DISTRIBUTED_MUTEX)
list(APPEND SOURCES
${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/detail/Futex.cpp
${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/AtomicNotification.cpp
${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/DistributedMutex.cpp
${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/ParkingLot.cpp
${ROCKSDB_SOURCE_DIR}/third-party/folly/folly/synchronization/WaitOptions.cpp)
endif()
set(ROCKSDB_STATIC_LIB rocksdb)
if(WIN32)
set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib)
else()
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT})
endif()
add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES})
target_link_libraries(${ROCKSDB_STATIC_LIB} PRIVATE
${THIRDPARTY_LIBS} ${SYSTEM_LIBS})

View File

@ -64,6 +64,8 @@ RUN apt-get update \
libbz2-dev \
libavro-dev \
libfarmhash-dev \
librocksdb-dev \
libgflags-dev \
libmysqlclient-dev \
--yes --no-install-recommends

View File

@ -275,6 +275,9 @@ TESTS_TO_SKIP=(
00646_url_engine
00974_query_profiler
# In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default
01504_rocksdb
# Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert

View File

@ -43,6 +43,8 @@ RUN apt-get --allow-unauthenticated update -y \
libreadline-dev \
libsasl2-dev \
libzstd-dev \
librocksdb-dev \
libgflags-dev \
lsof \
moreutils \
ncdu \

View File

@ -0,0 +1,45 @@
---
toc_priority: 6
toc_title: EmbeddedRocksDB
---
# EmbeddedRocksDB Engine {#EmbeddedRocksDB-engine}
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
`EmbeddedRocksDB` lets you:
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name)
```
Required parameters:
- `primary_key_name` any column name in the column list.
Example:
``` sql
CREATE TABLE test
(
`key` String,
`v1` UInt32,
`v2` String,
`v3` Float32,
)
ENGINE = EmbeddedRocksDB
PRIMARY KEY key
```
## Description {#description}
- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key.
- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order.
- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb.

View File

@ -30,4 +30,4 @@ Instead of inserting data manually, you might consider to use one of [client lib
- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type.
!!! note "Note"
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface.
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the `CLI` interface.

View File

@ -307,7 +307,51 @@ Disabled by default.
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
For TSV input format switches to parsing enum values as enum ids.
Enables or disables parsing enum values as enum ids for TSV input format.
Possible values:
- 0 — Enum values are parsed as values.
- 1 — Enum values are parsed as enum IDs
Default value: 0.
**Example**
Consider the table:
```sql
CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
```
When the `input_format_tsv_enum_as_number` setting is enabled:
```sql
SET input_format_tsv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1;
SELECT * FROM table_with_enum_column_for_tsv_insert;
```
Result:
```text
┌──Id─┬─Value──┐
│ 102 │ second │
└─────┴────────┘
┌──Id─┬─Value──┐
│ 103 │ first │
└─────┴────────┘
```
When the `input_format_tsv_enum_as_number` setting is disabled, the `INSERT` query:
```sql
SET input_format_tsv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
```
throws an exception.
## input_format_null_as_default {#settings-input-format-null-as-default}
@ -1182,7 +1226,47 @@ For CSV input format enables or disables parsing of unquoted `NULL` as literal (
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
For CSV input format switches to parsing enum values as enum ids.
Enables or disables parsing enum values as enum ids for CSV input format.
Possible values:
- 0 — Enum values are parsed as values.
- 1 — Enum values are parsed as enum IDs.
Default value: 0.
**Examples**
Consider the table:
```sql
CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
```
When the `input_format_csv_enum_as_number` setting is enabled:
```sql
SET input_format_csv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
SELECT * FROM table_with_enum_column_for_csv_insert;
```
Result:
```text
┌──Id─┬─Value─────┐
│ 102 │ second │
└─────┴───────────┘
```
When the `input_format_csv_enum_as_number` setting is disabled, the `INSERT` query:
```sql
SET input_format_csv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
```
throws an exception.
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}

View File

@ -50,8 +50,6 @@ ClickHouse-specific aggregate functions:
- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md)
- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md)
- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md)
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)

View File

@ -1,16 +0,0 @@
---
toc_priority: 171
---
# timeSeriesGroupRateSum {#agg-function-timeseriesgroupratesum}
Syntax: `timeSeriesGroupRateSum(uid, ts, val)`
Similarly to [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md), `timeSeriesGroupRateSum` calculates the rate of time-series and then sum rates together.
Also, timestamp should be in ascend order before use this function.
Applying this function to the data from the `timeSeriesGroupSum` example, you get the following result:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```

View File

@ -1,57 +0,0 @@
---
toc_priority: 170
---
# timeSeriesGroupSum {#agg-function-timeseriesgroupsum}
Syntax: `timeSeriesGroupSum(uid, timestamp, value)`
`timeSeriesGroupSum` can aggregate different time series that sample timestamp not alignment.
It will use linear interpolation between two sample timestamp and then sum time-series together.
- `uid` is the time series unique id, `UInt64`.
- `timestamp` is Int64 type in order to support millisecond or microsecond.
- `value` is the metric.
The function returns array of tuples with `(timestamp, aggregated_value)` pairs.
Before using this function make sure `timestamp` is in ascending order.
Example:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
And the result will be:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```

View File

@ -337,26 +337,124 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┘
```
## date_trunc(datepart, time_or_data\[, time_zone\]), dateTrunc(datepart, time_or_data\[, time_zone\]) {#date_trunc}
## date_trunc {#date_trunc}
Truncates a date or date with time based on the specified datepart, such as
- `second`
- `minute`
- `hour`
- `day`
- `week`
- `month`
- `quarter`
- `year`
Truncates date and time data to the specified part of date.
```sql
SELECT date_trunc('hour', now())
**Syntax**
``` sql
date_trunc(unit, value[, timezone])
```
## now {#now}
Alias: `dateTrunc`.
Accepts zero or one arguments(timezone) and returns the current time at one of the moments of request execution, or current time of specific timezone at one of the moments of request execution if `timezone` argument provided.
This function returns a constant, even if the request took a long time to complete.
**Parameters**
- `unit` — Part of date. [String](../syntax.md#syntax-string-literal).
Possible values:
- `second`
- `minute`
- `hour`
- `day`
- `week`
- `month`
- `quarter`
- `year`
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Value, truncated to the specified part of date.
Type: [Datetime](../../sql-reference/data-types/datetime.md).
**Example**
Query without timezone:
``` sql
SELECT now(), date_trunc('hour', now());
```
Result:
``` text
┌───────────────now()─┬─date_trunc('hour', now())─┐
│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │
└─────────────────────┴───────────────────────────┘
```
Query with the specified timezone:
```sql
SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
```
Result:
```text
┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐
│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │
└─────────────────────┴────────────────────────────────────────────┘
```
**See also**
- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
# now {#now}
Returns the current date and time.
**Syntax**
``` sql
now([timezone])
```
**Parameters**
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md).
**Returned value**
- Current date and time.
Type: [Datetime](../../sql-reference/data-types/datetime.md).
**Example**
Query without timezone:
``` sql
SELECT now();
```
Result:
``` text
┌───────────────now()─┐
│ 2020-10-17 07:42:09 │
└─────────────────────┘
```
Query with the specified timezone:
``` sql
SELECT now('Europe/Moscow');
```
Result:
``` text
┌─now('Europe/Moscow')─┐
│ 2020-10-17 10:42:23 │
└──────────────────────┘
```
## today {#today}

View File

@ -325,7 +325,59 @@ This function accepts a number or date or date with time, and returns a FixedStr
## reinterpretAsUUID {#reinterpretasuuid}
This function accepts FixedString, and returns UUID. Takes 16 bytes string. If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
``` sql
reinterpretAsUUID(fixed_string)
```
**Parameters**
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
**Returned value**
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
**Examples**
String to UUID.
Query:
``` sql
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
```
Result:
``` text
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
│ 08090a0b-0c0d-0e0f-0001-020304050607 │
└───────────────────────────────────────────────────────────────────────┘
```
Going back and forth from String to UUID.
Query:
``` sql
WITH
generateUUIDv4() AS uuid,
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
SELECT uuid = uuid2;
```
Result:
``` text
┌─equals(uuid, uuid2)─┐
│ 1 │
└─────────────────────┘
```
## CAST(x, T) {#type_conversion_function-cast}

View File

@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d
SELECT kurtSamp(value) FROM series_with_value_column
```
## Para obtener más información, consulta nuestra Política de privacidad y nuestras Condiciones de uso) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` puede agregar diferentes series de tiempo que muestran la marca de tiempo no la alineación.
Utilizará la interpolación lineal entre dos marcas de tiempo de muestra y luego sumará series temporales juntas.
- `uid` es la identificación única de la serie temporal, `UInt64`.
- `timestamp` es el tipo Int64 para admitir milisegundos o microsegundos.
- `value` es la métrica.
La función devuelve una matriz de tuplas con `(timestamp, aggregated_value)` par.
Antes de utilizar esta función, asegúrese de `timestamp` está en orden ascendente.
Ejemplo:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
Y el resultado será:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## También puede utilizar el siguiente ejemplo:) {#agg-function-timeseriesgroupratesum}
De manera similar a `timeSeriesGroupSum`, `timeSeriesGroupRateSum` calcula la tasa de series temporales y luego suma las tasas juntas.
Además, la marca de tiempo debe estar en orden ascendente antes de usar esta función.
Aplicando esta función a los datos del `timeSeriesGroupSum` ejemplo, se obtiene el siguiente resultado:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## Acerca de) {#agg_function-avg}
Calcula el promedio.

View File

@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [جسم شناور64](../../sql
SELECT kurtSamp(value) FROM series_with_value_column
```
## هشدار داده می شود) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` می توانید سری های زمانی مختلف که برچسب زمان نمونه هم ترازی جمع نمی.
این برون یابی خطی بین دو برچسب زمان نمونه و سپس مجموع زمان سری با هم استفاده کنید.
- `uid` سری زمان شناسه منحصر به فرد است, `UInt64`.
- `timestamp` است نوع درون64 به منظور حمایت میلی ثانیه یا میکروثانیه.
- `value` متریک است.
تابع گرداند مجموعه ای از تاپل با `(timestamp, aggregated_value)` جفت
قبل از استفاده از این تابع اطمینان حاصل کنید `timestamp` به ترتیب صعودی است.
مثال:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
و نتیجه خواهد بود:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## هشدار داده می شود) {#agg-function-timeseriesgroupratesum}
به طور مشابه به `timeSeriesGroupSum`, `timeSeriesGroupRateSum` محاسبه نرخ زمان سری و سپس مجموع نرخ با هم.
همچنین, برچسب زمان باید در جهت صعود قبل از استفاده از این تابع باشد.
استفاده از این تابع به داده ها از `timeSeriesGroupSum` مثال, شما نتیجه زیر را دریافت کنید:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## میانگین) {#agg_function-avg}
محاسبه متوسط.

View File

@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d
SELECT kurtSamp(value) FROM series_with_value_column
```
## timeSeriesGroupSum(uid, horodatage, valeur) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` peut agréger différentes séries temporelles qui échantillonnent l'horodatage et non l'alignement.
Il utilisera une interpolation linéaire entre deux échantillons d'horodatage, puis additionnera les séries temporelles ensemble.
- `uid` la série temporelle est elle unique, `UInt64`.
- `timestamp` est de type Int64 afin de prendre en charge la milliseconde ou la microseconde.
- `value` est la métrique.
La fonction renvoie un tableau de tuples avec `(timestamp, aggregated_value)` pair.
Avant d'utiliser cette fonction, assurez-vous `timestamp` est dans l'ordre croissant.
Exemple:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
Et le résultat sera:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## timeSeriesGroupRateSum(uid, ts, val) {#agg-function-timeseriesgroupratesum}
De la même manière à `timeSeriesGroupSum`, `timeSeriesGroupRateSum` calcule le taux de séries chronologiques, puis additionne les taux ensemble.
En outre, l'horodatage doit être dans l'ordre croissant avant d'utiliser cette fonction.
Application de cette fonction aux données du `timeSeriesGroupSum` exemple, vous obtenez le résultat suivant:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg (x) {#agg_function-avg}
Calcule la moyenne.

View File

@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d
SELECT kurtSamp(value) FROM series_with_value_column
```
## timeSeriesGroupSum(uid,タイムスタンプ,値) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` 総異なる時系列のサンプルのタイムスタンプなアライメントを実施します。
これは、二つのサンプルタイムスタンプ間の線形補間を使用して、一緒に時系列を合計します。
- `uid` 時系列は一意のidですか, `UInt64`.
- `timestamp` ミリ秒またはマイクロ秒をサポートするためにInt64型です。
- `value` は指標です。
この関数は、次のような組の配列を返します `(timestamp, aggregated_value)` ペア。
この関数を使用する前に、必ず `timestamp` 昇順です。
例:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
結果は次のようになります:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## タイムセリエスグロプラテスム(uid,ts,val) {#agg-function-timeseriesgroupratesum}
同様に `timeSeriesGroupSum`, `timeSeriesGroupRateSum` 時系列のレートを計算し、レートを合計します。
また、timestampはこの関数を使用する前に上昇順にする必要があります。
のデータにこの関数を適用します。 `timeSeriesGroupSum` 例では、次の結果が得られます:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg(x) {#agg_function-avg}
平均を計算します。

View File

@ -289,6 +289,54 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (
Disabled by default.
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV.
Возможные значения:
- 0 — парсинг значений перечисления как значений.
- 1 — парсинг значений перечисления как идентификаторов перечисления.
Значение по умолчанию: 0.
**Пример**
Рассмотрим таблицу:
```sql
CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
```
При включенной настройке `input_format_tsv_enum_as_number`:
```sql
SET input_format_tsv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 1;
SELECT * FROM table_with_enum_column_for_tsv_insert;
```
Результат:
```text
┌──Id─┬─Value──┐
│ 102 │ second │
└─────┴────────┘
┌──Id─┬─Value──┐
│ 103 │ first │
└─────┴────────┘
```
При отключенной настройке `input_format_tsv_enum_as_number` запрос `INSERT`:
```sql
SET input_format_tsv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2;
```
сгенерирует исключение.
## input_format_null_as_default {#settings-input-format-null-as-default}
Включает или отключает использование значений по умолчанию в случаях, когда во входных данных содержится `NULL`, но тип соответствующего столбца не `Nullable(T)` (для текстовых форматов).
@ -1127,6 +1175,50 @@ SELECT area/period FROM account_orders FORMAT JSON;
Для формата CSV включает или выключает парсинг неэкранированной строки `NULL` как литерала (синоним для `\N`)
## input_format_csv_enum_as_number {#settings-input_format_csv_enum_as_number}
Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата CSV.
Возможные значения:
- 0 — парсинг значений перечисления как значений.
- 1 — парсинг значений перечисления как идентификаторов перечисления.
Значение по умолчанию: 0.
**Пример**
Рассмотрим таблицу:
```sql
CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
```
При включенной настройке `input_format_csv_enum_as_number`:
```sql
SET input_format_csv_enum_as_number = 1;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
SELECT * FROM table_with_enum_column_for_csv_insert;
```
Результат:
```text
┌──Id─┬─Value──┐
│ 102 │ second │
└─────┴────────┘
```
При отключенной настройке `input_format_csv_enum_as_number` запрос `INSERT`:
```sql
SET input_format_csv_enum_as_number = 0;
INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2;
```
сгенерирует исключение.
## output_format_csv_crlf_end_of_line {#settings-output-format-csv-crlf-end-of-line}
Использовать в качестве разделителя строк для CSV формата CRLF (DOS/Windows стиль) вместо LF (Unix стиль).

View File

@ -45,8 +45,6 @@ toc_hidden: true
- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md)
- [timeSeriesGroupSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupsum.md)
- [timeSeriesGroupRateSum](../../../sql-reference/aggregate-functions/reference/timeseriesgroupratesum.md)
- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md)

View File

@ -1,18 +0,0 @@
---
toc_priority: 171
---
# timeSeriesGroupRateSum {#agg-function-timeseriesgroupratesum}
Синтаксис: `timeSeriesGroupRateSum(uid, ts, val)`
Аналогично timeSeriesGroupSum, timeSeriesGroupRateSum будет вычислять производные по timestamp для рядов, а затем суммировать полученные производные для всех рядов для одного значения timestamp.
Также ряды должны быть отсортированы по возрастанию timestamp.
Для пример из описания timeSeriesGroupSum результат будет следующим:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupratesum/) <!--hide-->

View File

@ -1,59 +0,0 @@
---
toc_priority: 170
---
# timeSeriesGroupSum {#agg-function-timeseriesgroupsum}
Синтаксис: `timeSeriesGroupSum(uid, timestamp, value)`
`timeSeriesGroupSum` агрегирует временные ряды в которых не совпадают моменты.
Функция использует линейную интерполяцию между двумя значениями времени, а затем суммирует значения для одного и того же момента (как измеренные так и интерполированные) по всем рядам.
- `uid` уникальный идентификатор временного ряда, `UInt64`.
- `timestamp` имеет тип `Int64` чтобы можно было учитывать милли и микросекунды.
- `value` представляет собой значение метрики.
Функция возвращает массив кортежей с парами `(timestamp, aggregated_value)`.
Временные ряды должны быть отсортированы по возрастанию `timestamp`.
Пример:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
И результат будет:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/timeseriesgroupsum/) <!--hide-->

View File

@ -234,10 +234,124 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
Переводит дату-с-временем в номер секунды, начиная с некоторого фиксированного момента в прошлом.
## date_trunc {#date_trunc}
Отсекает от даты и времени части, меньшие чем указанная часть.
**Синтаксис**
``` sql
date_trunc(unit, value[, timezone])
```
Синоним: `dateTrunc`.
**Параметры**
- `unit` — Название части даты или времени. [String](../syntax.md#syntax-string-literal).
Возможные значения:
- `second`
- `minute`
- `hour`
- `day`
- `week`
- `month`
- `quarter`
- `year`
- `value` — Дата и время. [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). Если параметр не задан, используется часовой пояс параметра `value`. [String](../../sql-reference/data-types/string.md)
**Возвращаемое значение**
- Дата и время, отсеченные до указанной части.
Тип: [Datetime](../../sql-reference/data-types/datetime.md).
**Примеры**
Запрос без указания часового пояса:
``` sql
SELECT now(), date_trunc('hour', now());
```
Результат:
``` text
┌───────────────now()─┬─date_trunc('hour', now())─┐
│ 2020-09-28 10:40:45 │ 2020-09-28 10:00:00 │
└─────────────────────┴───────────────────────────┘
```
Запрос с указанием часового пояса:
```sql
SELECT now(), date_trunc('hour', now(), 'Europe/Moscow');
```
Результат:
```text
┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐
│ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │
└─────────────────────┴────────────────────────────────────────────┘
```
**См. также**
- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone)
## now {#now}
Принимает ноль аргументов и возвращает текущее время на один из моментов выполнения запроса.
Функция возвращает константу, даже если запрос выполнялся долго.
Возвращает текущую дату и время.
**Синтаксис**
``` sql
now([timezone])
```
**Параметры**
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) для возвращаемого значения (необязательно). [String](../../sql-reference/data-types/string.md)
**Возвращаемое значение**
- Текущие дата и время.
Тип: [Datetime](../../sql-reference/data-types/datetime.md).
**Пример**
Запрос без указания часового пояса:
``` sql
SELECT now();
```
Результат:
``` text
┌───────────────now()─┐
│ 2020-10-17 07:42:09 │
└─────────────────────┘
```
Запрос с указанием часового пояса:
``` sql
SELECT now('Europe/Moscow');
```
Результат:
``` text
┌─now('Europe/Moscow')─┐
│ 2020-10-17 10:42:23 │
└──────────────────────┘
```
## today {#today}

View File

@ -319,6 +319,62 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
Функция принимает число или дату или дату-с-временем и возвращает строку, содержащую байты, представляющие соответствующее значение в host order (little endian). При этом, отбрасываются нулевые байты с конца. Например, значение 255 типа UInt32 будет строкой длины 1 байт.
## reinterpretAsUUID {#reinterpretasuuid}
Функция принимает шестнадцатибайтную строку и интерпретирует ее байты в network order (big-endian). Если строка имеет недостаточную длину, то функция работает так, как будто строка дополнена необходимым количетсвом нулевых байт с конца. Если строка длиннее, чем шестнадцать байт, то игнорируются лишние байты с конца.
**Синтаксис**
``` sql
reinterpretAsUUID(fixed_string)
```
**Параметры**
- `fixed_string` — cтрока с big-endian порядком байтов. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
**Возвращаемое значение**
- Значение типа [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
**Примеры**
Интерпретация строки как UUID.
Запрос:
``` sql
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
```
Результат:
``` text
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
│ 08090a0b-0c0d-0e0f-0001-020304050607 │
└───────────────────────────────────────────────────────────────────────┘
```
Переход в UUID и обратно.
Запрос:
``` sql
WITH
generateUUIDv4() AS uuid,
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
SELECT uuid = uuid2;
```
Результат:
``` text
┌─equals(uuid, uuid2)─┐
│ 1 │
└─────────────────────┘
```
## CAST(x, T) {#type_conversion_function-cast}
Преобразует x в тип данных t.

View File

@ -464,69 +464,6 @@ The kurtosis of the given distribution. Type — [Float64](../../sql-reference/d
SELECT kurtSamp(value) FROM series_with_value_column
```
## timeSeriesGroupSum(uıd, zaman damgası, değer) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` örnek zaman damgası değil hizalama farklı zaman serileri toplayabilir.
İki örnek zaman damgası arasında doğrusal enterpolasyon kullanacak ve daha sonra zaman serilerini birlikte toplayacaktır.
- `uid` zaman serisi benzersiz kimliği mi, `UInt64`.
- `timestamp` milisaniye veya mikrosaniye desteklemek için Int64 türüdür.
- `value` metr .iktir.
İşlev, tuples dizisini döndürür `(timestamp, aggregated_value)` çiftliler.
Bu işlevi kullanmadan önce emin olun `timestamp` artan düzende.
Örnek:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
Ve sonuç olacak:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## timeSeriesGroupRateSum(uıd, ts, val) {#agg-function-timeseriesgroupratesum}
Benzer şekilde `timeSeriesGroupSum`, `timeSeriesGroupRateSum` zaman serilerinin oranını hesaplar ve daha sonra toplam oranları birlikte hesaplar.
Ayrıca, bu işlevi kullanmadan önce zaman damgası yükseliş sırasına göre olmalıdır.
Bu fonksiyon dataun ver theiye uygulanması `timeSeriesGroupSum` örnek, aşağıdaki sonucu alırsınız:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg (x) {#agg_function-avg}
Ortalama hesaplar.

View File

@ -2,17 +2,17 @@
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
toc_priority: 3
toc_title: "\u788C\u83BD\u7984Support:"
toc_title: "\u5546\u4e1a\u652f\u6301"
---
# ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers}
!!! info "信息"
如果您已经推出ClickHouse商业支持服务请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。
如果您已经推出ClickHouse商业支持服务请随时 [提交一个 pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。
## 敏锐性 {#altinity}
## Altinity {#altinity}
隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄and陇.貌路.隆拢脳枚脢虏 隆隆隆隆路虏脢..陇.貌.垄拢卢虏禄.陇 访问 [www.altinity.com](https://www.altinity.com/) 欲了解更多信息.
Altinity 自从 2017 年开始已经为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。
## Mafiree {#mafiree}

View File

@ -43,7 +43,7 @@ $ cd ..
为此,请创建以下文件:
/图书馆/LaunchDaemons/限制.maxfilesplist:
/资源库/LaunchDaemons/limit.maxfiles.plist:
``` xml
<?xml version="1.0" encoding="UTF-8"?>

View File

@ -7,35 +7,37 @@ toc_title: "\u6570\u636E\u5907\u4EFD"
# 数据备份 {#data-backup}
碌莽禄While: [复制](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不涵盖所有可能的情况,可以规避。
尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不涵盖所有可能的情况,并且可以规避。
为了有效地减少可能的人为错误,您应该仔细准备备份和还原数据的策略 **提前**.
为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略.
每家公司都有不同的可用资源和业务需求因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 什么适用于一千兆字节的数据可能不会为几十pb的工作。 有多种可能的方法有自己的优点和缺点,这将在下面讨论。 这是一个好主意,使用几种方法,而不是只是一个,以弥补其各种缺点。
不同公司有不同的可用资源和业务需求因此没有适合各种情况的ClickHouse备份和恢复通用解决方案。 适用于 1GB 的数据的方案可能并不适用于几十 PB 数据的情况。 有多种可能的并有自己优缺点的方法,这将在下面讨论。 好的主意是同时结合使用多种方法而不是仅使用一种,这样可以弥补不同方法各自的缺点。
!!! note "注"
请记住,如果您备份了某些内容并且从未尝试过还原它,那么当您实际需要它时(或者至少需要比业务能够容忍的时间更长),恢复可能无法正常工作。 因此无论您选择哪种备份方法请确保自动还原过程并定期在备用ClickHouse群集上练习。
## 将源数据复制到其他地方 {#duplicating-source-data-somewhere-else}
通常被摄入到ClickHouse的数据是通过某种持久队列传递的例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下可以配置一组额外的订阅服务器这些订阅服务器将在写入ClickHouse时读取相同的数据流并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html).
通常被聚集到ClickHouse的数据是通过某种持久队列传递的例如 [Apache Kafka](https://kafka.apache.org). 在这种情况下可以配置一组额外的订阅服务器这些订阅服务器将在写入ClickHouse时读取相同的数据流并将其存储在冷存储中。 大多数公司已经有一些默认的推荐冷存储,可能是对象存储或分布式文件系统,如 [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html).
## 文件系统快照 {#filesystem-snapshots}
某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们从 [分布](../engines/table-engines/special/distributed.md) 用于以下目的的表 `SELECT` 查询。 任何修改数据的查询都无法访问此类副本上的快照。 作为奖励,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。
## ツ环板-ョツ嘉ッツ偲 {#clickhouse-copier}
## clickhouse-copier {#clickhouse-copier}
[ツ环板-ョツ嘉ッツ偲](utilities/clickhouse-copier.md) 是一个多功能工具最初创建用于重新分片pb大小的表。 它还可用于备份和还原目的,因为它可以在ClickHouse表和集群之间可靠地复制数据。
[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具最初创建用于重新分片pb大小的表。 因为它可以在ClickHouse表和集群之间可靠地复制数据,所以它还可用于备份和还原数据。
对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。
## 部件操作 {#manipulations-with-parts}
ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是使用硬链接来实现 `/var/lib/clickhouse/shadow/` 文件夹中,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理所以你可以把它们留在那里你将有一个简单的备份不需要任何额外的外部系统但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)).
ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会占用旧数据的额外磁盘空间。 创建的文件副本不由ClickHouse服务器处理所以你可以把它们留在那里你将有一个简单的备份不需要任何额外的外部系统但它仍然会容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统 [rsync](https://en.wikipedia.org/wiki/Rsync)).
数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。
有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
第三方工具可用于自动化此方法: [ツ环板backupョツ嘉ッツ偲](https://github.com/AlexAkulov/clickhouse-backup).
第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).
[原始文章](https://clickhouse.tech/docs/en/operations/backup/) <!--hide-->

View File

@ -462,69 +462,6 @@ kurtSamp(expr)
SELECT kurtSamp(value) FROM series_with_value_column
```
## timeSeriesGroupSum(uid,timestamp,value) {#agg-function-timeseriesgroupsum}
`timeSeriesGroupSum` 可以聚合不同的时间序列,即采样时间戳不对齐。
它将在两个采样时间戳之间使用线性插值,然后将时间序列和在一起。
- `uid` 是时间序列唯一id, `UInt64`.
- `timestamp` 是Int64型以支持毫秒或微秒。
- `value` 是指标。
函数返回元组数组 `(timestamp, aggregated_value)` 对。
在使用此功能之前,请确保 `timestamp` 按升序排列
示例:
``` text
┌─uid─┬─timestamp─┬─value─┐
│ 1 │ 2 │ 0.2 │
│ 1 │ 7 │ 0.7 │
│ 1 │ 12 │ 1.2 │
│ 1 │ 17 │ 1.7 │
│ 1 │ 25 │ 2.5 │
│ 2 │ 3 │ 0.6 │
│ 2 │ 8 │ 1.6 │
│ 2 │ 12 │ 2.4 │
│ 2 │ 18 │ 3.6 │
│ 2 │ 24 │ 4.8 │
└─────┴───────────┴───────┘
```
``` sql
CREATE TABLE time_series(
uid UInt64,
timestamp Int64,
value Float64
) ENGINE = Memory;
INSERT INTO time_series VALUES
(1,2,0.2),(1,7,0.7),(1,12,1.2),(1,17,1.7),(1,25,2.5),
(2,3,0.6),(2,8,1.6),(2,12,2.4),(2,18,3.6),(2,24,4.8);
SELECT timeSeriesGroupSum(uid, timestamp, value)
FROM (
SELECT * FROM time_series order by timestamp ASC
);
```
其结果将是:
``` text
[(2,0.2),(3,0.9),(7,2.1),(8,2.4),(12,3.6),(17,5.1),(18,5.4),(24,7.2),(25,2.5)]
```
## timeSeriesGroupRateSum(uid,ts,val) {#agg-function-timeseriesgroupratesum}
同样 `timeSeriesGroupSum`, `timeSeriesGroupRateSum` 计算时间序列的速率,然后将速率总和在一起。
此外,使用此函数之前,时间戳应该是上升顺序。
应用此功能从数据 `timeSeriesGroupSum` 例如,您将得到以下结果:
``` text
[(2,0),(3,0.1),(7,0.3),(8,0.3),(12,0.3),(17,0.3),(18,0.3),(24,0.3),(25,0.1)]
```
## avg(x) {#agg_function-avg}
计算平均值。

View File

@ -49,7 +49,6 @@ using Ports = std::vector<UInt16>;
namespace ErrorCodes
{
extern const int CANNOT_BLOCK_SIGNAL;
extern const int BAD_ARGUMENTS;
extern const int EMPTY_DATA_PASSED;
}
@ -103,17 +102,7 @@ public:
/// (example: when using stage = 'with_mergeable_state')
registerAggregateFunctions();
if (stage == "complete")
query_processing_stage = QueryProcessingStage::Complete;
else if (stage == "fetch_columns")
query_processing_stage = QueryProcessingStage::FetchColumns;
else if (stage == "with_mergeable_state")
query_processing_stage = QueryProcessingStage::WithMergeableState;
else if (stage == "with_mergeable_state_after_aggregation")
query_processing_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
else
throw Exception("Unknown query processing stage: " + stage, ErrorCodes::BAD_ARGUMENTS);
query_processing_stage = QueryProcessingStage::fromString(stage);
}
void initialize(Poco::Util::Application & self [[maybe_unused]]) override

View File

@ -224,6 +224,7 @@ private:
/// We will format query_id in interactive mode in various ways, the default is just to print Query id: ...
std::vector<std::pair<String, String>> query_id_formats;
QueryProcessingStage::Enum query_processing_stage;
void initialize(Poco::Util::Application & self) override
{
@ -1444,7 +1445,7 @@ private:
connection_parameters.timeouts,
query_to_send,
context.getCurrentQueryId(),
QueryProcessingStage::Complete,
query_processing_stage,
&context.getSettingsRef(),
&context.getClientInfo(),
true);
@ -1485,7 +1486,7 @@ private:
connection_parameters.timeouts,
query_to_send,
context.getCurrentQueryId(),
QueryProcessingStage::Complete,
query_processing_stage,
&context.getSettingsRef(),
&context.getClientInfo(),
true);
@ -2309,6 +2310,7 @@ public:
("password", po::value<std::string>()->implicit_value("\n", ""), "password")
("ask-password", "ask-password")
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
("query_id", po::value<std::string>(), "query_id")
("query,q", po::value<std::string>(), "query")
("database,d", po::value<std::string>(), "database")
@ -2433,6 +2435,8 @@ public:
if (options.count("config-file") && options.count("config"))
throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS);
query_processing_stage = QueryProcessingStage::fromString(options["stage"].as<std::string>());
/// Save received data into the internal config.
if (options.count("config-file"))
config().setString("config-file", options["config-file"].as<std::string>());

View File

@ -680,7 +680,7 @@ void updateSnapshot(Snapshot & snapshot, const Commit & commit, CommitDiff & fil
for (auto & elem : file_changes)
{
auto & file = elem.second.file_change;
if (file.path != file.old_path)
if (!file.old_path.empty() && file.path != file.old_path)
snapshot[file.path] = snapshot[file.old_path];
}

View File

@ -56,11 +56,7 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
}
AggregateFunctionPtr AggregateFunctionFactory::get(
const String & name,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
int recursion_level) const
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
{
auto type_without_low_cardinality = convertLowCardinalityTypesToNested(argument_types);
@ -81,11 +77,11 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
[](const auto & type) { return type->onlyNull(); });
AggregateFunctionPtr nested_function = getImpl(
name, nested_types, nested_parameters, out_properties, has_null_arguments, recursion_level);
name, nested_types, nested_parameters, out_properties, has_null_arguments);
return combinator->transformAggregateFunction(nested_function, out_properties, type_without_low_cardinality, parameters);
}
auto res = getImpl(name, type_without_low_cardinality, parameters, out_properties, false, recursion_level);
auto res = getImpl(name, type_without_low_cardinality, parameters, out_properties, false);
if (!res)
throw Exception("Logical error: AggregateFunctionFactory returned nullptr", ErrorCodes::LOGICAL_ERROR);
return res;
@ -97,8 +93,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments,
int recursion_level) const
bool has_null_arguments) const
{
String name = getAliasToOrName(name_param);
Value found;
@ -108,13 +103,9 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
{
found = it->second;
}
/// Find by case-insensitive name.
/// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names.
else if (recursion_level == 0)
{
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
if (found.creator)
{
@ -140,7 +131,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters);
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties, recursion_level + 1);
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties);
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
}
@ -162,7 +153,7 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param, int recursion_level) const
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPropertiesImpl(const String & name_param) const
{
String name = getAliasToOrName(name_param);
Value found;
@ -172,13 +163,9 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
{
found = it->second;
}
/// Find by case-insensitive name.
/// Combinators cannot apply for case insensitive (SQL-style) aggregate function names. Only for native names.
else if (recursion_level == 0)
{
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
if (found.creator)
return found.properties;
@ -195,7 +182,7 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
String nested_name = name.substr(0, name.size() - combinator->getName().size());
/// NOTE: It's reasonable to also allow to transform properties by combinator.
return tryGetPropertiesImpl(nested_name, recursion_level + 1);
return tryGetPropertiesImpl(nested_name);
}
return {};
@ -204,21 +191,21 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(const String & name) const
{
return tryGetPropertiesImpl(name, 0);
return tryGetPropertiesImpl(name);
}
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int recursion_level) const
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name) const
{
if (aggregate_functions.count(name) || isAlias(name))
return true;
String name_lowercase = Poco::toLower(name);
if (recursion_level == 0 && (case_insensitive_aggregate_functions.count(name_lowercase) || isAlias(name_lowercase)))
if (case_insensitive_aggregate_functions.count(name_lowercase) || isAlias(name_lowercase))
return true;
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
return isAggregateFunctionName(name.substr(0, name.size() - combinator->getName().size()), recursion_level + 1);
return isAggregateFunctionName(name.substr(0, name.size() - combinator->getName().size()));
return false;
}

View File

@ -59,12 +59,11 @@ public:
CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Throws an exception if not found.
AggregateFunctionPtr get(
const String & name,
AggregateFunctionPtr
get(const String & name,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
int recursion_level = 0) const;
AggregateFunctionProperties & out_properties) const;
/// Returns nullptr if not found.
AggregateFunctionPtr tryGet(
@ -76,7 +75,7 @@ public:
/// Get properties if the aggregate function exists.
std::optional<AggregateFunctionProperties> tryGetProperties(const String & name) const;
bool isAggregateFunctionName(const String & name, int recursion_level = 0) const;
bool isAggregateFunctionName(const String & name) const;
private:
AggregateFunctionPtr getImpl(
@ -84,10 +83,9 @@ private:
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments,
int recursion_level) const;
bool has_null_arguments) const;
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name, int recursion_level) const;
std::optional<AggregateFunctionProperties> tryGetPropertiesImpl(const String & name) const;
private:
using AggregateFunctions = std::unordered_map<String, Value>;

View File

@ -187,7 +187,10 @@ struct AggregateFunctionTimeSeriesGroupSumData
{
size_t size = result.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(result.data()), sizeof(result[0]));
if (size > 0)
{
buf.write(reinterpret_cast<const char *>(result.data()), size * sizeof(result[0]));
}
}
void deserialize(ReadBuffer & buf)
@ -195,7 +198,10 @@ struct AggregateFunctionTimeSeriesGroupSumData
size_t size = 0;
readVarUInt(size, buf);
result.resize(size);
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
if (size > 0)
{
buf.read(reinterpret_cast<char *>(result.data()), size * sizeof(result[0]));
}
}
};
template <bool rate>

View File

@ -78,6 +78,10 @@ if (USE_AMQPCPP)
add_headers_and_sources(dbms Storages/RabbitMQ)
endif()
if (USE_ROCKSDB)
add_headers_and_sources(dbms Storages/RocksDB)
endif()
if (USE_AWS_S3)
add_headers_and_sources(dbms Common/S3)
add_headers_and_sources(dbms Disks/S3)
@ -294,6 +298,7 @@ if (USE_KRB5)
dbms_target_link_libraries(PRIVATE ${KRB5_LIBRARY})
endif()
if(RE2_INCLUDE_DIR)
target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})
endif()
@ -409,6 +414,11 @@ if (USE_ORC)
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/orc/c++/include)
endif ()
if (USE_ROCKSDB)
dbms_target_link_libraries(PUBLIC ${ROCKSDB_LIBRARY})
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ROCKSDB_INCLUDE_DIR})
endif()
if (ENABLE_TESTS AND USE_GTEST)
macro (grep_gtest_sources BASE_DIR DST_VAR)
# Cold match files that are not in tests/ directories

View File

@ -14,6 +14,8 @@
* In debug build, use small mmap threshold to reproduce more memory
* stomping bugs. Along with ASLR it will hopefully detect more issues than
* ASan. The program may fail due to the limit on number of memory mappings.
*
* Not too small to avoid too quick exhaust of memory mappings.
*/
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 4096;
__attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384;
#endif

View File

@ -9,7 +9,8 @@
M(ReplicatedFetch, "Number of data parts being fetched from replica") \
M(ReplicatedSend, "Number of data parts being sent to replicas") \
M(ReplicatedChecks, "Number of data parts checking for consistency") \
M(BackgroundPoolTask, "Number of active tasks in BackgroundProcessingPool (merges, mutations, fetches, or replication queue bookkeeping)") \
M(BackgroundPoolTask, "Number of active tasks in BackgroundProcessingPool (merges, mutations, or replication queue bookkeeping)") \
M(BackgroundFetchesPoolTask, "Number of active tasks in BackgroundFetchesPool") \
M(BackgroundMovePoolTask, "Number of active tasks in BackgroundProcessingPool for moves") \
M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \

View File

@ -519,9 +519,9 @@
M(550, CONDITIONAL_TREE_PARENT_NOT_FOUND) \
M(551, ILLEGAL_PROJECTION_MANIPULATOR) \
M(552, UNRECOGNIZED_ARGUMENTS) \
M(553, ROCKSDB_ERROR) \
M(553, LZMA_STREAM_ENCODER_FAILED) \
M(554, LZMA_STREAM_DECODER_FAILED) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \
M(1001, STD_EXCEPTION) \

View File

@ -11,6 +11,7 @@
#include <Common/Exception.h>
#include <Common/PipeFDs.h>
#include <Common/StackTrace.h>
#include <Common/setThreadName.h>
#include <common/logger_useful.h>
@ -115,6 +116,8 @@ void TraceCollector::stop()
void TraceCollector::run()
{
setThreadName("TraceCollector");
ReadBufferFromFileDescriptor in(pipe.fds_rw[0]);
while (true)

View File

@ -0,0 +1,35 @@
#include <Core/QueryProcessingStage.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
namespace QueryProcessingStage
{
Enum fromString(const std::string & stage_string)
{
Enum stage;
if (stage_string == "complete")
stage = Complete;
else if (stage_string == "fetch_columns")
stage = FetchColumns;
else if (stage_string == "with_mergeable_state")
stage = WithMergeableState;
else if (stage_string == "with_mergeable_state_after_aggregation")
stage = WithMergeableStateAfterAggregation;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown query processing stage: {}", stage_string);
return stage;
}
}
}

View File

@ -43,6 +43,14 @@ namespace QueryProcessingStage
? data[stage]
: "Unknown stage";
}
/// This methid is used for the program options,
/// hence it accept under_score notation for stage:
/// - complete
/// - fetch_columns
/// - with_mergeable_state
/// - with_mergeable_state_after_aggregation
Enum fromString(const std::string & stage_string);
}
}

View File

@ -71,6 +71,7 @@ class IColumn;
M(UInt64, background_buffer_flush_schedule_pool_size, 16, "Number of threads performing background flush for tables with Buffer engine. Only has meaning at server startup.", 0) \
M(UInt64, background_pool_size, 16, "Number of threads performing background work for tables (for example, merging in merge tree). Only has meaning at server startup.", 0) \
M(UInt64, background_move_pool_size, 8, "Number of threads performing background moves for tables. Only has meaning at server startup.", 0) \
M(UInt64, background_fetches_pool_size, 3, "Number of threads performing background fetches for replicated tables. Only has meaning at server startup.", 0) \
M(UInt64, background_schedule_pool_size, 16, "Number of threads performing background tasks for replicated tables, dns cache updates. Only has meaning at server startup.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "Number of threads performing background tasks for message streaming. Only has meaning at server startup.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "Number of threads performing background tasks for distributed sends. Only has meaning at server startup.", 0) \

View File

@ -11,3 +11,5 @@
#cmakedefine01 USE_SSL
#cmakedefine01 USE_OPENCL
#cmakedefine01 USE_LDAP
#cmakedefine01 USE_ROCKSDB

View File

@ -30,6 +30,7 @@ SRCS(
MySQL/PacketsReplication.cpp
NamesAndTypes.cpp
PostgreSQLProtocol.cpp
QueryProcessingStage.cpp
Settings.cpp
SettingsEnums.cpp
SettingsFields.cpp

View File

@ -155,7 +155,7 @@ void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & i
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
@ -182,7 +182,7 @@ void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & ist
{
std::string field_name;
readString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
@ -226,7 +226,7 @@ void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}

View File

@ -80,12 +80,25 @@ public:
return findByValue(value)->second;
}
FieldType getValue(StringRef field_name) const
FieldType getValue(StringRef field_name, bool try_treat_as_id = false) const
{
const auto it = name_to_value_map.find(field_name);
if (!it)
{
/// It is used in CSV and TSV input formats. If we fail to find given string in
/// enum names, we will try to treat it as enum id.
if (try_treat_as_id)
{
FieldType x;
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
readText(x, tmp_buf);
/// Check if we reached end of the tmp_buf (otherwise field_name is not a number)
/// and try to find it in enum ids
if (tmp_buf.eof() && value_to_name_map.find(x) != value_to_name_map.end())
return x;
}
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
}
return it->getMapped();
}

View File

@ -1,5 +1,6 @@
#include <IO/LZMADeflatingWriteBuffer.h>
#if !defined(ARCADIA_BUILD)
namespace DB
{
@ -123,3 +124,5 @@ void LZMADeflatingWriteBuffer::finish()
} while (lstr.avail_out == 0);
}
}
#endif

View File

@ -3,10 +3,16 @@
#include <IO/BufferWithOwnMemory.h>
#include <IO/WriteBuffer.h>
#include <lzma.h>
#if !defined(ARCADIA_BUILD)
#include <lzma.h> // Y_IGNORE
#endif
namespace DB
{
#if !defined(ARCADIA_BUILD)
/// Performs compression using lzma library and writes compressed data to out_ WriteBuffer.
class LZMADeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{
@ -29,4 +35,27 @@ private:
lzma_stream lstr;
bool finished = false;
};
#else
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
class LZMADeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{
public:
LZMADeflatingWriteBuffer(
std::unique_ptr<WriteBuffer> out_ [[maybe_unused]],
int compression_level [[maybe_unused]],
size_t buf_size [[maybe_unused]] = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory [[maybe_unused]] = nullptr,
size_t alignment [[maybe_unused]] = 0)
{
throw Exception("LZMADeflatingWriteBuffer is not implemented for arcadia build", ErrorCodes::NOT_IMPLEMENTED);
}
};
#endif
}

View File

@ -1,5 +1,6 @@
#include <IO/LZMAInflatingReadBuffer.h>
#if !defined(ARCADIA_BUILD)
namespace DB
{
namespace ErrorCodes
@ -87,3 +88,4 @@ bool LZMAInflatingReadBuffer::nextImpl()
return true;
}
}
#endif

View File

@ -3,14 +3,14 @@
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <lzma.h>
#if !defined(ARCADIA_BUILD)
#include <lzma.h> // Y_IGNORE
#endif
namespace DB
{
namespace ErrorCodes
{
}
#if !defined(ARCADIA_BUILD)
class LZMAInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
public:
@ -30,4 +30,26 @@ private:
bool eof;
};
#else
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
class LZMAInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
{
public:
LZMAInflatingReadBuffer(
std::unique_ptr<ReadBuffer> in_ [[maybe_unused]],
size_t buf_size [[maybe_unused]] = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory [[maybe_unused]] = nullptr,
size_t alignment [[maybe_unused]] = 0)
{
throw Exception("LZMADeflatingWriteBuffer is not implemented for arcadia build", ErrorCodes::NOT_IMPLEMENTED);
}
};
#endif
}

View File

@ -451,6 +451,8 @@ struct ContextShared
void initializeTraceCollector(std::shared_ptr<TraceLog> trace_log)
{
if (!trace_log)
return;
if (hasTraceCollector())
return;

View File

@ -680,6 +680,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
// Table SQL definition is available even if the table is detached
auto query = database->getCreateTableQuery(create.table, context);
create = query->as<ASTCreateQuery &>(); // Copy the saved create query, but use ATTACH instead of CREATE
if (create.is_dictionary)
throw Exception(
"Cannot ATTACH TABLE " + backQuoteIfNeed(database_name) + "." + backQuoteIfNeed(create.table) + ", it is a Dictionary",
ErrorCodes::INCORRECT_QUERY);
create.attach = true;
create.attach_short_syntax = true;
create.if_not_exists = if_not_exists;

View File

@ -119,6 +119,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ASTDropQuery & query, Dat
if (query.kind == ASTDropQuery::Kind::Detach)
{
context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id);
table->checkTableCanBeDetached();
table->shutdown();
TableExclusiveLockHolder table_lock;
if (database->getEngineName() != "Atomic")

View File

@ -439,6 +439,8 @@ public:
/// Otherwise - throws an exception with detailed information.
/// We do not use mutex because it is not very important that the size could change during the operation.
virtual void checkTableCanBeDropped() const {}
/// Similar to above but checks for DETACH. It's only used for DICTIONARIES.
virtual void checkTableCanBeDetached() const {}
/// Checks that Partition could be dropped right now
/// Otherwise - throws an exception with detailed information.

View File

@ -9,6 +9,7 @@ namespace CurrentMetrics
{
extern const Metric BackgroundPoolTask;
extern const Metric BackgroundMovePoolTask;
extern const Metric BackgroundFetchesPoolTask;
}
namespace DB
@ -183,7 +184,8 @@ BackgroundJobsExecutor::BackgroundJobsExecutor(
: IBackgroundJobExecutor(
global_context_,
global_context_.getBackgroundProcessingTaskSchedulingSettings(),
{PoolConfig{PoolType::MERGE_MUTATE, global_context_.getSettingsRef().background_pool_size, CurrentMetrics::BackgroundPoolTask}})
{PoolConfig{PoolType::MERGE_MUTATE, global_context_.getSettingsRef().background_pool_size, CurrentMetrics::BackgroundPoolTask},
PoolConfig{PoolType::FETCH, global_context_.getSettingsRef().background_fetches_pool_size, CurrentMetrics::BackgroundFetchesPoolTask}})
, data(data_)
{
}

View File

@ -33,6 +33,7 @@ enum class PoolType
{
MERGE_MUTATE,
MOVE,
FETCH,
};
/// Result from background job providers. Function which will be executed in pool and pool type.

View File

@ -1215,6 +1215,20 @@ void MergeTreeData::clearOldWriteAheadLogs()
}
}
void MergeTreeData::clearEmptyParts()
{
auto parts = getDataPartsVector();
for (const auto & part : parts)
{
if (part->rows_count == 0)
{
ASTPtr literal = std::make_shared<ASTLiteral>(part->name);
/// If another replica has already started drop, it's ok, no need to throw.
dropPartition(literal, /* detach = */ false, /*drop_part = */ true, global_context, /* throw_if_noop = */ false);
}
}
}
void MergeTreeData::rename(const String & new_table_path, const StorageID & new_table_id)
{
auto disks = getStoragePolicy()->getDisks();
@ -2753,6 +2767,96 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED);
}
void MergeTreeData::fetchPartition(const ASTPtr & /*partition*/, const StorageMetadataPtr & /*metadata_snapshot*/, const String & /*from*/, const Context & /*query_context*/)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FETCH PARTITION is not supported by storage {}", getName());
}
Pipe MergeTreeData::alterPartition(
const StorageMetadataPtr & metadata_snapshot,
const PartitionCommands & commands,
const Context & query_context)
{
PartitionCommandsResultInfo result;
for (const PartitionCommand & command : commands)
{
PartitionCommandsResultInfo current_command_results;
switch (command.type)
{
case PartitionCommand::DROP_PARTITION:
if (command.part)
checkPartCanBeDropped(command.partition);
else
checkPartitionCanBeDropped(command.partition);
dropPartition(command.partition, command.detach, command.part, query_context);
break;
case PartitionCommand::DROP_DETACHED_PARTITION:
dropDetached(command.partition, command.part, query_context);
break;
case PartitionCommand::ATTACH_PARTITION:
current_command_results = attachPartition(command.partition, metadata_snapshot, command.part, query_context);
break;
case PartitionCommand::MOVE_PARTITION:
{
switch (*command.move_destination_type)
{
case PartitionCommand::MoveDestinationType::DISK:
movePartitionToDisk(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::VOLUME:
movePartitionToVolume(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::TABLE:
checkPartitionCanBeDropped(command.partition);
String dest_database = query_context.resolveDatabase(command.to_database);
auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context);
movePartitionToTable(dest_storage, command.partition, query_context);
break;
}
}
break;
case PartitionCommand::REPLACE_PARTITION:
{
checkPartitionCanBeDropped(command.partition);
String from_database = query_context.resolveDatabase(command.from_database);
auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context);
replacePartitionFrom(from_storage, command.partition, command.replace, query_context);
}
break;
case PartitionCommand::FETCH_PARTITION:
fetchPartition(command.partition, metadata_snapshot, command.from_zookeeper_path, query_context);
break;
case PartitionCommand::FREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezePartition(command.partition, metadata_snapshot, command.with_name, query_context, lock);
}
break;
case PartitionCommand::FREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
result.insert(result.end(), current_command_results.begin(), current_command_results.end());
}
if (query_context.getSettingsRef().alter_partition_verbose_result)
return convertCommandsResultToSource(result);
return {};
}
String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context & context) const
{

View File

@ -498,6 +498,8 @@ public:
/// Must be called with locked lockForShare() because use relative_data_path.
void clearOldTemporaryDirectories(ssize_t custom_directories_lifetime_seconds = -1);
void clearEmptyParts();
/// After the call to dropAllData() no method can be called.
/// Deletes the data directory and flushes the uncompressed blocks cache and the marks cache.
void dropAllData();
@ -563,6 +565,11 @@ public:
void checkPartCanBeDropped(const ASTPtr & part);
Pipe alterPartition(
const StorageMetadataPtr & metadata_snapshot,
const PartitionCommands & commands,
const Context & query_context) override;
size_t getColumnCompressedSize(const std::string & name) const
{
auto lock = lockParts();
@ -868,8 +875,17 @@ protected:
using MatcherFn = std::function<bool(const DataPartPtr &)>;
PartitionCommandsResultInfo freezePartitionsByMatcher(MatcherFn matcher, const StorageMetadataPtr & metadata_snapshot, const String & with_name, const Context & context);
// Partition helpers
bool canReplacePartition(const DataPartPtr & src_part) const;
virtual void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context, bool throw_if_noop = true) = 0;
virtual PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, const Context & context) = 0;
virtual void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) = 0;
virtual void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) = 0;
/// Makes sense only for replicated tables
virtual void fetchPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, const String & from, const Context & query_context);
void writePartLog(
PartLogElement::Type type,
const ExecutionStatus & execution_status,

View File

@ -72,6 +72,7 @@ void ReplicatedMergeTreeCleanupThread::iterate()
clearOldLogs();
clearOldBlocks();
clearOldMutations();
storage.clearEmptyParts();
}
}

View File

@ -54,7 +54,8 @@ void ReplicatedMergeTreeQueue::addVirtualParts(const MergeTreeData::DataParts &
bool ReplicatedMergeTreeQueue::isVirtualPart(const MergeTreeData::DataPartPtr & data_part) const
{
std::lock_guard lock(state_mutex);
return virtual_parts.getContainingPart(data_part->info) != data_part->name;
auto virtual_part_name = virtual_parts.getContainingPart(data_part->info);
return !virtual_part_name.empty() && virtual_part_name != data_part->name;
}
@ -1039,6 +1040,16 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
}
}
/// Check that fetches pool is not overloaded
if (entry.type == LogEntry::GET_PART)
{
if (!storage.canExecuteFetch(entry, out_postpone_reason))
{
LOG_TRACE(log, out_postpone_reason);
return false;
}
}
if (entry.type == LogEntry::MERGE_PARTS || entry.type == LogEntry::MUTATE_PART)
{
/** If any of the required parts are now fetched or in merge process, wait for the end of this operation.

View File

@ -0,0 +1,65 @@
#include <DataStreams/IBlockInputStream.h>
#include <Interpreters/Context.h>
#include <IO/ReadBufferFromString.h>
#include <Storages/RocksDB/StorageEmbeddedRocksDB.h>
#include <Storages/RocksDB/EmbeddedRocksDBBlockInputStream.h>
#include <rocksdb/db.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ROCKSDB_ERROR;
}
EmbeddedRocksDBBlockInputStream::EmbeddedRocksDBBlockInputStream(
StorageEmbeddedRocksDB & storage_,
const StorageMetadataPtr & metadata_snapshot_,
size_t max_block_size_)
: storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, max_block_size(max_block_size_)
{
sample_block = metadata_snapshot->getSampleBlock();
primary_key_pos = sample_block.getPositionByName(storage.primary_key);
}
Block EmbeddedRocksDBBlockInputStream::readImpl()
{
if (finished)
return {};
if (!iterator)
{
iterator = std::unique_ptr<rocksdb::Iterator>(storage.rocksdb_ptr->NewIterator(rocksdb::ReadOptions()));
iterator->SeekToFirst();
}
MutableColumns columns = sample_block.cloneEmptyColumns();
for (size_t rows = 0; iterator->Valid() && rows < max_block_size; ++rows, iterator->Next())
{
ReadBufferFromString key_buffer(iterator->key());
ReadBufferFromString value_buffer(iterator->value());
size_t idx = 0;
for (const auto & elem : sample_block)
{
elem.type->deserializeBinary(*columns[idx], idx == primary_key_pos ? key_buffer : value_buffer);
++idx;
}
}
finished = !iterator->Valid();
if (!iterator->status().ok())
{
throw Exception("Engine " + getName() + " got error while seeking key value datas: " + iterator->status().ToString(),
ErrorCodes::ROCKSDB_ERROR);
}
return sample_block.cloneWithColumns(std::move(columns));
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
namespace rocksdb
{
class Iterator;
}
namespace DB
{
class StorageEmbeddedRocksDB;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class EmbeddedRocksDBBlockInputStream : public IBlockInputStream
{
public:
EmbeddedRocksDBBlockInputStream(
StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_, size_t max_block_size_);
String getName() const override { return "EmbeddedRocksDB"; }
Block getHeader() const override { return sample_block; }
Block readImpl() override;
private:
StorageEmbeddedRocksDB & storage;
StorageMetadataPtr metadata_snapshot;
const size_t max_block_size;
Block sample_block;
std::unique_ptr<rocksdb::Iterator> iterator;
size_t primary_key_pos;
bool finished = false;
};
}

View File

@ -0,0 +1,67 @@
#include <Storages/RocksDB/EmbeddedRocksDBBlockOutputStream.h>
#include <Storages/RocksDB/StorageEmbeddedRocksDB.h>
#include <IO/WriteBufferFromString.h>
#include <rocksdb/db.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ROCKSDB_ERROR;
}
EmbeddedRocksDBBlockOutputStream::EmbeddedRocksDBBlockOutputStream(
StorageEmbeddedRocksDB & storage_,
const StorageMetadataPtr & metadata_snapshot_)
: storage(storage_)
, metadata_snapshot(metadata_snapshot_)
{
Block sample_block = metadata_snapshot->getSampleBlock();
for (const auto & elem : sample_block)
{
if (elem.name == storage.primary_key)
break;
++primary_key_pos;
}
}
Block EmbeddedRocksDBBlockOutputStream::getHeader() const
{
return metadata_snapshot->getSampleBlock();
}
void EmbeddedRocksDBBlockOutputStream::write(const Block & block)
{
metadata_snapshot->check(block, true);
auto rows = block.rows();
WriteBufferFromOwnString wb_key;
WriteBufferFromOwnString wb_value;
rocksdb::WriteBatch batch;
rocksdb::Status status;
for (size_t i = 0; i < rows; i++)
{
wb_key.restart();
wb_value.restart();
size_t idx = 0;
for (const auto & elem : block)
{
elem.type->serializeBinary(*elem.column, i, idx == primary_key_pos ? wb_key : wb_value);
++idx;
}
status = batch.Put(wb_key.str(), wb_value.str());
if (!status.ok())
throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR);
}
status = storage.rocksdb_ptr->Write(rocksdb::WriteOptions(), &batch);
if (!status.ok())
throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR);
}
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <DataStreams/IBlockOutputStream.h>
namespace DB
{
class StorageEmbeddedRocksDB;
struct StorageInMemoryMetadata;
using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
class EmbeddedRocksDBBlockOutputStream : public IBlockOutputStream
{
public:
EmbeddedRocksDBBlockOutputStream(
StorageEmbeddedRocksDB & storage_,
const StorageMetadataPtr & metadata_snapshot_);
Block getHeader() const override;
void write(const Block & block) override;
private:
StorageEmbeddedRocksDB & storage;
StorageMetadataPtr metadata_snapshot;
size_t primary_key_pos = 0;
};
}

View File

@ -0,0 +1,374 @@
#include <Storages/RocksDB/StorageEmbeddedRocksDB.h>
#include <Storages/RocksDB/EmbeddedRocksDBBlockOutputStream.h>
#include <Storages/RocksDB/EmbeddedRocksDBBlockInputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <Storages/StorageFactory.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTCreateQuery.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Sources/SourceFromInputStream.h>
#include <Processors/Pipe.h>
#include <Interpreters/Context.h>
#include <Interpreters/Set.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/TreeRewriter.h>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/Logger.h>
#include <common/logger_useful.h>
#include <rocksdb/db.h>
#include <rocksdb/table.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ROCKSDB_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
// returns keys may be filter by condition
static bool traverseASTFilter(const String & primary_key, const DataTypePtr & primary_key_type, const ASTPtr & elem, const PreparedSets & sets, FieldVector & res)
{
const auto * function = elem->as<ASTFunction>();
if (!function)
return false;
if (function->name == "and")
{
// one child has the key filter condition is ok
for (const auto & child : function->arguments->children)
if (traverseASTFilter(primary_key, primary_key_type, child, sets, res))
return true;
return false;
}
else if (function->name == "or")
{
// make sure every child has the key filter condition
FieldVector child_res;
for (const auto & child : function->arguments->children)
{
if (!traverseASTFilter(primary_key, primary_key_type, child, sets, child_res))
return false;
}
res.insert(res.end(), child_res.begin(), child_res.end());
return true;
}
else if (function->name == "equals" || function->name == "in")
{
const auto & args = function->arguments->as<ASTExpressionList &>();
const ASTIdentifier * ident;
const IAST * value;
if (args.children.size() != 2)
return false;
if (function->name == "in")
{
ident = args.children.at(0)->as<ASTIdentifier>();
if (!ident)
return false;
if (ident->name() != primary_key)
return false;
value = args.children.at(1).get();
PreparedSetKey set_key;
if ((value->as<ASTSubquery>() || value->as<ASTIdentifier>()))
set_key = PreparedSetKey::forSubquery(*value);
else
set_key = PreparedSetKey::forLiteral(*value, {primary_key_type});
auto set_it = sets.find(set_key);
if (set_it == sets.end())
return false;
SetPtr prepared_set = set_it->second;
if (!prepared_set->hasExplicitSetElements())
return false;
prepared_set->checkColumnsNumber(1);
const auto & set_column = *prepared_set->getSetElements()[0];
for (size_t row = 0; row < set_column.size(); ++row)
{
res.push_back(set_column[row]);
}
return true;
}
else
{
if ((ident = args.children.at(0)->as<ASTIdentifier>()))
value = args.children.at(1).get();
else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
value = args.children.at(0).get();
else
return false;
if (ident->name() != primary_key)
return false;
//function->name == "equals"
if (const auto * literal = value->as<ASTLiteral>())
{
res.push_back(literal->value);
return true;
}
}
}
return false;
}
/** Retrieve from the query a condition of the form `key = 'key'`, `key in ('xxx_'), from conjunctions in the WHERE clause.
* TODO support key like search
*/
static std::pair<FieldVector, bool> getFilterKeys(const String & primary_key, const DataTypePtr & primary_key_type, const SelectQueryInfo & query_info)
{
const auto & select = query_info.query->as<ASTSelectQuery &>();
if (!select.where())
{
return std::make_pair(FieldVector{}, true);
}
FieldVector res;
auto matched_keys = traverseASTFilter(primary_key, primary_key_type, select.where(), query_info.sets, res);
return std::make_pair(res, !matched_keys);
}
class EmbeddedRocksDBSource : public SourceWithProgress
{
public:
EmbeddedRocksDBSource(
const StorageEmbeddedRocksDB & storage_,
const StorageMetadataPtr & metadata_snapshot_,
const FieldVector & keys_,
const size_t start_,
const size_t end_,
const size_t max_block_size_)
: SourceWithProgress(metadata_snapshot_->getSampleBlock())
, storage(storage_)
, metadata_snapshot(metadata_snapshot_)
, start(start_)
, end(end_)
, max_block_size(max_block_size_)
{
// slice the keys
if (end > start)
{
keys.resize(end - start);
std::copy(keys_.begin() + start, keys_.begin() + end, keys.begin());
}
}
String getName() const override
{
return storage.getName();
}
Chunk generate() override
{
if (processed_keys >= keys.size() || (start == end))
return {};
std::vector<rocksdb::Slice> slices_keys;
slices_keys.reserve(keys.size());
std::vector<String> values;
std::vector<WriteBufferFromOwnString> wbs(keys.size());
const auto & sample_block = metadata_snapshot->getSampleBlock();
const auto & key_column = sample_block.getByName(storage.primary_key);
auto columns = sample_block.cloneEmptyColumns();
size_t primary_key_pos = sample_block.getPositionByName(storage.primary_key);
for (size_t i = processed_keys; i < std::min(keys.size(), processed_keys + max_block_size); ++i)
{
key_column.type->serializeBinary(keys[i], wbs[i]);
auto str_ref = wbs[i].stringRef();
slices_keys.emplace_back(str_ref.data, str_ref.size);
}
auto statuses = storage.rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values);
for (size_t i = 0; i < statuses.size(); ++i)
{
if (statuses[i].ok())
{
ReadBufferFromString key_buffer(slices_keys[i]);
ReadBufferFromString value_buffer(values[i]);
size_t idx = 0;
for (const auto & elem : sample_block)
{
elem.type->deserializeBinary(*columns[idx], idx == primary_key_pos ? key_buffer : value_buffer);
++idx;
}
}
}
processed_keys += max_block_size;
UInt64 num_rows = columns.at(0)->size();
return Chunk(std::move(columns), num_rows);
}
private:
const StorageEmbeddedRocksDB & storage;
const StorageMetadataPtr metadata_snapshot;
const size_t start;
const size_t end;
const size_t max_block_size;
FieldVector keys;
size_t processed_keys = 0;
};
StorageEmbeddedRocksDB::StorageEmbeddedRocksDB(const StorageID & table_id_,
const String & relative_data_path_,
const StorageInMemoryMetadata & metadata_,
bool attach,
Context & context_,
const String & primary_key_)
: IStorage(table_id_), primary_key{primary_key_}
{
setInMemoryMetadata(metadata_);
rocksdb_dir = context_.getPath() + relative_data_path_;
if (!attach)
{
Poco::File(rocksdb_dir).createDirectories();
}
initDb();
}
void StorageEmbeddedRocksDB::truncate(const ASTPtr &, const StorageMetadataPtr & , const Context &, TableExclusiveLockHolder &)
{
rocksdb_ptr->Close();
Poco::File(rocksdb_dir).remove(true);
Poco::File(rocksdb_dir).createDirectories();
initDb();
}
void StorageEmbeddedRocksDB::initDb()
{
rocksdb::Options options;
rocksdb::DB * db;
options.create_if_missing = true;
options.compression = rocksdb::CompressionType::kZSTD;
rocksdb::Status status = rocksdb::DB::Open(options, rocksdb_dir, &db);
if (status != rocksdb::Status::OK())
throw Exception("Fail to open rocksdb path at: " + rocksdb_dir + ": " + status.ToString(), ErrorCodes::ROCKSDB_ERROR);
rocksdb_ptr = std::unique_ptr<rocksdb::DB>(db);
}
Pipe StorageEmbeddedRocksDB::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
SelectQueryInfo & query_info,
const Context & /*context*/,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
unsigned num_streams)
{
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
FieldVector keys;
bool all_scan = false;
auto primary_key_data_type = metadata_snapshot->getSampleBlock().getByName(primary_key).type;
std::tie(keys, all_scan) = getFilterKeys(primary_key, primary_key_data_type, query_info);
if (all_scan)
{
auto reader = std::make_shared<EmbeddedRocksDBBlockInputStream>(
*this, metadata_snapshot, max_block_size);
return Pipe(std::make_shared<SourceFromInputStream>(reader));
}
else
{
if (keys.empty())
return {};
std::sort(keys.begin(), keys.end());
auto unique_iter = std::unique(keys.begin(), keys.end());
if (unique_iter != keys.end())
keys.erase(unique_iter, keys.end());
Pipes pipes;
size_t start = 0;
size_t end;
const size_t num_threads = std::min(size_t(num_streams), keys.size());
const size_t batch_per_size = ceil(keys.size() * 1.0 / num_threads);
for (size_t t = 0; t < num_threads; ++t)
{
if (start >= keys.size())
start = end = 0;
else
end = start + batch_per_size > keys.size() ? keys.size() : start + batch_per_size;
pipes.emplace_back(
std::make_shared<EmbeddedRocksDBSource>(*this, metadata_snapshot, keys, start, end, max_block_size));
start += batch_per_size;
}
return Pipe::unitePipes(std::move(pipes));
}
}
BlockOutputStreamPtr StorageEmbeddedRocksDB::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/)
{
return std::make_shared<EmbeddedRocksDBBlockOutputStream>(*this, metadata_snapshot);
}
static StoragePtr create(const StorageFactory::Arguments & args)
{
// TODO custom RocksDBSettings, table function
if (!args.engine_args.empty())
throw Exception(
"Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
StorageInMemoryMetadata metadata;
metadata.setColumns(args.columns);
metadata.setConstraints(args.constraints);
if (!args.storage_def->primary_key)
throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS);
metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.context);
auto primary_key_names = metadata.getColumnsRequiredForPrimaryKey();
if (primary_key_names.size() != 1)
{
throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS);
}
return StorageEmbeddedRocksDB::create(args.table_id, args.relative_data_path, metadata, args.attach, args.context, primary_key_names[0]);
}
void registerStorageEmbeddedRocksDB(StorageFactory & factory)
{
StorageFactory::StorageFeatures features{
.supports_sort_order = true,
};
factory.registerStorage("EmbeddedRocksDB", create, features);
}
}

View File

@ -0,0 +1,64 @@
#pragma once
#include <memory>
#include <ext/shared_ptr_helper.h>
#include <Storages/IStorage.h>
namespace rocksdb
{
class DB;
}
namespace DB
{
class Context;
class StorageEmbeddedRocksDB final : public ext::shared_ptr_helper<StorageEmbeddedRocksDB>, public IStorage
{
friend struct ext::shared_ptr_helper<StorageEmbeddedRocksDB>;
friend class EmbeddedRocksDBSource;
friend class EmbeddedRocksDBBlockOutputStream;
friend class EmbeddedRocksDBBlockInputStream;
public:
std::string getName() const override { return "EmbeddedRocksDB"; }
Pipe read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override;
void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override;
bool supportsParallelInsert() const override { return true; }
bool supportsIndexForIn() const override { return true; }
bool mayBenefitFromIndexForIn(
const ASTPtr & node, const Context & /*query_context*/, const StorageMetadataPtr & /*metadata_snapshot*/) const override
{
return node->getColumnName() == primary_key;
}
protected:
StorageEmbeddedRocksDB(const StorageID & table_id_,
const String & relative_data_path_,
const StorageInMemoryMetadata & metadata,
bool attach,
Context & context_,
const String & primary_key_);
private:
const String primary_key;
using RocksDBPtr = std::unique_ptr<rocksdb::DB>;
RocksDBPtr rocksdb_ptr;
String rocksdb_dir;
void initDb();
};
}

View File

@ -126,6 +126,11 @@ void StorageDictionary::checkTableCanBeDropped() const
throw Exception("Cannot detach table " + getStorageID().getFullTableName() + " from a database with DICTIONARY engine", ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE);
}
void StorageDictionary::checkTableCanBeDetached() const
{
checkTableCanBeDropped();
}
Pipe StorageDictionary::read(
const Names & column_names,
const StorageMetadataPtr & /*metadata_snapshot*/,

View File

@ -15,6 +15,7 @@ public:
std::string getName() const override { return "Dictionary"; }
void checkTableCanBeDropped() const override;
void checkTableCanBeDetached() const override;
Pipe read(
const Names & column_names,

View File

@ -99,6 +99,7 @@ void StorageMergeTree::startup()
{
clearOldPartsFromFilesystem();
clearOldWriteAheadLogs();
clearEmptyParts();
/// Temporary directories contain incomplete results of merges (after forced restart)
/// and don't allow to reinitialize them, so delete each of them immediately
@ -947,6 +948,7 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
clearOldTemporaryDirectories();
clearOldWriteAheadLogs();
clearOldMutations();
clearEmptyParts();
}, PoolType::MERGE_MUTATE};
}
return {};
@ -1062,95 +1064,6 @@ bool StorageMergeTree::optimize(
return true;
}
Pipe StorageMergeTree::alterPartition(
const StorageMetadataPtr & metadata_snapshot,
const PartitionCommands & commands,
const Context & query_context)
{
PartitionCommandsResultInfo result;
for (const PartitionCommand & command : commands)
{
PartitionCommandsResultInfo current_command_results;
switch (command.type)
{
case PartitionCommand::DROP_PARTITION:
if (command.part)
checkPartCanBeDropped(command.partition);
else
checkPartitionCanBeDropped(command.partition);
dropPartition(command.partition, command.detach, command.part, query_context);
break;
case PartitionCommand::DROP_DETACHED_PARTITION:
dropDetached(command.partition, command.part, query_context);
break;
case PartitionCommand::ATTACH_PARTITION:
current_command_results = attachPartition(command.partition, command.part, query_context);
break;
case PartitionCommand::MOVE_PARTITION:
{
switch (*command.move_destination_type)
{
case PartitionCommand::MoveDestinationType::DISK:
movePartitionToDisk(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::VOLUME:
movePartitionToVolume(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::TABLE:
checkPartitionCanBeDropped(command.partition);
String dest_database = query_context.resolveDatabase(command.to_database);
auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context);
movePartitionToTable(dest_storage, command.partition, query_context);
break;
}
}
break;
case PartitionCommand::REPLACE_PARTITION:
{
checkPartitionCanBeDropped(command.partition);
String from_database = query_context.resolveDatabase(command.from_database);
auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context);
replacePartitionFrom(from_storage, command.partition, command.replace, query_context);
}
break;
case PartitionCommand::FREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezePartition(command.partition, metadata_snapshot, command.with_name, query_context, lock);
}
break;
case PartitionCommand::FREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
default:
IStorage::alterPartition(metadata_snapshot, commands, query_context); // should throw an exception.
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
result.insert(result.end(), current_command_results.begin(), current_command_results.end());
}
if (query_context.getSettingsRef().alter_partition_verbose_result)
return convertCommandsResultToSource(result);
return {};
}
ActionLock StorageMergeTree::stopMergesAndWait()
{
/// Asks to complete merges and does not allow them to start.
@ -1176,7 +1089,7 @@ ActionLock StorageMergeTree::stopMergesAndWait()
}
void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context)
void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context, bool throw_if_noop)
{
{
/// Asks to complete merges and does not allow them to start.
@ -1194,8 +1107,10 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool
if (part)
parts_to_remove.push_back(part);
else
else if (throw_if_noop)
throw Exception("Part " + part_name + " not found, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
else
return;
}
else
{
@ -1227,7 +1142,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool
PartitionCommandsResultInfo StorageMergeTree::attachPartition(
const ASTPtr & partition, bool attach_part, const Context & context)
const ASTPtr & partition, const StorageMetadataPtr & /* metadata_snapshot */,
bool attach_part, const Context & context)
{
PartitionCommandsResultInfo results;
PartsTemporaryRename renamed_parts(*this, "detached/");

View File

@ -72,11 +72,6 @@ public:
bool deduplicate,
const Context & context) override;
Pipe alterPartition(
const StorageMetadataPtr & /* metadata_snapshot */,
const PartitionCommands & commands,
const Context & context) override;
void mutate(const MutationCommands & commands, const Context & context) override;
/// Return introspection information about currently processing or recently processed mutations.
@ -192,11 +187,11 @@ private:
void clearOldMutations(bool truncate = false);
// Partition helpers
void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context);
PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, bool part, const Context & context);
void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context, bool throw_if_noop) override;
PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, const Context & context) override;
void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context);
void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context);
void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) override;
void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) override;
bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const override;
/// Update mutation entries after part mutation execution. May reset old
/// errors if mutation was successful. Otherwise update last_failed* fields

View File

@ -77,6 +77,10 @@ namespace ProfileEvents
extern const Event NotCreatedLogEntryForMutation;
}
namespace CurrentMetrics
{
extern const Metric BackgroundFetchesPoolTask;
}
namespace DB
{
@ -206,6 +210,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
, part_check_thread(*this)
, restarting_thread(*this)
, allow_renaming(allow_renaming_)
, replicated_fetches_pool_size(global_context.getSettingsRef().background_fetches_pool_size)
{
queue_updating_task = global_context.getSchedulePool().createTask(
getStorageID().getFullTableName() + " (StorageReplicatedMergeTree::queueUpdatingTask)", [this]{ queueUpdatingTask(); });
@ -2604,10 +2609,37 @@ std::optional<JobAndPool> StorageReplicatedMergeTree::getDataProcessingJob()
if (!selected_entry)
return {};
PoolType pool_type;
/// Depending on entry type execute in fetches (small) pool or big merge_mutate pool
if (selected_entry->log_entry->type == LogEntry::GET_PART)
pool_type = PoolType::FETCH;
else
pool_type = PoolType::MERGE_MUTATE;
return JobAndPool{[this, selected_entry] () mutable
{
processQueueEntry(selected_entry);
}, PoolType::MERGE_MUTATE};
}, pool_type};
}
bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const
{
if (fetcher.blocker.isCancelled())
{
disable_reason = fmt::format("Not executing fetch of part {} because replicated fetches are cancelled now.", entry.new_part_name);
return false;
}
size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundFetchesPoolTask].load(std::memory_order_relaxed);
if (busy_threads_in_pool >= replicated_fetches_pool_size)
{
disable_reason = fmt::format("Not executing fetch of part {} because {} fetches already executing, max {}.", entry.new_part_name, busy_threads_in_pool, replicated_fetches_pool_size);
return false;
}
return true;
}
bool StorageReplicatedMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & part) const
@ -4171,93 +4203,6 @@ void StorageReplicatedMergeTree::alter(
}
}
Pipe StorageReplicatedMergeTree::alterPartition(
const StorageMetadataPtr & metadata_snapshot,
const PartitionCommands & commands,
const Context & query_context)
{
PartitionCommandsResultInfo result;
for (const PartitionCommand & command : commands)
{
PartitionCommandsResultInfo current_command_results;
switch (command.type)
{
case PartitionCommand::DROP_PARTITION:
if (command.part)
checkPartCanBeDropped(command.partition);
else
checkPartitionCanBeDropped(command.partition);
dropPartition(command.partition, command.detach, command.part, query_context);
break;
case PartitionCommand::DROP_DETACHED_PARTITION:
dropDetached(command.partition, command.part, query_context);
break;
case PartitionCommand::ATTACH_PARTITION:
current_command_results = attachPartition(command.partition, metadata_snapshot, command.part, query_context);
break;
case PartitionCommand::MOVE_PARTITION:
{
switch (*command.move_destination_type)
{
case PartitionCommand::MoveDestinationType::DISK:
movePartitionToDisk(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::VOLUME:
movePartitionToVolume(command.partition, command.move_destination_name, command.part, query_context);
break;
case PartitionCommand::MoveDestinationType::TABLE:
checkPartitionCanBeDropped(command.partition);
String dest_database = query_context.resolveDatabase(command.to_database);
auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context);
movePartitionToTable(dest_storage, command.partition, query_context);
break;
}
}
break;
case PartitionCommand::REPLACE_PARTITION:
{
checkPartitionCanBeDropped(command.partition);
String from_database = query_context.resolveDatabase(command.from_database);
auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context);
replacePartitionFrom(from_storage, command.partition, command.replace, query_context);
}
break;
case PartitionCommand::FETCH_PARTITION:
fetchPartition(command.partition, metadata_snapshot, command.from_zookeeper_path, query_context);
break;
case PartitionCommand::FREEZE_PARTITION:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezePartition(command.partition, metadata_snapshot, command.with_name, query_context, lock);
}
break;
case PartitionCommand::FREEZE_ALL_PARTITIONS:
{
auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout);
current_command_results = freezeAll(command.with_name, metadata_snapshot, query_context, lock);
}
break;
}
for (auto & command_result : current_command_results)
command_result.command_type = command.typeToString();
result.insert(result.end(), current_command_results.begin(), current_command_results.end());
}
if (query_context.getSettingsRef().alter_partition_verbose_result)
return convertCommandsResultToSource(result);
return {};
}
/// If new version returns ordinary name, else returns part name containing the first and last month of the month
/// NOTE: use it in pair with getFakePartCoveringAllPartsInPartition(...)
static String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const MergeTreePartInfo & part_info)
@ -4317,7 +4262,7 @@ bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const St
}
void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context)
void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context, bool throw_if_noop)
{
assertNotReadonly();
if (!is_leader)
@ -4331,7 +4276,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de
if (drop_part)
{
String part_name = partition->as<ASTLiteral &>().value.safeGet<String>();
did_drop = dropPart(zookeeper, part_name, entry, detach);
did_drop = dropPart(zookeeper, part_name, entry, detach, throw_if_noop);
}
else
{
@ -6024,7 +5969,7 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI
}
bool StorageReplicatedMergeTree::dropPart(
zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach)
zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach, bool throw_if_noop)
{
LOG_TRACE(log, "Will try to insert a log entry to DROP_RANGE for part: " + part_name);
@ -6037,32 +5982,52 @@ bool StorageReplicatedMergeTree::dropPart(
auto part = getPartIfExists(part_info, {MergeTreeDataPartState::Committed});
if (!part)
throw Exception("Part " + part_name + " not found locally, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
{
if (throw_if_noop)
throw Exception("Part " + part_name + " not found locally, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
return false;
}
/// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already
/// finished the merge.
if (partIsAssignedToBackgroundOperation(part))
throw Exception("Part " + part_name
+ " is currently participating in a background operation (mutation/merge)"
+ ", try again later", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
{
if (throw_if_noop)
throw Exception("Part " + part_name
+ " is currently participating in a background operation (mutation/merge)"
+ ", try again later", ErrorCodes::PART_IS_TEMPORARILY_LOCKED);
return false;
}
if (partIsLastQuorumPart(part->info))
throw Exception("Part " + part_name + " is last inserted part with quorum in partition. Cannot drop",
ErrorCodes::NOT_IMPLEMENTED);
{
if (throw_if_noop)
throw Exception("Part " + part_name + " is last inserted part with quorum in partition. Cannot drop",
ErrorCodes::NOT_IMPLEMENTED);
return false;
}
if (partIsInsertingWithParallelQuorum(part->info))
throw Exception("Part " + part_name + " is inserting with parallel quorum. Cannot drop",
ErrorCodes::NOT_IMPLEMENTED);
{
if (throw_if_noop)
throw Exception("Part " + part_name + " is inserting with parallel quorum. Cannot drop",
ErrorCodes::NOT_IMPLEMENTED);
return false;
}
Coordination::Requests ops;
getClearBlocksInPartitionOps(ops, *zookeeper, part_info.partition_id, part_info.min_block, part_info.max_block);
size_t clean_block_ops_size = ops.size();
/// Set fake level to treat this part as virtual in queue.
auto drop_part_info = part->info;
drop_part_info.level = MergeTreePartInfo::MAX_LEVEL;
/// If `part_name` is result of a recent merge and source parts are still available then
/// DROP_RANGE with detach will move this part together with source parts to `detached/` dir.
entry.type = LogEntry::DROP_RANGE;
entry.source_replica = replica_name;
entry.new_part_name = part_name;
entry.new_part_name = drop_part_info.getPartName();
entry.detach = detach;
entry.create_time = time(nullptr);

View File

@ -122,11 +122,6 @@ public:
void alter(const AlterCommands & commands, const Context & query_context, TableLockHolder & table_lock_holder) override;
Pipe alterPartition(
const StorageMetadataPtr & metadata_snapshot,
const PartitionCommands & commands,
const Context & query_context) override;
void mutate(const MutationCommands & commands, const Context & context) override;
void waitMutation(const String & znode_name, size_t mutations_sync) const;
std::vector<MergeTreeMutationStatus> getMutationsStatus() const override;
@ -207,8 +202,13 @@ public:
*/
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, Poco::Logger * logger);
/// Get job to execute in background pool (merge, mutate, drop range and so on)
std::optional<JobAndPool> getDataProcessingJob() override;
/// Checks that fetches are not disabled with action blocker and pool for fetches
/// is not overloaded
bool canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const;
private:
/// Get a sequential consistent view of current parts.
@ -319,6 +319,8 @@ private:
/// Do not allow RENAME TABLE if zookeeper_path contains {database} or {table} macro
const bool allow_renaming;
const size_t replicated_fetches_pool_size;
template <class Func>
void foreachCommittedParts(const Func & func) const;
@ -553,16 +555,16 @@ private:
/// Info about how other replicas can access this one.
ReplicatedMergeTreeAddress getReplicatedMergeTreeAddress() const;
bool dropPart(zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach);
bool dropPart(zkutil::ZooKeeperPtr & zookeeper, String part_name, LogEntry & entry, bool detach, bool throw_if_noop);
bool dropAllPartsInPartition(
zkutil::ZooKeeper & zookeeper, String & partition_id, LogEntry & entry, bool detach);
// Partition helpers
void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context);
PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, const Context & query_context);
void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context);
void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & query_context);
void fetchPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, const String & from, const Context & query_context);
void dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & query_context, bool throw_if_noop) override;
PartitionCommandsResultInfo attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, const Context & query_context) override;
void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context) override;
void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & query_context) override;
void fetchPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, const String & from, const Context & query_context) override;
/// Check granularity of already existing replicated table in zookeeper if it exists
/// return true if it's fixed

View File

@ -1,10 +1,64 @@
#include <Storages/registerStorages.h>
#include <Storages/StorageFactory.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# include "config_core.h"
#endif
namespace DB
{
void registerStorageLog(StorageFactory & factory);
void registerStorageTinyLog(StorageFactory & factory);
void registerStorageStripeLog(StorageFactory & factory);
void registerStorageMergeTree(StorageFactory & factory);
void registerStorageNull(StorageFactory & factory);
void registerStorageMerge(StorageFactory & factory);
void registerStorageBuffer(StorageFactory & factory);
void registerStorageDistributed(StorageFactory & factory);
void registerStorageMemory(StorageFactory & factory);
void registerStorageFile(StorageFactory & factory);
void registerStorageURL(StorageFactory & factory);
void registerStorageDictionary(StorageFactory & factory);
void registerStorageSet(StorageFactory & factory);
void registerStorageJoin(StorageFactory & factory);
void registerStorageView(StorageFactory & factory);
void registerStorageMaterializedView(StorageFactory & factory);
void registerStorageLiveView(StorageFactory & factory);
void registerStorageGenerateRandom(StorageFactory & factory);
#if USE_AWS_S3
void registerStorageS3(StorageFactory & factory);
void registerStorageCOS(StorageFactory & factory);
#endif
#if USE_HDFS
void registerStorageHDFS(StorageFactory & factory);
#endif
void registerStorageODBC(StorageFactory & factory);
void registerStorageJDBC(StorageFactory & factory);
#if USE_MYSQL
void registerStorageMySQL(StorageFactory & factory);
#endif
void registerStorageMongoDB(StorageFactory & factory);
#if USE_RDKAFKA
void registerStorageKafka(StorageFactory & factory);
#endif
#if USE_AMQPCPP
void registerStorageRabbitMQ(StorageFactory & factory);
#endif
#if USE_ROCKSDB
void registerStorageEmbeddedRocksDB(StorageFactory & factory);
#endif
void registerStorages()
{
auto & factory = StorageFactory::instance();
@ -28,7 +82,7 @@ void registerStorages()
registerStorageLiveView(factory);
registerStorageGenerateRandom(factory);
#if USE_AWS_S3
#if USE_AWS_S3
registerStorageS3(factory);
registerStorageCOS(factory);
#endif
@ -53,6 +107,10 @@ void registerStorages()
#if USE_AMQPCPP
registerStorageRabbitMQ(factory);
#endif
#if USE_ROCKSDB
registerStorageEmbeddedRocksDB(factory);
#endif
}
}

View File

@ -1,59 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# include "config_core.h"
#endif
namespace DB
{
class StorageFactory;
void registerStorageLog(StorageFactory & factory);
void registerStorageTinyLog(StorageFactory & factory);
void registerStorageStripeLog(StorageFactory & factory);
void registerStorageMergeTree(StorageFactory & factory);
void registerStorageNull(StorageFactory & factory);
void registerStorageMerge(StorageFactory & factory);
void registerStorageBuffer(StorageFactory & factory);
void registerStorageDistributed(StorageFactory & factory);
void registerStorageMemory(StorageFactory & factory);
void registerStorageFile(StorageFactory & factory);
void registerStorageURL(StorageFactory & factory);
void registerStorageDictionary(StorageFactory & factory);
void registerStorageSet(StorageFactory & factory);
void registerStorageJoin(StorageFactory & factory);
void registerStorageView(StorageFactory & factory);
void registerStorageMaterializedView(StorageFactory & factory);
void registerStorageLiveView(StorageFactory & factory);
void registerStorageGenerateRandom(StorageFactory & factory);
#if USE_AWS_S3
void registerStorageS3(StorageFactory & factory);
void registerStorageCOS(StorageFactory & factory);
#endif
#if USE_HDFS
void registerStorageHDFS(StorageFactory & factory);
#endif
void registerStorageODBC(StorageFactory & factory);
void registerStorageJDBC(StorageFactory & factory);
#if USE_MYSQL
void registerStorageMySQL(StorageFactory & factory);
#endif
void registerStorageMongoDB(StorageFactory & factory);
#if USE_RDKAFKA
void registerStorageKafka(StorageFactory & factory);
#endif
#if USE_AMQPCPP
void registerStorageRabbitMQ(StorageFactory & factory);
#endif
void registerStorages();
}

View File

@ -8,7 +8,7 @@ PEERDIR(
SRCS(
<? find . -name '*.cpp' | grep -v -F tests | grep -v -P 'Kafka|RabbitMQ|S3|HDFS|Licenses|TimeZones' | sed 's/^\.\// /' | sort ?>
<? find . -name '*.cpp' | grep -v -F tests | grep -v -P 'Kafka|RabbitMQ|S3|HDFS|Licenses|TimeZones|RocksDB' | sed 's/^\.\// /' | sort ?>
)
END()

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import pytest
import time
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
import random
import string
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True)
DEFAULT_MAX_THREADS_FOR_FETCH = 3
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def get_random_string(length):
return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length))
def test_limited_fetches(started_cluster):
"""
Test checks that that we utilize all available threads for fetches
"""
node1.query("CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '1') ORDER BY tuple() PARTITION BY key")
node2.query("CREATE TABLE t (key UInt64, data String) ENGINE = ReplicatedMergeTree('/clickhouse/test/t', '2') ORDER BY tuple() PARTITION BY key")
with PartitionManager() as pm:
node2.query("SYSTEM STOP FETCHES t")
node1.query("INSERT INTO t SELECT 1, '{}' FROM numbers(5000)".format(get_random_string(104857)))
node1.query("INSERT INTO t SELECT 2, '{}' FROM numbers(5000)".format(get_random_string(104857)))
node1.query("INSERT INTO t SELECT 3, '{}' FROM numbers(5000)".format(get_random_string(104857)))
node1.query("INSERT INTO t SELECT 4, '{}' FROM numbers(5000)".format(get_random_string(104857)))
node1.query("INSERT INTO t SELECT 5, '{}' FROM numbers(5000)".format(get_random_string(104857)))
node1.query("INSERT INTO t SELECT 6, '{}' FROM numbers(5000)".format(get_random_string(104857)))
pm.add_network_delay(node1, 80)
node2.query("SYSTEM START FETCHES t")
fetches_result = []
background_fetches_metric = []
fetched_parts = set([])
for _ in range(1000):
result = node2.query("SELECT result_part_name FROM system.replicated_fetches").strip().split()
background_fetches_metric.append(int(node2.query("select value from system.metrics where metric = 'BackgroundFetchesPoolTask'").strip()))
if not result:
if len(fetched_parts) == 6:
break
time.sleep(0.1)
else:
for part in result:
fetched_parts.add(part)
fetches_result.append(result)
print(fetches_result[-1])
print(background_fetches_metric[-1])
time.sleep(0.1)
for concurrently_fetching_parts in fetches_result:
if len(concurrently_fetching_parts) > DEFAULT_MAX_THREADS_FOR_FETCH:
assert False, "Found more than {} concurrently fetching parts: {}".format(DEFAULT_MAX_THREADS_FOR_FETCH, ', '.join(concurrently_fetching_parts))
assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches"
assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table"

View File

@ -213,6 +213,14 @@ def test_ttl_double_delete_rule_returns_error(started_cluster):
assert False
def optimize_with_retry(node, table_name, retry=20):
for i in range(retry):
try:
node.query("OPTIMIZE TABLE {name} FINAL SETTINGS optimize_throw_if_noop = 1".format(name=table_name), settings={"optimize_throw_if_noop": "1"})
break
except e:
time.sleep(0.5)
@pytest.mark.parametrize("name,engine", [
("test_ttl_alter_delete", "MergeTree()"),
("test_replicated_ttl_alter_delete", "ReplicatedMergeTree('/clickhouse/test_replicated_ttl_alter_delete', '1')"),
@ -238,14 +246,6 @@ limitations under the License."""
"""
drop_table([node1], name)
def optimize_with_retry(retry=20):
for i in range(retry):
try:
node1.query("OPTIMIZE TABLE {name} FINAL".format(name=name), settings={"optimize_throw_if_noop": "1"})
break
except:
time.sleep(0.5)
node1.query(
"""
CREATE TABLE {name} (
@ -267,7 +267,7 @@ limitations under the License."""
time.sleep(1)
optimize_with_retry()
optimize_with_retry(node1, name)
r = node1.query("SELECT s1, b1 FROM {name} ORDER BY b1, s1".format(name=name)).splitlines()
assert r == ["\t1", "hello2\t2"]
@ -277,7 +277,55 @@ limitations under the License."""
time.sleep(1)
optimize_with_retry()
optimize_with_retry(node1, name)
r = node1.query("SELECT s1, b1 FROM {name} ORDER BY b1, s1".format(name=name)).splitlines()
assert r == ["\t0", "\t0", "hello2\t2"]
def test_ttl_empty_parts(started_cluster):
drop_table([node1, node2], "test_ttl_empty_parts")
for node in [node1, node2]:
node.query(
'''
CREATE TABLE test_ttl_empty_parts(date Date, id UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl', '{replica}')
ORDER BY id
SETTINGS max_bytes_to_merge_at_min_space_in_pool = 1, max_bytes_to_merge_at_max_space_in_pool = 1,
cleanup_delay_period = 1, cleanup_delay_period_random_add = 0
'''.format(replica=node.name))
for i in range (1, 7):
node1.query("INSERT INTO test_ttl_empty_parts SELECT '2{}00-01-0{}', number FROM numbers(1000)".format(i % 2, i))
assert node1.query("SELECT count() FROM test_ttl_empty_parts") == "6000\n"
assert node1.query("SELECT name FROM system.parts WHERE table = 'test_ttl_empty_parts' AND active ORDER BY name") == \
"all_0_0_0\nall_1_1_0\nall_2_2_0\nall_3_3_0\nall_4_4_0\nall_5_5_0\n"
node1.query("ALTER TABLE test_ttl_empty_parts MODIFY TTL date")
assert node1.query("SELECT count() FROM test_ttl_empty_parts") == "3000\n"
time.sleep(3) # Wait for cleanup thread
assert node1.query("SELECT name FROM system.parts WHERE table = 'test_ttl_empty_parts' AND active ORDER BY name") == \
"all_0_0_0_6\nall_2_2_0_6\nall_4_4_0_6\n"
for node in [node1, node2]:
node.query("ALTER TABLE test_ttl_empty_parts MODIFY SETTING max_bytes_to_merge_at_min_space_in_pool = 1000000000")
node.query("ALTER TABLE test_ttl_empty_parts MODIFY SETTING max_bytes_to_merge_at_max_space_in_pool = 1000000000")
optimize_with_retry(node1, 'test_ttl_empty_parts')
assert node1.query("SELECT name FROM system.parts WHERE table = 'test_ttl_empty_parts' AND active ORDER BY name") == "all_0_4_1_6\n"
# Check that after removing empty parts mutations and merges works
node1.query("INSERT INTO test_ttl_empty_parts SELECT '2100-01-20', number FROM numbers(1000)")
node1.query("ALTER TABLE test_ttl_empty_parts DELETE WHERE id % 2 = 0 SETTINGS mutations_sync = 2")
assert node1.query("SELECT count() FROM test_ttl_empty_parts") == "2000\n"
optimize_with_retry(node1, 'test_ttl_empty_parts')
assert node1.query("SELECT name FROM system.parts WHERE table = 'test_ttl_empty_parts' AND active ORDER BY name") == "all_0_7_2_8\n"
node2.query('SYSTEM SYNC REPLICA test_ttl_empty_parts', timeout=20)
error_msg = '<Error> default.test_ttl_empty_parts (ReplicatedMergeTreeCleanupThread)'
assert not node1.contains_in_log(error_msg)
assert not node2.contains_in_log(error_msg)

View File

@ -16,7 +16,9 @@ done
function thread {
for x in {0..99}; do
$CLICKHOUSE_CLIENT --query "INSERT INTO r$1 SELECT $x % $NUM_REPLICAS = $1 ? $x - 1 : $x" # Replace some records as duplicates so they will be written by other replicas
# sometimes we can try to commit obsolete part if fetches will be quite fast,
# so supress warning messages like "Tried to commit obsolete part ... covered by ..."
$CLICKHOUSE_CLIENT --query "INSERT INTO r$1 SELECT $x % $NUM_REPLICAS = $1 ? $x - 1 : $x" 2>/dev/null # Replace some records as duplicates so they will be written by other replicas
done
}

View File

@ -0,0 +1,10 @@
1
1
1
1
1 1 1 1 1
1
1
1
1
1

View File

@ -0,0 +1,47 @@
DROP TABLE IF EXISTS test;
CREATE TABLE test (key String, value UInt32) Engine=EmbeddedRocksDB; -- { serverError 36 }
CREATE TABLE test (key String, value UInt32) Engine=EmbeddedRocksDB PRIMARY KEY(key2); -- { serverError 47 }
CREATE TABLE test (key String, value UInt32) Engine=EmbeddedRocksDB PRIMARY KEY(key, value); -- { serverError 36 }
CREATE TABLE test (key Tuple(String, UInt32), value UInt64) Engine=EmbeddedRocksDB PRIMARY KEY(key);
DROP TABLE IF EXISTS test;
CREATE TABLE test (key String, value UInt32) Engine=EmbeddedRocksDB PRIMARY KEY(key);
INSERT INTO test SELECT '1_1', number FROM numbers(10000);
SELECT COUNT(1) == 1 FROM test;
INSERT INTO test SELECT concat(toString(number), '_1'), number FROM numbers(10000);
SELECT COUNT(1) == 10000 FROM test;
SELECT uniqExact(key) == 32 FROM (SELECT * FROM test LIMIT 32 SETTINGS max_block_size = 1);
SELECT SUM(value) == 1 + 99 + 900 FROM test WHERE key IN ('1_1', '99_1', '900_1');
DROP TABLE IF EXISTS test;
DROP TABLE IF EXISTS test_memory;
CREATE TABLE test (k UInt32, value UInt64, dummy Tuple(UInt32, Float64), bm AggregateFunction(groupBitmap, UInt64)) Engine=EmbeddedRocksDB PRIMARY KEY(k);
CREATE TABLE test_memory AS test Engine = Memory;
INSERT INTO test SELECT number % 77 AS k, SUM(number) AS value, (1, 1.2), bitmapBuild(groupArray(number)) FROM numbers(10000000) group by k;
INSERT INTO test_memory SELECT number % 77 AS k, SUM(number) AS value, (1, 1.2), bitmapBuild(groupArray(number)) FROM numbers(10000000) group by k;
SELECT A.a = B.a, A.b = B.b, A.c = B.c, A.d = B.d, A.e = B.e FROM ( SELECT 0 AS a, groupBitmapMerge(bm) AS b , SUM(k) AS c, SUM(value) AS d, SUM(dummy.1) AS e FROM test) A ANY LEFT JOIN (SELECT 0 AS a, groupBitmapMerge(bm) AS b , SUM(k) AS c, SUM(value) AS d, SUM(dummy.1) AS e FROM test_memory) B USING a ORDER BY a;
CREATE TEMPORARY TABLE keys AS SELECT * FROM numbers(1000);
SET max_rows_to_read = 2;
SELECT dummy == (1,1.2) FROM test WHERE k IN (1, 3) OR k IN (1) OR k IN (3, 1) OR k IN [1] OR k IN [1, 3] ;
SELECT k == 4 FROM test WHERE k = 4 OR k IN [4] OR k in (4, 10000001, 10000002) AND value > 0;
SELECT k == 4 FROM test WHERE k IN (SELECT toUInt32(number) FROM keys WHERE number = 4);
SELECT k, value FROM test WHERE k = 0 OR value > 0; -- { serverError 158 }
SELECT k, value FROM test WHERE k = 0 AND k IN (1, 3) OR k > 8; -- { serverError 158 }
TRUNCATE TABLE test;
SELECT 0 == COUNT(1) FROM test;
DROP TABLE IF EXISTS test;
DROP TABLE IF EXISTS test_memory;

View File

@ -0,0 +1,10 @@
1 a
2 3
3 3
4 a
5 b
6 a
7 3
8 3
9 a
10 b

View File

@ -0,0 +1,23 @@
DROP TABLE IF EXISTS enum_as_num;
CREATE TABLE enum_as_num (
Id Int32,
Value Enum('a' = 1, '3' = 2, 'b' = 3)
) ENGINE=Memory();
INSERT INTO enum_as_num FORMAT TSV 1 1
INSERT INTO enum_as_num FORMAT TSV 2 2
INSERT INTO enum_as_num FORMAT TSV 3 3
INSERT INTO enum_as_num FORMAT TSV 4 a
INSERT INTO enum_as_num FORMAT TSV 5 b
INSERT INTO enum_as_num FORMAT CSV 6,1
INSERT INTO enum_as_num FORMAT CSV 7,2
INSERT INTO enum_as_num FORMAT CSV 8,3
INSERT INTO enum_as_num FORMAT CSV 9,a
INSERT INTO enum_as_num FORMAT CSV 10,b
SELECT * FROM enum_as_num ORDER BY Id;
DROP TABLE IF EXISTS enum_as_num;

View File

@ -0,0 +1,8 @@
0
0
0
0
-1
-1
-1
\N

View File

@ -0,0 +1,91 @@
SELECT MAX(aggr)
FROM
(
SELECT MAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT MAX(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT MAx(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT max(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT max(-1) AS aggr
FROM system.one
WHERE not 1
);
SET aggregate_functions_null_for_empty=1;
SELECT MAX(aggr)
FROM
(
SELECT MAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT MAX(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT MAx(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT max(-1) AS aggr
FROM system.one
WHERE 1
);
SELECT MaX(aggr)
FROM
(
SELECT mAX(-1) AS aggr
FROM system.one
WHERE NOT 1
UNION ALL
SELECT max(-1) AS aggr
FROM system.one
WHERE not 1
);

View File

@ -0,0 +1,5 @@
SELECT Sum(1);
SELECT SumOrNull(1);
SELECT SUMOrNull(1);
SELECT SUMOrNullIf(1, 1);
SELECT SUMOrNullIf(1, 0);

View File

@ -0,0 +1,3 @@
[]
1
server is still alive

View File

@ -0,0 +1,23 @@
DROP TABLE IF EXISTS tsgs_local;
DROP TABLE IF EXISTS tsgs;
CREATE TABLE tsgs_local ENGINE = MergeTree ORDER BY tuple() AS
SELECT
toUInt64(13820745146630357293) AS a,
toInt64(1604422500000000000) AS b,
toFloat64(0) AS c
FROM numbers(100);
-- the issue (https://github.com/ClickHouse/ClickHouse/issues/16862) happens during serialization of the state
-- so happens only when Distributed tables are used or with -State modifier.
CREATE TABLE tsgs AS tsgs_local ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), tsgs_local);
SELECT timeSeriesGroupSum(a, b, c) FROM tsgs;
SELECT count() FROM ( SELECT timeSeriesGroupSumState(a, b, c) as x FROM tsgs_local) WHERE NOT ignore(*);
SELECT 'server is still alive';
DROP TABLE tsgs_local;
DROP TABLE tsgs;

View File

@ -0,0 +1,4 @@
1000
2
500
1

View File

@ -0,0 +1,20 @@
DROP TABLE IF EXISTS ttl_empty_parts;
CREATE TABLE ttl_empty_parts (id UInt32, d Date) ENGINE = MergeTree ORDER BY tuple() PARTITION BY id;
INSERT INTO ttl_empty_parts SELECT 0, toDate('2005-01-01') + number from numbers(500);
INSERT INTO ttl_empty_parts SELECT 1, toDate('2050-01-01') + number from numbers(500);
SELECT count() FROM ttl_empty_parts;
SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active;
ALTER TABLE ttl_empty_parts MODIFY TTL d;
-- To be sure, that task, which clears outdated parts executed.
DETACH TABLE ttl_empty_parts;
ATTACH TABLE ttl_empty_parts;
SELECT count() FROM ttl_empty_parts;
SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active;
DROP TABLE ttl_empty_parts;

View File

@ -0,0 +1,15 @@
execute: default
"foo"
1
execute: --stage fetch_columns
"dummy"
0
execute: --stage with_mergeable_state
"1"
1
execute: --stage with_mergeable_state_after_aggregation
"1"
1
execute: --stage complete
"foo"
1

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
function execute_query()
{
if [ $# -eq 0 ]; then
echo "execute: default"
else
echo "execute: $*"
fi
${CLICKHOUSE_CLIENT} "$@" --format CSVWithNames -q "SELECT 1 AS foo"
}
execute_query # default -- complete
execute_query --stage fetch_columns
execute_query --stage with_mergeable_state
execute_query --stage with_mergeable_state_after_aggregation
execute_query --stage complete

View File

@ -0,0 +1,26 @@
DROP DATABASE IF EXISTS database_for_dict;
CREATE DATABASE database_for_dict;
CREATE TABLE database_for_dict.table_for_dict (k UInt64, v UInt8) ENGINE = MergeTree ORDER BY k;
DROP DICTIONARY IF EXISTS database_for_dict.dict1;
CREATE DICTIONARY database_for_dict.dict1 (k UInt64 DEFAULT 0, v UInt8 DEFAULT 1) PRIMARY KEY k
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'table_for_dict' PASSWORD '' DB 'database_for_dict'))
LIFETIME(MIN 1 MAX 10)
LAYOUT(FLAT());
DETACH TABLE database_for_dict.dict1; -- { serverError 520 }
DETACH DICTIONARY database_for_dict.dict1;
ATTACH TABLE database_for_dict.dict1; -- { serverError 80 }
ATTACH DICTIONARY database_for_dict.dict1;
DROP DICTIONARY database_for_dict.dict1;
DROP TABLE database_for_dict.table_for_dict;
DROP DATABASE IF EXISTS database_for_dict;

Some files were not shown because too many files have changed in this diff Show More