Merge branch 'master' of github.com:ClickHouse/ClickHouse into docs/CLICKHOUSEDOCS-658-mv-block-settings

This commit is contained in:
Sergei Shtykov 2020-06-18 22:24:17 +03:00
commit 13f8c588de
1239 changed files with 25567 additions and 14987 deletions

8
.gitmodules vendored
View File

@ -157,6 +157,14 @@
[submodule "contrib/openldap"]
path = contrib/openldap
url = https://github.com/openldap/openldap.git
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse-Extras/cpp-driver.git
branch = clickhouse
[submodule "contrib/libuv"]
path = contrib/libuv
url = https://github.com/ClickHouse-Extras/libuv.git
branch = clickhouse
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git

View File

@ -327,20 +327,16 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE
include (GNUInstallDirs)
include (cmake/contrib_finder.cmake)
include (cmake/lib_name.cmake)
find_contrib_lib(double-conversion) # Must be before parquet
include (cmake/find/ssl.cmake)
include (cmake/find/ldap.cmake) # after ssl
include (cmake/find/icu.cmake)
include (cmake/find/boost.cmake)
include (cmake/find/zlib.cmake)
include (cmake/find/zstd.cmake)
include (cmake/find/ltdl.cmake) # for odbc
include (cmake/find/termcap.cmake)
# openssl, zlib before poco
include (cmake/find/lz4.cmake)
include (cmake/find/xxhash.cmake)
include (cmake/find/sparsehash.cmake)
include (cmake/find/re2.cmake)
include (cmake/find/libgsasl.cmake)
@ -358,17 +354,16 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/s3.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/hyperscan.cmake)
include (cmake/find/simdjson.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/orc.cmake)
include (cmake/find/avro.cmake)
include (cmake/find/msgpack.cmake)
include (cmake/find/cassandra.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
find_contrib_lib(metrohash)
find_contrib_lib(btrie)
if (ENABLE_TESTS)

View File

@ -10,10 +10,12 @@ ClickHouse is an open-source column-oriented database management system that all
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -16,6 +16,7 @@ set (SRCS
shift10.cpp
sleep.cpp
terminalColors.cpp
errnoToString.cpp
)
if (ENABLE_REPLXX)
@ -43,10 +44,6 @@ endif()
target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
if (NOT USE_INTERNAL_BOOST_LIBRARY)
target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
endif ()
# Allow explicit fallback to readline
if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
message (STATUS "Attempt to fallback to readline explicitly")
@ -72,7 +69,8 @@ endif ()
target_link_libraries (common
PUBLIC
${CITYHASH_LIBRARIES}
${Boost_SYSTEM_LIBRARY}
boost::headers_only
boost::system
FastMemcpy
Poco::Net
Poco::Net::SSL

View File

@ -1,9 +1,11 @@
#include <common/ReplxxLineReader.h>
#include <common/errnoToString.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <functional>
#include <sys/file.h>
namespace
{
@ -17,14 +19,41 @@ void trim(String & s)
}
ReplxxLineReader::ReplxxLineReader(
const Suggest & suggest, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
const Suggest & suggest,
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
Patterns delimiters_,
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
if (!history_file_path.empty())
rx.history_load(history_file_path);
{
history_file_fd = open(history_file_path.c_str(), O_RDWR);
if (history_file_fd < 0)
{
rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
}
else
{
if (flock(history_file_fd, LOCK_SH))
{
rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
}
else
{
rx.history_load(history_file_path);
if (flock(history_file_fd, LOCK_UN))
{
rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
}
}
}
}
auto callback = [&suggest] (const String & context, size_t context_size)
{
@ -36,6 +65,9 @@ ReplxxLineReader::ReplxxLineReader(
rx.set_complete_on_empty(false);
rx.set_word_break_characters(word_break_characters);
if (highlighter)
rx.set_highlighter_callback(highlighter);
/// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
/// bind C-p/C-n to history-previous/history-next like readline.
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
@ -49,8 +81,8 @@ ReplxxLineReader::ReplxxLineReader(
ReplxxLineReader::~ReplxxLineReader()
{
if (!history_file_path.empty())
rx.history_save(history_file_path);
if (close(history_file_fd))
rx.print("Close of history file failed: %s\n", strerror(errno));
}
LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -68,7 +100,20 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
void ReplxxLineReader::addToHistory(const String & line)
{
// locking history file to prevent from inconsistent concurrent changes
bool locked = false;
if (flock(history_file_fd, LOCK_EX))
rx.print("Lock of history file failed: %s\n", strerror(errno));
else
locked = true;
rx.history_add(line);
// flush changes to the disk
rx.history_save(history_file_path);
if (locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", strerror(errno));
}
void ReplxxLineReader::enableBracketedPaste()

View File

@ -4,10 +4,17 @@
#include <replxx.hxx>
class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
ReplxxLineReader(
const Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
Patterns delimiters_,
replxx::Replxx::highlighter_callback_t highlighter_);
~ReplxxLineReader() override;
void enableBracketedPaste() override;
@ -17,4 +24,8 @@ private:
void addToHistory(const String & line) override;
replxx::Replxx rx;
replxx::Replxx::highlighter_callback_t highlighter;
// used to call flock() to synchronize multiple clients using same history file
int history_file_fd = -1;
};

View File

@ -0,0 +1,29 @@
#include "errnoToString.h"
#include <fmt/format.h>
std::string errnoToString(int code, int the_errno)
{
const size_t buf_size = 128;
char buf[buf_size];
#ifndef _GNU_SOURCE
int rc = strerror_r(the_errno, buf, buf_size);
#ifdef __APPLE__
if (rc != 0 && rc != EINVAL)
#else
if (rc != 0)
#endif
{
std::string tmp = std::to_string(code);
const char * code_str = tmp.c_str();
const char * unknown_message = "Unknown error ";
strcpy(buf, unknown_message);
strcpy(buf + strlen(unknown_message), code_str);
}
return fmt::format("errno: {}, strerror: {}", the_errno, buf);
#else
(void)code;
return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
#endif
}

View File

@ -0,0 +1,6 @@
#pragma once
#include <cerrno>
#include <string>
std::string errnoToString(int code, int the_errno = errno);

View File

@ -1,6 +1,8 @@
#pragma once
#include <functional>
#include <type_traits>
#include <utility>
template <class T, class Tag>
struct StrongTypedef

View File

@ -47,6 +47,7 @@ SRCS(
shift10.cpp
sleep.cpp
terminalColors.cpp
errnoToString.cpp
)
END()

View File

@ -32,10 +32,18 @@ else ()
endif ()
endif ()
target_link_libraries(mysqlxx PUBLIC common PRIVATE ${MYSQLCLIENT_LIBRARIES} PUBLIC ${Boost_SYSTEM_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
target_link_libraries (mysqlxx
PUBLIC
common
PRIVATE
${MYSQLCLIENT_LIBRARIES}
${ZLIB_LIBRARIES}
)
if(OPENSSL_LIBRARIES)
target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
endif()
target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})
if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)

View File

@ -1,44 +0,0 @@
# - Try to find metrohash headers and libraries.
#
# Usage of this module as follows:
#
# find_package(metrohash)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# METROHASH_ROOT_DIR Set this variable to the root installation of
# metrohash if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# METROHASH_FOUND System has metrohash libs/headers
# METROHASH_LIBRARIES The metrohash library/libraries
# METROHASH_INCLUDE_DIR The location of metrohash headers
find_path(METROHASH_ROOT_DIR
NAMES include/metrohash.h
)
find_library(METROHASH_LIBRARIES
NAMES metrohash
PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS}
)
find_path(METROHASH_INCLUDE_DIR
NAMES metrohash.h
PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS}
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(metrohash DEFAULT_MSG
METROHASH_LIBRARIES
METROHASH_INCLUDE_DIR
)
mark_as_advanced(
METROHASH_ROOT_DIR
METROHASH_LIBRARIES
METROHASH_INCLUDE_DIR
)

View File

@ -1,52 +0,0 @@
option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED})
# Test random file existing in all package variants
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
if(USE_INTERNAL_BOOST_LIBRARY)
message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set (USE_INTERNAL_BOOST_LIBRARY 0)
set (MISSING_INTERNAL_BOOST_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_BOOST_LIBRARY)
set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
set (BOOST_ROOT "/usr/local")
find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex)
# incomplete, no include search, who use it?
if (NOT Boost_FOUND)
# # Try to find manually.
# set (BOOST_PATHS "")
# find_library (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options PATHS ${BOOST_PATHS})
# find_library (Boost_SYSTEM_LIBRARY boost_system PATHS ${BOOST_PATHS})
# find_library (Boost_FILESYSTEM_LIBRARY boost_filesystem PATHS ${BOOST_PATHS})
# maybe found but incorrect version.
set (Boost_INCLUDE_DIRS "")
set (Boost_SYSTEM_LIBRARY "")
endif ()
endif ()
if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
set (USE_INTERNAL_BOOST_LIBRARY 1)
set (Boost_SYSTEM_LIBRARY boost_system_internal)
set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal)
set (Boost_REGEX_LIBRARY boost_regex_internal)
set (Boost_INCLUDE_DIRS)
set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
# For boost from github:
file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include")
list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
# numeric has additional level
file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/numeric/*/include")
list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
# For packaged version:
list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
endif ()
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}")

View File

@ -0,0 +1,26 @@
option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES})
if (ENABLE_CASSANDRA)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv")
message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive")
elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive")
else()
set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv")
set (CASSANDRA_INCLUDE_DIR
"${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/")
if (USE_STATIC_LIBRARIES)
set (LIBUV_LIBRARY uv_a)
set (CASSANDRA_LIBRARY cassandra_static)
else()
set (LIBUV_LIBRARY uv)
set (CASSANDRA_LIBRARY cassandra)
endif()
set (USE_CASSANDRA 1)
set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
endif()
endif()
message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}")
message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}")

View File

@ -1,33 +0,0 @@
if (HAVE_SSSE3)
option (ENABLE_HYPERSCAN "Enable hyperscan" ${ENABLE_LIBRARIES})
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
endif ()
set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
endif ()
if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
find_library (HYPERSCAN_LIBRARY hs)
find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
endif ()
if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
set (USE_HYPERSCAN 1)
elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
set (HYPERSCAN_LIBRARY hs)
set (USE_HYPERSCAN 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
endif()
message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
endif ()

View File

@ -1,23 +0,0 @@
option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
if (USE_INTERNAL_LZ4_LIBRARY)
message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LZ4_LIBRARY 0)
endif ()
set (MISSING_INTERNAL_LZ4_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_LZ4_LIBRARY)
find_library (LZ4_LIBRARY lz4)
find_path (LZ4_INCLUDE_DIR NAMES lz4.h PATHS ${LZ4_INCLUDE_PATHS})
endif ()
if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY)
set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
set (USE_INTERNAL_LZ4_LIBRARY 1)
set (LZ4_LIBRARY lz4)
endif ()
message (STATUS "Using lz4: ${LZ4_INCLUDE_DIR} : ${LZ4_LIBRARY}")

View File

@ -63,7 +63,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
set(ARROW_LIBRARY arrow_shared)
set(PARQUET_LIBRARY parquet_shared)
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY})
list(APPEND PARQUET_LIBRARY boost::regex)
endif()
set(THRIFT_LIBRARY thrift)
endif()

View File

@ -1,22 +0,0 @@
option (USE_INTERNAL_XXHASH_LIBRARY "Set to FALSE to use system xxHash library instead of bundled" ${NOT_UNBUNDLED})
if (USE_INTERNAL_XXHASH_LIBRARY AND NOT USE_INTERNAL_LZ4_LIBRARY)
message (WARNING "can not use internal xxhash without internal lz4")
set (USE_INTERNAL_XXHASH_LIBRARY 0)
endif ()
if (USE_INTERNAL_XXHASH_LIBRARY)
set (XXHASH_LIBRARY lz4)
set (XXHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
else ()
find_library (XXHASH_LIBRARY xxhash)
find_path (XXHASH_INCLUDE_DIR NAMES xxhash.h PATHS ${XXHASH_INCLUDE_PATHS})
endif ()
if (XXHASH_LIBRARY AND XXHASH_INCLUDE_DIR)
set (USE_XXHASH 1)
else ()
set (USE_XXHASH 0)
endif ()
message (STATUS "Using xxhash=${USE_XXHASH}: ${XXHASH_INCLUDE_DIR} : ${XXHASH_LIBRARY}")

View File

@ -1,4 +0,0 @@
set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion)
set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)

View File

@ -21,11 +21,6 @@ if (TARGET double-conversion)
list(APPEND dirs ${dirs1})
endif ()
if (TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY})
get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
list(APPEND dirs ${dirs1})
endif ()
list(REMOVE_DUPLICATES dirs)
file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
foreach (dir ${dirs})

View File

@ -16,13 +16,18 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing-sumbur)
add_subdirectory (consistent-hashing)
add_subdirectory (croaring)
add_subdirectory (FastMemcpy)
add_subdirectory (hyperscan-cmake)
add_subdirectory (jemalloc-cmake)
add_subdirectory (libcpuid-cmake)
add_subdirectory (libdivide)
add_subdirectory (libmetrohash)
add_subdirectory (lz4-cmake)
add_subdirectory (murmurhash)
add_subdirectory (replxx-cmake)
add_subdirectory (ryu-cmake)
@ -33,14 +38,6 @@ add_subdirectory (poco-cmake)
# TODO: refactor the contrib libraries below this comment.
if (USE_INTERNAL_BOOST_LIBRARY)
add_subdirectory (boost-cmake)
endif ()
if (USE_INTERNAL_LZ4_LIBRARY)
add_subdirectory (lz4-cmake)
endif ()
if (USE_INTERNAL_ZSTD_LIBRARY)
add_subdirectory (zstd-cmake)
endif ()
@ -63,10 +60,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
add_subdirectory (libfarmhash)
endif ()
if (USE_INTERNAL_METROHASH_LIBRARY)
add_subdirectory (libmetrohash)
endif ()
if (USE_INTERNAL_BTRIE_LIBRARY)
add_subdirectory (libbtrie)
endif ()
@ -294,18 +287,6 @@ if (USE_BASE64)
add_subdirectory (base64-cmake)
endif()
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
# The library is large - avoid bloat.
if (USE_STATIC_LIBRARIES)
add_subdirectory (hyperscan)
target_compile_options (hs PRIVATE -g0)
else ()
set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
add_subdirectory (hyperscan)
target_compile_options (hs_shared PRIVATE -g0)
endif ()
endif()
if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake)
endif()
@ -314,4 +295,10 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
if (USE_CASSANDRA)
add_subdirectory (libuv)
add_subdirectory (cassandra)
endif()
add_subdirectory (fmtlib-cmake)

View File

@ -47,7 +47,8 @@ set(thriftcpp_threads_SOURCES
)
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src)
target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)
# === orc
@ -146,7 +147,7 @@ add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
add_dependencies(metadata_fbs flatc)
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
# As result these compiler features detected as absent. In result it failed to compile orc itself.
# In orc makefile there is code that sets flags, but arrow-cmake ignores these flags.
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@ -286,10 +287,6 @@ set(ARROW_SRCS ${ARROW_SRCS}
${LIBRARY_DIR}/compute/kernels/util_internal.cc
)
if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
set(ARROW_WITH_LZ4 1)
endif ()
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
set(ARROW_WITH_SNAPPY 1)
endif ()
@ -302,10 +299,8 @@ if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
set(ARROW_WITH_ZSTD 1)
endif ()
if (ARROW_WITH_LZ4)
add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
endif ()
add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
if (ARROW_WITH_SNAPPY)
add_definitions(-DARROW_WITH_SNAPPY)
@ -328,18 +323,15 @@ add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY})
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
if (USE_INTERNAL_PROTOBUF_LIBRARY)
add_dependencies(${ARROW_LIBRARY} protoc)
endif ()
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
if (ARROW_WITH_LZ4)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
endif ()
target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
if (ARROW_WITH_SNAPPY)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
endif ()
@ -396,8 +388,7 @@ list(APPEND PARQUET_SRCS
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)
if (SANITIZE STREQUAL "undefined")
target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)

View File

@ -45,13 +45,12 @@ set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_V
target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR})
target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY})
target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams)
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (avrocpp ${SNAPPY_LIBRARY})
target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY})
endif ()
if (COMPILER_GCC)
@ -67,4 +66,4 @@ ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory ${AVROCPP_ROOT_DIR}/include
COMMAND ${CMAKE_COMMAND} -E create_symlink ${AVROCPP_ROOT_DIR}/api ${AVROCPP_ROOT_DIR}/include/avro
)
add_dependencies(avrocpp avro_symlink_headers)
add_dependencies(avrocpp avro_symlink_headers)

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit f7d9ce39f41323300044567be007c233338bb94a
Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56

View File

@ -1,45 +1,133 @@
# Supported contrib/boost source variants:
# 1. Default - Minimized vrsion from release archive : https://github.com/ClickHouse-Extras/boost
# 2. Release archive unpacked to contrib/boost
# 3. Full boost https://github.com/boostorg/boost
option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ${NOT_UNBUNDLED})
# if boostorg/boost connected as submodule: Update all boost internal submodules to tag:
# git submodule foreach "git fetch --all && git checkout boost-1.66.0 || true"
if (USE_INTERNAL_BOOST_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
#
# Important boost patch: 094c18b
#
# filesystem
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
set (SRCS_FILESYSTEM
${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp
${LIBRARY_DIR}/libs/filesystem/src/operations.cpp
${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp
${LIBRARY_DIR}/libs/filesystem/src/path.cpp
${LIBRARY_DIR}/libs/filesystem/src/portability.cpp
${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp
${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp
${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp
)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
add_library (_boost_filesystem ${SRCS_FILESYSTEM})
add_library (boost::filesystem ALIAS _boost_filesystem)
target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
if(NOT MSVC)
add_definitions(-Wno-unused-variable -Wno-deprecated-declarations)
endif()
# headers-only
macro(add_boost_lib lib_name)
add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources})
target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
endmacro()
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})
add_boost_lib(system)
# iostreams
add_boost_lib(program_options)
set (SRCS_IOSTREAMS
${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp
${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp
${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp
${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp
)
add_boost_lib(filesystem)
target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
add_library (_boost_iostreams ${SRCS_IOSTREAMS})
add_library (boost::iostreams ALIAS _boost_iostreams)
target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
target_link_libraries (_boost_iostreams PRIVATE zlib)
#add_boost_lib(random)
# program_options
if (USE_INTERNAL_PARQUET_LIBRARY)
add_boost_lib(regex)
endif()
set (SRCS_PROGRAM_OPTIONS
${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp
${LIBRARY_DIR}/libs/program_options/src/config_file.cpp
${LIBRARY_DIR}/libs/program_options/src/convert.cpp
${LIBRARY_DIR}/libs/program_options/src/options_description.cpp
${LIBRARY_DIR}/libs/program_options/src/parsers.cpp
${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp
${LIBRARY_DIR}/libs/program_options/src/split.cpp
${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp
${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp
${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp
${LIBRARY_DIR}/libs/program_options/src/winmain.cpp
)
if (USE_INTERNAL_AVRO_LIBRARY)
add_boost_lib(iostreams)
target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES})
target_include_directories(boost_iostreams_internal SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
endif()
add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS})
add_library (boost::program_options ALIAS _boost_program_options)
target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
# regex
set (SRCS_REGEX
${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/cregex.cpp
${LIBRARY_DIR}/libs/regex/src/fileiter.cpp
${LIBRARY_DIR}/libs/regex/src/icu.cpp
${LIBRARY_DIR}/libs/regex/src/instances.cpp
${LIBRARY_DIR}/libs/regex/src/internals.hpp
${LIBRARY_DIR}/libs/regex/src/posix_api.cpp
${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp
${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp
${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp
${LIBRARY_DIR}/libs/regex/src/regex.cpp
${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp
${LIBRARY_DIR}/libs/regex/src/usinstances.cpp
${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp
${LIBRARY_DIR}/libs/regex/src/winstances.cpp
)
add_library (_boost_regex ${SRCS_REGEX})
add_library (boost::regex ALIAS _boost_regex)
target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR})
# system
set (SRCS_SYSTEM
${LIBRARY_DIR}/libs/system/src/error_code.cpp
)
add_library (_boost_system ${SRCS_SYSTEM})
add_library (boost::system ALIAS _boost_system)
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
else ()
# 1.70 like in contrib/boost
# 1.67 on CI
set(BOOST_VERSION 1.67)
find_package(Boost ${BOOST_VERSION} COMPONENTS
system
filesystem
iostreams
program_options
regex
REQUIRED)
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR})
add_library (_boost_filesystem INTERFACE)
add_library (_boost_iostreams INTERFACE)
add_library (_boost_program_options INTERFACE)
add_library (_boost_regex INTERFACE)
add_library (_boost_system INTERFACE)
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
add_library (boost::filesystem ALIAS _boost_filesystem)
add_library (boost::iostreams ALIAS _boost_iostreams)
add_library (boost::program_options ALIAS _boost_program_options)
add_library (boost::regex ALIAS _boost_regex)
add_library (boost::system ALIAS _boost_system)
endif ()

1
contrib/cassandra vendored Submodule

@ -0,0 +1 @@
Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509

View File

@ -1,31 +1,33 @@
set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
set(SRCS
${CPPKAFKA_DIR}/src/configuration.cpp
${CPPKAFKA_DIR}/src/topic_configuration.cpp
${CPPKAFKA_DIR}/src/configuration_option.cpp
${CPPKAFKA_DIR}/src/exceptions.cpp
${CPPKAFKA_DIR}/src/topic.cpp
${CPPKAFKA_DIR}/src/buffer.cpp
${CPPKAFKA_DIR}/src/queue.cpp
${CPPKAFKA_DIR}/src/message.cpp
${CPPKAFKA_DIR}/src/message_timestamp.cpp
${CPPKAFKA_DIR}/src/message_internal.cpp
${CPPKAFKA_DIR}/src/topic_partition.cpp
${CPPKAFKA_DIR}/src/topic_partition_list.cpp
${CPPKAFKA_DIR}/src/metadata.cpp
${CPPKAFKA_DIR}/src/group_information.cpp
${CPPKAFKA_DIR}/src/error.cpp
${CPPKAFKA_DIR}/src/event.cpp
${CPPKAFKA_DIR}/src/kafka_handle_base.cpp
${CPPKAFKA_DIR}/src/producer.cpp
${CPPKAFKA_DIR}/src/consumer.cpp
${LIBRARY_DIR}/src/buffer.cpp
${LIBRARY_DIR}/src/configuration_option.cpp
${LIBRARY_DIR}/src/configuration.cpp
${LIBRARY_DIR}/src/consumer.cpp
${LIBRARY_DIR}/src/error.cpp
${LIBRARY_DIR}/src/event.cpp
${LIBRARY_DIR}/src/exceptions.cpp
${LIBRARY_DIR}/src/group_information.cpp
${LIBRARY_DIR}/src/kafka_handle_base.cpp
${LIBRARY_DIR}/src/message_internal.cpp
${LIBRARY_DIR}/src/message_timestamp.cpp
${LIBRARY_DIR}/src/message.cpp
${LIBRARY_DIR}/src/metadata.cpp
${LIBRARY_DIR}/src/producer.cpp
${LIBRARY_DIR}/src/queue.cpp
${LIBRARY_DIR}/src/topic_configuration.cpp
${LIBRARY_DIR}/src/topic_partition_list.cpp
${LIBRARY_DIR}/src/topic_partition.cpp
${LIBRARY_DIR}/src/topic.cpp
)
add_library(cppkafka ${SRCS})
target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include)
target_link_libraries(cppkafka
PRIVATE
${RDKAFKA_LIBRARY}
boost::headers_only
)
target_include_directories(cppkafka PRIVATE ${LIBRARY_DIR}/include/cppkafka)
target_include_directories(cppkafka SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70
Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531

View File

@ -0,0 +1,250 @@
option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES})
if (NOT HAVE_SSSE3)
set (ENABLE_HYPERSCAN OFF)
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan)
set (SRCS
${LIBRARY_DIR}/src/alloc.c
${LIBRARY_DIR}/src/compiler/asserts.cpp
${LIBRARY_DIR}/src/compiler/compiler.cpp
${LIBRARY_DIR}/src/compiler/error.cpp
${LIBRARY_DIR}/src/crc32.c
${LIBRARY_DIR}/src/database.c
${LIBRARY_DIR}/src/fdr/engine_description.cpp
${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp
${LIBRARY_DIR}/src/fdr/fdr_compile.cpp
${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp
${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp
${LIBRARY_DIR}/src/fdr/fdr.c
${LIBRARY_DIR}/src/fdr/flood_compile.cpp
${LIBRARY_DIR}/src/fdr/teddy_compile.cpp
${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp
${LIBRARY_DIR}/src/fdr/teddy.c
${LIBRARY_DIR}/src/grey.cpp
${LIBRARY_DIR}/src/hs_valid_platform.c
${LIBRARY_DIR}/src/hs_version.c
${LIBRARY_DIR}/src/hs.cpp
${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp
${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp
${LIBRARY_DIR}/src/hwlm/hwlm.c
${LIBRARY_DIR}/src/hwlm/noodle_build.cpp
${LIBRARY_DIR}/src/hwlm/noodle_engine.c
${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp
${LIBRARY_DIR}/src/nfa/accel.c
${LIBRARY_DIR}/src/nfa/accelcompile.cpp
${LIBRARY_DIR}/src/nfa/castle.c
${LIBRARY_DIR}/src/nfa/castlecompile.cpp
${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp
${LIBRARY_DIR}/src/nfa/dfa_min.cpp
${LIBRARY_DIR}/src/nfa/gough.c
${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp
${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp
${LIBRARY_DIR}/src/nfa/goughcompile.cpp
${LIBRARY_DIR}/src/nfa/lbr.c
${LIBRARY_DIR}/src/nfa/limex_64.c
${LIBRARY_DIR}/src/nfa/limex_accel.c
${LIBRARY_DIR}/src/nfa/limex_compile.cpp
${LIBRARY_DIR}/src/nfa/limex_native.c
${LIBRARY_DIR}/src/nfa/limex_simd128.c
${LIBRARY_DIR}/src/nfa/limex_simd256.c
${LIBRARY_DIR}/src/nfa/limex_simd384.c
${LIBRARY_DIR}/src/nfa/limex_simd512.c
${LIBRARY_DIR}/src/nfa/mcclellan.c
${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp
${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp
${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp
${LIBRARY_DIR}/src/nfa/mcsheng_data.c
${LIBRARY_DIR}/src/nfa/mcsheng.c
${LIBRARY_DIR}/src/nfa/mpv.c
${LIBRARY_DIR}/src/nfa/mpvcompile.cpp
${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c
${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp
${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp
${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp
${LIBRARY_DIR}/src/nfa/rdfa.cpp
${LIBRARY_DIR}/src/nfa/repeat.c
${LIBRARY_DIR}/src/nfa/repeatcompile.cpp
${LIBRARY_DIR}/src/nfa/sheng.c
${LIBRARY_DIR}/src/nfa/shengcompile.cpp
${LIBRARY_DIR}/src/nfa/shufti.c
${LIBRARY_DIR}/src/nfa/shufticompile.cpp
${LIBRARY_DIR}/src/nfa/tamarama.c
${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp
${LIBRARY_DIR}/src/nfa/truffle.c
${LIBRARY_DIR}/src/nfa/trufflecompile.cpp
${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp
${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp
${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp
${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp
${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp
${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp
${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp
${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp
${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp
${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp
${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp
${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp
${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp
${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp
${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp
${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp
${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp
${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp
${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp
${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp
${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp
${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp
${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp
${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp
${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_region.cpp
${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp
${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp
${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp
${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp
${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp
${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som.cpp
${LIBRARY_DIR}/src/nfagraph/ng_split.cpp
${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp
${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp
${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp
${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp
${LIBRARY_DIR}/src/nfagraph/ng_util.cpp
${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp
${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp
${LIBRARY_DIR}/src/nfagraph/ng_width.cpp
${LIBRARY_DIR}/src/nfagraph/ng.cpp
${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp
${LIBRARY_DIR}/src/parser/buildstate.cpp
${LIBRARY_DIR}/src/parser/check_refs.cpp
${LIBRARY_DIR}/src/parser/Component.cpp
${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp
${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp
${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp
${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp
${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp
${LIBRARY_DIR}/src/parser/ComponentByte.cpp
${LIBRARY_DIR}/src/parser/ComponentClass.cpp
${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp
${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp
${LIBRARY_DIR}/src/parser/ComponentEUS.cpp
${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp
${LIBRARY_DIR}/src/parser/ComponentSequence.cpp
${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp
${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp
${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp
${LIBRARY_DIR}/src/parser/control_verbs.cpp
${LIBRARY_DIR}/src/parser/logical_combination.cpp
${LIBRARY_DIR}/src/parser/parse_error.cpp
${LIBRARY_DIR}/src/parser/parser_util.cpp
${LIBRARY_DIR}/src/parser/Parser.cpp
${LIBRARY_DIR}/src/parser/prefilter.cpp
${LIBRARY_DIR}/src/parser/shortcut_literal.cpp
${LIBRARY_DIR}/src/parser/ucp_table.cpp
${LIBRARY_DIR}/src/parser/unsupported.cpp
${LIBRARY_DIR}/src/parser/utf8_validate.cpp
${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp
${LIBRARY_DIR}/src/rose/block.c
${LIBRARY_DIR}/src/rose/catchup.c
${LIBRARY_DIR}/src/rose/init.c
${LIBRARY_DIR}/src/rose/match.c
${LIBRARY_DIR}/src/rose/program_runtime.c
${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp
${LIBRARY_DIR}/src/rose/rose_build_add.cpp
${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp
${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp
${LIBRARY_DIR}/src/rose/rose_build_castle.cpp
${LIBRARY_DIR}/src/rose/rose_build_compile.cpp
${LIBRARY_DIR}/src/rose/rose_build_convert.cpp
${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp
${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp
${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp
${LIBRARY_DIR}/src/rose/rose_build_groups.cpp
${LIBRARY_DIR}/src/rose/rose_build_infix.cpp
${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp
${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp
${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp
${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp
${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp
${LIBRARY_DIR}/src/rose/rose_build_merge.cpp
${LIBRARY_DIR}/src/rose/rose_build_misc.cpp
${LIBRARY_DIR}/src/rose/rose_build_program.cpp
${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp
${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp
${LIBRARY_DIR}/src/rose/rose_build_width.cpp
${LIBRARY_DIR}/src/rose/rose_in_util.cpp
${LIBRARY_DIR}/src/rose/stream.c
${LIBRARY_DIR}/src/runtime.c
${LIBRARY_DIR}/src/scratch.c
${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp
${LIBRARY_DIR}/src/som/slot_manager.cpp
${LIBRARY_DIR}/src/som/som_runtime.c
${LIBRARY_DIR}/src/som/som_stream.c
${LIBRARY_DIR}/src/stream_compress.c
${LIBRARY_DIR}/src/util/alloc.cpp
${LIBRARY_DIR}/src/util/charreach.cpp
${LIBRARY_DIR}/src/util/clique.cpp
${LIBRARY_DIR}/src/util/compile_context.cpp
${LIBRARY_DIR}/src/util/compile_error.cpp
${LIBRARY_DIR}/src/util/cpuid_flags.c
${LIBRARY_DIR}/src/util/depth.cpp
${LIBRARY_DIR}/src/util/fatbit_build.cpp
${LIBRARY_DIR}/src/util/multibit_build.cpp
${LIBRARY_DIR}/src/util/multibit.c
${LIBRARY_DIR}/src/util/report_manager.cpp
${LIBRARY_DIR}/src/util/simd_utils.c
${LIBRARY_DIR}/src/util/state_compress.c
${LIBRARY_DIR}/src/util/target_info.cpp
${LIBRARY_DIR}/src/util/ue2string.cpp
)
add_library (hyperscan ${SRCS})
target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
target_compile_options (hyperscan
PRIVATE -g0 -march=corei7 # library has too much debug information
)
target_include_directories (hyperscan
PRIVATE
common
${LIBRARY_DIR}/include
)
target_include_directories (hyperscan SYSTEM PUBLIC ${LIBRARY_DIR}/src)
if (ARCH_AMD64)
target_include_directories (hyperscan PRIVATE x86_64)
endif ()
target_link_libraries (hyperscan PRIVATE boost::headers_only)
else ()
find_library (LIBRARY_HYPERSCAN hs)
find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder
add_library (hyperscan UNKNOWN IMPORTED GLOBAL)
set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN})
set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN})
set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1)
endif ()
message (STATUS "Using hyperscan")
else ()
add_library (hyperscan INTERFACE)
target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0)
message (STATUS "Not using hyperscan")
endif ()

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_VERSION_H_C6428FAF8E3713
#define HS_VERSION_H_C6428FAF8E3713
/**
* A version string to identify this release of Hyperscan.
*/
#define HS_VERSION_STRING "5.1.1 2000-01-01"
#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0)
#endif /* HS_VERSION_H_C6428FAF8E3713 */

View File

@ -0,0 +1,106 @@
/* used by cmake */
#ifndef CONFIG_H_
#define CONFIG_H_
/* "Define if the build is 32 bit" */
/* #undef ARCH_32_BIT */
/* "Define if the build is 64 bit" */
#define ARCH_64_BIT
/* "Define if building for IA32" */
/* #undef ARCH_IA32 */
/* "Define if building for EM64T" */
#define ARCH_X86_64
/* internal build, switch on dump support. */
/* #undef DUMP_SUPPORT */
/* Define if building "fat" runtime. */
/* #undef FAT_RUNTIME */
/* Define if building AVX-512 in the fat runtime. */
/* #undef BUILD_AVX512 */
/* Define to 1 if `backtrace' works. */
#define HAVE_BACKTRACE
/* C compiler has __builtin_assume_aligned */
#define HAVE_CC_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has __builtin_assume_aligned */
#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has x86intrin.h */
#define HAVE_CXX_X86INTRIN_H
/* C compiler has x86intrin.h */
#define HAVE_C_X86INTRIN_H
/* C++ compiler has intrin.h */
/* #undef HAVE_CXX_INTRIN_H */
/* C compiler has intrin.h */
/* #undef HAVE_C_INTRIN_H */
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
/* #undef HAVE_PTHREAD_NP_H */
/* Define to 1 if you have the `malloc_info' function. */
/* #undef HAVE_MALLOC_INFO */
/* Define to 1 if you have the `memmem' function. */
/* #undef HAVE_MEMMEM */
/* Define to 1 if you have a working `mmap' system call. */
#define HAVE_MMAP
/* Define to 1 if `posix_memalign' works. */
#define HAVE_POSIX_MEMALIGN
/* Define to 1 if you have the `setrlimit' function. */
#define HAVE_SETRLIMIT
/* Define to 1 if you have the `shmget' function. */
/* #undef HAVE_SHMGET */
/* Define to 1 if you have the `sigaction' function. */
#define HAVE_SIGACTION
/* Define to 1 if you have the `sigaltstack' function. */
#define HAVE_SIGALTSTACK
/* Define if the sqlite3_open_v2 call is available */
/* #undef HAVE_SQLITE3_OPEN_V2 */
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the `_aligned_malloc' function. */
/* #undef HAVE__ALIGNED_MALLOC */
/* Define if compiler has __builtin_constant_p */
#define HAVE__BUILTIN_CONSTANT_P
/* Optimize, inline critical functions */
#define HS_OPTIMIZE
#define HS_VERSION
#define HS_MAJOR_VERSION
#define HS_MINOR_VERSION
#define HS_PATCH_VERSION
#define BUILD_DATE
/* define if this is a release build. */
#define RELEASE_BUILD
/* define if reverse_graph requires patch for boost 1.62.0 */
/* #undef BOOST_REVGRAPH_PATCH */
#endif /* CONFIG_H_ */

View File

@ -0,0 +1,2 @@
add_library (libdivide INTERFACE)
target_include_directories (libdivide SYSTEM BEFORE INTERFACE .)

View File

@ -209,9 +209,8 @@ endif()
target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY})
# inherit from parent cmake
target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY})
target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only)
if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES)
target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES})

View File

@ -1,13 +1,10 @@
if (HAVE_SSE42) # Not used. Pretty easy to port.
set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h)
endif ()
add_library(metrohash
src/metrohash.h
src/testvector.h
set (SRCS
src/metrohash64.cpp
src/metrohash128.cpp
${SOURCES_SSE42_ONLY})
)
if (HAVE_SSE42) # Not used. Pretty easy to port.
list (APPEND SRCS src/metrohash128crc.cpp)
endif ()
target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
add_library(metrohash ${SRCS})
target_include_directories(metrohash PUBLIC src)

View File

@ -82,7 +82,7 @@ target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined)
target_include_directories(rdkafka SYSTEM PUBLIC include)
target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used.
target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used.
target_link_libraries(rdkafka PRIVATE ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY})
target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LIBGSASL_LIBRARY})
if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
endif()

1
contrib/libuv vendored Submodule

@ -0,0 +1 @@
Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28

View File

@ -1,17 +1,28 @@
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ${NOT_UNBUNDLED})
add_library (lz4
${LIBRARY_DIR}/lz4.c
${LIBRARY_DIR}/lz4hc.c
${LIBRARY_DIR}/lz4frame.c
${LIBRARY_DIR}/lz4frame.h
${LIBRARY_DIR}/xxhash.c
${LIBRARY_DIR}/xxhash.h
if (USE_INTERNAL_LZ4_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4)
${LIBRARY_DIR}/lz4.h
${LIBRARY_DIR}/lz4hc.h)
set (SRCS
${LIBRARY_DIR}/lib/lz4.c
${LIBRARY_DIR}/lib/lz4hc.c
${LIBRARY_DIR}/lib/lz4frame.c
${LIBRARY_DIR}/lib/xxhash.c
)
target_compile_definitions(lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
target_compile_options(lz4 PRIVATE -fno-sanitize=undefined)
add_library (lz4 ${SRCS})
target_include_directories(lz4 PUBLIC ${LIBRARY_DIR})
target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1)
if (SANITIZE STREQUAL "undefined")
target_compile_options (lz4 PRIVATE -fno-sanitize=undefined)
endif ()
target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}/lib)
else ()
find_library (LIBRARY_LZ4 lz4)
find_path (INCLUDE_LZ4 lz4.h)
add_library (lz4 UNKNOWN IMPORTED)
set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4})
set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4})
set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0)
endif ()

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit f1332626639d6492eaf170758642da14fbbda7bf
Subproject commit 2d37daaad24be71e76514a36b0a47120be2f9086

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
RUN apt-get update \

View File

@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \
libboost-system-dev \
libboost-filesystem-dev \
libboost-thread-dev \
libboost-iostreams-dev \
libboost-regex-dev \
zlib1g-dev \
liblz4-dev \
libdouble-conversion-dev \
@ -82,8 +84,8 @@ RUN apt-get --allow-unauthenticated update -y \
libcctz-dev \
libldap2-dev \
libsasl2-dev \
heimdal-multidev
heimdal-multidev \
libhyperscan-dev
# This symlink required by gcc to find lld compiler

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
ARG gosu_ver=1.10

View File

@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
RUN apt-get update && \

View File

@ -0,0 +1,7 @@
version: '2.3'
services:
cassandra1:
image: cassandra
restart: always
ports:
- 9043:9042

View File

@ -198,12 +198,14 @@ function get_profiles
clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &
wait
@ -347,9 +349,11 @@ create table query_metric_stats engine File(TSVWithNamesAndTypes,
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
-- immediately (on the same order of time as noise). We compute average
-- run time between old and new version, and if it is below a threshold,
-- we just skip the query. If there is a significant regression, the
-- average will be above threshold, we'll process it normally and will
-- detect the regression.
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
@ -409,11 +413,11 @@ create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json')
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
@ -421,9 +425,39 @@ create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
select test, query_index from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
group by test having s > 0 order by s desc;
create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from (
select test, count(*) queries,
sum(left) as left, sum(right) as right,
(right - left) / right average_time_change
from queries
group by test
order by abs(average_time_change) desc
)
;
create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
from queries
group by test
order by total_unstable + total_changed desc
;
create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
select test,
queries,
coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad,
coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
from test_time_changes_tsv
full join unstable_tests_tsv
using test
where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0)
order by total_bad desc, average_time_change desc
settings join_use_nulls = 1
;
create table query_time engine Memory as select *
from file('analyze/client-times.tsv', TSV,
@ -464,8 +498,8 @@ create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_display_name
from queries order by test, query_display_name;
stat_threshold, test, query_index, query_display_name
from queries order by test, query_index;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
@ -582,7 +616,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
group by test, query_index, metric
having d > 0.5
having d > 0.5 and q[3] > 5
) metrics
left join query_display_names using (test, query_index)
order by test, query_index, d desc

View File

@ -17,7 +17,7 @@ function find_reference_sha
# If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ]
then
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pr/*"
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
fi
# Go back from the revision to be tested, trying to find the closest published
@ -28,9 +28,9 @@ function find_reference_sha
# and SHA_TO_TEST, but a revision that is merged with recent master, given
# by pull/.../merge ref.
# Master is the first parent of the pull/.../merge.
if git -C ch rev-parse pr/merge
if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
then
start_ref=pr/merge~
start_ref="pull/$PR_TO_TEST/merge~"
fi
while :
@ -73,11 +73,11 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
(
git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
if git -C ch rev-parse pr/merge &> /dev/null
if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
then
echo
echo Real tested commit is:
git -C ch log -1 --decorate pr/merge
git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
fi
) | tee right-commit.txt
@ -87,7 +87,7 @@ then
# tests for use by compare.sh. Compare to merge base, because master might be
# far in the future and have unrelated test changes.
base=$(git -C ch merge-base "$SHA_TO_TEST" master)
git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt
git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
if grep -vq '^tests/performance' changed-tests.txt
then
# Have some other changes besides the tests, so truncate the test list,
@ -131,5 +131,8 @@ done
dmesg -T > dmesg.log
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark
cp compare.log /output

View File

@ -207,7 +207,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4
'Test', # 5
'Query', # 6
'#', # 6
'Query', # 7
]
print(tableHeader(columns))
@ -248,7 +249,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', #3
# Failed #4
'Test', #5
'Query' #6
'#', #6
'Query' #7
]
print(tableStart('Unstable queries'))
@ -272,9 +274,9 @@ if args.report == 'main':
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
['Test', 'Unstable', 'Changed perf', 'Total not OK'],
tsvRows('report/bad-tests.tsv'))
printSimpleTable('Test performance changes',
['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
tsvRows('report/test-perf-changes.tsv'))
def print_test_times():
global slow_average_tests
@ -357,7 +359,7 @@ if args.report == 'main':
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.append(str(error_tests) + ' errors')
message_array.insert(0, str(error_tests) + ' errors')
if message_array:
message = ', '.join(message_array)
@ -391,7 +393,8 @@ elif args.report == 'all-queries':
'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6
'Test', #7
'Query', #8
'#', #8
'Query', #9
]
print(tableStart('All query times'))

View File

@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb"
ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb"
RUN wget "http://files.viva64.com/$PKG_VERSION"
RUN wget "https://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i "$PKG_VERSION"
CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \

View File

@ -24,6 +24,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \

View File

@ -59,7 +59,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \

View File

@ -62,7 +62,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \

View File

@ -50,7 +50,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \

View File

@ -31,6 +31,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \
echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \

View File

@ -1,4 +1,4 @@
## function-name {#function-name-in-lower-case}
## functionName {#functionname-in-lower-case}
Short description.

View File

@ -1,4 +1,4 @@
## setting-name {#setting-name-in-lower-case}
## setting_name {#setting_name}
Description.

View File

@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first.
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md).
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first.
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}

View File

@ -28,10 +28,9 @@ There are several ways to do this.
### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer:
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
### Install from a PPA Package {#install-from-a-ppa-package}

View File

@ -3,11 +3,11 @@ toc_priority: 61
toc_title: For Beginners
---
# The Beginner ClickHouse Developer Instruction
# The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction}
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`

View File

@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes.
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input.
@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
## Security Audit {#security-audit}
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.

View File

@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura
You can also use the following database engines:
- [MySQL](mysql.md)
- [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](lazy.md)
- [Lazy](../../engines/database-engines/lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -1,8 +1,8 @@
---
toc_folder_title: Engines
toc_hidden: true
toc_priority: 25
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -19,27 +19,27 @@ The table engine (type of table) determines:
### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines.
Engines in the family:
- [MergeTree](mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree)
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree)
### Log {#log}
Lightweight [engines](log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family:
- [TinyLog](log-family/tinylog.md#tinylog)
- [StripeLog](log-family/stripelog.md#stripelog)
- [Log](log-family/log.md#log)
- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog)
- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog)
- [Log](../../engines/table-engines/log-family/log.md#log)
### Integration Engines {#integration-engines}
@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems.
Engines in the family:
- [Kafka](integrations/kafka.md#kafka)
- [MySQL](integrations/mysql.md#mysql)
- [ODBC](integrations/odbc.md#table-engine-odbc)
- [JDBC](integrations/jdbc.md#table-engine-jdbc)
- [HDFS](integrations/hdfs.md#hdfs)
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
- [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
- [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
### Special Engines {#special-engines}
Engines in the family:
- [Distributed](special/distributed.md#distributed)
- [MaterializedView](special/materializedview.md#materializedview)
- [Dictionary](special/dictionary.md#dictionary)
- [Merge](special/merge.md#merge
- [File](special/file.md#file)
- [Null](special/null.md#null)
- [Set](special/set.md#set)
- [Join](special/join.md#join)
- [URL](special/url.md#table_engines-url)
- [View](special/view.md#table_engines-view)
- [Memory](special/memory.md#memory)
- [Buffer](special/buffer.md#buffer)
- [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
- [Merge](../../engines/table-engines/special/merge.md#merge)
- [File](../../engines/table-engines/special/file.md#file)
- [Null](../../engines/table-engines/special/null.md#null)
- [Set](../../engines/table-engines/special/set.md#set)
- [Join](../../engines/table-engines/special/join.md#join)
- [URL](../../engines/table-engines/special/url.md#table_engines-url)
- [View](../../engines/table-engines/special/view.md#table_engines-view)
- [Memory](../../engines/table-engines/special/memory.md#memory)
- [Buffer](../../engines/table-engines/special/buffer.md#buffer)
## Virtual Columns {#table_engines-virtual_columns}

View File

@ -6,7 +6,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
## Usage {#usage}
@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s
Engines of the family:
- [StripeLog](stripelog.md)
- [Log](log.md)
- [TinyLog](tinylog.md)
- [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties}

View File

@ -5,9 +5,9 @@ toc_title: Log
# Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

View File

@ -5,7 +5,7 @@ toc_title: StripeLog
# Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article.
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

View File

@ -5,10 +5,10 @@ toc_title: TinyLog
# TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences.
The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened).
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree
# Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree
# CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe
**Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key
# Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql
CREATE TABLE visits
@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour;
```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example:
The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun
!!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql
SELECT

View File

@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md).
The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
## Creating a Table {#creating-table}
@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration.
**Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -15,20 +15,20 @@ Main features:
This allows you to create a small sparse index that helps find data faster.
- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified.
- Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.
- Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md).
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md).
- Data sampling support.
If necessary, you can set the data sampling method in the table.
!!! info "Info"
The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family.
The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family.
## Creating a Table {#table_engine-mergetree-creating-a-table}
@ -41,8 +41,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[ORDER BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@ -51,30 +51,31 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
!!! note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
- `ORDER BY` — The sorting key.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you dont need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
- `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
- `ORDER BY` — The sorting key.
A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
- `SAMPLE BY` — An expression for sampling.
- `SAMPLE BY` — An expression for sampling. Optional.
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
Expression must have one `Date` or `DateTime` column as a result. Example:
`TTL date + INTERVAL 1 DAY`
@ -83,7 +84,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional):
- `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
- `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
@ -192,18 +193,22 @@ The number of columns in the primary key is not explicitly limited. Depending on
ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines.
In this case it makes sense to specify the *sorting key* that is different from the primary key.
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and
[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and
[AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.
@ -249,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes}
### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query.
@ -332,8 +337,8 @@ The `set` index can be used with all functions. Function subsets for other index
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | | ✗ | ✗ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
@ -349,7 +354,8 @@ The `set` index can be used with all functions. Function subsets for other index
Functions with a constant argument that is less than ngram size cant be used by `ngrambf_v1` for query optimization.
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes cant be used for optimizing queries where the result of a function is expected to be false, for example:
!!! note "Note"
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes cant be used for optimizing queries where the result of a function is expected to be false, for example:
- Can be optimized:
- `s LIKE '%test%'`
@ -478,7 +484,7 @@ When ClickHouse see that data is expired, it performs an off-schedule merge. To
If you perform the `SELECT` query between merges, you may get expired data. To avoid it, use the [OPTIMIZE](../../../sql-reference/statements/misc.md#misc_operations-optimize) query before `SELECT`.
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes}
### Introduction {#introduction}
@ -493,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes_configure}
@ -623,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details}
@ -642,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.
@ -652,4 +658,3 @@ After the completion of background merges and mutations, old parts are removed o
During this time, they are not moved to other volumes or disks. Therefore, until the parts are finally removed, they are still taken into account for evaluation of the occupied disk space.
[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree
# ReplacingMergeTree {#replacingmergetree}
The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value).
The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value).
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
@ -40,7 +40,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
**See also**
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
- [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: SummingMergeTree
# SummingMergeTree {#summingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses**
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table.
When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1">
@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation}
@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key.
### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function.
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function.
### Nested Structures {#nested-structures}
@ -132,7 +132,7 @@ Examples:
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`.
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation.

View File

@ -12,7 +12,7 @@ This engine:
See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
## Creating a Table {#creating-a-table}
@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version)
**Query Clauses**
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table.
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
<details markdown="1">

View File

@ -3,7 +3,7 @@ toc_priority: 45
toc_title: Buffer
---
# Buffer {#buffer}
# Buffer Table Engine {#buffer}
Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously.
@ -34,9 +34,9 @@ Example:
CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)
```
Creating a merge.hits\_buffer table with the same structure as merge.hits and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
When the server is stopped, with DROP TABLE or DETACH TABLE, buffer data is also flushed to the destination table.
When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table.
You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory.
@ -52,11 +52,11 @@ If you need to run ALTER for a subordinate table and the Buffer table, we recomm
If the server is restarted abnormally, the data in the buffer is lost.
FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.
Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set num\_layers to 1.
Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.
If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable exactly once write to replicated tables.

View File

@ -3,15 +3,17 @@ toc_priority: 35
toc_title: Dictionary
---
# Dictionary {#dictionary}
# Dictionary Table Engine {#dictionary}
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
## Example {#example}
As an example, consider a dictionary of `products` with the following configuration:
``` xml
<dictionaries>
<dictionary>
<dictionary>
<name>products</name>
<source>
<odbc>
@ -36,7 +38,7 @@ As an example, consider a dictionary of `products` with the following configurat
<null_value></null_value>
</attribute>
</structure>
</dictionary>
</dictionary>
</dictionaries>
```

View File

@ -3,9 +3,9 @@ toc_priority: 33
toc_title: Distributed
---
# Distributed {#distributed}
# Distributed Table Engine {#distributed}
**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers.
Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers.
Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
The Distributed engine accepts parameters:
@ -23,7 +23,7 @@ The Distributed engine accepts parameters:
See also:
- `insert_distributed_sync` setting
- [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example:
@ -31,7 +31,7 @@ Example:
Distributed(logs, default, hits[, sharding_key[, policy_name]])
```
Data will be read from all servers in the logs cluster, from the default.hits table located on every server in the cluster.
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
@ -75,7 +75,7 @@ Clusters are set like this:
</remote_servers>
```
Here a cluster is defined with the name logs that consists of two shards, each of which contains two replicas.
Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
@ -83,7 +83,7 @@ Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (tcp\_port in the config, usually set to 9000). Do not confuse it with http\_port.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
@ -97,11 +97,11 @@ You can specify just one of the shards (in this case, query processing should be
You can specify as many clusters as you wish in the configuration.
To view your clusters, use the system.clusters table.
To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the remote table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
@ -111,15 +111,15 @@ Second, you can perform INSERT in a Distributed table. In this case, the table w
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the internal\_replication parameter defined in the config file.
Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to true, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to false (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from prev\_weight to prev\_weights + weight, where prev\_weights is the total weight of the shards with the smallest number, and weight is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression rand() for random distribution of data, or UserID for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
@ -130,11 +130,11 @@ You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the broken subdirectory and no longer used.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
## Virtual Columns {#virtual-columns}
@ -145,7 +145,7 @@ When the max\_parallel\_replicas option is enabled, query processing is parallel
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -1,11 +1,11 @@
---
toc_priority: 34
toc_title: External data
toc_priority: 45
toc_title: External Data
---
# External Data for Query Processing {#external-data-for-query-processing}
ClickHouse allows sending a server the data that is needed for processing a query, together with a SELECT query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in IN operators).
ClickHouse allows sending a server the data that is needed for processing a query, together with a `SELECT` query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in `IN` operators).
For example, if you have a text file with important user identifiers, you can upload it to the server along with a query that uses filtration by this list.
@ -46,7 +46,7 @@ $ cat /etc/passwd | sed 's/:/\t/g' | clickhouse-client --query="SELECT shell, co
/bin/sync 1
```
When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The query\_string is passed the parameters name\_format, name\_types, and name\_structure, where name is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.
When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The `query_string` is passed the parameters `name_format`, `name_types`, and `name_structure`, where `name` is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.
Example:

View File

@ -3,12 +3,11 @@ toc_priority: 37
toc_title: File
---
# File {#table_engines-file}
# File Table Engine {#table_engines-file}
The File table engine keeps the data in a file in one of the supported [file
formats](../../../interfaces/formats.md#formats) (TabSeparated, Native, etc.).
The File table engine keeps the data in a file in one of the supported [file formats](../../../interfaces/formats.md#formats) (`TabSeparated`, `Native`, etc.).
Usage examples:
Usage scenarios:
- Data export from ClickHouse to file.
- Convert data from one format to another.
@ -34,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
!!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
**Example:**
## Example {#example}
**1.** Set up the `file_engine_table` table:

View File

@ -3,7 +3,7 @@ toc_priority: 46
toc_title: GenerateRandom
---
# Generaterandom {#table_engines-generate}
# GenerateRandom Table Engine {#table_engines-generate}
The GenerateRandom table engine produces random data for given table schema.
@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
**Example:**
## Example {#example}
**1.** Set up the `generate_engine_table` table:

View File

@ -3,9 +3,12 @@ toc_priority: 40
toc_title: Join
---
# Join {#join}
# Join Table Engine {#join}
Prepared data structure for using in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
!!! note "Note"
This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself.
## Creating a Table {#creating-a-table}

View File

@ -3,8 +3,8 @@ toc_priority: 43
toc_title: MaterializedView
---
# Materializedview {#materializedview}
# MaterializedView Table Engine {#materializedview}
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine.
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->

View File

@ -3,15 +3,16 @@ toc_priority: 44
toc_title: Memory
---
# Memory {#memory}
# Memory Table Engine {#memory}
The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.
Concurrent data access is synchronized. Locks are short: read and write operations dont block each other.
Indexes are not supported. Reading is parallelized.
Maximal productivity (over 10 GB/sec) is reached on simple queries, because there is no reading from the disk, decompressing, or deserializing data. (We should note that in many cases, the productivity of the MergeTree engine is almost as high.)
When restarting a server, data disappears from the table and the table becomes empty.
Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000).
The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing GLOBAL IN (see the section “IN operators”).
The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/memory/) <!--hide-->

View File

@ -3,13 +3,17 @@ toc_priority: 36
toc_title: Merge
---
# Merge {#merge}
# Merge Table Engine {#merge}
The `Merge` engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously.
Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist.
The `Merge` engine accepts parameters: the database name and a regular expression for tables.
Example:
## Examples {#examples}
Example 1:
``` sql
Merge(hits, '^WatchLog')
@ -63,6 +67,6 @@ FROM WatchLog
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->

View File

@ -3,10 +3,11 @@ toc_priority: 38
toc_title: 'Null'
---
# Null {#null}
# Null Table Engine {#null}
When writing to a Null table, data is ignored. When reading from a Null table, the response is empty.
When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty.
However, you can create a materialized view on a Null table. So the data written to the table will end up in the view.
!!! info "Hint"
However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/null/) <!--hide-->

View File

@ -3,14 +3,14 @@ toc_priority: 39
toc_title: Set
---
# Set {#set}
# Set Table Engine {#set}
A data set that is always in RAM. It is intended for use on the right side of the IN operator (see the section “IN operators”).
A data set that is always in RAM. It is intended for use on the right side of the `IN` operator (see the section “IN operators”).
You can use INSERT to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
But you cant perform SELECT from the table. The only way to retrieve data is by using it in the right half of the IN operator.
You can use `INSERT` to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
But you cant perform `SELECT` from the table. The only way to retrieve data is by using it in the right half of the `IN` operator.
Data is always located in RAM. For INSERT, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.
Data is always located in RAM. For `INSERT`, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.
For a rough server restart, the block of data on the disk might be lost or damaged. In the latter case, you may need to manually delete the file with damaged data.

View File

@ -3,12 +3,13 @@ toc_priority: 41
toc_title: URL
---
# URL(URL, Format) {#table_engines-url}
# URL Table Engine {#table_engines-url}
Manages data on a remote HTTP/HTTPS server. This engine is similar
to the [File](file.md) engine.
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
## Using the Engine in the ClickHouse Server {#using-the-engine-in-the-clickhouse-server}
Syntax: `URL(URL, Format)`
## Usage {#using-the-engine-in-the-clickhouse-server}
The `format` must be one that ClickHouse can use in
`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
@ -24,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
**Example:**
## Example {#example}
**1.** Create a `url_engine_table` table on the server :

View File

@ -3,7 +3,7 @@ toc_priority: 42
toc_title: View
---
# View {#table_engines-view}
# View Table Engine {#table_engines-view}
Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query).

View File

@ -1,9 +1,8 @@
---
toc_folder_title: F.A.Q.
toc_hidden: true
toc_priority: 76
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -9,12 +9,12 @@ toc_title: Introduction
This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available.
- [Anonymized Yandex.Metrica Dataset](metrica.md)
- [Star Schema Benchmark](star-schema.md)
- [WikiStat](wikistat.md)
- [Terabyte of Click Logs from Criteo](criteo.md)
- [AMPLab Big Data Benchmark](amplab-benchmark.md)
- [New York Taxi Data](nyc-taxi.md)
- [OnTime](ontime.md)
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](../../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data
Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz.
## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}

View File

@ -7,9 +7,9 @@ toc_title: hidden
# Getting Started {#getting-started}
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can:
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can:
- [Go through detailed tutorial](tutorial.md)
- [Experiment with example datasets](example-datasets/ontime.md)
- [Go through detailed tutorial](../getting-started/tutorial.md)
- [Experiment with example datasets](../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide-->

View File

@ -64,7 +64,7 @@ You can also download and install packages manually from [here](https://repo.cli
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.yandex.ru/clickhouse/tgz/.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/.
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:
``` bash

View File

@ -6,13 +6,13 @@ toc_title: Playground
# ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. Theres also a selection of ClickHouse LTS releases to experiment with.
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
## Credentials
## Credentials {#credentials}
| Parameter | Value |
|:--------------------|:----------------------------------------|
@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
* 20.3 LTS: `play-api-v20-3.clickhouse.tech`
* 19.14 LTS: `play-api-v19-14.clickhouse.tech`
- 20.3 LTS: `play-api-v20-3.clickhouse.tech`
- 19.14 LTS: `play-api-v19-14.clickhouse.tech`
!!! note "Note"
All these endpoints require a secure TLS connection.
## Limitations
## Limitations {#limitations}
The queries are executed as a read-only user. It implies some limitations:
@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed
The following settings are also enforced:
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time)
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
## Examples
## Examples {#examples}
HTTPS endpoint example with `curl`:
@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us
```
TCP endpoint example with [CLI](../interfaces/cli.md):
``` bash
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
```
## Implementation Details
## Implementation Details {#implementation-details}
ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity.

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them.
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
For example, you have chosen `deb` packages and executed:
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data}

View File

@ -232,6 +232,6 @@ FROM
```
!!! note "Note"
More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->

Some files were not shown because too many files have changed in this diff Show More