Merge branch 'master' into avoid-trailing-whitespaces-in-some-cases

This commit is contained in:
Alexey Milovidov 2020-06-15 06:52:14 +03:00
commit fdc8f7ad14
805 changed files with 15188 additions and 12440 deletions

8
.gitmodules vendored
View File

@ -157,6 +157,14 @@
[submodule "contrib/openldap"]
path = contrib/openldap
url = https://github.com/openldap/openldap.git
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse-Extras/cpp-driver.git
branch = clickhouse
[submodule "contrib/libuv"]
path = contrib/libuv
url = https://github.com/ClickHouse-Extras/libuv.git
branch = clickhouse
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git

View File

@ -327,20 +327,16 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE
include (GNUInstallDirs)
include (cmake/contrib_finder.cmake)
include (cmake/lib_name.cmake)
find_contrib_lib(double-conversion) # Must be before parquet
include (cmake/find/ssl.cmake)
include (cmake/find/ldap.cmake) # after ssl
include (cmake/find/icu.cmake)
include (cmake/find/boost.cmake)
include (cmake/find/zlib.cmake)
include (cmake/find/zstd.cmake)
include (cmake/find/ltdl.cmake) # for odbc
include (cmake/find/termcap.cmake)
# openssl, zlib before poco
include (cmake/find/lz4.cmake)
include (cmake/find/xxhash.cmake)
include (cmake/find/sparsehash.cmake)
include (cmake/find/re2.cmake)
include (cmake/find/libgsasl.cmake)
@ -358,17 +354,16 @@ include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/s3.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/hyperscan.cmake)
include (cmake/find/simdjson.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/orc.cmake)
include (cmake/find/avro.cmake)
include (cmake/find/msgpack.cmake)
include (cmake/find/cassandra.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
find_contrib_lib(metrohash)
find_contrib_lib(btrie)
if (ENABLE_TESTS)

View File

@ -10,10 +10,12 @@ ClickHouse is an open-source column-oriented database management system that all
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time.
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -16,6 +16,7 @@ set (SRCS
shift10.cpp
sleep.cpp
terminalColors.cpp
errnoToString.cpp
)
if (ENABLE_REPLXX)
@ -43,10 +44,6 @@ endif()
target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
if (NOT USE_INTERNAL_BOOST_LIBRARY)
target_include_directories (common SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
endif ()
# Allow explicit fallback to readline
if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
message (STATUS "Attempt to fallback to readline explicitly")
@ -72,7 +69,8 @@ endif ()
target_link_libraries (common
PUBLIC
${CITYHASH_LIBRARIES}
${Boost_SYSTEM_LIBRARY}
boost::headers_only
boost::system
FastMemcpy
Poco::Net
Poco::Net::SSL

View File

@ -67,8 +67,8 @@ LineReader::Suggest::WordsRange LineReader::Suggest::getCompletions(const String
});
}
LineReader::LineReader(const String & history_file_path_, char extender_, char delimiter_)
: history_file_path(history_file_path_), extender(extender_), delimiter(delimiter_)
LineReader::LineReader(const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
: history_file_path(history_file_path_), multiline(multiline_), extenders(std::move(extenders_)), delimiters(std::move(delimiters_))
{
/// FIXME: check extender != delimiter
}
@ -76,38 +76,60 @@ LineReader::LineReader(const String & history_file_path_, char extender_, char d
String LineReader::readLine(const String & first_prompt, const String & second_prompt)
{
String line;
bool is_multiline = false;
bool need_next_line = false;
while (auto status = readOneLine(is_multiline ? second_prompt : first_prompt))
while (auto status = readOneLine(need_next_line ? second_prompt : first_prompt))
{
if (status == RESET_LINE)
{
line.clear();
is_multiline = false;
need_next_line = false;
continue;
}
if (input.empty())
{
if (!line.empty() && !delimiter && !hasInputData())
if (!line.empty() && !multiline && !hasInputData())
break;
else
continue;
}
is_multiline = (input.back() == extender) || (delimiter && input.back() != delimiter) || hasInputData();
if (input.back() == extender)
#if !defined(ARCADIA_BUILD) /// C++20
const char * has_extender = nullptr;
for (const auto * extender : extenders)
{
input = input.substr(0, input.size() - 1);
if (input.ends_with(extender))
{
has_extender = extender;
break;
}
}
const char * has_delimiter = nullptr;
for (const auto * delimiter : delimiters)
{
if (input.ends_with(delimiter))
{
has_delimiter = delimiter;
break;
}
}
need_next_line = has_extender || (multiline && !has_delimiter) || hasInputData();
if (has_extender)
{
input.resize(input.size() - strlen(has_extender));
trim(input);
if (input.empty())
continue;
}
#endif
line += (line.empty() ? "" : " ") + input;
if (!is_multiline)
if (!need_next_line)
break;
}

View File

@ -21,7 +21,9 @@ public:
WordsRange getCompletions(const String & prefix, size_t prefix_length) const;
};
LineReader(const String & history_file_path, char extender, char delimiter = 0); /// if delimiter != 0, then it's multiline mode
using Patterns = std::vector<const char *>;
LineReader(const String & history_file_path, bool multiline, Patterns extenders, Patterns delimiters);
virtual ~LineReader() {}
/// Reads the whole line until delimiter (in multiline mode) or until the last line without extender.
@ -51,8 +53,10 @@ protected:
String input;
private:
const char extender;
const char delimiter;
bool multiline;
Patterns extenders;
Patterns delimiters;
String prev_line;

View File

@ -56,8 +56,9 @@ static char * generate(const char * text, int state)
return nextMatch();
};
ReadlineLineReader::ReadlineLineReader(const Suggest & suggest_, const String & history_file_path_, char extender_, char delimiter_)
: LineReader(history_file_path_, extender_, delimiter_)
ReadlineLineReader::ReadlineLineReader(
const Suggest & suggest_, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_))
{
suggest = &suggest_;

View File

@ -8,7 +8,7 @@
class ReadlineLineReader : public LineReader
{
public:
ReadlineLineReader(const Suggest & suggest, const String & history_file_path, char extender, char delimiter = 0);
ReadlineLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_);
~ReadlineLineReader() override;
void enableBracketedPaste() override;

View File

@ -1,9 +1,11 @@
#include <common/ReplxxLineReader.h>
#include <common/errnoToString.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <functional>
#include <sys/file.h>
namespace
{
@ -16,15 +18,43 @@ void trim(String & s)
}
ReplxxLineReader::ReplxxLineReader(const Suggest & suggest, const String & history_file_path_, char extender_, char delimiter_)
: LineReader(history_file_path_, extender_, delimiter_)
ReplxxLineReader::ReplxxLineReader(
const Suggest & suggest,
const String & history_file_path_,
bool multiline_,
Patterns extenders_,
Patterns delimiters_,
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
if (!history_file_path.empty())
{
history_file_fd = open(history_file_path.c_str(), O_RDWR);
if (history_file_fd < 0)
{
rx.print("Open of history file failed: %s\n", errnoToString(errno).c_str());
}
else
{
if (flock(history_file_fd, LOCK_SH))
{
rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str());
}
else
{
rx.history_load(history_file_path);
if (flock(history_file_fd, LOCK_UN))
{
rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
}
}
}
}
auto callback = [&suggest] (const String & context, size_t context_size)
{
auto range = suggest.getCompletions(context, context_size);
@ -35,6 +65,9 @@ ReplxxLineReader::ReplxxLineReader(const Suggest & suggest, const String & histo
rx.set_complete_on_empty(false);
rx.set_word_break_characters(word_break_characters);
if (highlighter)
rx.set_highlighter_callback(highlighter);
/// By default C-p/C-n binded to COMPLETE_NEXT/COMPLETE_PREV,
/// bind C-p/C-n to history-previous/history-next like readline.
rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); });
@ -48,8 +81,8 @@ ReplxxLineReader::ReplxxLineReader(const Suggest & suggest, const String & histo
ReplxxLineReader::~ReplxxLineReader()
{
if (!history_file_path.empty())
rx.history_save(history_file_path);
if (close(history_file_fd))
rx.print("Close of history file failed: %s\n", strerror(errno));
}
LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
@ -67,7 +100,20 @@ LineReader::InputStatus ReplxxLineReader::readOneLine(const String & prompt)
void ReplxxLineReader::addToHistory(const String & line)
{
// locking history file to prevent from inconsistent concurrent changes
bool locked = false;
if (flock(history_file_fd, LOCK_EX))
rx.print("Lock of history file failed: %s\n", strerror(errno));
else
locked = true;
rx.history_add(line);
// flush changes to the disk
rx.history_save(history_file_path);
if (locked && 0 != flock(history_file_fd, LOCK_UN))
rx.print("Unlock of history file failed: %s\n", strerror(errno));
}
void ReplxxLineReader::enableBracketedPaste()

View File

@ -4,10 +4,17 @@
#include <replxx.hxx>
class ReplxxLineReader : public LineReader
{
public:
ReplxxLineReader(const Suggest & suggest, const String & history_file_path, char extender, char delimiter = 0);
ReplxxLineReader(
const Suggest & suggest,
const String & history_file_path,
bool multiline,
Patterns extenders_,
Patterns delimiters_,
replxx::Replxx::highlighter_callback_t highlighter_);
~ReplxxLineReader() override;
void enableBracketedPaste() override;
@ -17,4 +24,8 @@ private:
void addToHistory(const String & line) override;
replxx::Replxx rx;
replxx::Replxx::highlighter_callback_t highlighter;
// used to call flock() to synchronize multiple clients using same history file
int history_file_fd = -1;
};

View File

@ -0,0 +1,29 @@
#include "errnoToString.h"
#include <fmt/format.h>
std::string errnoToString(int code, int the_errno)
{
const size_t buf_size = 128;
char buf[buf_size];
#ifndef _GNU_SOURCE
int rc = strerror_r(the_errno, buf, buf_size);
#ifdef __APPLE__
if (rc != 0 && rc != EINVAL)
#else
if (rc != 0)
#endif
{
std::string tmp = std::to_string(code);
const char * code_str = tmp.c_str();
const char * unknown_message = "Unknown error ";
strcpy(buf, unknown_message);
strcpy(buf + strlen(unknown_message), code_str);
}
return fmt::format("errno: {}, strerror: {}", the_errno, buf);
#else
(void)code;
return fmt::format("errno: {}, strerror: {}", the_errno, strerror_r(the_errno, buf, sizeof(buf)));
#endif
}

View File

@ -0,0 +1,6 @@
#pragma once
#include <cerrno>
#include <string>
std::string errnoToString(int code, int the_errno = errno);

View File

@ -1,6 +1,8 @@
#pragma once
#include <functional>
#include <type_traits>
#include <utility>
template <class T, class Tag>
struct StrongTypedef

View File

@ -47,6 +47,7 @@ SRCS(
shift10.cpp
sleep.cpp
terminalColors.cpp
errnoToString.cpp
)
END()

View File

@ -32,10 +32,18 @@ else ()
endif ()
endif ()
target_link_libraries(mysqlxx PUBLIC common PRIVATE ${MYSQLCLIENT_LIBRARIES} PUBLIC ${Boost_SYSTEM_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
target_link_libraries (mysqlxx
PUBLIC
common
PRIVATE
${MYSQLCLIENT_LIBRARIES}
${ZLIB_LIBRARIES}
)
if(OPENSSL_LIBRARIES)
target_link_libraries(mysqlxx PRIVATE ${OPENSSL_LIBRARIES})
endif()
target_link_libraries(mysqlxx PRIVATE ${PLATFORM_LIBRARIES})
if (NOT USE_INTERNAL_MYSQL_LIBRARY AND OPENSSL_INCLUDE_DIR)

View File

@ -1,44 +0,0 @@
# - Try to find metrohash headers and libraries.
#
# Usage of this module as follows:
#
# find_package(metrohash)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# METROHASH_ROOT_DIR Set this variable to the root installation of
# metrohash if the module has problems finding
# the proper installation path.
#
# Variables defined by this module:
#
# METROHASH_FOUND System has metrohash libs/headers
# METROHASH_LIBRARIES The metrohash library/libraries
# METROHASH_INCLUDE_DIR The location of metrohash headers
find_path(METROHASH_ROOT_DIR
NAMES include/metrohash.h
)
find_library(METROHASH_LIBRARIES
NAMES metrohash
PATHS ${METROHASH_ROOT_DIR}/lib ${METROHASH_LIBRARIES_PATHS}
)
find_path(METROHASH_INCLUDE_DIR
NAMES metrohash.h
PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS}
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(metrohash DEFAULT_MSG
METROHASH_LIBRARIES
METROHASH_INCLUDE_DIR
)
mark_as_advanced(
METROHASH_ROOT_DIR
METROHASH_LIBRARIES
METROHASH_INCLUDE_DIR
)

View File

@ -1,52 +0,0 @@
option (USE_INTERNAL_BOOST_LIBRARY "Set to FALSE to use system boost library instead of bundled" ${NOT_UNBUNDLED})
# Test random file existing in all package variants
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/system/src/error_code.cpp")
if(USE_INTERNAL_BOOST_LIBRARY)
message(WARNING "submodules in contrib/boost is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set (USE_INTERNAL_BOOST_LIBRARY 0)
set (MISSING_INTERNAL_BOOST_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_BOOST_LIBRARY)
set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
set (BOOST_ROOT "/usr/local")
find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex)
# incomplete, no include search, who use it?
if (NOT Boost_FOUND)
# # Try to find manually.
# set (BOOST_PATHS "")
# find_library (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options PATHS ${BOOST_PATHS})
# find_library (Boost_SYSTEM_LIBRARY boost_system PATHS ${BOOST_PATHS})
# find_library (Boost_FILESYSTEM_LIBRARY boost_filesystem PATHS ${BOOST_PATHS})
# maybe found but incorrect version.
set (Boost_INCLUDE_DIRS "")
set (Boost_SYSTEM_LIBRARY "")
endif ()
endif ()
if (NOT Boost_SYSTEM_LIBRARY AND NOT MISSING_INTERNAL_BOOST_LIBRARY)
set (USE_INTERNAL_BOOST_LIBRARY 1)
set (Boost_SYSTEM_LIBRARY boost_system_internal)
set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
set (Boost_IOSTREAMS_LIBRARY boost_iostreams_internal)
set (Boost_REGEX_LIBRARY boost_regex_internal)
set (Boost_INCLUDE_DIRS)
set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
# For boost from github:
file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include")
list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
# numeric has additional level
file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/numeric/*/include")
list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
# For packaged version:
list (APPEND Boost_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/boost")
endif ()
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_IOSTREAMS_LIBRARY},${Boost_REGEX_LIBRARY}")

View File

@ -0,0 +1,26 @@
option(ENABLE_CASSANDRA "Enable Cassandra" ${ENABLE_LIBRARIES})
if (ENABLE_CASSANDRA)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv")
message (ERROR "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive")
elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive")
else()
set (LIBUV_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libuv")
set (CASSANDRA_INCLUDE_DIR
"${ClickHouse_SOURCE_DIR}/contrib/cassandra/include/")
if (USE_STATIC_LIBRARIES)
set (LIBUV_LIBRARY uv_a)
set (CASSANDRA_LIBRARY cassandra_static)
else()
set (LIBUV_LIBRARY uv)
set (CASSANDRA_LIBRARY cassandra)
endif()
set (USE_CASSANDRA 1)
set (CASS_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/cassandra")
endif()
endif()
message (STATUS "Using cassandra=${USE_CASSANDRA}: ${CASSANDRA_INCLUDE_DIR} : ${CASSANDRA_LIBRARY}")
message (STATUS "Using libuv: ${LIBUV_ROOT_DIR} : ${LIBUV_LIBRARY}")

View File

@ -1,33 +0,0 @@
if (HAVE_SSSE3)
option (ENABLE_HYPERSCAN "Enable hyperscan" ${ENABLE_LIBRARIES})
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
endif ()
set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
endif ()
if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
find_library (HYPERSCAN_LIBRARY hs)
find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
endif ()
if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
set (USE_HYPERSCAN 1)
elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
set (HYPERSCAN_LIBRARY hs)
set (USE_HYPERSCAN 1)
set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
endif()
message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
endif ()

View File

@ -1,23 +0,0 @@
option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED})
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
if (USE_INTERNAL_LZ4_LIBRARY)
message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LZ4_LIBRARY 0)
endif ()
set (MISSING_INTERNAL_LZ4_LIBRARY 1)
endif ()
if (NOT USE_INTERNAL_LZ4_LIBRARY)
find_library (LZ4_LIBRARY lz4)
find_path (LZ4_INCLUDE_DIR NAMES lz4.h PATHS ${LZ4_INCLUDE_PATHS})
endif ()
if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY)
set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
set (USE_INTERNAL_LZ4_LIBRARY 1)
set (LZ4_LIBRARY lz4)
endif ()
message (STATUS "Using lz4: ${LZ4_INCLUDE_DIR} : ${LZ4_LIBRARY}")

View File

@ -63,7 +63,7 @@ elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
set(ARROW_LIBRARY arrow_shared)
set(PARQUET_LIBRARY parquet_shared)
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY})
list(APPEND PARQUET_LIBRARY boost::regex)
endif()
set(THRIFT_LIBRARY thrift)
endif()

View File

@ -1,22 +0,0 @@
option (USE_INTERNAL_XXHASH_LIBRARY "Set to FALSE to use system xxHash library instead of bundled" ${NOT_UNBUNDLED})
if (USE_INTERNAL_XXHASH_LIBRARY AND NOT USE_INTERNAL_LZ4_LIBRARY)
message (WARNING "can not use internal xxhash without internal lz4")
set (USE_INTERNAL_XXHASH_LIBRARY 0)
endif ()
if (USE_INTERNAL_XXHASH_LIBRARY)
set (XXHASH_LIBRARY lz4)
set (XXHASH_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
else ()
find_library (XXHASH_LIBRARY xxhash)
find_path (XXHASH_INCLUDE_DIR NAMES xxhash.h PATHS ${XXHASH_INCLUDE_PATHS})
endif ()
if (XXHASH_LIBRARY AND XXHASH_INCLUDE_DIR)
set (USE_XXHASH 1)
else ()
set (USE_XXHASH 0)
endif ()
message (STATUS "Using xxhash=${USE_XXHASH}: ${XXHASH_INCLUDE_DIR} : ${XXHASH_LIBRARY}")

View File

@ -1,4 +0,0 @@
set(DIVIDE_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
set(DBMS_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/src ${ClickHouse_BINARY_DIR}/src)
set(DOUBLE_CONVERSION_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/double-conversion)
set(METROHASH_CONTRIB_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)

View File

@ -21,11 +21,6 @@ if (TARGET double-conversion)
list(APPEND dirs ${dirs1})
endif ()
if (TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY})
get_property (dirs1 TARGET ${Boost_PROGRAM_OPTIONS_LIBRARY} PROPERTY INCLUDE_DIRECTORIES)
list(APPEND dirs ${dirs1})
endif ()
list(REMOVE_DUPLICATES dirs)
file (WRITE ${CMAKE_CURRENT_BINARY_DIR}/include_directories.txt "")
foreach (dir ${dirs})

View File

@ -16,13 +16,18 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w")
set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1)
add_subdirectory (boost-cmake)
add_subdirectory (cctz-cmake)
add_subdirectory (consistent-hashing-sumbur)
add_subdirectory (consistent-hashing)
add_subdirectory (croaring)
add_subdirectory (FastMemcpy)
add_subdirectory (hyperscan-cmake)
add_subdirectory (jemalloc-cmake)
add_subdirectory (libcpuid-cmake)
add_subdirectory (libdivide)
add_subdirectory (libmetrohash)
add_subdirectory (lz4-cmake)
add_subdirectory (murmurhash)
add_subdirectory (replxx-cmake)
add_subdirectory (ryu-cmake)
@ -33,14 +38,6 @@ add_subdirectory (poco-cmake)
# TODO: refactor the contrib libraries below this comment.
if (USE_INTERNAL_BOOST_LIBRARY)
add_subdirectory (boost-cmake)
endif ()
if (USE_INTERNAL_LZ4_LIBRARY)
add_subdirectory (lz4-cmake)
endif ()
if (USE_INTERNAL_ZSTD_LIBRARY)
add_subdirectory (zstd-cmake)
endif ()
@ -63,10 +60,6 @@ if (USE_INTERNAL_FARMHASH_LIBRARY)
add_subdirectory (libfarmhash)
endif ()
if (USE_INTERNAL_METROHASH_LIBRARY)
add_subdirectory (libmetrohash)
endif ()
if (USE_INTERNAL_BTRIE_LIBRARY)
add_subdirectory (libbtrie)
endif ()
@ -294,18 +287,6 @@ if (USE_BASE64)
add_subdirectory (base64-cmake)
endif()
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
# The library is large - avoid bloat.
if (USE_STATIC_LIBRARIES)
add_subdirectory (hyperscan)
target_compile_options (hs PRIVATE -g0)
else ()
set(BUILD_SHARED_LIBS 1 CACHE INTERNAL "")
add_subdirectory (hyperscan)
target_compile_options (hs_shared PRIVATE -g0)
endif ()
endif()
if (USE_SIMDJSON)
add_subdirectory (simdjson-cmake)
endif()
@ -314,4 +295,10 @@ if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
if (USE_CASSANDRA)
add_subdirectory (libuv)
add_subdirectory (cassandra)
endif()
add_subdirectory (fmtlib-cmake)

View File

@ -47,7 +47,8 @@ set(thriftcpp_threads_SOURCES
)
add_library(${THRIFT_LIBRARY} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src)
target_link_libraries (${THRIFT_LIBRARY} PRIVATE boost::headers_only)
# === orc
@ -286,10 +287,6 @@ set(ARROW_SRCS ${ARROW_SRCS}
${LIBRARY_DIR}/compute/kernels/util_internal.cc
)
if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
set(ARROW_WITH_LZ4 1)
endif ()
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
set(ARROW_WITH_SNAPPY 1)
endif ()
@ -302,10 +299,8 @@ if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
set(ARROW_WITH_ZSTD 1)
endif ()
if (ARROW_WITH_LZ4)
add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
endif ()
add_definitions(-DARROW_WITH_LZ4)
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
if (ARROW_WITH_SNAPPY)
add_definitions(-DARROW_WITH_SNAPPY)
@ -328,18 +323,15 @@ add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
target_link_libraries(${ARROW_LIBRARY} PRIVATE boost_system_internal boost_filesystem_internal boost_regex_internal)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY})
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
if (USE_INTERNAL_PROTOBUF_LIBRARY)
add_dependencies(${ARROW_LIBRARY} protoc)
endif ()
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
if (ARROW_WITH_LZ4)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
endif ()
target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
if (ARROW_WITH_SNAPPY)
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
endif ()
@ -396,8 +388,7 @@ list(APPEND PARQUET_SRCS
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)
if (SANITIZE STREQUAL "undefined")
target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)

View File

@ -45,13 +45,12 @@ set_target_properties (avrocpp PROPERTIES VERSION ${AVRO_VERSION_MAJOR}.${AVRO_V
target_include_directories(avrocpp SYSTEM PUBLIC ${AVROCPP_INCLUDE_DIR})
target_include_directories(avrocpp SYSTEM PUBLIC ${Boost_INCLUDE_DIRS})
target_link_libraries (avrocpp ${Boost_IOSTREAMS_LIBRARY})
target_link_libraries (avrocpp PRIVATE boost::headers_only boost::iostreams)
if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
target_compile_definitions (avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE)
target_include_directories (avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR})
target_link_libraries (avrocpp ${SNAPPY_LIBRARY})
target_link_libraries (avrocpp PRIVATE ${SNAPPY_LIBRARY})
endif ()
if (COMPILER_GCC)

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit fb5c604525f5151d75a856462653e7e38b559b79
Subproject commit 17e10c0fc77f22afe890fa6d1b283760e5edaa56

View File

@ -1,45 +1,133 @@
# Supported contrib/boost source variants:
# 1. Default - Minimized vrsion from release archive : https://github.com/ClickHouse-Extras/boost
# 2. Release archive unpacked to contrib/boost
# 3. Full boost https://github.com/boostorg/boost
option (USE_INTERNAL_BOOST_LIBRARY "Use internal Boost library" ${NOT_UNBUNDLED})
# if boostorg/boost connected as submodule: Update all boost internal submodules to tag:
# git submodule foreach "git fetch --all && git checkout boost-1.66.0 || true"
if (USE_INTERNAL_BOOST_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
#
# Important boost patch: 094c18b
#
# filesystem
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
set (SRCS_FILESYSTEM
${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp
${LIBRARY_DIR}/libs/filesystem/src/operations.cpp
${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp
${LIBRARY_DIR}/libs/filesystem/src/path.cpp
${LIBRARY_DIR}/libs/filesystem/src/portability.cpp
${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp
${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp
${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp
)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
add_library (_boost_filesystem ${SRCS_FILESYSTEM})
add_library (boost::filesystem ALIAS _boost_filesystem)
target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
if(NOT MSVC)
add_definitions(-Wno-unused-variable -Wno-deprecated-declarations)
endif()
# headers-only
macro(add_boost_lib lib_name)
add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
add_library(boost_${lib_name}_internal ${boost_${lib_name}_sources})
target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
endmacro()
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR})
add_boost_lib(system)
# iostreams
add_boost_lib(program_options)
set (SRCS_IOSTREAMS
${LIBRARY_DIR}/libs/iostreams/src/file_descriptor.cpp
${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp
${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp
${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp
)
add_boost_lib(filesystem)
target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
add_library (_boost_iostreams ${SRCS_IOSTREAMS})
add_library (boost::iostreams ALIAS _boost_iostreams)
target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
target_link_libraries (_boost_iostreams PRIVATE zlib)
#add_boost_lib(random)
# program_options
if (USE_INTERNAL_PARQUET_LIBRARY)
add_boost_lib(regex)
endif()
set (SRCS_PROGRAM_OPTIONS
${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp
${LIBRARY_DIR}/libs/program_options/src/config_file.cpp
${LIBRARY_DIR}/libs/program_options/src/convert.cpp
${LIBRARY_DIR}/libs/program_options/src/options_description.cpp
${LIBRARY_DIR}/libs/program_options/src/parsers.cpp
${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp
${LIBRARY_DIR}/libs/program_options/src/split.cpp
${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp
${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp
${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp
${LIBRARY_DIR}/libs/program_options/src/winmain.cpp
)
if (USE_INTERNAL_AVRO_LIBRARY)
add_boost_lib(iostreams)
target_link_libraries(boost_iostreams_internal PUBLIC ${ZLIB_LIBRARIES})
target_include_directories(boost_iostreams_internal SYSTEM BEFORE PRIVATE ${ZLIB_INCLUDE_DIR})
endif()
add_library (_boost_program_options ${SRCS_PROGRAM_OPTIONS})
add_library (boost::program_options ALIAS _boost_program_options)
target_include_directories (_boost_program_options SYSTEM BEFORE PUBLIC ${LIBRARY_DIR})
# regex
set (SRCS_REGEX
${LIBRARY_DIR}/libs/regex/src/c_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/cpp_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/cregex.cpp
${LIBRARY_DIR}/libs/regex/src/fileiter.cpp
${LIBRARY_DIR}/libs/regex/src/icu.cpp
${LIBRARY_DIR}/libs/regex/src/instances.cpp
${LIBRARY_DIR}/libs/regex/src/internals.hpp
${LIBRARY_DIR}/libs/regex/src/posix_api.cpp
${LIBRARY_DIR}/libs/regex/src/regex_debug.cpp
${LIBRARY_DIR}/libs/regex/src/regex_raw_buffer.cpp
${LIBRARY_DIR}/libs/regex/src/regex_traits_defaults.cpp
${LIBRARY_DIR}/libs/regex/src/regex.cpp
${LIBRARY_DIR}/libs/regex/src/static_mutex.cpp
${LIBRARY_DIR}/libs/regex/src/usinstances.cpp
${LIBRARY_DIR}/libs/regex/src/w32_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/wc_regex_traits.cpp
${LIBRARY_DIR}/libs/regex/src/wide_posix_api.cpp
${LIBRARY_DIR}/libs/regex/src/winstances.cpp
)
add_library (_boost_regex ${SRCS_REGEX})
add_library (boost::regex ALIAS _boost_regex)
target_include_directories (_boost_regex PRIVATE ${LIBRARY_DIR})
# system
set (SRCS_SYSTEM
${LIBRARY_DIR}/libs/system/src/error_code.cpp
)
add_library (_boost_system ${SRCS_SYSTEM})
add_library (boost::system ALIAS _boost_system)
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
else ()
# 1.70 like in contrib/boost
# 1.67 on CI
set(BOOST_VERSION 1.67)
find_package(Boost ${BOOST_VERSION} COMPONENTS
system
filesystem
iostreams
program_options
regex
REQUIRED)
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR})
add_library (_boost_filesystem INTERFACE)
add_library (_boost_iostreams INTERFACE)
add_library (_boost_program_options INTERFACE)
add_library (_boost_regex INTERFACE)
add_library (_boost_system INTERFACE)
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
add_library (boost::filesystem ALIAS _boost_filesystem)
add_library (boost::iostreams ALIAS _boost_iostreams)
add_library (boost::program_options ALIAS _boost_program_options)
add_library (boost::regex ALIAS _boost_regex)
add_library (boost::system ALIAS _boost_system)
endif ()

1
contrib/cassandra vendored Submodule

@ -0,0 +1 @@
Subproject commit a49b4e0e2696a4b8ef286a5b9538d1cbe8490509

View File

@ -1,31 +1,33 @@
set(CPPKAFKA_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cppkafka)
set(SRCS
${CPPKAFKA_DIR}/src/configuration.cpp
${CPPKAFKA_DIR}/src/topic_configuration.cpp
${CPPKAFKA_DIR}/src/configuration_option.cpp
${CPPKAFKA_DIR}/src/exceptions.cpp
${CPPKAFKA_DIR}/src/topic.cpp
${CPPKAFKA_DIR}/src/buffer.cpp
${CPPKAFKA_DIR}/src/queue.cpp
${CPPKAFKA_DIR}/src/message.cpp
${CPPKAFKA_DIR}/src/message_timestamp.cpp
${CPPKAFKA_DIR}/src/message_internal.cpp
${CPPKAFKA_DIR}/src/topic_partition.cpp
${CPPKAFKA_DIR}/src/topic_partition_list.cpp
${CPPKAFKA_DIR}/src/metadata.cpp
${CPPKAFKA_DIR}/src/group_information.cpp
${CPPKAFKA_DIR}/src/error.cpp
${CPPKAFKA_DIR}/src/event.cpp
${CPPKAFKA_DIR}/src/kafka_handle_base.cpp
${CPPKAFKA_DIR}/src/producer.cpp
${CPPKAFKA_DIR}/src/consumer.cpp
${LIBRARY_DIR}/src/buffer.cpp
${LIBRARY_DIR}/src/configuration_option.cpp
${LIBRARY_DIR}/src/configuration.cpp
${LIBRARY_DIR}/src/consumer.cpp
${LIBRARY_DIR}/src/error.cpp
${LIBRARY_DIR}/src/event.cpp
${LIBRARY_DIR}/src/exceptions.cpp
${LIBRARY_DIR}/src/group_information.cpp
${LIBRARY_DIR}/src/kafka_handle_base.cpp
${LIBRARY_DIR}/src/message_internal.cpp
${LIBRARY_DIR}/src/message_timestamp.cpp
${LIBRARY_DIR}/src/message.cpp
${LIBRARY_DIR}/src/metadata.cpp
${LIBRARY_DIR}/src/producer.cpp
${LIBRARY_DIR}/src/queue.cpp
${LIBRARY_DIR}/src/topic_configuration.cpp
${LIBRARY_DIR}/src/topic_partition_list.cpp
${LIBRARY_DIR}/src/topic_partition.cpp
${LIBRARY_DIR}/src/topic.cpp
)
add_library(cppkafka ${SRCS})
target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include)
target_link_libraries(cppkafka
PRIVATE
${RDKAFKA_LIBRARY}
boost::headers_only
)
target_include_directories(cppkafka PRIVATE ${LIBRARY_DIR}/include/cppkafka)
target_include_directories(cppkafka SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}/include)

View File

@ -0,0 +1,250 @@
option (ENABLE_HYPERSCAN "Enable hyperscan library" ${ENABLE_LIBRARIES})
if (NOT HAVE_SSSE3)
set (ENABLE_HYPERSCAN OFF)
endif ()
if (ENABLE_HYPERSCAN)
option (USE_INTERNAL_HYPERSCAN_LIBRARY "Use internal hyperscan library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_HYPERSCAN_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan)
set (SRCS
${LIBRARY_DIR}/src/alloc.c
${LIBRARY_DIR}/src/compiler/asserts.cpp
${LIBRARY_DIR}/src/compiler/compiler.cpp
${LIBRARY_DIR}/src/compiler/error.cpp
${LIBRARY_DIR}/src/crc32.c
${LIBRARY_DIR}/src/database.c
${LIBRARY_DIR}/src/fdr/engine_description.cpp
${LIBRARY_DIR}/src/fdr/fdr_compile_util.cpp
${LIBRARY_DIR}/src/fdr/fdr_compile.cpp
${LIBRARY_DIR}/src/fdr/fdr_confirm_compile.cpp
${LIBRARY_DIR}/src/fdr/fdr_engine_description.cpp
${LIBRARY_DIR}/src/fdr/fdr.c
${LIBRARY_DIR}/src/fdr/flood_compile.cpp
${LIBRARY_DIR}/src/fdr/teddy_compile.cpp
${LIBRARY_DIR}/src/fdr/teddy_engine_description.cpp
${LIBRARY_DIR}/src/fdr/teddy.c
${LIBRARY_DIR}/src/grey.cpp
${LIBRARY_DIR}/src/hs_valid_platform.c
${LIBRARY_DIR}/src/hs_version.c
${LIBRARY_DIR}/src/hs.cpp
${LIBRARY_DIR}/src/hwlm/hwlm_build.cpp
${LIBRARY_DIR}/src/hwlm/hwlm_literal.cpp
${LIBRARY_DIR}/src/hwlm/hwlm.c
${LIBRARY_DIR}/src/hwlm/noodle_build.cpp
${LIBRARY_DIR}/src/hwlm/noodle_engine.c
${LIBRARY_DIR}/src/nfa/accel_dfa_build_strat.cpp
${LIBRARY_DIR}/src/nfa/accel.c
${LIBRARY_DIR}/src/nfa/accelcompile.cpp
${LIBRARY_DIR}/src/nfa/castle.c
${LIBRARY_DIR}/src/nfa/castlecompile.cpp
${LIBRARY_DIR}/src/nfa/dfa_build_strat.cpp
${LIBRARY_DIR}/src/nfa/dfa_min.cpp
${LIBRARY_DIR}/src/nfa/gough.c
${LIBRARY_DIR}/src/nfa/goughcompile_accel.cpp
${LIBRARY_DIR}/src/nfa/goughcompile_reg.cpp
${LIBRARY_DIR}/src/nfa/goughcompile.cpp
${LIBRARY_DIR}/src/nfa/lbr.c
${LIBRARY_DIR}/src/nfa/limex_64.c
${LIBRARY_DIR}/src/nfa/limex_accel.c
${LIBRARY_DIR}/src/nfa/limex_compile.cpp
${LIBRARY_DIR}/src/nfa/limex_native.c
${LIBRARY_DIR}/src/nfa/limex_simd128.c
${LIBRARY_DIR}/src/nfa/limex_simd256.c
${LIBRARY_DIR}/src/nfa/limex_simd384.c
${LIBRARY_DIR}/src/nfa/limex_simd512.c
${LIBRARY_DIR}/src/nfa/mcclellan.c
${LIBRARY_DIR}/src/nfa/mcclellancompile_util.cpp
${LIBRARY_DIR}/src/nfa/mcclellancompile.cpp
${LIBRARY_DIR}/src/nfa/mcsheng_compile.cpp
${LIBRARY_DIR}/src/nfa/mcsheng_data.c
${LIBRARY_DIR}/src/nfa/mcsheng.c
${LIBRARY_DIR}/src/nfa/mpv.c
${LIBRARY_DIR}/src/nfa/mpvcompile.cpp
${LIBRARY_DIR}/src/nfa/nfa_api_dispatch.c
${LIBRARY_DIR}/src/nfa/nfa_build_util.cpp
${LIBRARY_DIR}/src/nfa/rdfa_graph.cpp
${LIBRARY_DIR}/src/nfa/rdfa_merge.cpp
${LIBRARY_DIR}/src/nfa/rdfa.cpp
${LIBRARY_DIR}/src/nfa/repeat.c
${LIBRARY_DIR}/src/nfa/repeatcompile.cpp
${LIBRARY_DIR}/src/nfa/sheng.c
${LIBRARY_DIR}/src/nfa/shengcompile.cpp
${LIBRARY_DIR}/src/nfa/shufti.c
${LIBRARY_DIR}/src/nfa/shufticompile.cpp
${LIBRARY_DIR}/src/nfa/tamarama.c
${LIBRARY_DIR}/src/nfa/tamaramacompile.cpp
${LIBRARY_DIR}/src/nfa/truffle.c
${LIBRARY_DIR}/src/nfa/trufflecompile.cpp
${LIBRARY_DIR}/src/nfagraph/ng_anchored_acyclic.cpp
${LIBRARY_DIR}/src/nfagraph/ng_anchored_dots.cpp
${LIBRARY_DIR}/src/nfagraph/ng_asserts.cpp
${LIBRARY_DIR}/src/nfagraph/ng_builder.cpp
${LIBRARY_DIR}/src/nfagraph/ng_calc_components.cpp
${LIBRARY_DIR}/src/nfagraph/ng_cyclic_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_depth.cpp
${LIBRARY_DIR}/src/nfagraph/ng_dominators.cpp
${LIBRARY_DIR}/src/nfagraph/ng_edge_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_equivalence.cpp
${LIBRARY_DIR}/src/nfagraph/ng_execute.cpp
${LIBRARY_DIR}/src/nfagraph/ng_expr_info.cpp
${LIBRARY_DIR}/src/nfagraph/ng_extparam.cpp
${LIBRARY_DIR}/src/nfagraph/ng_fixed_width.cpp
${LIBRARY_DIR}/src/nfagraph/ng_fuzzy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_haig.cpp
${LIBRARY_DIR}/src/nfagraph/ng_holder.cpp
${LIBRARY_DIR}/src/nfagraph/ng_is_equal.cpp
${LIBRARY_DIR}/src/nfagraph/ng_lbr.cpp
${LIBRARY_DIR}/src/nfagraph/ng_limex_accel.cpp
${LIBRARY_DIR}/src/nfagraph/ng_limex.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_analysis.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_component.cpp
${LIBRARY_DIR}/src/nfagraph/ng_literal_decorated.cpp
${LIBRARY_DIR}/src/nfagraph/ng_mcclellan.cpp
${LIBRARY_DIR}/src/nfagraph/ng_misc_opt.cpp
${LIBRARY_DIR}/src/nfagraph/ng_netflow.cpp
${LIBRARY_DIR}/src/nfagraph/ng_prefilter.cpp
${LIBRARY_DIR}/src/nfagraph/ng_prune.cpp
${LIBRARY_DIR}/src/nfagraph/ng_puff.cpp
${LIBRARY_DIR}/src/nfagraph/ng_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_region_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_region.cpp
${LIBRARY_DIR}/src/nfagraph/ng_repeat.cpp
${LIBRARY_DIR}/src/nfagraph/ng_reports.cpp
${LIBRARY_DIR}/src/nfagraph/ng_restructuring.cpp
${LIBRARY_DIR}/src/nfagraph/ng_revacc.cpp
${LIBRARY_DIR}/src/nfagraph/ng_sep.cpp
${LIBRARY_DIR}/src/nfagraph/ng_small_literal_set.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som_add_redundancy.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som_util.cpp
${LIBRARY_DIR}/src/nfagraph/ng_som.cpp
${LIBRARY_DIR}/src/nfagraph/ng_split.cpp
${LIBRARY_DIR}/src/nfagraph/ng_squash.cpp
${LIBRARY_DIR}/src/nfagraph/ng_stop.cpp
${LIBRARY_DIR}/src/nfagraph/ng_uncalc_components.cpp
${LIBRARY_DIR}/src/nfagraph/ng_utf8.cpp
${LIBRARY_DIR}/src/nfagraph/ng_util.cpp
${LIBRARY_DIR}/src/nfagraph/ng_vacuous.cpp
${LIBRARY_DIR}/src/nfagraph/ng_violet.cpp
${LIBRARY_DIR}/src/nfagraph/ng_width.cpp
${LIBRARY_DIR}/src/nfagraph/ng.cpp
${LIBRARY_DIR}/src/parser/AsciiComponentClass.cpp
${LIBRARY_DIR}/src/parser/buildstate.cpp
${LIBRARY_DIR}/src/parser/check_refs.cpp
${LIBRARY_DIR}/src/parser/Component.cpp
${LIBRARY_DIR}/src/parser/ComponentAlternation.cpp
${LIBRARY_DIR}/src/parser/ComponentAssertion.cpp
${LIBRARY_DIR}/src/parser/ComponentAtomicGroup.cpp
${LIBRARY_DIR}/src/parser/ComponentBackReference.cpp
${LIBRARY_DIR}/src/parser/ComponentBoundary.cpp
${LIBRARY_DIR}/src/parser/ComponentByte.cpp
${LIBRARY_DIR}/src/parser/ComponentClass.cpp
${LIBRARY_DIR}/src/parser/ComponentCondReference.cpp
${LIBRARY_DIR}/src/parser/ComponentEmpty.cpp
${LIBRARY_DIR}/src/parser/ComponentEUS.cpp
${LIBRARY_DIR}/src/parser/ComponentRepeat.cpp
${LIBRARY_DIR}/src/parser/ComponentSequence.cpp
${LIBRARY_DIR}/src/parser/ComponentVisitor.cpp
${LIBRARY_DIR}/src/parser/ComponentWordBoundary.cpp
${LIBRARY_DIR}/src/parser/ConstComponentVisitor.cpp
${LIBRARY_DIR}/src/parser/control_verbs.cpp
${LIBRARY_DIR}/src/parser/logical_combination.cpp
${LIBRARY_DIR}/src/parser/parse_error.cpp
${LIBRARY_DIR}/src/parser/parser_util.cpp
${LIBRARY_DIR}/src/parser/Parser.cpp
${LIBRARY_DIR}/src/parser/prefilter.cpp
${LIBRARY_DIR}/src/parser/shortcut_literal.cpp
${LIBRARY_DIR}/src/parser/ucp_table.cpp
${LIBRARY_DIR}/src/parser/unsupported.cpp
${LIBRARY_DIR}/src/parser/utf8_validate.cpp
${LIBRARY_DIR}/src/parser/Utf8ComponentClass.cpp
${LIBRARY_DIR}/src/rose/block.c
${LIBRARY_DIR}/src/rose/catchup.c
${LIBRARY_DIR}/src/rose/init.c
${LIBRARY_DIR}/src/rose/match.c
${LIBRARY_DIR}/src/rose/program_runtime.c
${LIBRARY_DIR}/src/rose/rose_build_add_mask.cpp
${LIBRARY_DIR}/src/rose/rose_build_add.cpp
${LIBRARY_DIR}/src/rose/rose_build_anchored.cpp
${LIBRARY_DIR}/src/rose/rose_build_bytecode.cpp
${LIBRARY_DIR}/src/rose/rose_build_castle.cpp
${LIBRARY_DIR}/src/rose/rose_build_compile.cpp
${LIBRARY_DIR}/src/rose/rose_build_convert.cpp
${LIBRARY_DIR}/src/rose/rose_build_dedupe.cpp
${LIBRARY_DIR}/src/rose/rose_build_engine_blob.cpp
${LIBRARY_DIR}/src/rose/rose_build_exclusive.cpp
${LIBRARY_DIR}/src/rose/rose_build_groups.cpp
${LIBRARY_DIR}/src/rose/rose_build_infix.cpp
${LIBRARY_DIR}/src/rose/rose_build_instructions.cpp
${LIBRARY_DIR}/src/rose/rose_build_lit_accel.cpp
${LIBRARY_DIR}/src/rose/rose_build_long_lit.cpp
${LIBRARY_DIR}/src/rose/rose_build_lookaround.cpp
${LIBRARY_DIR}/src/rose/rose_build_matchers.cpp
${LIBRARY_DIR}/src/rose/rose_build_merge.cpp
${LIBRARY_DIR}/src/rose/rose_build_misc.cpp
${LIBRARY_DIR}/src/rose/rose_build_program.cpp
${LIBRARY_DIR}/src/rose/rose_build_role_aliasing.cpp
${LIBRARY_DIR}/src/rose/rose_build_scatter.cpp
${LIBRARY_DIR}/src/rose/rose_build_width.cpp
${LIBRARY_DIR}/src/rose/rose_in_util.cpp
${LIBRARY_DIR}/src/rose/stream.c
${LIBRARY_DIR}/src/runtime.c
${LIBRARY_DIR}/src/scratch.c
${LIBRARY_DIR}/src/smallwrite/smallwrite_build.cpp
${LIBRARY_DIR}/src/som/slot_manager.cpp
${LIBRARY_DIR}/src/som/som_runtime.c
${LIBRARY_DIR}/src/som/som_stream.c
${LIBRARY_DIR}/src/stream_compress.c
${LIBRARY_DIR}/src/util/alloc.cpp
${LIBRARY_DIR}/src/util/charreach.cpp
${LIBRARY_DIR}/src/util/clique.cpp
${LIBRARY_DIR}/src/util/compile_context.cpp
${LIBRARY_DIR}/src/util/compile_error.cpp
${LIBRARY_DIR}/src/util/cpuid_flags.c
${LIBRARY_DIR}/src/util/depth.cpp
${LIBRARY_DIR}/src/util/fatbit_build.cpp
${LIBRARY_DIR}/src/util/multibit_build.cpp
${LIBRARY_DIR}/src/util/multibit.c
${LIBRARY_DIR}/src/util/report_manager.cpp
${LIBRARY_DIR}/src/util/simd_utils.c
${LIBRARY_DIR}/src/util/state_compress.c
${LIBRARY_DIR}/src/util/target_info.cpp
${LIBRARY_DIR}/src/util/ue2string.cpp
)
add_library (hyperscan ${SRCS})
target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
target_compile_options (hyperscan
PRIVATE -g0 -march=corei7 # library has too much debug information
)
target_include_directories (hyperscan
PRIVATE
common
${LIBRARY_DIR}/include
)
target_include_directories (hyperscan SYSTEM PUBLIC ${LIBRARY_DIR}/src)
if (ARCH_AMD64)
target_include_directories (hyperscan PRIVATE x86_64)
endif ()
target_link_libraries (hyperscan PRIVATE boost::headers_only)
else ()
find_library (LIBRARY_HYPERSCAN hs)
find_path (INCLUDE_HYPERSCAN NAMES hs.h HINTS /usr/include/hs) # Ubuntu puts headers in this folder
add_library (hyperscan UNKNOWN IMPORTED GLOBAL)
set_target_properties (hyperscan PROPERTIES IMPORTED_LOCATION ${LIBRARY_HYPERSCAN})
set_target_properties (hyperscan PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_HYPERSCAN})
set_property(TARGET hyperscan APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_HYPERSCAN=1)
endif ()
message (STATUS "Using hyperscan")
else ()
add_library (hyperscan INTERFACE)
target_compile_definitions (hyperscan INTERFACE USE_HYPERSCAN=0)
message (STATUS "Not using hyperscan")
endif ()

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2015, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef HS_VERSION_H_C6428FAF8E3713
#define HS_VERSION_H_C6428FAF8E3713
/**
* A version string to identify this release of Hyperscan.
*/
#define HS_VERSION_STRING "5.1.1 2000-01-01"
#define HS_VERSION_32BIT ((5 << 24) | (1 << 16) | (1 << 8) | 0)
#endif /* HS_VERSION_H_C6428FAF8E3713 */

View File

@ -0,0 +1,106 @@
/* used by cmake */
#ifndef CONFIG_H_
#define CONFIG_H_
/* "Define if the build is 32 bit" */
/* #undef ARCH_32_BIT */
/* "Define if the build is 64 bit" */
#define ARCH_64_BIT
/* "Define if building for IA32" */
/* #undef ARCH_IA32 */
/* "Define if building for EM64T" */
#define ARCH_X86_64
/* internal build, switch on dump support. */
/* #undef DUMP_SUPPORT */
/* Define if building "fat" runtime. */
/* #undef FAT_RUNTIME */
/* Define if building AVX-512 in the fat runtime. */
/* #undef BUILD_AVX512 */
/* Define to 1 if `backtrace' works. */
#define HAVE_BACKTRACE
/* C compiler has __builtin_assume_aligned */
#define HAVE_CC_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has __builtin_assume_aligned */
#define HAVE_CXX_BUILTIN_ASSUME_ALIGNED
/* C++ compiler has x86intrin.h */
#define HAVE_CXX_X86INTRIN_H
/* C compiler has x86intrin.h */
#define HAVE_C_X86INTRIN_H
/* C++ compiler has intrin.h */
/* #undef HAVE_CXX_INTRIN_H */
/* C compiler has intrin.h */
/* #undef HAVE_C_INTRIN_H */
/* Define to 1 if you have the declaration of `pthread_setaffinity_np', and to
0 if you don't. */
/* #undef HAVE_DECL_PTHREAD_SETAFFINITY_NP */
/* #undef HAVE_PTHREAD_NP_H */
/* Define to 1 if you have the `malloc_info' function. */
/* #undef HAVE_MALLOC_INFO */
/* Define to 1 if you have the `memmem' function. */
/* #undef HAVE_MEMMEM */
/* Define to 1 if you have a working `mmap' system call. */
#define HAVE_MMAP
/* Define to 1 if `posix_memalign' works. */
#define HAVE_POSIX_MEMALIGN
/* Define to 1 if you have the `setrlimit' function. */
#define HAVE_SETRLIMIT
/* Define to 1 if you have the `shmget' function. */
/* #undef HAVE_SHMGET */
/* Define to 1 if you have the `sigaction' function. */
#define HAVE_SIGACTION
/* Define to 1 if you have the `sigaltstack' function. */
#define HAVE_SIGALTSTACK
/* Define if the sqlite3_open_v2 call is available */
/* #undef HAVE_SQLITE3_OPEN_V2 */
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H
/* Define to 1 if you have the `_aligned_malloc' function. */
/* #undef HAVE__ALIGNED_MALLOC */
/* Define if compiler has __builtin_constant_p */
#define HAVE__BUILTIN_CONSTANT_P
/* Optimize, inline critical functions */
#define HS_OPTIMIZE
#define HS_VERSION
#define HS_MAJOR_VERSION
#define HS_MINOR_VERSION
#define HS_PATCH_VERSION
#define BUILD_DATE
/* define if this is a release build. */
#define RELEASE_BUILD
/* define if reverse_graph requires patch for boost 1.62.0 */
/* #undef BOOST_REVGRAPH_PATCH */
#endif /* CONFIG_H_ */

View File

@ -0,0 +1,2 @@
add_library (libdivide INTERFACE)
target_include_directories (libdivide SYSTEM BEFORE INTERFACE .)

View File

@ -209,9 +209,8 @@ endif()
target_link_libraries(hdfs3 PRIVATE ${LIBXML2_LIBRARY})
# inherit from parent cmake
target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY})
target_link_libraries(hdfs3 PRIVATE ${Protobuf_LIBRARY} boost::headers_only)
if(OPENSSL_INCLUDE_DIR AND OPENSSL_LIBRARIES)
target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
target_link_libraries(hdfs3 PRIVATE ${OPENSSL_LIBRARIES})

View File

@ -1,13 +1,10 @@
if (HAVE_SSE42) # Not used. Pretty easy to port.
set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h)
endif ()
add_library(metrohash
src/metrohash.h
src/testvector.h
set (SRCS
src/metrohash64.cpp
src/metrohash128.cpp
${SOURCES_SSE42_ONLY})
)
if (HAVE_SSE42) # Not used. Pretty easy to port.
list (APPEND SRCS src/metrohash128crc.cpp)
endif ()
target_include_directories(metrohash PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src)
add_library(metrohash ${SRCS})
target_include_directories(metrohash PUBLIC src)

View File

@ -82,7 +82,7 @@ target_compile_options(rdkafka PRIVATE -fno-sanitize=undefined)
target_include_directories(rdkafka SYSTEM PUBLIC include)
target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) # Because weird logic with "include_next" is used.
target_include_directories(rdkafka SYSTEM PRIVATE ${ZSTD_INCLUDE_DIR}/common) # Because wrong path to "zstd_errors.h" is used.
target_link_libraries(rdkafka PRIVATE ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LZ4_LIBRARY} ${LIBGSASL_LIBRARY})
target_link_libraries(rdkafka PRIVATE lz4 ${ZLIB_LIBRARIES} ${ZSTD_LIBRARY} ${LIBGSASL_LIBRARY})
if(OPENSSL_SSL_LIBRARY AND OPENSSL_CRYPTO_LIBRARY)
target_link_libraries(rdkafka PRIVATE ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
endif()

1
contrib/libuv vendored Submodule

@ -0,0 +1 @@
Subproject commit 84438304f41d8ea6670ee5409f4d6c63ca784f28

View File

@ -1,17 +1,28 @@
SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
option (USE_INTERNAL_LZ4_LIBRARY "Use internal lz4 library" ${NOT_UNBUNDLED})
add_library (lz4
${LIBRARY_DIR}/lz4.c
${LIBRARY_DIR}/lz4hc.c
${LIBRARY_DIR}/lz4frame.c
${LIBRARY_DIR}/lz4frame.h
${LIBRARY_DIR}/xxhash.c
${LIBRARY_DIR}/xxhash.h
if (USE_INTERNAL_LZ4_LIBRARY)
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4)
${LIBRARY_DIR}/lz4.h
${LIBRARY_DIR}/lz4hc.h)
set (SRCS
${LIBRARY_DIR}/lib/lz4.c
${LIBRARY_DIR}/lib/lz4hc.c
${LIBRARY_DIR}/lib/lz4frame.c
${LIBRARY_DIR}/lib/xxhash.c
)
target_compile_definitions(lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1)
target_compile_options(lz4 PRIVATE -fno-sanitize=undefined)
add_library (lz4 ${SRCS})
target_include_directories(lz4 PUBLIC ${LIBRARY_DIR})
target_compile_definitions (lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1 USE_XXHASH=1)
if (SANITIZE STREQUAL "undefined")
target_compile_options (lz4 PRIVATE -fno-sanitize=undefined)
endif ()
target_include_directories(lz4 PUBLIC ${LIBRARY_DIR}/lib)
else ()
find_library (LIBRARY_LZ4 lz4)
find_path (INCLUDE_LZ4 lz4.h)
add_library (lz4 UNKNOWN IMPORTED)
set_property (TARGET lz4 PROPERTY IMPORTED_LOCATION ${LIBRARY_LZ4})
set_property (TARGET lz4 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${INCLUDE_LZ4})
set_property (TARGET lz4 APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_XXHASH=0)
endif ()

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit f1332626639d6492eaf170758642da14fbbda7bf
Subproject commit 2d37daaad24be71e76514a36b0a47120be2f9086

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
RUN apt-get update \

View File

@ -18,8 +18,7 @@ ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt
cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
ninja
ccache --show-stats ||:
ninja clickhouse-bundle
mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output
find . -name '*.so' -print -exec mv '{}' /output \;
@ -47,3 +46,4 @@ then
rm -r /output/*
mv "$COMBINED_OUTPUT.tgz" /output
fi
ccache --show-stats ||:

View File

@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \
libboost-system-dev \
libboost-filesystem-dev \
libboost-thread-dev \
libboost-iostreams-dev \
libboost-regex-dev \
zlib1g-dev \
liblz4-dev \
libdouble-conversion-dev \
@ -82,8 +84,8 @@ RUN apt-get --allow-unauthenticated update -y \
libcctz-dev \
libldap2-dev \
libsasl2-dev \
heimdal-multidev
heimdal-multidev \
libhyperscan-dev
# This symlink required by gcc to find lld compiler

View File

@ -120,6 +120,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
result.append("CCACHE_BASEDIR=/build")
result.append("CCACHE_NOHASHDIR=true")
result.append("CCACHE_COMPILERCHECK=content")
result.append("CCACHE_MAXSIZE=15G")
# result.append("CCACHE_UMASK=777")
if distcc_hosts:

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
ARG gosu_ver=1.10

View File

@ -94,7 +94,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# check if clickhouse is ready to accept connections
# will try to send ping clickhouse via http_port (max 12 retries, with 1 sec delay)
if ! wget --spider --quiet --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
if ! wget --spider --quiet --prefer-family=IPv6 --tries=12 --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
echo >&2 'ClickHouse init process failed.'
exit 1
fi
@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
"${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi
for f in /docker-entrypoint-initdb.d/*; do

View File

@ -1,6 +1,6 @@
FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/"
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.*
RUN apt-get update && \

View File

@ -0,0 +1,7 @@
version: '2.3'
services:
cassandra1:
image: cassandra
restart: always
ports:
- 9043:9042

View File

@ -43,7 +43,10 @@ services:
# Empty container to run proxy resolver.
resolver:
image: python:3
build:
context: ../../../docker/test/integration/
dockerfile: resolver/Dockerfile
network: host
ports:
- "4083:8080"
tty: true

View File

@ -0,0 +1,4 @@
# Helper docker container to run python bottle apps
FROM python:3
RUN python -m pip install bottle

View File

@ -104,13 +104,12 @@ function run_tests
# allows the tests to pass even when we add new functions and tests for
# them, that are not supported in the old revision.
test_prefix=left/performance
elif [ "$PR_TO_TEST" != "" ] && [ "$PR_TO_TEST" != "0" ]
then
else
# For PRs, use newer test files so we can test these changes.
test_prefix=right/performance
# If some tests were changed in the PR, we may want to run only these
# ones. The list of changed tests in changed-test.txt is prepared in
# If only the perf tests were changed in the PR, we will run only these
# tests. The list of changed tests in changed-test.txt is prepared in
# entrypoint.sh from git diffs, because it has the cloned repo. Used
# to use rsync for that but it was really ugly and not always correct
# (e.g. when the reference SHA is really old and has some other
@ -199,12 +198,14 @@ function get_profiles
clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &
wait
@ -348,9 +349,11 @@ create table query_metric_stats engine File(TSVWithNamesAndTypes,
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
-- immediately, so for now we pretend they don't exist. We don't want to
-- remove them altogether because we want to be able to detect regressions,
-- but the right way to do this is not yet clear.
-- immediately (on the same order of time as noise). We compute average
-- run time between old and new version, and if it is below a threshold,
-- we just skip the query. If there is a significant regression, the
-- average will be above threshold, we'll process it normally and will
-- detect the regression.
(left + right) / 2 < 0.02 as short,
not short and abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
@ -410,11 +413,11 @@ create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json')
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
select left, right, diff, stat_threshold, changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
select left, right, diff, stat_threshold, unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
@ -422,9 +425,39 @@ create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
select test, query_index from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
select test, sum(unstable_fail) u, sum(changed_fail) c, u + c s from queries
group by test having s > 0 order by s desc;
create table test_time_changes_tsv engine File(TSV, 'report/test-time-changes.tsv') as
select test, queries, average_time_change from (
select test, count(*) queries,
sum(left) as left, sum(right) as right,
(right - left) / right average_time_change
from queries
group by test
order by abs(average_time_change) desc
)
;
create table unstable_tests_tsv engine File(TSV, 'report/unstable-tests.tsv') as
select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
from queries
group by test
order by total_unstable + total_changed desc
;
create table test_perf_changes_tsv engine File(TSV, 'report/test-perf-changes.tsv') as
select test,
queries,
coalesce(total_unstable, 0) total_unstable,
coalesce(total_changed, 0) total_changed,
total_unstable + total_changed total_bad,
coalesce(toString(floor(average_time_change, 3)), '??') average_time_change_str
from test_time_changes_tsv
full join unstable_tests_tsv
using test
where (abs(average_time_change) > 0.05 and queries > 5)
or (total_bad > 0)
order by total_bad desc, average_time_change desc
settings join_use_nulls = 1
;
create table query_time engine Memory as select *
from file('analyze/client-times.tsv', TSV,
@ -465,8 +498,8 @@ create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_display_name
from queries order by test, query_display_name;
stat_threshold, test, query_index, query_display_name
from queries order by test, query_index;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
@ -583,7 +616,7 @@ create table metric_devation engine File(TSVWithNamesAndTypes,
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
group by test, query_index, metric
having d > 0.5
having d > 0.5 and q[3] > 5
) metrics
left join query_display_names using (test, query_index)
order by test, query_index, d desc

View File

@ -19,6 +19,5 @@
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
<use_uncompressed_cache>0</use_uncompressed_cache>
<uncompressed_cache_size>1000000000</uncompressed_cache_size>
</yandex>

View File

@ -5,6 +5,7 @@
<query_profiler_cpu_time_period_ns>0</query_profiler_cpu_time_period_ns>
<allow_introspection_functions>1</allow_introspection_functions>
<log_queries>1</log_queries>
<metrics_perf_events_enabled>1</metrics_perf_events_enabled>
</default>
</profiles>
</yandex>

View File

@ -83,10 +83,17 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
if [ "$PR_TO_TEST" != "0" ]
then
# Prepare the list of tests changed in the PR for use by compare.sh. Compare to
# merge base, because master might be far in the future and have unrelated test
# changes.
git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt
# If the PR only changes the tests and nothing else, prepare a list of these
# tests for use by compare.sh. Compare to merge base, because master might be
# far in the future and have unrelated test changes.
base=$(git -C ch merge-base "$SHA_TO_TEST" master)
git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
if grep -vq '^tests/performance' changed-tests.txt
then
# Have some other changes besides the tests, so truncate the test list,
# meaning, run all tests.
: > changed-tests.txt
fi
fi
# Set python output encoding so that we can print queries with Russian letters.
@ -124,5 +131,5 @@ done
dmesg -T > dmesg.log
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze benchmark
cp compare.log /output

View File

@ -100,11 +100,20 @@ for c in connections:
report_stage_end('drop1')
# Apply settings
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
settings = root.findall('settings/*')
for c in connections:
for s in settings:
try:
c.execute("set {} = '{}'".format(s.tag, s.text))
except:
print(traceback.format_exc(), file=sys.stderr)
report_stage_end('settings')

View File

@ -207,7 +207,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4
'Test', # 5
'Query', # 6
'#', # 6
'Query', # 7
]
print(tableHeader(columns))
@ -248,7 +249,8 @@ if args.report == 'main':
'p&nbsp;<&nbsp;0.001 threshold', #3
# Failed #4
'Test', #5
'Query' #6
'#', #6
'Query' #7
]
print(tableStart('Unstable queries'))
@ -272,9 +274,9 @@ if args.report == 'main':
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
['Test', 'Unstable', 'Changed perf', 'Total not OK'],
tsvRows('report/bad-tests.tsv'))
printSimpleTable('Test performance changes',
['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
tsvRows('report/test-perf-changes.tsv'))
def print_test_times():
global slow_average_tests
@ -357,7 +359,7 @@ if args.report == 'main':
error_tests += slow_average_tests
if error_tests:
status = 'failure'
message_array.append(str(error_tests) + ' errors')
message_array.insert(0, str(error_tests) + ' errors')
if message_array:
message = ', '.join(message_array)
@ -391,7 +393,8 @@ elif args.report == 'all-queries':
'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6
'Test', #7
'Query', #8
'#', #8
'Query', #9
]
print(tableStart('All query times'))

View File

@ -24,6 +24,8 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \

View File

@ -59,7 +59,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
ln -s /usr/share/clickhouse-test/config/strings_dictionary.xml /etc/clickhouse-server/; \

View File

@ -62,7 +62,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \

View File

@ -50,7 +50,9 @@ ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/con
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/text_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/metric_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/access_management.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \

View File

@ -31,6 +31,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
dpkg -i package_folder/clickhouse-server_*.deb; \
dpkg -i package_folder/clickhouse-client_*.deb; \
dpkg -i package_folder/clickhouse-test_*.deb; \
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
ln -s /usr/lib/llvm-9/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \
echo "TSAN_OPTIONS='halt_on_error=1 history_size=7 ignore_noninstrumented_modules=1 verbosity=1'" >> /etc/environment; \

View File

@ -5,9 +5,13 @@ toc_title: How to Build ClickHouse on Linux
# How to Build ClickHouse for Development {#how-to-build-clickhouse-for-development}
The following tutorial is based on the Ubuntu Linux system.
With appropriate changes, it should also work on any other Linux distribution.
Supported platforms: x86\_64 and AArch64. Support for Power9 is experimental.
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
Supported platforms:
- x86\_64
- AArch64
- Power9 (experimental)
## Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
@ -21,8 +25,18 @@ Or cmake3 instead of cmake on older systems.
There are several ways to do this.
### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer:
```
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
```
### Install from a PPA Package {#install-from-a-ppa-package}
On older Ubuntu:
``` bash
$ sudo apt-get install software-properties-common
$ sudo apt-add-repository ppa:ubuntu-toolchain-r/test
@ -32,7 +46,7 @@ $ sudo apt-get install gcc-9 g++-9
### Install from Sources {#install-from-sources}
Look at [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
See [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
## Use GCC 9 for Builds {#use-gcc-9-for-builds}
@ -61,7 +75,6 @@ $ mkdir build
$ cd build
$ cmake ..
$ ninja
$ cd ..
```
To create an executable, run `ninja clickhouse`.

View File

@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm`
@ -245,7 +245,7 @@ The Code Style Guide: https://clickhouse.tech/docs/en/development/style/
Writing tests: https://clickhouse.tech/docs/en/development/tests/
List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md
List of tasks: https://github.com/ClickHouse/ClickHouse/contribute
## Test Data {#test-data}

View File

@ -60,7 +60,7 @@ Engines in the family:
- [Distributed](special/distributed.md#distributed)
- [MaterializedView](special/materializedview.md#materializedview)
- [Dictionary](special/dictionary.md#dictionary)
- [Merge](special/merge.md#merge
- [Merge](special/merge.md#merge)
- [File](special/file.md#file)
- [Null](special/null.md#null)
- [Set](special/set.md#set)

View File

@ -174,5 +174,6 @@ For a list of possible configuration options, see the [librdkafka configuration
**See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -41,8 +41,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1,
INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[ORDER BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...]
@ -58,23 +58,27 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md).
- `ORDER BY` — The sorting key.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
- `ORDER BY` — The sorting key.
A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key).
- `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
- `SAMPLE BY` — An expression for sampling.
- `SAMPLE BY` — An expression for sampling. Optional.
If a sampling expression is used, the primary key must contain it. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes).
- `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
Expression must have one `Date` or `DateTime` column as a result. Example:
`TTL date + INTERVAL 1 DAY`
@ -83,7 +87,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`:
- `SETTINGS` — Additional parameters that control the behavior of the `MergeTree` (optional):
- `index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
- `index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
@ -198,6 +202,10 @@ The number of columns in the primary key is not explicitly limited. Depending on
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
### Choosing a Primary Key that Differs from the Sorting Key {#choosing-a-primary-key-that-differs-from-the-sorting-key}
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
@ -332,8 +340,8 @@ The `set` index can be used with all functions. Function subsets for other index
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | | ✗ | ✗ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
@ -349,7 +357,8 @@ The `set` index can be used with all functions. Function subsets for other index
Functions with a constant argument that is less than ngram size cant be used by `ngrambf_v1` for query optimization.
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes cant be used for optimizing queries where the result of a function is expected to be false, for example:
!!! note "Note"
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes cant be used for optimizing queries where the result of a function is expected to be false, for example:
- Can be optimized:
- `s LIKE '%test%'`
@ -623,6 +632,8 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details}
In the case of `MergeTree` tables, data is getting to disk in different ways:

View File

@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network.
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
@ -215,4 +215,8 @@ After this, you can launch the server, create a `MergeTree` table, move the data
If the data in ZooKeeper was lost or damaged, you can save data by moving it to an unreplicated table as described above.
**See also**
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -3,7 +3,7 @@ toc_priority: 45
toc_title: Buffer
---
# Buffer {#buffer}
# Buffer Table Engine {#buffer}
Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously.
@ -34,9 +34,9 @@ Example:
CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000)
```
Creating a merge.hits\_buffer table with the same structure as merge.hits and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the merge.hits table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner.
When the server is stopped, with DROP TABLE or DETACH TABLE, buffer data is also flushed to the destination table.
When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table.
You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory.
@ -52,11 +52,11 @@ If you need to run ALTER for a subordinate table and the Buffer table, we recomm
If the server is restarted abnormally, the data in the buffer is lost.
FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.
When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table.
Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set num\_layers to 1.
Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1.
If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable exactly once write to replicated tables.

View File

@ -3,15 +3,17 @@ toc_priority: 35
toc_title: Dictionary
---
# Dictionary {#dictionary}
# Dictionary Table Engine {#dictionary}
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
## Example
As an example, consider a dictionary of `products` with the following configuration:
``` xml
<dictionaries>
<dictionary>
<dictionary>
<name>products</name>
<source>
<odbc>
@ -36,7 +38,7 @@ As an example, consider a dictionary of `products` with the following configurat
<null_value></null_value>
</attribute>
</structure>
</dictionary>
</dictionary>
</dictionaries>
```

View File

@ -3,14 +3,14 @@ toc_priority: 33
toc_title: Distributed
---
# Distributed {#distributed}
# Distributed Table Engine {#distributed}
**Tables with Distributed engine do not store any data by themself**, but allow distributed query processing on multiple servers.
Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers.
Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any.
The Distributed engine accepts parameters:
- the cluster name in the servers config file
- the cluster name in the server's config file
- the name of a remote database
@ -31,13 +31,13 @@ Example:
Distributed(logs, default, hits[, sharding_key[, policy_name]])
```
Data will be read from all servers in the logs cluster, from the default.hits table located on every server in the cluster.
Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
Data is not only read but is partially processed on the remote servers (to the extent that this is possible).
For example, for a query with GROUP BY, data will be aggregated on remote servers, and the intermediate states of aggregate functions will be sent to the requestor server. Then data will be further aggregated.
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the servers config file.
logs The cluster name in the server's config file.
Clusters are set like this:
@ -75,15 +75,15 @@ Clusters are set like this:
</remote_servers>
```
Here a cluster is defined with the name logs that consists of two shards, each of which contains two replicas.
Here a cluster is defined with the name `logs` that consists of two shards, each of which contains two replicas.
Shards refer to the servers that contain different parts of the data (in order to read all the data, you must access all the shards).
Replicas are duplicating servers (in order to read all the data, you can access the data on any one of the replicas).
Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (tcp\_port in the config, usually set to 9000). Do not confuse it with http\_port.
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string.
- `secure` - Use ssl for connection, usually you also should define `port` = 9440. Server should listen on `<tcp_port_secure>9440</tcp_port_secure>` and have correct certificates.
@ -97,44 +97,44 @@ You can specify just one of the shards (in this case, query processing should be
You can specify as many clusters as you wish in the configuration.
To view your clusters, use the system.clusters table.
To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the remote table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the internal\_replication parameter defined in the config file.
Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to true, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to false (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from prev\_weight to prev\_weights + weight, where prev\_weights is the total weight of the shards with the smallest number, and weight is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression rand() for random distribution of data, or UserID for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the broken subdirectory and no longer used.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
When the max\_parallel\_replicas option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
When the `max_parallel_replicas` option is enabled, query processing is parallelized across all replicas within a single shard. For more information, see the section [max\_parallel\_replicas](../../../operations/settings/settings.md#settings-max_parallel_replicas).
## Virtual Columns {#virtual-columns}
@ -146,5 +146,6 @@ When the max\_parallel\_replicas option is enabled, query processing is parallel
**See Also**
- [Virtual columns](index.md#table_engines-virtual_columns)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -1,11 +1,11 @@
---
toc_priority: 34
toc_title: External data
toc_priority: 45
toc_title: External Data
---
# External Data for Query Processing {#external-data-for-query-processing}
ClickHouse allows sending a server the data that is needed for processing a query, together with a SELECT query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in IN operators).
ClickHouse allows sending a server the data that is needed for processing a query, together with a `SELECT` query. This data is put in a temporary table (see the section “Temporary tables”) and can be used in the query (for example, in `IN` operators).
For example, if you have a text file with important user identifiers, you can upload it to the server along with a query that uses filtration by this list.
@ -46,7 +46,7 @@ $ cat /etc/passwd | sed 's/:/\t/g' | clickhouse-client --query="SELECT shell, co
/bin/sync 1
```
When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The query\_string is passed the parameters name\_format, name\_types, and name\_structure, where name is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.
When using the HTTP interface, external data is passed in the multipart/form-data format. Each table is transmitted as a separate file. The table name is taken from the file name. The `query_string` is passed the parameters `name_format`, `name_types`, and `name_structure`, where `name` is the name of the table that these parameters correspond to. The meaning of the parameters is the same as when using the command-line client.
Example:

View File

@ -3,12 +3,11 @@ toc_priority: 37
toc_title: File
---
# File {#table_engines-file}
# File Table Engine {#table_engines-file}
The File table engine keeps the data in a file in one of the supported [file
formats](../../../interfaces/formats.md#formats) (TabSeparated, Native, etc.).
The File table engine keeps the data in a file in one of the supported [file formats](../../../interfaces/formats.md#formats) (`TabSeparated`, `Native`, etc.).
Usage examples:
Usage scenarios:
- Data export from ClickHouse to file.
- Convert data from one format to another.
@ -34,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
!!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
**Example:**
## Example
**1.** Set up the `file_engine_table` table:

View File

@ -3,7 +3,7 @@ toc_priority: 46
toc_title: GenerateRandom
---
# Generaterandom {#table_engines-generate}
# GenerateRandom Table Engine {#table_engines-generate}
The GenerateRandom table engine produces random data for given table schema.
@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
**Example:**
## Example
**1.** Set up the `generate_engine_table` table:

View File

@ -3,9 +3,12 @@ toc_priority: 40
toc_title: Join
---
# Join {#join}
# Join Table Engine {#join}
Prepared data structure for using in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
Optional prepared data structure for usage in [JOIN](../../../sql-reference/statements/select/join.md#select-join) operations.
!!! note "Note"
This is not an article about the [JOIN clause](../../../sql-reference/statements/select/join.md#select-join) itself.
## Creating a Table {#creating-a-table}

View File

@ -3,8 +3,8 @@ toc_priority: 43
toc_title: MaterializedView
---
# Materializedview {#materializedview}
# MaterializedView Table Engine {#materializedview}
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine.
Used for implementing materialized views (for more information, see [CREATE TABLE](../../../sql-reference/statements/create.md)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses that engine.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/materializedview/) <!--hide-->

View File

@ -3,15 +3,16 @@ toc_priority: 44
toc_title: Memory
---
# Memory {#memory}
# Memory Table Engine {#memory}
The Memory engine stores data in RAM, in uncompressed form. Data is stored in exactly the same form as it is received when read. In other words, reading from this table is completely free.
Concurrent data access is synchronized. Locks are short: read and write operations dont block each other.
Indexes are not supported. Reading is parallelized.
Maximal productivity (over 10 GB/sec) is reached on simple queries, because there is no reading from the disk, decompressing, or deserializing data. (We should note that in many cases, the productivity of the MergeTree engine is almost as high.)
When restarting a server, data disappears from the table and the table becomes empty.
Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000).
The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing GLOBAL IN (see the section “IN operators”).
The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/memory/) <!--hide-->

View File

@ -3,13 +3,17 @@ toc_priority: 36
toc_title: Merge
---
# Merge {#merge}
# Merge Table Engine {#merge}
The `Merge` engine (not to be confused with `MergeTree`) does not store data itself, but allows reading from any number of other tables simultaneously.
Reading is automatically parallelized. Writing to a table is not supported. When reading, the indexes of tables that are actually being read are used, if they exist.
The `Merge` engine accepts parameters: the database name and a regular expression for tables.
Example:
## Examples
Example 1:
``` sql
Merge(hits, '^WatchLog')

View File

@ -3,10 +3,11 @@ toc_priority: 38
toc_title: 'Null'
---
# Null {#null}
# Null Table Engine {#null}
When writing to a Null table, data is ignored. When reading from a Null table, the response is empty.
When writing to a `Null` table, data is ignored. When reading from a `Null` table, the response is empty.
However, you can create a materialized view on a Null table. So the data written to the table will end up in the view.
!!! info "Hint"
However, you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/null/) <!--hide-->

View File

@ -3,14 +3,14 @@ toc_priority: 39
toc_title: Set
---
# Set {#set}
# Set Table Engine {#set}
A data set that is always in RAM. It is intended for use on the right side of the IN operator (see the section “IN operators”).
A data set that is always in RAM. It is intended for use on the right side of the `IN` operator (see the section “IN operators”).
You can use INSERT to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
But you cant perform SELECT from the table. The only way to retrieve data is by using it in the right half of the IN operator.
You can use `INSERT` to insert data in the table. New elements will be added to the data set, while duplicates will be ignored.
But you cant perform `SELECT` from the table. The only way to retrieve data is by using it in the right half of the `IN` operator.
Data is always located in RAM. For INSERT, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.
Data is always located in RAM. For `INSERT`, the blocks of inserted data are also written to the directory of tables on the disk. When starting the server, this data is loaded to RAM. In other words, after restarting, the data remains in place.
For a rough server restart, the block of data on the disk might be lost or damaged. In the latter case, you may need to manually delete the file with damaged data.

View File

@ -3,12 +3,13 @@ toc_priority: 41
toc_title: URL
---
# URL(URL, Format) {#table_engines-url}
# URL Table Engine {#table_engines-url}
Manages data on a remote HTTP/HTTPS server. This engine is similar
to the [File](file.md) engine.
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine.
## Using the Engine in the ClickHouse Server {#using-the-engine-in-the-clickhouse-server}
Syntax: `URL(URL, Format)`
## Usage {#using-the-engine-in-the-clickhouse-server}
The `format` must be one that ClickHouse can use in
`SELECT` queries and, if necessary, in `INSERTs`. For the full list of supported formats, see
@ -24,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
**Example:**
## Example
**1.** Create a `url_engine_table` table on the server :

View File

@ -3,7 +3,7 @@ toc_priority: 42
toc_title: View
---
# View {#table_engines-view}
# View Table Engine {#table_engines-view}
Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query).

View File

@ -64,7 +64,7 @@ You can also download and install packages manually from [here](https://repo.cli
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.yandex.ru/clickhouse/tgz/.
The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/.
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:
``` bash

View File

@ -5,6 +5,6 @@ toc_title: ODBC Driver
# ODBC Driver {#odbc-driver}
- [Official driver](https://github.com/ClickHouse/clickhouse-odbc).
- [Official driver](https://github.com/ClickHouse/clickhouse-odbc)
[Original article](https://clickhouse.tech/docs/en/interfaces/odbc/) <!--hide-->

View File

@ -57,7 +57,7 @@ toc_title: Adopters
| [S7&nbsp;Airlines](https://www.s7.ru){.favicon} | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| [SEMrush](https://www.semrush.com/){.favicon} | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| [scireum&nbsp;GmbH](https://www.scireum.de/){.favicon} | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| [Sentry](https://sentry.io/){.favicon} | Software developer | Backend for product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| [Sentry](https://sentry.io/){.favicon} | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr){.favicon} | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| [seo.do](https://seo.do/){.favicon} | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| [Sina](http://english.sina.com/index.html){.favicon} | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |

View File

@ -31,6 +31,7 @@ For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tab
## Manipulations with Parts {#manipulations-with-parts}
ClickHouse allows using the `ALTER TABLE ... FREEZE PARTITION ...` query to create a local copy of table partitions. This is implemented using hardlinks to the `/var/lib/clickhouse/shadow/` folder, so it usually does not consume extra disk space for old data. The created copies of files are not handled by ClickHouse server, so you can just leave them there: you will have a simple backup that doesnt require any additional external system, but it will still be prone to hardware issues. For this reason, its better to remotely copy them to another location and then remove the local copies. Distributed filesystems and object stores are still a good options for this, but normal attached file servers with a large enough capacity might work as well (in this case the transfer will occur via the network filesystem or maybe [rsync](https://en.wikipedia.org/wiki/Rsync)).
Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ...`
For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).

View File

@ -7,26 +7,33 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml`. Other files must be in the `/etc/clickhouse-server/config.d` directory.
!!! note "Note"
All the configuration files should be in XML format. Also, they should have the same root element, usually `<yandex>`.
All the configuration files should be in XML format. Also, they should have the same root element, usually `<yandex>`.
Some settings specified in the main configuration file can be overridden in other configuration files. The `replace` or `remove` attributes can be specified for the elements of these configuration files.
## Override
If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children.
Some settings specified in the main configuration file can be overridden in other configuration files:
If `replace` is specified, it replaces the entire element with the specified one.
- The `replace` or `remove` attributes can be specified for the elements of these configuration files.
- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children.
- If `replace` is specified, it replaces the entire element with the specified one.
- If `remove` is specified, it deletes the element.
If `remove` is specified, it deletes the element.
## Substitution
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
The config can also define "substitutions". If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include\_from](server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](server-configuration-parameters/settings.md)).
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## User Settings
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
Users configuration can be splitted into separate files similar to `config.xml` and `config.d/`.
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.
## Example
For example, you can have separate config file for each user like this:
``` bash
@ -48,8 +55,10 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
</yandex>
```
## Implementation Details
For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file.
The server tracks changes in config files, as well as files and ZooKeeper nodes that were used when performing substitutions and overrides, and reloads the settings for users and clusters on the fly. This means that you can modify the cluster, users, and their settings without restarting the server.
[Original article](https://clickhouse.tech/docs/en/operations/configuration_files/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/operations/configuration-files/) <!--hide-->

View File

@ -892,6 +892,9 @@ The update is performed asynchronously, in a separate system thread.
**Default value**: 15.
**See also**
- [background_schedule_pool_size](../settings/settings.md#background_schedule_pool_size)
## access_control_path {#access_control_path}

View File

@ -1336,7 +1336,7 @@ Type: URL
Default value: Empty
## background\_pool\_size {#background_pool_size}
## background_pool_size {#background_pool_size}
Sets the number of threads performing background operations in table engines (for example, merges in [MergeTree engine](../../engines/table-engines/mergetree-family/index.md) tables). This setting is applied from `default` profile at ClickHouse server start and cant be changed in a user session. By adjusting this setting, you manage CPU and disk load. Smaller pool size utilizes less CPU and disk resources, but background processes advance slower which might eventually impact query performance.
@ -1348,6 +1348,46 @@ Possible values:
Default value: 16.
## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and can't be changed in a user session.
Possible values:
- Any positive integer.
Default value: 16.
## background_move_pool_size {#background_move_pool_size}
Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
- Any positive integer.
Default value: 8.
## background_schedule_pool_size {#background_schedule_pool_size}
Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
- Any positive integer.
Default value: 16.
## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size}
Sets the number of threads performing background tasks for [distributed](../../engines/table-engines/special/distributed.md) sends. This setting is applied at ClickHouse server start and cant be changed in a user session.
Possible values:
- Any positive integer.
Default value: 16.
## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size}
Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can't be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method.
@ -1406,5 +1446,4 @@ Possible values:
Default value: 0.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -18,9 +18,11 @@ System tables:
- Available only for reading data.
- Can't be dropped or altered, but can be detached.
Most of system tables store their data in RAM. ClickHouse server creates such system tables at the start.
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
The [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) system tables store data in a storage filesystem. You can alter them or remove from a disk manually. If you remove one of that tables from a disk, the ClickHouse server creates the table again at the time of the next recording. A storage period for these tables is not limited, and ClickHouse server doesn't delete their data automatically. You need to organize removing of outdated logs by yourself. For example, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records.
Unlike other system tables, the system tables [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) are served by [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.
### Sources of System Metrics {#system-tables-sources-of-system-metrics}
@ -81,6 +83,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10
- [system.events](#system_tables-events) — Contains a number of events that have occurred.
- [system.metric\_log](#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`.
## system.asynchronous_metric_log {#system-tables-async-log}
Contains the historical values for `system.asynchronous_log` (see [system.asynchronous_metrics](#system_tables-asynchronous_metrics))
## system.clusters {#system-clusters}
Contains information about clusters available in the config file and the servers in them.
@ -176,6 +182,41 @@ This table contains a single String column called name the name of a d
Each database that the server knows about has a corresponding entry in the table.
This system table is used for implementing the `SHOW DATABASES` query.
## system.data_type_families {#system_tables-data_type_families}
Contains information about supported [data types](../sql-reference/data-types/).
Columns:
- `name` ([String](../sql-reference/data-types/string.md)) — Data type name.
- `case_insensitive` ([UInt8](../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid.
- `alias_to` ([String](../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias.
**Example**
``` sql
SELECT * FROM system.data_type_families WHERE alias_to = 'String'
```
``` text
┌─name───────┬─case_insensitive─┬─alias_to─┐
│ LONGBLOB │ 1 │ String │
│ LONGTEXT │ 1 │ String │
│ TINYTEXT │ 1 │ String │
│ TEXT │ 1 │ String │
│ VARCHAR │ 1 │ String │
│ MEDIUMBLOB │ 1 │ String │
│ BLOB │ 1 │ String │
│ TINYBLOB │ 1 │ String │
│ CHAR │ 1 │ String │
│ MEDIUMTEXT │ 1 │ String │
└────────────┴──────────────────┴──────────┘
```
**See Also**
- [Syntax](../sql-reference/syntax.md) — Information about supported syntax.
## system.detached\_parts {#system_tables-detached_parts}
Contains information about detached parts of [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) tables. The `reason` column specifies why the part was detached. For user-detached parts, the reason is empty. Such parts can be attached with [ALTER TABLE ATTACH PARTITION\|PART](../sql-reference/statements/alter.md#alter_attach-partition) command. For the description of other columns, see [system.parts](#system_tables-parts). If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../sql-reference/statements/alter.md#alter_drop-detached).
@ -601,9 +642,9 @@ You can change settings of queries logging in the [query_log](server-configurati
You can disable queries logging by setting [log_queries = 0](settings/settings.md#settings-log-queries). We don't recommend to turn off logging because information in this table is important for solving issues.
The flushing period of logs is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing logs, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesn't delete logs from the table automatically. See [Introduction](#system-tables-introduction) for more details.
ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details.
The `system.query_log` table registers two kinds of queries:
@ -731,68 +772,117 @@ Settings.Values: ['0','random','1','10000000000']
## system.query_thread_log {#system_tables-query_thread_log}
The table contains information about each query execution thread.
Contains information about threads which execute queries, for example, thread name, thread start time, duration of query processing.
ClickHouse creates this table only if the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in.
To start logging:
To enable query logging, set the [log\_query\_threads](settings/settings.md#settings-log-query-threads) parameter to 1. For details, see the [Settings](settings/settings.md) section.
1. Configure parameters in the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) section.
2. Set [log_query_threads](settings/settings.md#settings-log-query-threads) to 1.
The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_thread_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query.
ClickHouse doesn't delete data from the table automatically. See [Introduction](#system-tables-introduction) for more details.
Columns:
- `event_date` (Date) — the date when the thread has finished execution of the query.
- `event_time` (DateTime) — the date and time when the thread has finished execution of the query.
- `query_start_time` (DateTime) — Start time of query execution.
- `query_duration_ms` (UInt64) — Duration of query execution.
- `read_rows` (UInt64) — Number of read rows.
- `read_bytes` (UInt64) — Number of read bytes.
- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `memory_usage` (Int64) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` (Int64) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` (String) — Name of the thread.
- `thread_number` (UInt32) — Internal thread ID.
- `os_thread_id` (Int32) — OS thread ID.
- `master_thread_id` (UInt64) — OS initial ID of initial thread.
- `query` (String) — Query string.
- `is_initial_query` (UInt8) — Query type. Possible values:
- `event_date` ([Date](../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time of query execution.
- `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution.
- `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows.
- `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes.
- `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0.
- `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0.
- `memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread.
- `peak_memory_usage` ([Int64](../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread.
- `thread_name` ([String](../sql-reference/data-types/string.md)) — Name of the thread.
- `thread_number` ([UInt32](../sql-reference/data-types/int-uint.md)) — Internal thread ID.
- `thread_id` ([Int32](../sql-reference/data-types/int-uint.md)) — thread ID.
- `master_thread_id` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread.
- `query` ([String](../sql-reference/data-types/string.md)) — Query string.
- `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values:
- 1 — Query was initiated by the client.
- 0 — Query was initiated by another query for distributed query execution.
- `user` (String) — Name of the user who initiated the current query.
- `query_id` (String) — ID of the query.
- `address` (IPv6) — IP address that was used to make the query.
- `port` (UInt16) — The client port that was used to make the query.
- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` (String) — ID of the initial query (for distributed query execution).
- `initial_address` (IPv6) — IP address that the parent query was launched from.
- `initial_port` (UInt16) — The client port that was used to make the parent query.
- `interface` (UInt8) — Interface that the query was initiated from. Possible values:
- `user` ([String](../sql-reference/data-types/string.md)) — Name of the user who initiated the current query.
- `query_id` ([String](../sql-reference/data-types/string.md)) — ID of the query.
- `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query.
- `port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query.
- `initial_user` ([String](../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution).
- `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution).
- `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from.
- `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query.
- `interface` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values:
- 1 — TCP.
- 2 — HTTP.
- `os_user` (String) — OSs username who runs [clickhouse-client](../interfaces/cli.md).
- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run.
- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name.
- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version.
- `http_method` (UInt8) — HTTP method that initiated the query. Possible values:
- `os_user` ([String](../sql-reference/data-types/string.md)) — OSs username who runs [clickhouse-client](../interfaces/cli.md).
- `client_hostname` ([String](../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run.
- `client_name` ([String](../sql-reference/data-types/string.md)) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name.
- `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client.
- `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version.
- `http_method` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values:
- 0 — The query was launched from the TCP interface.
- 1 — `GET` method was used.
- 2 — `POST` method was used.
- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request.
- `quota_key` (String) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`).
- `revision` (UInt32) — ClickHouse revision.
- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events)
- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
- `http_user_agent` ([String](../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request.
- `quota_key` ([String](../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`).
- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse revision.
- `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events).
- `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column.
By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query.
**Example**
When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted.
``` sql
SELECT * FROM system.query_thread_log LIMIT 1 FORMAT Vertical
```
!!! note "Note"
The storage period for logs is unlimited. Logs arent automatically deleted from the table. You need to organize the removal of outdated logs yourself.
``` text
Row 1:
──────
event_date: 2020-05-13
event_time: 2020-05-13 14:02:28
query_start_time: 2020-05-13 14:02:28
query_duration_ms: 0
read_rows: 1
read_bytes: 1
written_rows: 0
written_bytes: 0
memory_usage: 0
peak_memory_usage: 0
thread_name: QueryPipelineEx
thread_id: 28952
master_thread_id: 28924
query: SELECT 1
is_initial_query: 1
user: default
query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
address: ::ffff:127.0.0.1
port: 57720
initial_user: default
initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a
initial_address: ::ffff:127.0.0.1
initial_port: 57720
interface: 1
os_user: bayonet
client_hostname: clickhouse.ru-central1.internal
client_name: ClickHouse client
client_revision: 54434
client_version_major: 20
client_version_minor: 4
client_version_patch: 1
http_method: 0
http_user_agent:
quota_key:
revision: 54434
ProfileEvents.Names: ['ContextLock','RealTimeMicroseconds','UserTimeMicroseconds','OSCPUWaitMicroseconds','OSCPUVirtualTimeMicroseconds']
ProfileEvents.Values: [1,97,81,5,81]
...
```
You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `partition_by` parameter).
**See Also**
- [system.query_log](#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution.
## system.trace\_log {#system_tables-trace_log}

View File

@ -29,7 +29,7 @@ To reduce network traffic, we recommend running `clickhouse-copier` on the same
The utility should be run manually:
``` bash
$ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
```
Parameters:

View File

@ -316,7 +316,7 @@ Result:
The function takes as arguments a set of conditions from 1 to 32 arguments of type `UInt8` that indicate whether a certain condition was met for the event.
Any condition can be specified as an argument (as in [WHERE](../../sql-reference/statements/select/where.md#select-where)).
The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and fird are true, etc.
The conditions, except the first, apply in pairs: the result of the second will be true if the first and second are true, of the third if the first and third are true, etc.
**Syntax**

View File

@ -1805,7 +1805,7 @@ For more information see [parameters](#agg_functions-stochasticlinearregression-
stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')
```
1. Fitting
**1.** Fitting
<!-- -->
@ -1813,7 +1813,7 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')
Predicted labels have to be in \[-1, 1\].
1. Predicting
**2.** Predicting
<!-- -->

View File

@ -28,7 +28,7 @@ CREATE TABLE t
) ENGINE = ...
```
[uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference.md) are the aggregate functions supported in ClickHouse.
[uniq](../../sql-reference/aggregate-functions/reference.md#agg_function-uniq), anyIf ([any](../../sql-reference/aggregate-functions/reference.md#agg_function-any)+[If](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-if)) and [quantiles](../../sql-reference/aggregate-functions/reference.md#quantiles) are the aggregate functions supported in ClickHouse.
## Usage {#usage}

View File

@ -10,4 +10,6 @@ ClickHouse can store various kinds of data in table cells.
This section describes the supported data types and special considerations for using and/or implementing them if any.
You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables.md#system_tables-data_type_families) table.
[Original article](https://clickhouse.tech/docs/en/data_types/) <!--hide-->

View File

@ -625,4 +625,43 @@ Setting fields:
- `storage_type` The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`.
- `db_index` The specific numeric index of Redis logical database. May be omitted, default value is 0.
### Cassandra {#dicts-external_dicts_dict_sources-cassandra}
Example of settings:
```xml
<source>
<cassandra>
<host>localhost</host>
<port>9042</port>
<user>username</user>
<password>qwerty123</password>
<keyspase>database_name</keyspase>
<column_family>table_name</column_family>
<allow_filering>1</allow_filering>
<partition_key_prefix>1</partition_key_prefix>
<consistency>One</consistency>
<where>"SomeColumn" = 42</where>
<max_threads>8</max_threads>
</cassandra>
</source>
```
Setting fields:
- `host` The Cassandra host or comma-separated list of hosts.
- `port` The port on the Cassandra servers. If not specified, default port is used.
- `user` Name of the Cassandra user.
- `password` Password of the Cassandra user.
- `keyspace` Name of the keyspace (database).
- `column_family` Name of the column family (table).
- `allow_filering` Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1.
- `partition_key_prefix` Number of partition key columns in primary key of the Cassandra table.
Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra.
Default value is 1 (the first key column is a partition key and other key columns are clustering key).
- `consistency` Consistency level. Possible values: `One`, `Two`, `Three`,
`All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default is `One`.
- `where` Optional selection criteria.
- `max_threads` The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries.
[Original article](https://clickhouse.tech/docs/en/query_language/dicts/external_dicts_dict_sources/) <!--hide-->

View File

@ -22,7 +22,7 @@ Strings are compared by bytes. A shorter string is smaller than all strings that
## equals, a = b and a == b operator {#function-equals}
## notEquals, a ! operator= b and a \<\> b {#function-notequals}
## notEquals, a != b and a \<\> b operator {#function-notequals}
## less, \< operator {#function-less}

View File

@ -7,7 +7,7 @@ toc_title: Working with geographical coordinates
## greatCircleDistance {#greatcircledistance}
Calculate the distance between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
Calculates the distance between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
@ -40,6 +40,37 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
└───────────────────────────────────────────────────────────────────┘
```
## greatCircleAngle {#greatcircleangle}
Calculates the central angle between two points on the Earths surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
``` sql
greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg)
```
**Input parameters**
- `lon1Deg` — Longitude of the first point in degrees.
- `lat1Deg` — Latitude of the first point in degrees.
- `lon2Deg` — Longitude of the second point in degrees.
- `lat2Deg` — Latitude of the second point in degrees.
**Returned value**
The central angle between two points in degrees.
**Example**
``` sql
SELECT greatCircleAngle(0, 0, 45, 0) AS arc
```
``` text
┌─arc─┐
│ 45 │
└─────┘
```
## pointInEllipses {#pointinellipses}
Checks whether the point belongs to at least one of the ellipses.

View File

@ -60,4 +60,43 @@ Result:
└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘
```
# Random functions for working with strings {#random-functions-for-working-with-strings}
## randomString {#random-string}
## randomFixedString {#random-fixed-string}
## randomPrintableASCII {#random-printable-ascii}
## randomStringUTF8 {#random-string-utf8}
## fuzzBits {#fuzzbits}
**Syntax**
``` sql
fuzzBits([s], [prob])
```
Inverts bits of `s`, each with probability `prob`.
**Parameters**
- `s` - `String` or `FixedString`
- `prob` - constant `Float32/64`
**Returned value**
Fuzzed string with same as s type.
**Example**
``` sql
SELECT fuzzBits(materialize('abacaba'), 0.1)
FROM numbers(3)
```
``` text
┌─fuzzBits(materialize('abacaba'), 0.1)─┐
│ abaaaja │
│ a*cjab+ │
│ aeca2A │
└───────────────────────────────────────┘
[Original article](https://clickhouse.tech/docs/en/query_language/functions/random_functions/) <!--hide-->

View File

@ -271,7 +271,7 @@ Renames one or more tables.
RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster]
```
All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned).
Renaming tables is a light operation. If you indicated another database after `TO`, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). If you rename multiple tables in one query, this is a non-atomic operation, it may be partially executed, queries in other sessions may receive the error `Table ... doesn't exist ..`.
## SET {#query-set}

Some files were not shown because too many files have changed in this diff Show More