Merge branch 'master' into make_copy_instead_of_hardlink

This commit is contained in:
alesapin 2022-09-28 12:10:00 +02:00
commit 9e77d520d4
98 changed files with 2469 additions and 1009 deletions

View File

@ -18,7 +18,7 @@ include (cmake/target.cmake)
include (cmake/tools.cmake)
include (cmake/ccache.cmake)
include (cmake/clang_tidy.cmake)
include (cmake/git_status.cmake)
include (cmake/git.cmake)
# Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries()

View File

@ -1,6 +1,7 @@
#include <base/ReplxxLineReader.h>
#include <base/errnoToString.h>
#include <stdexcept>
#include <chrono>
#include <cerrno>
#include <cstring>
@ -13,8 +14,10 @@
#include <dlfcn.h>
#include <fcntl.h>
#include <fstream>
#include <filesystem>
#include <fmt/format.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp> /// is_any_of
namespace
{
@ -35,6 +38,166 @@ std::string getEditor()
return editor;
}
std::string getFuzzyFinder()
{
const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe)
if (!env_path || !*env_path)
return {};
std::vector<std::string> paths;
boost::split(paths, env_path, boost::is_any_of(":"));
for (const auto & path_str : paths)
{
std::filesystem::path path(path_str);
std::filesystem::path sk_bin_path = path / "sk";
if (!access(sk_bin_path.c_str(), X_OK))
return sk_bin_path;
std::filesystem::path fzf_bin_path = path / "fzf";
if (!access(fzf_bin_path.c_str(), X_OK))
return fzf_bin_path;
}
return {};
}
/// See comments in ShellCommand::executeImpl()
/// (for the vfork via dlsym())
int executeCommand(char * const argv[])
{
#if !defined(USE_MUSL)
/** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs,
* because of the resolving of symbols in the shared library
* http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html
* Therefore, separate the resolving of the symbol from the call.
*/
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
#else
/// If we use Musl with static linking, there is no dlsym and no issue with vfork.
static void * real_vfork = reinterpret_cast<void *>(&vfork);
#endif
if (!real_vfork)
throw std::runtime_error("Cannot find vfork symbol");
pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
if (-1 == pid)
throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString()));
/// Child
if (0 == pid)
{
sigset_t mask;
sigemptyset(&mask);
sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
execvp(argv[0], argv);
_exit(-1);
}
int status = 0;
do
{
int exited_pid = waitpid(pid, &status, 0);
if (exited_pid != -1)
break;
if (errno == EINTR)
continue;
throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString()));
} while (true);
return status;
}
void writeRetry(int fd, const std::string & data)
{
size_t bytes_written = 0;
const char * begin = data.c_str();
size_t offset = data.size();
while (bytes_written != offset)
{
ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
if ((-1 == res || 0 == res) && errno != EINTR)
throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString()));
bytes_written += res;
}
}
std::string readFile(const std::string & path)
{
std::ifstream t(path);
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
t.seekg(0, std::ios::beg);
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
return str;
}
/// Simple wrapper for temporary files.
class TemporaryFile
{
private:
std::string path;
int fd = -1;
public:
explicit TemporaryFile(const char * pattern)
: path(pattern)
{
size_t dot_pos = path.rfind('.');
if (dot_pos != std::string::npos)
fd = ::mkstemps(path.data(), path.size() - dot_pos);
else
fd = ::mkstemp(path.data());
if (-1 == fd)
throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString()));
}
~TemporaryFile()
{
try
{
close();
unlink();
}
catch (const std::runtime_error & e)
{
fmt::print(stderr, "{}", e.what());
}
}
void close()
{
if (fd == -1)
return;
if (0 != ::close(fd))
throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString()));
fd = -1;
}
void write(const std::string & data)
{
if (fd == -1)
throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path));
writeRetry(fd, data);
}
void unlink()
{
if (0 != ::unlink(path.c_str()))
throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString()));
}
std::string & getPath() { return path; }
};
/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx.
/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org)
/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com)
@ -142,6 +305,7 @@ ReplxxLineReader::ReplxxLineReader(
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
, editor(getEditor())
, fuzzy_finder(getFuzzyFinder())
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
@ -249,6 +413,17 @@ ReplxxLineReader::ReplxxLineReader(
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);
/// interactive search in history (requires fzf/sk)
if (!fuzzy_finder.empty())
{
auto interactive_history_search = [this](char32_t code)
{
openInteractiveHistorySearch();
return rx.invoke(Replxx::ACTION::REPAINT, code);
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
}
}
ReplxxLineReader::~ReplxxLineReader()
@ -293,116 +468,70 @@ void ReplxxLineReader::addToHistory(const String & line)
rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
}
/// See comments in ShellCommand::executeImpl()
/// (for the vfork via dlsym())
int ReplxxLineReader::executeEditor(const std::string & path)
{
std::vector<char> argv0(editor.data(), editor.data() + editor.size() + 1);
std::vector<char> argv1(path.data(), path.data() + path.size() + 1);
char * const argv[] = {argv0.data(), argv1.data(), nullptr};
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
if (!real_vfork)
{
rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str());
return -1;
}
pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
if (-1 == pid)
{
rx.print("Cannot vfork: %s\n", errnoToString().c_str());
return -1;
}
/// Child
if (0 == pid)
{
sigset_t mask;
sigemptyset(&mask);
sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
execvp(editor.c_str(), argv);
rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str());
_exit(-1);
}
int status = 0;
do
{
int exited_pid = waitpid(pid, &status, 0);
if (exited_pid == -1)
{
if (errno == EINTR)
continue;
rx.print("Cannot waitpid: %s\n", errnoToString().c_str());
return -1;
}
else
break;
} while (true);
return status;
}
void ReplxxLineReader::openEditor()
{
char filename[] = "clickhouse_replxx_XXXXXX.sql";
int fd = ::mkstemps(filename, 4);
if (-1 == fd)
{
rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str());
return;
}
TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql");
editor_file.write(rx.get_state().text());
editor_file.close();
replxx::Replxx::State state(rx.get_state());
size_t bytes_written = 0;
const char * begin = state.text();
size_t offset = strlen(state.text());
while (bytes_written != offset)
char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr};
try
{
ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
if ((-1 == res || 0 == res) && errno != EINTR)
if (executeCommand(argv) == 0)
{
rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str());
break;
const std::string & new_query = readFile(editor_file.getPath());
rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size()));
}
bytes_written += res;
}
if (0 != ::close(fd))
catch (const std::runtime_error & e)
{
rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str());
return;
}
if (0 == executeEditor(filename))
{
try
{
std::ifstream t(filename);
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
t.seekg(0, std::ios::beg);
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
rx.set_state(replxx::Replxx::State(str.c_str(), str.size()));
}
catch (...)
{
rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str());
return;
}
rx.print(e.what());
}
if (bracketed_paste_enabled)
enableBracketedPaste();
}
if (0 != ::unlink(filename))
rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str());
void ReplxxLineReader::openInteractiveHistorySearch()
{
assert(!fuzzy_finder.empty());
TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin");
auto hs(rx.history_scan());
while (hs.next())
{
history_file.write(hs.get().text());
history_file.write(std::string(1, '\0'));
}
history_file.close();
TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql");
output_file.close();
char sh[] = "sh";
char sh_c[] = "-c";
/// NOTE: You can use one of the following to configure the behaviour additionally:
/// - SKIM_DEFAULT_OPTIONS
/// - FZF_DEFAULT_OPTS
std::string fuzzy_finder_command = fmt::format(
"{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}",
fuzzy_finder, history_file.getPath(), output_file.getPath());
char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr};
try
{
if (executeCommand(argv) == 0)
{
const std::string & new_query = readFile(output_file.getPath());
rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size()));
}
}
catch (const std::runtime_error & e)
{
rx.print(e.what());
}
if (bracketed_paste_enabled)
enableBracketedPaste();
}
void ReplxxLineReader::enableBracketedPaste()

View File

@ -27,6 +27,7 @@ private:
void addToHistory(const String & line) override;
int executeEditor(const std::string & path);
void openEditor();
void openInteractiveHistorySearch();
replxx::Replxx rx;
replxx::Replxx::highlighter_callback_t highlighter;
@ -36,4 +37,5 @@ private:
bool bracketed_paste_enabled = false;
std::string editor;
std::string fuzzy_finder;
};

42
cmake/git.cmake Normal file
View File

@ -0,0 +1,42 @@
find_package(Git)
# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS.
if (Git_FOUND)
# Commit hash + whether the building workspace was dirty or not
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_HASH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Branch name
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Date of the commit
SET(ENV{TZ} "UTC")
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "Git HEAD commit hash: ${GIT_HASH}")
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
message(STATUS "Git could not be found.")
endif()

View File

@ -1,22 +0,0 @@
# Print the status of the git repository (if git is available).
# This is useful for troubleshooting build failure reports
find_package(Git)
if (Git_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_ID
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}")
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
message(STATUS "Git could not be found.")
endif()

View File

@ -6,7 +6,7 @@ sidebar_label: VIEW
# CREATE VIEW
Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features).
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features).
## Normal View

View File

@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные,
Таких секций может быть несколько - по числу передаваемых таблиц.
**external** - маркер начала секции.
**file** - путь к файлу с дампом таблицы, или -, что обозначает stdin.
Из stdin может быть считана только одна таблица.
- **--external** - маркер начала секции.
- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`.
Из `stdin` может быть считана только одна таблица.
Следующие параметры не обязательные:
**name** - имя таблицы. Если не указано - используется _data.
**format** - формат данных в файле. Если не указано - используется TabSeparated.
- **--name** - имя таблицы. Если не указано - используется _data.
- **--format** - формат данных в файле. Если не указано - используется TabSeparated.
Должен быть указан один из следующих параметров:
**types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, …
**structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов.
- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, …
- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов.
Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name.

View File

@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER)
# Sraight list of all required sources
# Straight list of all required sources
set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp
@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp
${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp
Keeper.cpp
TinyContext.cpp

View File

@ -490,8 +490,9 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
void Keeper::logRevision() const
{
Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING}
+ " with revision " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", " + build_id_info
+ "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", git hash: " + (git_hash.empty() ? "<unknown>" : git_hash)
+ ", build id: " + (build_id.empty() ? "<unknown>" : build_id) + ")"
+ ", PID " + std::to_string(getpid()));
}

View File

@ -1114,10 +1114,6 @@
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<!--
Asynchronous metrics are updated once a minute, so there is
no need to flush more often.
-->
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>

View File

@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) {
sync.sub(plots[idx]);
/// Set title
const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] );
const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
chart.querySelector('.title').firstChild.data = title;
}

View File

@ -70,7 +70,7 @@
#include <Client/InternalTextLogs.h>
#include <boost/algorithm/string/replace.hpp>
#include <IO/ForkWriteBuffer.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
namespace fs = std::filesystem;
using namespace std::literals;
@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler()
ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const
{
ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert);
std::unique_ptr<IParserBase> parser;
ASTPtr res;
const auto & settings = global_context->getSettingsRef();
@ -301,10 +301,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
if (!allow_multi_statements)
max_length = settings.max_query_size;
const Dialect & dialect = settings.dialect;
if (dialect == Dialect::kusto)
parser = std::make_unique<ParserKQLStatement>(end, global_context->getSettings().allow_settings_after_format_in_insert);
else
parser = std::make_unique<ParserQuery>(end, global_context->getSettings().allow_settings_after_format_in_insert);
if (is_interactive || ignore_error)
{
String message;
res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth);
res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth);
if (!res)
{
@ -314,7 +321,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu
}
else
{
res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth);
res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth);
}
if (is_interactive)

View File

@ -7,15 +7,11 @@
#include <base/unaligned.h>
#include <Core/Field.h>
#include <Common/assert_cast.h>
#include <Common/TargetSpecific.h>
#include <Core/TypeId.h>
#include <base/TypeName.h>
#include "config_core.h"
#if USE_MULTITARGET_CODE
# include <immintrin.h>
#endif
namespace DB
{
@ -395,124 +391,6 @@ protected:
Container data;
};
DECLARE_DEFAULT_CODE(
template <typename Container, typename Type>
inline void vectorIndexImpl(const Container & data, const PaddedPODArray<Type> & indexes, size_t limit, Container & res_data)
{
for (size_t i = 0; i < limit; ++i)
res_data[i] = data[indexes[i]];
}
);
DECLARE_AVX512VBMI_SPECIFIC_CODE(
template <typename Container, typename Type>
inline void vectorIndexImpl(const Container & data, const PaddedPODArray<Type> & indexes, size_t limit, Container & res_data)
{
static constexpr UInt64 MASK64 = 0xffffffffffffffff;
const size_t limit64 = limit & ~63;
size_t pos = 0;
size_t data_size = data.size();
auto data_pos = reinterpret_cast<const UInt8 *>(data.data());
auto indexes_pos = reinterpret_cast<const UInt8 *>(indexes.data());
auto res_pos = reinterpret_cast<UInt8 *>(res_data.data());
if (data_size <= 64)
{
/// one single mask load for table size <= 64
__mmask64 last_mask = MASK64 >> (64 - data_size);
__m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos);
/// 64 bytes table lookup using one single permutexvar_epi8
while (pos < limit64)
{
__m512i vidx = _mm512_loadu_epi8(indexes_pos + pos);
__m512i out = _mm512_permutexvar_epi8(vidx, table1);
_mm512_storeu_epi8(res_pos + pos, out);
pos += 64;
}
/// tail handling
if (limit > limit64)
{
__mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit);
__m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos);
__m512i out = _mm512_permutexvar_epi8(vidx, table1);
_mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out);
}
}
else if (data_size <= 128)
{
/// table size (64, 128] requires 2 zmm load
__mmask64 last_mask = MASK64 >> (128 - data_size);
__m512i table1 = _mm512_loadu_epi8(data_pos);
__m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64);
/// 128 bytes table lookup using one single permute2xvar_epi8
while (pos < limit64)
{
__m512i vidx = _mm512_loadu_epi8(indexes_pos + pos);
__m512i out = _mm512_permutex2var_epi8(table1, vidx, table2);
_mm512_storeu_epi8(res_pos + pos, out);
pos += 64;
}
if (limit > limit64)
{
__mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit);
__m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos);
__m512i out = _mm512_permutex2var_epi8(table1, vidx, table2);
_mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out);
}
}
else
{
if (data_size > 256)
{
/// byte index will not exceed 256 boundary.
data_size = 256;
}
__m512i table1 = _mm512_loadu_epi8(data_pos);
__m512i table2 = _mm512_loadu_epi8(data_pos + 64);
__m512i table3, table4;
if (data_size <= 192)
{
/// only 3 tables need to load if size <= 192
__mmask64 last_mask = MASK64 >> (192 - data_size);
table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128);
table4 = _mm512_setzero_si512();
}
else
{
__mmask64 last_mask = MASK64 >> (256 - data_size);
table3 = _mm512_loadu_epi8(data_pos + 128);
table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192);
}
/// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB
while (pos < limit64)
{
__m512i vidx = _mm512_loadu_epi8(indexes_pos + pos);
__m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2);
__m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4);
__mmask64 msb = _mm512_movepi8_mask(vidx);
__m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2);
_mm512_storeu_epi8(res_pos + pos, out);
pos += 64;
}
if (limit > limit64)
{
__mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit);
__m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos);
__m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2);
__m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4);
__mmask64 msb = _mm512_movepi8_mask(vidx);
__m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2);
_mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out);
}
}
}
);
template <typename T>
template <typename Type>
ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const
@ -521,18 +399,8 @@ ColumnPtr ColumnVector<T>::indexImpl(const PaddedPODArray<Type> & indexes, size_
auto res = this->create(limit);
typename Self::Container & res_data = res->getData();
#if USE_MULTITARGET_CODE
if constexpr (sizeof(T) == 1 && sizeof(Type) == 1)
{
/// VBMI optimization only applicable for (U)Int8 types
if (isArchSupported(TargetArch::AVX512VBMI))
{
TargetSpecific::AVX512VBMI::vectorIndexImpl<Container, Type>(data, indexes, limit, res_data);
return res;
}
}
#endif
TargetSpecific::Default::vectorIndexImpl<Container, Type>(data, indexes, limit, res_data);
for (size_t i = 0; i < limit; ++i)
res_data[i] = data[indexes[i]];
return res;
}

View File

@ -5,7 +5,7 @@
#include <Common/Exception.h>
#include <Common/hex.h>
#include <Core/Settings.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
namespace DB
{
@ -226,6 +226,30 @@ String TracingContext::composeTraceparentHeader() const
static_cast<uint8_t>(trace_flags));
}
void TracingContext::deserialize(ReadBuffer & buf)
{
buf >> this->trace_id
>> "\n"
>> this->span_id
>> "\n"
>> this->tracestate
>> "\n"
>> this->trace_flags
>> "\n";
}
void TracingContext::serialize(WriteBuffer & buf) const
{
buf << this->trace_id
<< "\n"
<< this->span_id
<< "\n"
<< this->tracestate
<< "\n"
<< this->trace_flags
<< "\n";
}
const TracingContextOnThread & CurrentContext()
{
return current_thread_trace_context;

View File

@ -7,6 +7,8 @@ namespace DB
struct Settings;
class OpenTelemetrySpanLog;
class WriteBuffer;
class ReadBuffer;
namespace OpenTelemetry
{
@ -63,6 +65,9 @@ struct TracingContext
{
return trace_id != UUID();
}
void deserialize(ReadBuffer & buf);
void serialize(WriteBuffer & buf) const;
};
/// Tracing context kept on each thread
@ -157,5 +162,16 @@ struct SpanHolder : public Span
}
inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context)
{
context.serialize(buf);
return buf;
}
inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext & context)
{
context.deserialize(buf);
return buf;
}
}

View File

@ -32,6 +32,13 @@
v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \
} while(0)
/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array
/// to ensure correct byte order on different endian machines
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define CURRENT_BYTES_IDX(i) (7 - i)
#else
#define CURRENT_BYTES_IDX(i) (i)
#endif
class SipHash
{
@ -55,7 +62,7 @@ private:
ALWAYS_INLINE void finalize()
{
/// In the last free byte, we write the remainder of the division by 256.
current_bytes[7] = static_cast<UInt8>(cnt);
current_bytes[CURRENT_BYTES_IDX(7)] = static_cast<UInt8>(cnt);
v3 ^= current_word;
SIPROUND;
@ -92,7 +99,7 @@ public:
{
while (cnt & 7 && data < end)
{
current_bytes[cnt & 7] = *data;
current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data;
++data;
++cnt;
}
@ -125,13 +132,13 @@ public:
current_word = 0;
switch (end - data)
{
case 7: current_bytes[6] = data[6]; [[fallthrough]];
case 6: current_bytes[5] = data[5]; [[fallthrough]];
case 5: current_bytes[4] = data[4]; [[fallthrough]];
case 4: current_bytes[3] = data[3]; [[fallthrough]];
case 3: current_bytes[2] = data[2]; [[fallthrough]];
case 2: current_bytes[1] = data[1]; [[fallthrough]];
case 1: current_bytes[0] = data[0]; [[fallthrough]];
case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]];
case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]];
case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]];
case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]];
case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]];
case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]];
case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]];
case 0: break;
}
}
@ -157,8 +164,8 @@ public:
void get128(char * out)
{
finalize();
unalignedStoreLE<UInt64>(out, v0 ^ v1);
unalignedStoreLE<UInt64>(out + 8, v2 ^ v3);
unalignedStore<UInt64>(out, v0 ^ v1);
unalignedStore<UInt64>(out + 8, v2 ^ v3);
}
template <typename T>
@ -225,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s)
{
return sipHash64(s.data(), s.size());
}
#undef CURRENT_BYTES_IDX

View File

@ -705,7 +705,7 @@ void KeeperServer::waitInit()
int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds();
if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); }))
throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout);
}
std::vector<int64_t> KeeperServer::getDeadSessions()

View File

@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
*/
#define COMMON_SETTINGS(M) \
M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\
M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \
M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \
M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \

View File

@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
{{"clickhouse", Dialect::clickhouse},
{"kusto", Dialect::kusto}})
}

View File

@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
enum class Dialect
{
clickhouse,
kusto,
kusto_auto,
};
DECLARE_SETTING_ENUM(Dialect)
}

View File

@ -266,8 +266,8 @@ private:
{
size_t pos = message.find('\n');
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos));
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos));
/// Print trace from std::terminate exception line-by-line to make it easy for grep.
while (pos != std::string_view::npos)
@ -315,14 +315,14 @@ private:
if (query_id.empty())
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id,
thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
}
else
{
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info,
LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id,
thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context)
}
@ -838,6 +838,7 @@ static void blockSignals(const std::vector<int> & signals)
throw Poco::Exception("Cannot block signal.");
}
extern String getGitHash();
void BaseDaemon::initializeTerminationAndSignalProcessing()
{
@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
#if defined(__ELF__) && !defined(OS_FREEBSD)
String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
if (build_id_hex.empty())
build_id_info = "no build id";
build_id = "";
else
build_id_info = "build id: " + build_id_hex;
build_id = build_id_hex;
#else
build_id_info = "no build id";
build_id = "";
#endif
git_hash = getGitHash();
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
void BaseDaemon::logRevision() const
{
Poco::Logger::root().information("Starting " + std::string{VERSION_FULL}
+ " with revision " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", " + build_id_info
+ " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", git hash: " + (git_hash.empty() ? "<unknown>" : git_hash)
+ ", build id: " + (build_id.empty() ? "<unknown>" : build_id) + ")"
+ ", PID " + std::to_string(getpid()));
}

View File

@ -172,7 +172,8 @@ protected:
DB::ConfigProcessor::LoadedConfig loaded_config;
Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info;
String build_id;
String git_hash;
String stored_binary_hash;
std::vector<int> handled_signals;

View File

@ -1,7 +1,10 @@
configure_file(GitHash.cpp.in GitHash.generated.cpp)
add_library (daemon
BaseDaemon.cpp
GraphiteWriter.cpp
SentryWriter.cpp
GitHash.generated.cpp
)
if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES)

View File

@ -0,0 +1,8 @@
// File was generated by CMake
#include <base/types.h>
String getGitHash()
{
return "@GIT_HASH@";
}

View File

@ -8,6 +8,7 @@
#include <Interpreters/executeQuery.h>
#include <Parsers/queryToString.h>
#include <Common/Exception.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h>
@ -642,6 +643,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex
entry.query = queryToString(query);
entry.initiator = ddl_worker->getCommonHostID();
entry.setSettingsIfRequired(query_context);
entry.tracing_context = OpenTelemetry::CurrentContext();
String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context);
Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas");

View File

@ -221,6 +221,10 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
/// NOTE Possibly it would be better to execute initial query on the most up-to-date node,
/// but it requires more complex logic around /try node.
OpenTelemetry::SpanHolder span(__FUNCTION__);
span.addAttribute("clickhouse.cluster", database->getDatabaseName());
entry.tracing_context = OpenTelemetry::CurrentContext();
auto zookeeper = getAndSetZooKeeper();
UInt32 our_log_ptr = getLogPointer();
UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));

View File

@ -50,21 +50,26 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const
void DDLLogEntry::assertVersion() const
{
constexpr UInt64 max_version = 2;
if (version == 0 || max_version < version)
if (version == 0
/// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version
|| version == NORMALIZE_CREATE_ON_INITIATOR_VERSION
|| version > DDL_ENTRY_FORMAT_MAX_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}."
"Maximum supported version is {}", version, max_version);
"Maximum supported version is {}", version, DDL_ENTRY_FORMAT_MAX_VERSION);
}
void DDLLogEntry::setSettingsIfRequired(ContextPtr context)
{
version = context->getSettingsRef().distributed_ddl_entry_format_version;
if (version <= 0 || version > DDL_ENTRY_FORMAT_MAX_VERSION)
throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}."
"Maximum supported version is {}.", version, DDL_ENTRY_FORMAT_MAX_VERSION);
/// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper
if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION)
version = SETTINGS_IN_ZK_VERSION;
if (version == SETTINGS_IN_ZK_VERSION)
if (version >= SETTINGS_IN_ZK_VERSION)
settings.emplace(context->getSettingsRef().changes());
}
@ -94,6 +99,9 @@ String DDLLogEntry::toString() const
wb << "settings: " << serializeAST(ast) << "\n";
}
if (version >= OPENTELEMETRY_ENABLED_VERSION)
wb << "tracing: " << this->tracing_context;
return wb.str();
}
@ -106,7 +114,7 @@ void DDLLogEntry::parse(const String & data)
Strings host_id_strings;
rb >> "query: " >> escape >> query >> "\n";
if (version == 1)
if (version == OLDEST_VERSION)
{
rb >> "hosts: " >> host_id_strings >> "\n";
@ -115,9 +123,8 @@ void DDLLogEntry::parse(const String & data)
else
initiator.clear();
}
else if (version == 2)
else if (version >= SETTINGS_IN_ZK_VERSION)
{
if (!rb.eof() && *rb.position() == 'h')
rb >> "hosts: " >> host_id_strings >> "\n";
if (!rb.eof() && *rb.position() == 'i')
@ -134,6 +141,12 @@ void DDLLogEntry::parse(const String & data)
}
}
if (version >= OPENTELEMETRY_ENABLED_VERSION)
{
if (!rb.eof() && *rb.position() == 't')
rb >> "tracing: " >> this->tracing_context;
}
assertEOF(rb);
if (!host_id_strings.empty())

View File

@ -2,6 +2,7 @@
#include <Core/Types.h>
#include <Interpreters/Cluster.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/ZooKeeper/Types.h>
#include <filesystem>
@ -69,12 +70,18 @@ struct DDLLogEntry
static constexpr const UInt64 OLDEST_VERSION = 1;
static constexpr const UInt64 SETTINGS_IN_ZK_VERSION = 2;
static constexpr const UInt64 NORMALIZE_CREATE_ON_INITIATOR_VERSION = 3;
static constexpr const UInt64 OPENTELEMETRY_ENABLED_VERSION = 4;
/// Add new version here
/// Remember to update the value below once new version is added
static constexpr const UInt64 DDL_ENTRY_FORMAT_MAX_VERSION = 4;
UInt64 version = 1;
String query;
std::vector<HostID> hosts;
String initiator; // optional
std::optional<SettingsChanges> settings;
OpenTelemetry::TracingContext tracing_context;
void setSettingsIfRequired(ContextPtr context);
String toString() const;

View File

@ -19,6 +19,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Common/OpenTelemetryTraceContext.h>
#include <Common/setThreadName.h>
#include <Common/randomSeed.h>
#include <Common/ZooKeeper/ZooKeeper.h>
@ -515,6 +516,11 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query);
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ ,
task.entry.tracing_context,
this->context->getOpenTelemetrySpanLog());
String active_node_path = task.getActiveNodePath();
String finished_node_path = task.getFinishedNodePath();

View File

@ -55,6 +55,8 @@ bool isSupportedAlterType(int type)
BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params)
{
OpenTelemetry::SpanHolder span(__FUNCTION__);
if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported");
@ -88,6 +90,8 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context,
cluster = context->getCluster(query->cluster);
}
span.addAttribute("clickhouse.cluster", query->cluster);
/// TODO: support per-cluster grant
context->checkAccess(AccessType::CLUSTER);
@ -164,6 +168,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context,
entry.query = queryToString(query_ptr);
entry.initiator = ddl_worker.getCommonHostID();
entry.setSettingsIfRequired(context);
entry.tracing_context = OpenTelemetry::CurrentContext();
String node_path = ddl_worker.enqueueQuery(entry);
return getDistributedDDLStatus(node_path, entry, context);

View File

@ -73,6 +73,7 @@
#include <memory>
#include <random>
#include <Parsers/Kusto/ParserKQLStatement.h>
namespace ProfileEvents
{
@ -392,10 +393,20 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
String query_table;
try
{
ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
if (settings.dialect == Dialect::kusto && !internal)
{
ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}
else
{
ParserQuery parser(end, settings.allow_settings_after_format_in_insert);
/// TODO: parser should fail early when max_query_size limit is reached.
ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth);
}
if (auto txn = context->getCurrentTransaction())
{

View File

@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_parsers .)
add_headers_and_sources(clickhouse_parsers ./Access)
add_headers_and_sources(clickhouse_parsers ./MySQL)
add_headers_and_sources(clickhouse_parsers ./Kusto)
add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources})
target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils)

View File

@ -0,0 +1,26 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/IParserBase.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLFilter.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
namespace DB
{
bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
String expr = getExprFromToken(pos);
ASTPtr where_expression;
Tokens token_filter(expr.c_str(), expr.c_str()+expr.size());
IParser::Pos pos_filter(token_filter, pos.max_depth);
if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
return true;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLFilter : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL where"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,29 @@
#include <Parsers/IParserBase.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLLimit.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <cstdlib>
#include <format>
namespace DB
{
bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr limit_length;
auto expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size());
IParser::Pos new_pos(tokens, pos.max_depth);
if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length));
return true;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLLimit : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL limit"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,359 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/CommonParsers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}
String KQLOperators::genHasAnyAllOpExpr(std::vector<String> &tokens, IParser::Pos &token_pos,String kql_op, String ch_op)
{
String new_expr;
Expected expected;
ParserToken s_lparen(TokenType::OpeningRoundBracket);
++token_pos;
if (!s_lparen.ignore(token_pos, expected))
throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR);
auto haystack = tokens.back();
String logic_op = (kql_op == "has_all") ? " and " : " or ";
while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon)
{
auto tmp_arg = String(token_pos->begin, token_pos->end);
if (token_pos->type == TokenType::Comma)
new_expr = new_expr + logic_op;
else
new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")";
++token_pos;
if (token_pos->type == TokenType::ClosingRoundBracket)
break;
}
tokens.pop_back();
return new_expr;
}
String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op)
{
String new_expr;
ParserToken s_lparen(TokenType::OpeningRoundBracket);
ASTPtr select;
Expected expected;
++token_pos;
if (!s_lparen.ignore(token_pos, expected))
throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR);
--token_pos;
--token_pos;
return ch_op;
}
String KQLOperators::genHaystackOpExpr(std::vector<String> &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos)
{
String new_expr, left_wildcards, right_wildcards, left_space, right_space;
switch (wildcards_pos)
{
case WildcardsPos::none:
break;
case WildcardsPos::left:
left_wildcards ="%";
break;
case WildcardsPos::right:
right_wildcards = "%";
break;
case WildcardsPos::both:
left_wildcards ="%";
right_wildcards = "%";
break;
}
switch (space_pos)
{
case WildcardsPos::none:
break;
case WildcardsPos::left:
left_space =" ";
break;
case WildcardsPos::right:
right_space = " ";
break;
case WildcardsPos::both:
left_space =" ";
right_space = " ";
break;
}
++token_pos;
if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier))
new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')";
else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord))
{
auto tmp_arg = String(token_pos->begin, token_pos->end);
new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))";
}
else
throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR);
tokens.pop_back();
return new_expr;
}
bool KQLOperators::convert(std::vector<String> &tokens,IParser::Pos &pos)
{
auto begin = pos;
if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
KQLOperatorValue op_value = KQLOperatorValue::none;
auto token = String(pos->begin,pos->end);
String op = token;
if (token == "!")
{
++pos;
if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon)
throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR);
op ="!"+String(pos->begin,pos->end);
}
else if (token == "matches")
{
++pos;
if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin,pos->end) == "regex")
op +=" regex";
else
--pos;
}
}
else
{
op = token;
}
++pos;
if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin,pos->end) == "~")
op +="~";
else
--pos;
}
else
--pos;
if (KQLOperator.find(op) == KQLOperator.end())
{
pos = begin;
return false;
}
op_value = KQLOperator[op];
String new_expr;
if (op_value == KQLOperatorValue::none)
tokens.push_back(op);
else
{
auto last_op = tokens.back();
auto last_pos = pos;
switch (op_value)
{
case KQLOperatorValue::contains:
new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both);
break;
case KQLOperatorValue::not_contains:
new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both);
break;
case KQLOperatorValue::contains_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both);
break;
case KQLOperatorValue::not_contains_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both);
break;
case KQLOperatorValue::endswith:
new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left);
break;
case KQLOperatorValue::not_endswith:
new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left);
break;
case KQLOperatorValue::endswith_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none);
break;
case KQLOperatorValue::not_endswith_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none);
break;
case KQLOperatorValue::equal:
break;
case KQLOperatorValue::not_equal:
break;
case KQLOperatorValue::equal_cs:
new_expr = "==";
break;
case KQLOperatorValue::not_equal_cs:
new_expr = "!=";
break;
case KQLOperatorValue::has:
new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none);
break;
case KQLOperatorValue::not_has:
new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none);
break;
case KQLOperatorValue::has_all:
new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive");
break;
case KQLOperatorValue::has_any:
new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive");
break;
case KQLOperatorValue::has_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none);
break;
case KQLOperatorValue::not_has_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none);
break;
case KQLOperatorValue::hasprefix:
new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right);
new_expr += " or ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left);
break;
case KQLOperatorValue::not_hasprefix:
new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right);
new_expr += " and ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left);
break;
case KQLOperatorValue::hasprefix_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none);
new_expr += " or ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left);
break;
case KQLOperatorValue::not_hasprefix_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none);
new_expr += " and ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left);
break;
case KQLOperatorValue::hassuffix:
new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left);
new_expr += " or ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right);
break;
case KQLOperatorValue::not_hassuffix:
new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left);
new_expr += " and ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right);
break;
case KQLOperatorValue::hassuffix_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none);
new_expr += " or ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right);
break;
case KQLOperatorValue::not_hassuffix_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none);
new_expr += " and ";
tokens.push_back(last_op);
new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right);
break;
case KQLOperatorValue::in_cs:
new_expr = genInOpExpr(pos,op,"in");
break;
case KQLOperatorValue::not_in_cs:
new_expr = genInOpExpr(pos,op,"not in");
break;
case KQLOperatorValue::in:
break;
case KQLOperatorValue::not_in:
break;
case KQLOperatorValue::matches_regex:
new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none);
break;
case KQLOperatorValue::startswith:
new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right);
break;
case KQLOperatorValue::not_startswith:
new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right);
break;
case KQLOperatorValue::startswith_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none);
break;
case KQLOperatorValue::not_startswith_cs:
new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none);
break;
default:
break;
}
tokens.push_back(new_expr);
}
return true;
}
pos = begin;
return false;
}
}

View File

@ -0,0 +1,106 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <unordered_map>
namespace DB
{
class KQLOperators
{
public:
bool convert(std::vector<String> &tokens,IParser::Pos &pos);
protected:
enum class WildcardsPos:uint8_t
{
none,
left,
right,
both
};
enum class KQLOperatorValue : uint16_t
{
none,
contains,
not_contains,
contains_cs,
not_contains_cs,
endswith,
not_endswith,
endswith_cs,
not_endswith_cs,
equal, //=~
not_equal,//!~
equal_cs, //=
not_equal_cs,//!=
has,
not_has,
has_all,
has_any,
has_cs,
not_has_cs,
hasprefix,
not_hasprefix,
hasprefix_cs,
not_hasprefix_cs,
hassuffix,
not_hassuffix,
hassuffix_cs,
not_hassuffix_cs,
in_cs, //in
not_in_cs, //!in
in, //in~
not_in ,//!in~
matches_regex,
startswith,
not_startswith,
startswith_cs,
not_startswith_cs,
};
std::unordered_map <String,KQLOperatorValue> KQLOperator =
{
{"contains" , KQLOperatorValue::contains},
{"!contains" , KQLOperatorValue::not_contains},
{"contains_cs" , KQLOperatorValue::contains_cs},
{"!contains_cs" , KQLOperatorValue::not_contains_cs},
{"endswith" , KQLOperatorValue::endswith},
{"!endswith" , KQLOperatorValue::not_endswith},
{"endswith_cs" , KQLOperatorValue::endswith_cs},
{"!endswith_cs" , KQLOperatorValue::not_endswith_cs},
{"=~" , KQLOperatorValue::equal},
{"!~" , KQLOperatorValue::not_equal},
{"==" , KQLOperatorValue::equal_cs},
{"!=" , KQLOperatorValue::not_equal_cs},
{"has" , KQLOperatorValue::has},
{"!has" , KQLOperatorValue::not_has},
{"has_all" , KQLOperatorValue::has_all},
{"has_any" , KQLOperatorValue::has_any},
{"has_cs" , KQLOperatorValue::has_cs},
{"!has_cs" , KQLOperatorValue::not_has_cs},
{"hasprefix" , KQLOperatorValue::hasprefix},
{"!hasprefix" , KQLOperatorValue::not_hasprefix},
{"hasprefix_cs" , KQLOperatorValue::hasprefix_cs},
{"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs},
{"hassuffix" , KQLOperatorValue::hassuffix},
{"!hassuffix" , KQLOperatorValue::not_hassuffix},
{"hassuffix_cs" , KQLOperatorValue::hassuffix_cs},
{"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs},
{"in" , KQLOperatorValue::in_cs},
{"!in" , KQLOperatorValue::not_in_cs},
{"in~" , KQLOperatorValue::in},
{"!in~" , KQLOperatorValue::not_in},
{"matches regex" , KQLOperatorValue::matches_regex},
{"startswith" , KQLOperatorValue::startswith},
{"!startswith" , KQLOperatorValue::not_startswith},
{"startswith_cs" , KQLOperatorValue::startswith_cs},
{"!startswith_cs" , KQLOperatorValue::not_startswith_cs},
};
static String genHaystackOpExpr(std::vector<String> &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none);
static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op);
static String genHasAnyAllOpExpr(std::vector<String> &tokens,IParser::Pos &token_pos,String kql_op, String ch_op);
};
}

View File

@ -0,0 +1,25 @@
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLProject.h>
namespace DB
{
bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr select_expression_list;
String expr;
expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str()+expr.size());
IParser::Pos new_pos(tokens, pos.max_depth);
if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list));
return true;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLProject : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL project"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,342 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLTable.h>
#include <Parsers/Kusto/ParserKQLProject.h>
#include <Parsers/Kusto/ParserKQLFilter.h>
#include <Parsers/Kusto/ParserKQLSort.h>
#include <Parsers/Kusto/ParserKQLSummarize.h>
#include <Parsers/Kusto/ParserKQLLimit.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/CommonParsers.h>
#include <format>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/Kusto/ParserKQLOperators.h>
namespace DB
{
String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth)
{
Tokens tokens(text.c_str(), text.c_str() + text.size());
IParser::Pos pos(tokens, max_depth);
return getExprFromToken(pos);
}
String ParserKQLBase :: getExprFromPipe(Pos & pos)
{
uint16_t bracket_count = 0;
auto begin = pos;
auto end = pos;
while (!end->isEnd() && end->type != TokenType::Semicolon)
{
if (end->type == TokenType::OpeningRoundBracket)
++bracket_count;
if (end->type == TokenType::OpeningRoundBracket)
--bracket_count;
if (end->type == TokenType::PipeMark && bracket_count == 0)
break;
++end;
}
--end;
return String(begin->begin, end->end);
}
String ParserKQLBase :: getExprFromToken(Pos & pos)
{
String res;
std::vector<String> tokens;
String alias;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
String token = String(pos->begin,pos->end);
if (token == "=")
{
++pos;
if (String(pos->begin,pos->end) != "~")
{
alias = tokens.back();
tokens.pop_back();
}
--pos;
}
else if (!KQLOperators().convert(tokens,pos))
{
tokens.push_back(token);
}
if (pos->type == TokenType::Comma && !alias.empty())
{
tokens.pop_back();
tokens.push_back("AS");
tokens.push_back(alias);
tokens.push_back(",");
alias.clear();
}
++pos;
}
if (!alias.empty())
{
tokens.push_back("AS");
tokens.push_back(alias);
}
for (auto const &token : tokens)
res = res.empty()? token : res +" " + token;
return res;
}
std::unique_ptr<IParserBase> ParserKQLQuery::getOperator(String & op_name)
{
if (op_name == "filter" || op_name == "where")
return std::make_unique<ParserKQLFilter>();
else if (op_name == "limit" || op_name == "take")
return std::make_unique<ParserKQLLimit>();
else if (op_name == "project")
return std::make_unique<ParserKQLProject>();
else if (op_name == "sort by" || op_name == "order by")
return std::make_unique<ParserKQLSort>();
else if (op_name == "summarize")
return std::make_unique<ParserKQLSummarize>();
else if (op_name == "table")
return std::make_unique<ParserKQLTable>();
else
return nullptr;
}
bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
struct KQLOperatorDataFlowState
{
String operator_name;
bool need_input;
bool gen_output;
int8_t backspace_steps; // how many steps to last token of previous pipe
};
auto select_query = std::make_shared<ASTSelectQuery>();
node = select_query;
ASTPtr tables;
std::unordered_map<std::string, KQLOperatorDataFlowState> kql_parser =
{
{ "filter", {"filter", false, false, 3}},
{ "where", {"filter", false, false, 3}},
{ "limit", {"limit", false, true, 3}},
{ "take", {"limit", false, true, 3}},
{ "project", {"project", false, false, 3}},
{ "sort by", {"order by", false, false, 4}},
{ "order by", {"order by", false, false, 4}},
{ "table", {"table", false, false, 3}},
{ "summarize", {"summarize", true, true, 3}}
};
std::vector<std::pair<String, Pos>> operation_pos;
String table_name(pos->begin, pos->end);
operation_pos.push_back(std::make_pair("table", pos));
++pos;
uint16_t bracket_count = 0;
while (!pos->isEnd() && pos->type != TokenType::Semicolon)
{
if (pos->type == TokenType::OpeningRoundBracket)
++bracket_count;
if (pos->type == TokenType::OpeningRoundBracket)
--bracket_count;
if (pos->type == TokenType::PipeMark && bracket_count == 0)
{
++pos;
String kql_operator(pos->begin, pos->end);
if (kql_operator == "order" || kql_operator == "sort")
{
++pos;
ParserKeyword s_by("by");
if (s_by.ignore(pos,expected))
{
kql_operator = "order by";
--pos;
}
}
if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end())
return false;
++pos;
operation_pos.push_back(std::make_pair(kql_operator, pos));
}
else
++pos;
}
auto kql_operator_str = operation_pos.back().first;
auto npos = operation_pos.back().second;
if (!npos.isValid())
return false;
auto kql_operator_p = getOperator(kql_operator_str);
if (!kql_operator_p)
return false;
if (operation_pos.size() == 1)
{
if (!kql_operator_p->parse(npos, node, expected))
return false;
}
else if (operation_pos.size() == 2 && operation_pos.front().first == "table")
{
if (!kql_operator_p->parse(npos, node, expected))
return false;
npos = operation_pos.front().second;
if (!ParserKQLTable().parse(npos, node, expected))
return false;
}
else
{
String project_clause, order_clause, where_clause, limit_clause;
auto last_pos = operation_pos.back().second;
auto last_op = operation_pos.back().first;
auto set_main_query_clause =[&](String & op, Pos & op_pos)
{
auto op_str = ParserKQLBase::getExprFromPipe(op_pos);
if (op == "project")
project_clause = op_str;
else if (op == "where" || op == "filter")
where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str);
else if (op == "limit" || op == "take")
limit_clause = op_str;
else if (op == "order by" || op == "sort by")
order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str;
};
set_main_query_clause(last_op, last_pos);
operation_pos.pop_back();
if (kql_parser[last_op].need_input)
{
if (!kql_operator_p->parse(npos, node, expected))
return false;
}
else
{
while (!operation_pos.empty())
{
auto prev_op = operation_pos.back().first;
auto prev_pos = operation_pos.back().second;
if (kql_parser[prev_op].gen_output)
break;
if (!project_clause.empty() && prev_op == "project")
break;
set_main_query_clause(prev_op, prev_pos);
operation_pos.pop_back();
last_op = prev_op;
last_pos = prev_pos;
}
}
if (!operation_pos.empty())
{
for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i)
--last_pos;
String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end));
Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size());
IParser::Pos pos_subquery(token_subquery, pos.max_depth);
if (!ParserKQLSubquery().parse(pos_subquery, tables, expected))
return false;
select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables));
}
else
{
if (!ParserKQLTable().parse(last_pos, node, expected))
return false;
}
auto set_query_clasue =[&](String op_str, String op_calsue)
{
auto oprator = getOperator(op_str);
if (oprator)
{
Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size());
IParser::Pos pos_clause(token_clause, pos.max_depth);
if (!oprator->parse(pos_clause, node, expected))
return false;
}
return true;
};
if (!select_query->select())
{
if (project_clause.empty())
project_clause = "*";
if (!set_query_clasue("project", project_clause))
return false;
}
if (!order_clause.empty())
if (!set_query_clasue("order by", order_clause))
return false;
if (!where_clause.empty())
if (!set_query_clasue("where", where_clause))
return false;
if (!limit_clause.empty())
if (!set_query_clasue("limit", limit_clause))
return false;
return true;
}
if (!select_query->select())
{
auto expr = String("*");
Tokens tokens(expr.c_str(), expr.c_str()+expr.size());
IParser::Pos new_pos(tokens, pos.max_depth);
if (!std::make_unique<ParserKQLProject>()->parse(new_pos, node, expected))
return false;
}
return true;
}
bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr select_node;
if (!ParserKQLTaleFunction().parse(pos, select_node, expected))
return false;
ASTPtr node_subquery = std::make_shared<ASTSubquery>();
node_subquery->children.push_back(select_node);
ASTPtr node_table_expr = std::make_shared<ASTTableExpression>();
node_table_expr->as<ASTTableExpression>()->subquery = node_subquery;
node_table_expr->children.emplace_back(node_subquery);
ASTPtr node_table_in_select_query_emlement = std::make_shared<ASTTablesInSelectQueryElement>();
node_table_in_select_query_emlement->as<ASTTablesInSelectQueryElement>()->table_expression = node_table_expr;
ASTPtr res = std::make_shared<ASTTablesInSelectQuery>();
res->children.emplace_back(node_table_in_select_query_emlement);
node = res;
return true;
}
}

View File

@ -0,0 +1,32 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/ASTSelectQuery.h>
namespace DB
{
class ParserKQLBase : public IParserBase
{
public:
static String getExprFromToken(Pos & pos);
static String getExprFromPipe(Pos & pos);
static String getExprFromToken(const String & text, const uint32_t & max_depth);
};
class ParserKQLQuery : public IParserBase
{
protected:
static std::unique_ptr<IParserBase> getOperator(String &op_name);
const char * getName() const override { return "KQL query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserKQLSubquery : public IParserBase
{
protected:
const char * getName() const override { return "KQL subquery"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,60 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/IParserBase.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLSort.h>
namespace DB
{
bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
bool has_dir = false;
std::vector <bool> has_directions;
ParserOrderByExpressionList order_list;
ASTPtr order_expression_list;
auto expr = getExprFromToken(pos);
Tokens tokens(expr.c_str(), expr.c_str() + expr.size());
IParser::Pos new_pos(tokens, pos.max_depth);
auto pos_backup = new_pos;
if (!order_list.parse(pos_backup, order_expression_list, expected))
return false;
while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon)
{
String tmp(new_pos->begin, new_pos->end);
if (tmp == "desc" or tmp == "asc")
has_dir = true;
if (new_pos->type == TokenType::Comma)
{
has_directions.push_back(has_dir);
has_dir = false;
}
++new_pos;
}
has_directions.push_back(has_dir);
for (uint64_t i = 0; i < order_expression_list->children.size(); ++i)
{
if (!has_directions[i])
{
auto *order_expr = order_expression_list->children[i]->as<ASTOrderByElement>();
order_expr->direction = -1; // default desc
if (!order_expr->nulls_direction_was_explicitly_specified)
order_expr->nulls_direction = -1;
else
order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1;
}
}
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list));
return true;
}
}

View File

@ -0,0 +1,16 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLSort : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL order by"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,102 @@
#include <Parsers/IParserBase.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLStatement.h>
#include <Parsers/CommonParsers.h>
namespace DB
{
bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert);
ParserSetQuery set_p;
bool res = query_with_output_p.parse(pos, node, expected)
|| set_p.parse(pos, node, expected);
return res;
}
bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKQLWithUnionQuery kql_p;
ASTPtr query;
bool parsed = kql_p.parse(pos, query, expected);
if (!parsed)
return false;
node = std::move(query);
return true;
}
bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr kql_query;
if (!ParserKQLQuery().parse(pos, kql_query, expected))
return false;
if (kql_query->as<ASTSelectWithUnionQuery>())
{
node = std::move(kql_query);
return true;
}
auto list_node = std::make_shared<ASTExpressionList>();
list_node->children.push_back(kql_query);
auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
node = select_with_union_query;
select_with_union_query->list_of_selects = list_node;
select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
return true;
}
bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ParserKQLWithUnionQuery kql_p;
ASTPtr select;
ParserToken s_lparen(TokenType::OpeningRoundBracket);
auto begin = pos;
auto paren_count = 0 ;
String kql_statement;
if (s_lparen.ignore(pos, expected))
{
++paren_count;
while (!pos->isEnd())
{
if (pos->type == TokenType::ClosingRoundBracket)
--paren_count;
if (pos->type == TokenType::OpeningRoundBracket)
++paren_count;
if (paren_count == 0)
break;
kql_statement = kql_statement + " " + String(pos->begin,pos->end);
++pos;
}
Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size());
IParser::Pos pos_kql(token_kql, pos.max_depth);
if (kql_p.parse(pos_kql, select, expected))
{
node = select;
++pos;
return true;
}
}
pos = begin;
return false;
};
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <Parsers/IParserBase.h>
namespace DB
{
class ParserKQLStatement : public IParserBase
{
private:
const char * end;
bool allow_settings_after_format_in_insert;
const char * getName() const override { return "KQL Statement"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false)
: end(end_)
, allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_)
{}
};
class ParserKQLWithOutput : public IParserBase
{
protected:
const char * end;
bool allow_settings_after_format_in_insert;
const char * getName() const override { return "KQL with output"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false)
: end(end_)
, allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_)
{}
};
class ParserKQLWithUnionQuery : public IParserBase
{
protected:
const char * getName() const override { return "KQL query, possibly with UNION"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
class ParserKQLTaleFunction : public IParserBase
{
protected:
const char * getName() const override { return "KQL() function"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,81 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTInterpolateElement.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/CommonParsers.h>
#include <Parsers/ExpressionElementParsers.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLSummarize.h>
#include <Parsers/ParserSelectQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ParserWithElement.h>
namespace DB
{
bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
ASTPtr select_expression_list;
ASTPtr group_expression_list;
String expr_aggregation;
String expr_groupby;
String expr_columns;
bool groupby = false;
auto begin = pos;
auto pos_groupby = pos;
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
{
if (String(pos->begin, pos->end) == "by")
{
groupby = true;
auto end = pos;
--end;
expr_aggregation = begin <= end ? String(begin->begin, end->end) : "";
pos_groupby = pos;
++pos_groupby;
}
++pos;
}
--pos;
if (groupby)
expr_groupby = String(pos_groupby->begin, pos->end);
else
expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : "";
auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +",";
expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str;
String converted_columns = getExprFromToken(expr_columns, pos.max_depth);
Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size());
IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth);
if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list));
if (groupby)
{
String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth);
Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size());
IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth);
if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list));
}
return true;
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLSummarize : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL summarize"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -0,0 +1,55 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/IParserBase.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <Parsers/Kusto/ParserKQLTable.h>
#include <unordered_set>
namespace DB
{
bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
std::unordered_set<String> sql_keywords
({
"SELECT",
"INSERT",
"CREATE",
"ALTER",
"SYSTEM",
"SHOW",
"GRANT",
"REVOKE",
"ATTACH",
"CHECK",
"DESCRIBE",
"DESC",
"DETACH",
"DROP",
"EXISTS",
"KILL",
"OPTIMIZE",
"RENAME",
"SET",
"TRUNCATE",
"USE",
"EXPLAIN"
});
ASTPtr tables;
String table_name(pos->begin,pos->end);
String table_name_upcase(table_name);
std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper);
if (sql_keywords.find(table_name_upcase) != sql_keywords.end())
return false;
if (!ParserTablesInSelectQuery().parse(pos, tables, expected))
return false;
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables));
return true;
}
}

View File

@ -0,0 +1,17 @@
#pragma once
#include <Parsers/IParserBase.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
namespace DB
{
class ParserKQLTable : public ParserKQLBase
{
protected:
const char * getName() const override { return "KQL Table"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
}

View File

@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl()
++pos;
if (pos < end && *pos == '|')
return Token(TokenType::Concatenation, token_begin, ++pos);
return Token(TokenType::ErrorSinglePipeMark, token_begin, pos);
return Token(TokenType::PipeMark, token_begin, pos);
}
case '@':
{

View File

@ -51,6 +51,7 @@ namespace DB
M(Greater) \
M(LessOrEquals) \
M(GreaterOrEquals) \
M(PipeMark) \
M(Concatenation) /** String concatenation operator: || */ \
\
M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \

View File

@ -12,6 +12,7 @@
#include <Parsers/ParserAttachAccessEntity.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Parsers/Kusto/ParserKQLQuery.h>
#include <string_view>
#include <regex>
#include <gtest/gtest.h>
@ -292,3 +293,185 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest,
"^$"
}
})));
INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest,
::testing::Combine(
::testing::Values(std::make_shared<ParserKQLQuery>()),
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
{
"Customers",
"SELECT *\nFROM Customers"
},
{
"Customers | project FirstName,LastName,Occupation",
"SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers"
},
{
"Customers | project FirstName,LastName,Occupation | take 3",
"SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3"
},
{
"Customers | project FirstName,LastName,Occupation | limit 3",
"SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3"
},
{
"Customers | project FirstName,LastName,Occupation | take 1 | take 3",
"SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3"
},
{
"Customers | project FirstName,LastName,Occupation | take 3 | take 1",
"SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1"
},
{
"Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName",
"SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)"
},
{
"Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education",
"SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)"
},
{
"Customers | sort by FirstName desc",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
},
{
"Customers | take 3 | order by FirstName desc",
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC"
},
{
"Customers | sort by FirstName asc",
"SELECT *\nFROM Customers\nORDER BY FirstName ASC"
},
{
"Customers | sort by FirstName",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
},
{
"Customers | order by LastName",
"SELECT *\nFROM Customers\nORDER BY LastName DESC"
},
{
"Customers | order by Age desc , FirstName asc ",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC"
},
{
"Customers | order by Age asc , FirstName desc",
"SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC"
},
{
"Customers | sort by FirstName | order by Age ",
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC"
},
{
"Customers | sort by FirstName nulls first",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST"
},
{
"Customers | sort by FirstName nulls last",
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
},
{
"Customers | where Occupation == 'Skilled Manual'",
"SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'"
},
{
"Customers | where Occupation != 'Skilled Manual'",
"SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'"
},
{
"Customers |where Education in ('Bachelors','High School')",
"SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')"
},
{
"Customers | where Education !in ('Bachelors','High School')",
"SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')"
},
{
"Customers |where Education contains_cs 'Degree'",
"SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'"
},
{
"Customers | where Occupation startswith_cs 'Skil'",
"SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')"
},
{
"Customers | where FirstName endswith_cs 'le'",
"SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')"
},
{
"Customers | where Age == 26",
"SELECT *\nFROM Customers\nWHERE Age = 26"
},
{
"Customers | where Age > 20 and Age < 30",
"SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)"
},
{
"Customers | where Age > 30 | where Education == 'Bachelors'",
"SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)"
},
{
"Customers |summarize count() by Occupation",
"SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation"
},
{
"Customers|summarize sum(Age) by Occupation",
"SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation"
},
{
"Customers|summarize avg(Age) by Occupation",
"SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation"
},
{
"Customers|summarize min(Age) by Occupation",
"SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation"
},
{
"Customers |summarize max(Age) by Occupation",
"SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation"
},
{
"Customers | where FirstName contains 'pet'",
"SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'"
},
{
"Customers | where FirstName !contains 'pet'",
"SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')"
},
{
"Customers | where FirstName endswith 'er'",
"SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'"
},
{
"Customers | where FirstName !endswith 'er'",
"SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')"
},
{
"Customers | where Education has 'School'",
"SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')"
},
{
"Customers | where Education !has 'School'",
"SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')"
},
{
"Customers | where Education has_cs 'School'",
"SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')"
},
{
"Customers | where Education !has_cs 'School'",
"SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')"
},
{
"Customers | where FirstName matches regex 'P.*r'",
"SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')"
},
{
"Customers | where FirstName startswith 'pet'",
"SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'"
},
{
"Customers | where FirstName !startswith 'pet'",
"SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')"
}
})));

View File

@ -8,6 +8,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SUPPORT_IS_DISABLED;
extern const int REPLICA_STATUS_CHANGED;
}
ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_)
@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run()
{
if (const auto * coordination_exception = dynamic_cast<const Coordination::Exception *>(&e))
needs_retry = Coordination::isHardwareError(coordination_exception->code);
else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED)
needs_retry = true;
if (needs_retry)
{
@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run()
void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path)
{
/// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup.
/// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER.
/// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica),
/// but it did not work correctly, because "/metadata" node was re-created on server startup.
/// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped.
String replica_metadata;
const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata);
if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version"))
if (!replica_metadata_exists || replica_metadata.empty())
{
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer "
"should be done through an intermediate version (failed to get metadata or metadata_version for {},"
@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl()
checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path);
String replica_metadata_version;
const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version);
if (replica_metadata_version_exists)
{
storage.metadata_version = parse<int>(replica_metadata_version);
}
else
{
/// Table was created before 20.4 and was never altered,
/// let's initialize replica metadata version from global metadata version.
Coordination::Stat table_metadata_version_stat;
zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat);
Coordination::Requests ops;
ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version));
ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent));
Coordination::Responses res;
auto code = zookeeper->tryMulti(ops, res);
if (code == Coordination::Error::ZBADVERSION)
throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version "
"because table was concurrently altered, will retry");
zkutil::KeeperMultiException::check(code, ops, res);
}
storage.checkTableStructure(replica_path, metadata_snapshot);
storage.checkParts(skip_sanity_checks);
storage.metadata_version = parse<int>(zookeeper->get(replica_path + "/metadata_version"));
/// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart),
/// don't allow to reinitialize them, delete each of them immediately.
storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"});

View File

@ -683,8 +683,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (replicated)
{
auto storage_policy = args.getContext()->getStoragePolicy(storage_settings->storage_policy);
return std::make_shared<StorageReplicatedMergeTree>(
zookeeper_path,
replica_name,

View File

@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap(
for (size_t i = 0; i < 1000; ++i)
{
std::string stored_metadata_string;
auto exists = client->tryGet(metadata_path, stored_metadata_string);
if (exists)
{
// this requires same name for columns
// maybe we can do a smarter comparison for columns and primary key expression
if (stored_metadata_string != metadata_string)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Path {} is already used but the stored table definition doesn't match. Stored metadata: {}",
root_path,
stored_metadata_string);
auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent);
// tables_path was removed with drop
if (code == Coordination::Error::ZNONODE)
{
LOG_INFO(log, "Metadata nodes were removed by another server, will retry");
continue;
}
else if (code != Coordination::Error::ZOK)
{
throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path);
}
return;
}
if (client->exists(dropped_path))
{
LOG_INFO(log, "Removing leftover nodes");
@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap(
}
}
std::string stored_metadata_string;
auto exists = client->tryGet(metadata_path, stored_metadata_string);
Coordination::Requests create_requests
{
zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent),
zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent),
};
if (exists)
Coordination::Responses create_responses;
auto code = client->tryMulti(create_requests, create_responses);
if (code == Coordination::Error::ZNODEEXISTS)
{
// this requires same name for columns
// maybe we can do a smarter comparison for columns and primary key expression
if (stored_metadata_string != metadata_string)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Path {} is already used but the stored table definition doesn't match. Stored metadata: {}",
root_path,
stored_metadata_string);
LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path);
continue;
}
else
else if (code != Coordination::Error::ZOK)
{
auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZNODEEXISTS)
continue;
else if (code != Coordination::Error::ZOK)
throw Coordination::Exception(code, metadata_path);
zkutil::KeeperMultiException::check(code, create_requests, create_responses);
}
client->createIfNotExists(tables_path, "");
auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZOK)
{
// metadata now should be guaranteed to exist because we added our UUID to the tables_path
client->createIfNotExists(data_path, "");
table_is_valid = true;
return;
}
if (code == Coordination::Error::ZNONODE)
LOG_INFO(log, "Metadata nodes were deleted in background, will retry");
else
throw Coordination::Exception(code, table_path);
table_is_valid = true;
return;
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path);

View File

@ -7450,8 +7450,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const
/// can be called only during table initialization
std::lock_guard lock(table_shared_id_mutex);
bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper;
/// Can happen if table was partially initialized before drop by DatabaseCatalog
if (table_shared_id == UUIDHelpers::Nil)
if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil)
createTableSharedID();
return toString(table_shared_id);

View File

@ -2,54 +2,21 @@
# You can also regenerate it manually this way:
# execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh")
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp")
get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS)
get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP)
find_package(Git)
if(Git_FOUND)
# The commit's git hash, and whether the building workspace was dirty or not
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse HEAD
WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_HASH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Git branch name
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_BRANCH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# The date of the commit
SET(ENV{TZ} "UTC")
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# The subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
function(generate_system_build_options)
include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake)
include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake)
include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake)
configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD})
configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp)
endfunction()
generate_system_build_options()
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(storages_system .)
list (APPEND storages_system_sources ${CONFIG_BUILD})
list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp)
add_custom_target(generate-contributors
./StorageSystemContributors.sh
@ -78,6 +45,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
# Overlength strings
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
clickhouse_embed_binaries(
TARGET information_schema_metadata
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"

View File

@ -1,4 +1,4 @@
// .cpp autogenerated by cmake
// File was generated by CMake
const char * auto_config_build[]
{

View File

@ -865,6 +865,12 @@ class TestCase:
stdout=PIPE,
universal_newlines=True,
).communicate()[0]
if diff.startswith("Binary files "):
diff += "Content of stdout:\n===================\n"
file = open(self.stdout_file, "r")
diff += str(file.read())
file.close()
diff += "==================="
description += f"\n{diff}\n"
if debug_log:
description += "\n"

View File

@ -8,6 +8,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk>
<s3_disk_2>
<type>s3</type>
@ -15,6 +16,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_2>
<s3_disk_3>
<type>s3</type>
@ -22,6 +24,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_3>
<s3_disk_4>
<type>s3</type>
@ -29,6 +32,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_4>
<s3_disk_5>
<type>s3</type>
@ -36,6 +40,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_5>
<s3_disk_6>
<type>s3</type>
@ -43,6 +48,7 @@
<endpoint>http://localhost:11111/test/00170_test/</endpoint>
<access_key_id>clickhouse</access_key_id>
<secret_access_key>clickhouse</secret_access_key>
<request_timeout_ms>20000</request_timeout_ms>
</s3_disk_6>
<!-- cache for s3 disks -->
<s3_cache>

View File

@ -0,0 +1,41 @@
import socket
import time
def get_keeper_socket(cluster, node, port=9181):
hosts = cluster.get_instance_ip(node.name)
client = socket.socket()
client.settimeout(10)
client.connect((hosts, port))
return client
def send_4lw_cmd(cluster, node, cmd="ruok", port=9181):
client = None
try:
client = get_keeper_socket(cluster, node, port)
client.send(cmd.encode())
data = client.recv(100_000)
data = data.decode()
return data
finally:
if client is not None:
client.close()
NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests"
def wait_until_connected(cluster, node, port=9181):
while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.1)
def wait_until_quorum_lost(cluster, node, port=9181):
while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.1)
def wait_nodes(cluster, nodes):
for node in nodes:
wait_until_connected(cluster, node)

View File

@ -15,6 +15,7 @@ node1 = cluster.add_instance(
def started_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()

View File

@ -1,22 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,8 +0,0 @@
<clickhouse>
<zookeeper>
<node index="1">
<host>node1</host>
<port>9181</port>
</node>
</zookeeper>
</clickhouse>

View File

@ -2,6 +2,7 @@ import os
import pytest
import socket
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import time
@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0):
return _fake_zk_instance
def get_keeper_socket(node_name):
hosts = cluster.get_instance_ip(node_name)
client = socket.socket()
client.settimeout(10)
client.connect((hosts, 9181))
return client
def send_4lw_cmd(node_name, cmd="ruok"):
client = None
try:
client = get_keeper_socket(node_name)
client.send(cmd.encode())
data = client.recv(100_000)
data = data.decode()
return data
finally:
if client is not None:
client.close()
def wait_until_connected(node_name):
while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.1)
def wait_nodes(nodes):
for node in nodes:
wait_until_connected(node.name)
def wait_and_assert_data(zk, path, data):
while zk.retry(zk.exists, path) is None:
time.sleep(0.1)
@ -104,9 +74,6 @@ def close_zk(zk):
zk.close()
NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests"
def test_cluster_recovery(started_cluster):
node_zks = []
try:
@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster):
for node in nodes[CLUSTER_SIZE:]:
node.stop_clickhouse()
wait_nodes(nodes[:CLUSTER_SIZE])
keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE])
node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]]
@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster):
wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra")
nodes[0].start_clickhouse()
wait_until_connected(nodes[0].name)
keeper_utils.wait_until_connected(cluster, nodes[0])
node_zks[0] = get_fake_zk(nodes[0].name)
wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra")
@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster):
node.stop_clickhouse()
# wait for node1 to lose quorum
while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.2)
keeper_utils.wait_until_quorum_lost(cluster, nodes[0])
nodes[0].copy_file_to_container(
os.path.join(CONFIG_DIR, "recovered_keeper1.xml"),
@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster):
nodes[0].query("SYSTEM RELOAD CONFIG")
assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG
send_4lw_cmd(nodes[0].name, "rcvr")
assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG
assert (
keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr")
== keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG
)
keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr")
assert (
keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr")
== keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG
)
# add one node to restore the quorum
nodes[CLUSTER_SIZE].copy_file_to_container(
@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster):
)
nodes[CLUSTER_SIZE].start_clickhouse()
wait_until_connected(nodes[CLUSTER_SIZE].name)
keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE])
# node1 should have quorum now and accept requests
wait_until_connected(nodes[0].name)
keeper_utils.wait_until_connected(cluster, nodes[0])
node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name))
@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster):
f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml",
)
node.start_clickhouse()
wait_until_connected(node.name)
keeper_utils.wait_until_connected(cluster, node)
node_zks.append(get_fake_zk(node.name))
# refresh old zk sessions
@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster):
wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast")
nodes[0].start_clickhouse()
wait_until_connected(nodes[0].name)
keeper_utils.wait_until_connected(cluster, nodes[0])
node_zks[0] = get_fake_zk(nodes[0].name)
for zk in node_zks[:nodes_left]:
assert_all_data(zk)

View File

@ -2,10 +2,11 @@ import os
import pytest
import socket
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import time
from kazoo.client import KazooClient
from kazoo.client import KazooClient, KazooRetry
CLUSTER_SIZE = 3
@ -45,47 +46,19 @@ def started_cluster():
def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
hosts=cluster.get_instance_ip(nodename) + ":9181",
timeout=timeout,
connection_retry=KazooRetry(max_tries=10),
command_retry=KazooRetry(max_tries=10),
)
_fake_zk_instance.start()
return _fake_zk_instance
def get_keeper_socket(node_name):
hosts = cluster.get_instance_ip(node_name)
client = socket.socket()
client.settimeout(10)
client.connect((hosts, 9181))
return client
def send_4lw_cmd(node_name, cmd="ruok"):
client = None
try:
client = get_keeper_socket(node_name)
client.send(cmd.encode())
data = client.recv(100_000)
data = data.decode()
return data
finally:
if client is not None:
client.close()
def wait_until_connected(node_name):
while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.1)
def wait_nodes(nodes):
for node in nodes:
wait_until_connected(node.name)
def wait_and_assert_data(zk, path, data):
while zk.exists(path) is None:
while zk.retry(zk.exists, path) is None:
time.sleep(0.1)
assert zk.get(path)[0] == data.encode()
assert zk.retry(zk.get, path)[0] == data.encode()
def close_zk(zk):
@ -93,20 +66,17 @@ def close_zk(zk):
zk.close()
NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests"
def test_cluster_recovery(started_cluster):
node_zks = []
try:
wait_nodes(nodes)
keeper_utils.wait_nodes(cluster, nodes)
node_zks = [get_fake_zk(node.name) for node in nodes]
data_in_cluster = []
def add_data(zk, path, data):
zk.create(path, data.encode())
zk.retry(zk.create, path, data.encode())
data_in_cluster.append((path, data))
def assert_all_data(zk):
@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster):
wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra")
nodes[0].start_clickhouse()
wait_until_connected(nodes[0].name)
keeper_utils.wait_until_connected(cluster, nodes[0])
node_zks[0] = get_fake_zk(nodes[0].name)
wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra")
@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster):
)
nodes[0].start_clickhouse()
wait_until_connected(nodes[0].name)
keeper_utils.wait_until_connected(cluster, nodes[0])
assert_all_data(get_fake_zk(nodes[0].name))
finally:

View File

@ -1,6 +1,7 @@
import socket
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -25,6 +26,10 @@ node3 = cluster.add_instance(
from kazoo.client import KazooClient, KazooState
def wait_nodes():
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
@pytest.fixture(scope="module")
def started_cluster():
try:
@ -56,28 +61,6 @@ def clear_znodes():
destroy_zk_client(zk)
def wait_node(node):
for _ in range(100):
zk = None
try:
zk = get_fake_zk(node.name, timeout=30.0)
# zk.create("/test", sequence=True)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
destroy_zk_client(zk)
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for n in [node1, node2, node3]:
wait_node(n)
def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0):
return _fake_zk_instance
def get_keeper_socket(node_name):
hosts = cluster.get_instance_ip(node_name)
client = socket.socket()
client.settimeout(10)
client.connect((hosts, 9181))
return client
def close_keeper_socket(cli):
if cli is not None:
cli.close()
def reset_node_stats(node_name=node1.name):
def reset_node_stats(node=node1):
client = None
try:
client = get_keeper_socket(node_name)
client = keeper_utils.get_keeper_socket(cluster, node)
client.send(b"srst")
client.recv(10)
finally:
@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name):
client.close()
def send_4lw_cmd(node_name=node1.name, cmd="ruok"):
def reset_conn_stats(node=node1):
client = None
try:
client = get_keeper_socket(node_name)
client.send(cmd.encode())
data = client.recv(100_000)
data = data.decode()
return data
finally:
if client is not None:
client.close()
def reset_conn_stats(node_name=node1.name):
client = None
try:
client = get_keeper_socket(node_name)
client = keeper_utils.get_keeper_socket(cluster, node)
client.send(b"crst")
client.recv(10_000)
finally:
@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster):
client = None
try:
wait_nodes()
data = send_4lw_cmd(cmd="ruok")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok")
assert data == "imok"
finally:
close_keeper_socket(client)
@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster):
clear_znodes()
# reset stat first
reset_node_stats(node1.name)
reset_node_stats(node1)
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(
@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster):
delete_cnt=2,
)
data = send_4lw_cmd(cmd="mntr")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr")
# print(data.decode())
reader = csv.reader(data.split("\n"), delimiter="\t")
@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster):
wait_nodes()
clear_znodes()
data = send_4lw_cmd(cmd="srst")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst")
assert data.strip() == "Server stats reset."
data = send_4lw_cmd(cmd="mntr")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr")
assert len(data) != 0
# print(data)
@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster):
wait_nodes()
clear_znodes()
data = send_4lw_cmd(cmd="conf")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf")
reader = csv.reader(data.split("\n"), delimiter="=")
result = {}
@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster):
def test_cmd_isro(started_cluster):
wait_nodes()
assert send_4lw_cmd(node1.name, "isro") == "rw"
assert send_4lw_cmd(node2.name, "isro") == "ro"
assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw"
assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro"
def test_cmd_srvr(started_cluster):
@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster):
wait_nodes()
clear_znodes()
reset_node_stats(node1.name)
reset_node_stats(node1)
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = send_4lw_cmd(cmd="srvr")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr")
print("srvr output -------------------------------------")
print(data)
@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster):
try:
wait_nodes()
clear_znodes()
reset_node_stats(node1.name)
reset_conn_stats(node1.name)
reset_node_stats(node1)
reset_conn_stats(node1)
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = send_4lw_cmd(cmd="stat")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat")
print("stat output -------------------------------------")
print(data)
@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = send_4lw_cmd(cmd="cons")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons")
print("cons output -------------------------------------")
print(data)
@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=10)
data = send_4lw_cmd(cmd="crst")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst")
print("crst output -------------------------------------")
print(data)
data = send_4lw_cmd(cmd="cons")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons")
print("cons output(after crst) -------------------------------------")
print(data)
@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, ephemeral_cnt=2)
data = send_4lw_cmd(cmd="dump")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump")
print("dump output -------------------------------------")
print(data)
@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=2, watch_cnt=2)
data = send_4lw_cmd(cmd="wchs")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs")
print("wchs output -------------------------------------")
print(data)
@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=2, watch_cnt=2)
data = send_4lw_cmd(cmd="wchc")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc")
print("wchc output -------------------------------------")
print(data)
@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster):
zk = get_fake_zk(node1.name, timeout=30.0)
do_some_action(zk, create_cnt=2, watch_cnt=2)
data = send_4lw_cmd(cmd="wchp")
data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp")
print("wchp output -------------------------------------")
print(data)

View File

@ -204,7 +204,7 @@ JUST_WRONG_CONFIG = """
"""
def test_duplicate_endpoint(started_cluster):
def test_invalid_configs(started_cluster):
node1.stop_clickhouse()
def assert_config_fails(config):

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import pytest
import random
import string
@ -37,40 +38,22 @@ def started_cluster():
cluster.shutdown()
def get_keeper_socket(node_name):
hosts = cluster.get_instance_ip(node_name)
client = socket.socket()
client.settimeout(10)
client.connect((hosts, 9181))
return client
def close_keeper_socket(cli):
if cli is not None:
cli.close()
def send_4lw_cmd(node_name, cmd="ruok"):
client = None
try:
client = get_keeper_socket(node_name)
client.send(cmd.encode())
data = client.recv(100_000)
data = data.decode()
return data
finally:
if client is not None:
client.close()
def test_aggressive_mntr(started_cluster):
def go_mntr(node_name):
for _ in range(100000):
print(node_name, send_4lw_cmd(node_name, "mntr"))
def go_mntr(node):
for _ in range(10000):
try:
print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr"))
except ConnectionRefusedError:
pass
node1_thread = threading.Thread(target=lambda: go_mntr(node1.name))
node2_thread = threading.Thread(target=lambda: go_mntr(node2.name))
node3_thread = threading.Thread(target=lambda: go_mntr(node3.name))
node1_thread = threading.Thread(target=lambda: go_mntr(node1))
node2_thread = threading.Thread(target=lambda: go_mntr(node2))
node3_thread = threading.Thread(target=lambda: go_mntr(node3))
node1_thread.start()
node2_thread.start()
node3_thread.start()
@ -78,8 +61,7 @@ def test_aggressive_mntr(started_cluster):
node2.stop_clickhouse()
node3.stop_clickhouse()
while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG:
time.sleep(0.2)
keeper_utils.wait_until_quorum_lost(cluster, node1)
node1.stop_clickhouse()
starters = []

View File

@ -1,5 +1,6 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -55,31 +56,6 @@ def smaller_exception(ex):
return "\n".join(str(ex).split("\n")[0:2])
def wait_node(node):
for _ in range(100):
zk = None
try:
node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
zk = get_fake_zk(node.name, timeout=30.0)
zk.create("/test", sequence=True)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
if zk:
zk.stop()
zk.close()
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for node in [node1, node2, node3]:
wait_node(node)
def get_fake_zk(nodename, timeout=30.0):
_fake_zk_instance = KazooClient(
hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout
@ -88,6 +64,10 @@ def get_fake_zk(nodename, timeout=30.0):
return _fake_zk_instance
def wait_nodes():
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
# in extremely rare case it can take more than 5 minutes in debug build with sanitizer
@pytest.mark.timeout(600)
def test_blocade_leader(started_cluster):

View File

@ -1,5 +1,6 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -43,29 +44,8 @@ def smaller_exception(ex):
return "\n".join(str(ex).split("\n")[0:2])
def wait_node(node):
for _ in range(100):
zk = None
try:
node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
zk = get_fake_zk(node.name, timeout=30.0)
zk.create("/test", sequence=True)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
if zk:
zk.stop()
zk.close()
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for node in [node1, node2, node3]:
wait_node(node)
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
def get_fake_zk(nodename, timeout=30.0):

View File

@ -2,6 +2,7 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -41,9 +42,11 @@ def started_cluster():
def start(node):
node.start_clickhouse()
keeper_utils.wait_until_connected(cluster, node)
def test_nodes_add(started_cluster):
keeper_utils.wait_until_connected(cluster, node1)
zk_conn = get_fake_zk(node1)
for i in range(100):
@ -62,6 +65,7 @@ def test_nodes_add(started_cluster):
)
node1.query("SYSTEM RELOAD CONFIG")
waiter.wait()
keeper_utils.wait_until_connected(cluster, node2)
zk_conn2 = get_fake_zk(node2)
@ -93,6 +97,7 @@ def test_nodes_add(started_cluster):
node2.query("SYSTEM RELOAD CONFIG")
waiter.wait()
keeper_utils.wait_until_connected(cluster, node3)
zk_conn3 = get_fake_zk(node3)
for i in range(100):

View File

@ -11,6 +11,7 @@ import os
import time
from multiprocessing.dummy import Pool
from helpers.test_tools import assert_eq_with_retry
import helpers.keeper_utils as keeper_utils
from kazoo.client import KazooClient, KazooState
cluster = ClickHouseCluster(__file__)
@ -41,6 +42,7 @@ def started_cluster():
def start(node):
node.start_clickhouse()
keeper_utils.wait_until_connected(cluster, node)
def get_fake_zk(node, timeout=30.0):

View File

@ -2,6 +2,7 @@
import pytest
from helpers.cluster import ClickHouseCluster
import time
import os
from kazoo.client import KazooClient, KazooState
@ -79,9 +80,12 @@ def test_nodes_remove(started_cluster):
assert zk_conn.exists("test_two_" + str(i)) is not None
assert zk_conn.exists("test_two_" + str(100 + i)) is not None
with pytest.raises(Exception):
try:
zk_conn3 = get_fake_zk(node3)
zk_conn3.sync("/test_two_0")
time.sleep(0.1)
except Exception:
pass
node3.stop_clickhouse()
@ -91,6 +95,7 @@ def test_nodes_remove(started_cluster):
)
node1.query("SYSTEM RELOAD CONFIG")
zk_conn = get_fake_zk(node1)
zk_conn.sync("/test_two_0")
@ -98,8 +103,11 @@ def test_nodes_remove(started_cluster):
assert zk_conn.exists("test_two_" + str(i)) is not None
assert zk_conn.exists("test_two_" + str(100 + i)) is not None
with pytest.raises(Exception):
try:
zk_conn2 = get_fake_zk(node2)
zk_conn2.sync("/test_two_0")
time.sleep(0.1)
except Exception:
pass
node2.stop_clickhouse()

View File

@ -46,6 +46,10 @@ def get_connection_zk(nodename, timeout=30.0):
return _fake_zk_instance
def restart_clickhouse():
node.restart_clickhouse(kill=True)
def test_state_after_restart(started_cluster):
try:
node_zk = None
@ -62,7 +66,7 @@ def test_state_after_restart(started_cluster):
if i % 7 == 0:
node_zk.delete("/test_state_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk2 = get_connection_zk("node")
@ -111,7 +115,7 @@ def test_state_duplicate_restart(started_cluster):
if i % 7 == 0:
node_zk.delete("/test_state_duplicated_restart/node" + str(i))
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk2 = get_connection_zk("node")
@ -119,7 +123,7 @@ def test_state_duplicate_restart(started_cluster):
node_zk2.create("/test_state_duplicated_restart/just_test2")
node_zk2.create("/test_state_duplicated_restart/just_test3")
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk3 = get_connection_zk("node")
@ -159,6 +163,7 @@ def test_state_duplicate_restart(started_cluster):
# http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html
def test_ephemeral_after_restart(started_cluster):
try:
node_zk = None
node_zk2 = None
@ -176,7 +181,7 @@ def test_ephemeral_after_restart(started_cluster):
if i % 7 == 0:
node_zk.delete("/test_ephemeral_after_restart/node" + str(i))
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk2 = get_connection_zk("node")

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -26,10 +27,15 @@ node3 = cluster.add_instance(
from kazoo.client import KazooClient, KazooState
def wait_nodes():
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
wait_nodes()
yield cluster
@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster):
node1.restart_clickhouse(kill=True)
node2.restart_clickhouse(kill=True)
node3.restart_clickhouse(kill=True)
wait_nodes()
for i in range(100):
try:
node1_zk = get_fake_zk("node1")

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,28 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,28 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,28 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -84,6 +85,7 @@ def test_recover_from_snapshot(started_cluster):
# stale node should recover from leader's snapshot
# with some sanitizers can start longer than 5 seconds
node3.start_clickhouse(20)
keeper_utils.wait_until_connected(cluster, node3)
print("Restarted")
try:

View File

@ -40,4 +40,4 @@ def started_cluster():
def test_connection(started_cluster):
# just nothrow
node2.query("SELECT * FROM system.zookeeper WHERE path = '/'")
node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'")

View File

@ -1,5 +1,6 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import time
import socket
import struct
@ -52,25 +53,8 @@ def destroy_zk_client(zk):
pass
def wait_node(node):
for _ in range(100):
zk = None
try:
zk = get_fake_zk(node.name, timeout=30.0)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
destroy_zk_client(zk)
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for n in [node1, node2, node3]:
wait_node(n)
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
def get_fake_zk(nodename, timeout=30.0):

View File

@ -2,6 +2,7 @@
##!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
from multiprocessing.dummy import Pool
from kazoo.client import KazooClient, KazooState
import random
@ -22,7 +23,7 @@ node3 = cluster.add_instance(
def start_zookeeper(node):
node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"])
node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"])
def stop_zookeeper(node):
@ -66,6 +67,7 @@ def stop_clickhouse(node):
def start_clickhouse(node):
node.start_clickhouse()
keeper_utils.wait_until_connected(cluster, node)
def copy_zookeeper_data(make_zk_snapshots, node):

View File

@ -3,6 +3,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -50,6 +51,11 @@ def get_connection_zk(nodename, timeout=30.0):
return _fake_zk_instance
def restart_clickhouse():
node.restart_clickhouse(kill=True)
keeper_utils.wait_until_connected(cluster, node)
def test_state_after_restart(started_cluster):
try:
node_zk = None
@ -69,7 +75,7 @@ def test_state_after_restart(started_cluster):
else:
existing_children.append("node" + str(i))
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk2 = get_connection_zk("node")
@ -123,7 +129,7 @@ def test_ephemeral_after_restart(started_cluster):
else:
existing_children.append("node" + str(i))
node.restart_clickhouse(kill=True)
restart_clickhouse()
node_zk2 = get_connection_zk("node")

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -20,6 +21,10 @@ node3 = cluster.add_instance(
from kazoo.client import KazooClient, KazooState
def wait_nodes():
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
@pytest.fixture(scope="module")
def started_cluster():
try:
@ -94,6 +99,8 @@ def test_restart_multinode(started_cluster):
node1.restart_clickhouse(kill=True)
node2.restart_clickhouse(kill=True)
node3.restart_clickhouse(kill=True)
wait_nodes()
for i in range(100):
try:
node1_zk = get_fake_zk("node1")

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>2</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,34 +0,0 @@
<clickhouse>
<keeper_server>
<tcp_port>9181</tcp_port>
<server_id>3</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<operation_timeout_ms>5000</operation_timeout_ms>
<session_timeout_ms>10000</session_timeout_ms>
<raft_logs_level>trace</raft_logs_level>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>node1</hostname>
<port>9234</port>
</server>
<server>
<id>2</id>
<hostname>node2</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
<server>
<id>3</id>
<hostname>node3</hostname>
<port>9234</port>
<start_as_follower>true</start_as_follower>
</server>
</raft_configuration>
</keeper_server>
</clickhouse>

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -48,6 +49,7 @@ def started_cluster():
def start(node):
node.start_clickhouse()
keeper_utils.wait_until_connected(cluster, node)
def delete_with_retry(node_name, path):
@ -138,6 +140,7 @@ def test_restart_third_node(started_cluster):
node1_zk.create("/test_restart", b"aaaa")
node3.restart_clickhouse()
keeper_utils.wait_until_connected(cluster, node3)
assert node3.contains_in_log(
"Connected to ZooKeeper (or Keeper) before internal Keeper start"

View File

@ -2,6 +2,7 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -40,29 +41,8 @@ def smaller_exception(ex):
return "\n".join(str(ex).split("\n")[0:2])
def wait_node(node):
for _ in range(100):
zk = None
try:
node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
zk = get_fake_zk(node.name, timeout=30.0)
zk.create("/test", sequence=True)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
if zk:
zk.stop()
zk.close()
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for node in [node1, node2]:
wait_node(node)
keeper_utils.wait_nodes(cluster, [node1, node2])
def get_fake_zk(nodename, timeout=30.0):

View File

@ -1,5 +1,6 @@
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
import random
import string
import os
@ -42,29 +43,8 @@ def smaller_exception(ex):
return "\n".join(str(ex).split("\n")[0:2])
def wait_node(node):
for _ in range(100):
zk = None
try:
node.query("SELECT * FROM system.zookeeper WHERE path = '/'")
zk = get_fake_zk(node.name, timeout=30.0)
zk.create("/test", sequence=True)
print("node", node.name, "ready")
break
except Exception as ex:
time.sleep(0.2)
print("Waiting until", node.name, "will be ready, exception", ex)
finally:
if zk:
zk.stop()
zk.close()
else:
raise Exception("Can't wait node", node.name, "to become ready")
def wait_nodes():
for node in [node1, node2, node3]:
wait_node(node)
keeper_utils.wait_nodes(cluster, [node1, node2, node3])
def get_fake_zk(nodename, timeout=30.0):
@ -129,6 +109,7 @@ def test_server_restart(started_cluster):
node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue")
node3.restart_clickhouse(kill=True)
keeper_utils.wait_until_connected(cluster, node3)
node2_zk = get_fake_zk("node2")
node3_zk = get_fake_zk("node3")

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python3
import pytest
from helpers.cluster import ClickHouseCluster
import helpers.keeper_utils as keeper_utils
from kazoo.client import KazooClient, KazooState
from kazoo.security import ACL, make_digest_acl, make_acl
from kazoo.exceptions import (
@ -60,6 +61,7 @@ def stop_clickhouse():
def start_clickhouse():
node.start_clickhouse()
keeper_utils.wait_until_connected(cluster, node)
def copy_zookeeper_data(make_zk_snapshots):

View File

@ -0,0 +1,15 @@
===ddl_format_version 3====
1
1
1
1
===ddl_format_version 4====
1
1
1
1
===exception====
1
1
1
1

View File

@ -0,0 +1,159 @@
#!/usr/bin/env bash
# Tags: zookeeper
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# The test cases in this file cover DDLs running on both Replicated database engine and non-Replicated database engine.
# Since the processing flow is a little bit different from each other, in order to share same reference file,
# we compare the expected result and actual result by ourselves. See check_span method below for more detail.
# This function takes following arguments:
# $1 - OpenTelemetry Trace Id
# $2 - Query
# $3 - Query Settings
function execute_query()
{
# Some queries are supposed to fail, use -f to suppress error messages
echo $2 | ${CLICKHOUSE_CURL_COMMAND} -q -s --max-time 180 \
-X POST \
-H "traceparent: 00-$1-5150000000000515-01" \
-H "tracestate: a\nb cd" \
"${CLICKHOUSE_URL}&${3}" \
--data @-
}
# This function takes following argument:
# $1 - expected
# $2 - OpenTelemetry Trace Id
# $3 - operation_name pattern
# $4 - extra condition
function check_span()
{
if [ -n "$4" ]; then
extra_condition=" AND ${4}"
else
extra_condition=""
fi
ret=$(${CLICKHOUSE_CLIENT} -nq "
SYSTEM FLUSH LOGS;
SELECT count()
FROM system.opentelemetry_span_log
WHERE finish_date >= yesterday()
AND lower(hex(trace_id)) = '${2}'
AND operation_name like '${3}'
${extra_condition};")
if [ $ret = $1 ]; then
echo 1
else
echo "[operation_name like '${3}' ${extra_condition}]=$ret, expected: ${1}"
# echo the span logs to help analyze
${CLICKHOUSE_CLIENT} -q "
SELECT operation_name, attribute
FROM system.opentelemetry_span_log
WHERE finish_date >= yesterday()
AND lower(hex(trace_id)) ='${2}'
ORDER BY start_time_us
Format PrettyCompact
"
fi
}
#
# Set up
#
${CLICKHOUSE_CLIENT} -q "
DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry;
"
# Support Replicated database engine
cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'")
#
# Only format_version 4 enables the tracing
#
for ddl_version in 3 4; do
# Echo a separator so that the reference file is more clear for reading
echo "===ddl_format_version ${ddl_version}===="
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
execute_query $trace_id "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}"
check_span 1 $trace_id "HTTPHandler"
if [ $cluster_name = "test_shard_localhost" ]; then
check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'"
else
check_span 1 $trace_id "%tryEnqueueAndExecuteEntry%" "attribute['clickhouse.cluster']='${cluster_name}'"
fi
if [ $cluster_name = "test_shard_localhost" ]; then
# The tracing is only enabled when entry format version is 4
if [ $ddl_version = "4" ]; then
expected=1
else
expected=0
fi
else
# For Replicated database engine, the tracing is always enabled because it calls DDLWorker::processTask directly
expected=1
fi
check_span $expected $trace_id "%DDLWorker::processTask%"
# For queries that tracing are enabled(format version is 4 or Replicated database engine), there should be two 'query' spans,
# one is for the HTTPHandler, the other is for the DDL executing in DDLWorker.
#
# For other format, there should be only one 'query' span
if [ $cluster_name = "test_shard_localhost" ]; then
if [ $ddl_version = "4" ]; then
expected=2
else
expected=1
fi
else
expected=2
fi
check_span $expected $trace_id "query"
# Remove table
# Under Replicated database engine, the DDL is executed as ON CLUSTER DDL, so distributed_ddl_output_mode is needed to supress output
${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode none -q "
DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry;
"
done
#
# an exceptional case, DROP a non-exist table
#
# Echo a separator so that the reference file is more clear for reading
echo "===exception===="
trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))");
execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" 2>&1| grep -Fv "UNKNOWN_TABLE"
check_span 1 $trace_id "HTTPHandler"
if [ $cluster_name = "test_shard_localhost" ]; then
expected=1
else
# For Replicated database it will fail on initiator before enqueueing distributed DDL
expected=0
fi
check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'"
check_span $expected $trace_id "%DDLWorker::processTask%"
if [ $cluster_name = "test_shard_localhost" ]; then
# There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker.
# Both of these two spans contain exception
expected=2
else
# For Replicated database, there should only one query span
expected=1
fi
# We don't case about the exact value of exception_code, just check it's there.
check_span $expected $trace_id "query" "attribute['clickhouse.exception_code']<>''"