diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..c737046a5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) -include (cmake/git_status.cmake) +include (cmake/git.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 75c48f690f8..916d4f9a74d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -13,8 +14,10 @@ #include #include #include +#include #include - +#include +#include /// is_any_of namespace { @@ -35,6 +38,166 @@ std::string getEditor() return editor; } +std::string getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return sk_bin_path; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return fzf_bin_path; + } + + return {}; +} + +/// See comments in ShellCommand::executeImpl() +/// (for the vfork via dlsym()) +int executeCommand(char * const argv[]) +{ +#if !defined(USE_MUSL) + /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, + * because of the resolving of symbols in the shared library + * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html + * Therefore, separate the resolving of the symbol from the call. + */ + static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif + if (!real_vfork) + throw std::runtime_error("Cannot find vfork symbol"); + + pid_t pid = reinterpret_cast(real_vfork)(); + + if (-1 == pid) + throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString())); + + /// Child + if (0 == pid) + { + sigset_t mask; + sigemptyset(&mask); + sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + + execvp(argv[0], argv); + _exit(-1); + } + + int status = 0; + do + { + int exited_pid = waitpid(pid, &status, 0); + if (exited_pid != -1) + break; + + if (errno == EINTR) + continue; + + throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString())); + } while (true); + + return status; +} + +void writeRetry(int fd, const std::string & data) +{ + size_t bytes_written = 0; + const char * begin = data.c_str(); + size_t offset = data.size(); + + while (bytes_written != offset) + { + ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); + if ((-1 == res || 0 == res) && errno != EINTR) + throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString())); + bytes_written += res; + } +} +std::string readFile(const std::string & path) +{ + std::ifstream t(path); + std::string str; + t.seekg(0, std::ios::end); + str.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + return str; +} + +/// Simple wrapper for temporary files. +class TemporaryFile +{ +private: + std::string path; + int fd = -1; + +public: + explicit TemporaryFile(const char * pattern) + : path(pattern) + { + size_t dot_pos = path.rfind('.'); + if (dot_pos != std::string::npos) + fd = ::mkstemps(path.data(), path.size() - dot_pos); + else + fd = ::mkstemp(path.data()); + + if (-1 == fd) + throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString())); + } + ~TemporaryFile() + { + try + { + close(); + unlink(); + } + catch (const std::runtime_error & e) + { + fmt::print(stderr, "{}", e.what()); + } + } + + void close() + { + if (fd == -1) + return; + + if (0 != ::close(fd)) + throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString())); + fd = -1; + } + + void write(const std::string & data) + { + if (fd == -1) + throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path)); + + writeRetry(fd, data); + } + + void unlink() + { + if (0 != ::unlink(path.c_str())) + throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString())); + } + + std::string & getPath() { return path; } +}; + /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) @@ -142,6 +305,7 @@ ReplxxLineReader::ReplxxLineReader( replxx::Replxx::highlighter_callback_t highlighter_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) , editor(getEditor()) + , fuzzy_finder(getFuzzyFinder()) { using namespace std::placeholders; using Replxx = replxx::Replxx; @@ -249,6 +413,17 @@ ReplxxLineReader::ReplxxLineReader( return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); + + /// interactive search in history (requires fzf/sk) + if (!fuzzy_finder.empty()) + { + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -293,116 +468,70 @@ void ReplxxLineReader::addToHistory(const String & line) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } -/// See comments in ShellCommand::executeImpl() -/// (for the vfork via dlsym()) -int ReplxxLineReader::executeEditor(const std::string & path) -{ - std::vector argv0(editor.data(), editor.data() + editor.size() + 1); - std::vector argv1(path.data(), path.data() + path.size() + 1); - char * const argv[] = {argv0.data(), argv1.data(), nullptr}; - - static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); - if (!real_vfork) - { - rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str()); - return -1; - } - - pid_t pid = reinterpret_cast(real_vfork)(); - - if (-1 == pid) - { - rx.print("Cannot vfork: %s\n", errnoToString().c_str()); - return -1; - } - - /// Child - if (0 == pid) - { - sigset_t mask; - sigemptyset(&mask); - sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - - execvp(editor.c_str(), argv); - rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str()); - _exit(-1); - } - - int status = 0; - do - { - int exited_pid = waitpid(pid, &status, 0); - if (exited_pid == -1) - { - if (errno == EINTR) - continue; - - rx.print("Cannot waitpid: %s\n", errnoToString().c_str()); - return -1; - } - else - break; - } while (true); - return status; -} - void ReplxxLineReader::openEditor() { - char filename[] = "clickhouse_replxx_XXXXXX.sql"; - int fd = ::mkstemps(filename, 4); - if (-1 == fd) - { - rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str()); - return; - } + TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql"); + editor_file.write(rx.get_state().text()); + editor_file.close(); - replxx::Replxx::State state(rx.get_state()); - - size_t bytes_written = 0; - const char * begin = state.text(); - size_t offset = strlen(state.text()); - while (bytes_written != offset) + char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr}; + try { - ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); - if ((-1 == res || 0 == res) && errno != EINTR) + if (executeCommand(argv) == 0) { - rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str()); - break; + const std::string & new_query = readFile(editor_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } - bytes_written += res; } - - if (0 != ::close(fd)) + catch (const std::runtime_error & e) { - rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } - - if (0 == executeEditor(filename)) - { - try - { - std::ifstream t(filename); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - rx.set_state(replxx::Replxx::State(str.c_str(), str.size())); - } - catch (...) - { - rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } + rx.print(e.what()); } if (bracketed_paste_enabled) enableBracketedPaste(); +} - if (0 != ::unlink(filename)) - rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str()); +void ReplxxLineReader::openInteractiveHistorySearch() +{ + assert(!fuzzy_finder.empty()); + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + std::string fuzzy_finder_command = fmt::format( + "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", + fuzzy_finder, history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + const std::string & new_query = readFile(output_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); } void ReplxxLineReader::enableBracketedPaste() diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index b9ec214d02c..fea1405a208 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -27,6 +27,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; @@ -36,4 +37,5 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; }; diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index f9b2f103f49..6707d703372 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -45,6 +45,8 @@ elseif (ARCH_AARCH64) # dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits # but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was # introduced as optional, either in v8.2 [7] or in v8.4 [8]. + # ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code. + # Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15. # # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 @@ -54,7 +56,8 @@ elseif (ARCH_AARCH64) # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs") + # [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument") endif () elseif (ARCH_PPC64LE) diff --git a/cmake/git.cmake b/cmake/git.cmake new file mode 100644 index 00000000000..397ec3cd081 --- /dev/null +++ b/cmake/git.cmake @@ -0,0 +1,42 @@ +find_package(Git) + +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS. +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "Git HEAD commit hash: ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() + diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake deleted file mode 100644 index c1047c0ccbf..00000000000 --- a/cmake/git_status.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Print the status of the git repository (if git is available). -# This is useful for troubleshooting build failure reports - -find_package(Git) - -if (Git_FOUND) - - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_ID - OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) - -else() - message(STATUS "Git could not be found.") -endif() diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 877179a66a6..b80d75e3611 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1818,11 +1818,6 @@ Result: └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, …) - -Evaluate external model. -Accepts a model name and model arguments. Returns Float64. - ## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..14c06ee0336 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 95ae1aa9059..b98039f768a 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные, Таких секций может быть несколько - по числу передаваемых таблиц. -**–external** - маркер начала секции. -**–file** - путь к файлу с дампом таблицы, или -, что обозначает stdin. -Из stdin может быть считана только одна таблица. +- **--external** - маркер начала секции. +- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`. +Из `stdin` может быть считана только одна таблица. Следующие параметры не обязательные: -**–name** - имя таблицы. Если не указано - используется _data. -**–format** - формат данных в файле. Если не указано - используется TabSeparated. +- **--name** - имя таблицы. Если не указано - используется _data. +- **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -**–types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … -**–structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 5e89a4f1236..5c8584cd2a0 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -1722,12 +1722,6 @@ SELECT joinGet(db_test.id_val,'val',toUInt32(number)) from numbers(4) SETTINGS j └──────────────────────────────────────────────────┘ ``` -## modelEvaluate(model_name, …) {#function-modelevaluate} - -Оценивает внешнюю модель. - -Принимает на вход имя и аргументы модели. Возвращает Float64. - ## throwIf(x\[, message\[, error_code\]\]) {#throwifx-custom-message} Бросает исключение, если аргумент не равен нулю. diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index fde55ec884f..a475420ba64 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -625,11 +625,6 @@ ORDER BY k ASC 使用指定的连接键从Join类型引擎的表中获取数据。 -## modelEvaluate(model_name, …) {#function-modelevaluate} - -使用外部模型计算。 -接受模型的名称以及模型的参数。返回Float64类型的值。 - ## throwIf(x) {#throwifx} 如果参数不为零则抛出异常。 diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..ce176ccade5 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) - # Sraight list of all required sources + # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp @@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp TinyContext.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6d487a68111..fdfe0cef2b3 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -490,8 +490,9 @@ int Keeper::main(const std::vector & /*args*/) void Keeper::logRevision() const { Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/programs/server/config.d/legacy_geobase.xml b/programs/server/config.d/legacy_geobase.xml new file mode 100644 index 00000000000..3973c55cb86 --- /dev/null +++ b/programs/server/config.d/legacy_geobase.xml @@ -0,0 +1,4 @@ + + config.d/regions_hierarchy.txt + config.d/ + diff --git a/programs/server/config.d/regions_hierarchy.txt b/programs/server/config.d/regions_hierarchy.txt new file mode 120000 index 00000000000..7b48f46aa94 --- /dev/null +++ b/programs/server/config.d/regions_hierarchy.txt @@ -0,0 +1 @@ +../../../tests/config/regions_hierarchy.txt \ No newline at end of file diff --git a/programs/server/config.d/regions_names_en.txt b/programs/server/config.d/regions_names_en.txt new file mode 120000 index 00000000000..523a1077d9d --- /dev/null +++ b/programs/server/config.d/regions_names_en.txt @@ -0,0 +1 @@ +../../../tests/config/regions_names_en.txt \ No newline at end of file diff --git a/programs/server/config.xml b/programs/server/config.xml index ab79e7a2e4c..dcb8ac0804c 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1114,10 +1114,6 @@ system asynchronous_metric_log
- 7000
diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index e63a277497a..f013e3ac064 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) { sync.sub(plots[idx]); /// Set title - const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ); + const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : ''; chart.querySelector('.title').firstChild.data = title; } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7d05cbb0681..f407fab68f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::unique_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -301,10 +301,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const Dialect & dialect = settings.dialect; + + if (dialect == Dialect::kusto) + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -314,7 +321,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6162de48143..281a65ca36a 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,6 +32,13 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array +/// to ensure correct byte order on different endian machines +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define CURRENT_BYTES_IDX(i) (7 - i) +#else +#define CURRENT_BYTES_IDX(i) (i) +#endif class SipHash { @@ -55,7 +62,7 @@ private: ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. - current_bytes[7] = static_cast(cnt); + current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; @@ -92,7 +99,7 @@ public: { while (cnt & 7 && data < end) { - current_bytes[cnt & 7] = *data; + current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } @@ -125,13 +132,13 @@ public: current_word = 0; switch (end - data) { - case 7: current_bytes[6] = data[6]; [[fallthrough]]; - case 6: current_bytes[5] = data[5]; [[fallthrough]]; - case 5: current_bytes[4] = data[4]; [[fallthrough]]; - case 4: current_bytes[3] = data[3]; [[fallthrough]]; - case 3: current_bytes[2] = data[2]; [[fallthrough]]; - case 2: current_bytes[1] = data[1]; [[fallthrough]]; - case 1: current_bytes[0] = data[0]; [[fallthrough]]; + case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; + case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; + case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; + case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; + case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; + case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; + case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } @@ -157,8 +164,8 @@ public: void get128(char * out) { finalize(); - unalignedStoreLE(out, v0 ^ v1); - unalignedStoreLE(out + 8, v2 ^ v3); + unalignedStore(out, v0 ^ v1); + unalignedStore(out + 8, v2 ^ v3); } template @@ -225,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } + +#undef CURRENT_BYTES_IDX diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 42d7d967b1f..08092cf68f1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -705,7 +705,7 @@ void KeeperServer::waitInit() int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) - throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); + LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } std::vector KeeperServer::getDeadSessions() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9dd87904a56..5dedc6117aa 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index daa678c0141..2a564ebe6d3 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) - +IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, + {{"clickhouse", Dialect::clickhouse}, + {"kusto", Dialect::kusto}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index b5e908defc7..97c4275c4d2 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +enum class Dialect +{ + clickhouse, + kusto, + kusto_auto, +}; + +DECLARE_SETTING_ENUM(Dialect) } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index d449768935a..157255bba12 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -266,8 +266,8 @@ private: { size_t pos = message.find('\n'); - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos)); + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -315,14 +315,14 @@ private: if (query_id.empty()) { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context) } @@ -838,6 +838,7 @@ static void blockSignals(const std::vector & signals) throw Poco::Exception("Cannot block signal."); } +extern String getGitHash(); void BaseDaemon::initializeTerminationAndSignalProcessing() { @@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #if defined(__ELF__) && !defined(OS_FREEBSD) String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); if (build_id_hex.empty()) - build_id_info = "no build id"; + build_id = ""; else - build_id_info = "build id: " + build_id_hex; + build_id = build_id_hex; #else - build_id_info = "no build id"; + build_id = ""; #endif + git_hash = getGitHash(); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 1b67ca986a8..d248ad9cec9 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -172,7 +172,8 @@ protected: DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - String build_id_info; + String build_id; + String git_hash; String stored_binary_hash; std::vector handled_signals; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 78c133d9893..f02fd69aa79 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,7 +1,10 @@ +configure_file(GitHash.cpp.in GitHash.generated.cpp) + add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp + GitHash.generated.cpp ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in new file mode 100644 index 00000000000..4a2da793fc2 --- /dev/null +++ b/src/Daemon/GitHash.cpp.in @@ -0,0 +1,8 @@ +// File was generated by CMake + +#include + +String getGitHash() +{ + return "@GIT_HASH@"; +} diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 507320fffde..c51864740f5 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -367,7 +367,7 @@ bool DatabaseReplicated::looksLikeReplicatedDatabasePath(const ZooKeeperPtr & cu return false; if (maybe_database_mark.starts_with(REPLICATED_DATABASE_MARK)) return true; - if (maybe_database_mark.empty()) + if (!maybe_database_mark.empty()) return false; /// Old versions did not have REPLICATED_DATABASE_MARK. Check specific nodes exist and add mark. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 5a327a2f31b..d7679416fd7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -100,6 +100,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; + format_settings.json.try_infer_objects = context->getSettingsRef().allow_experimental_object_type; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b6efb0bd391..66888df7e43 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -153,6 +153,7 @@ struct FormatSettings bool try_infer_numbers_from_strings = false; bool validate_types_from_metadata = true; bool validate_utf8 = false; + bool try_infer_objects = false; } json; struct diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index e295ad44ed3..6327ffaebd4 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -225,7 +225,7 @@ namespace JSONUtils if (!type) continue; - if (isObject(type)) + if (settings.json.try_infer_objects && isObject(type)) return std::make_shared("json", true); value_types.push_back(type); @@ -240,7 +240,11 @@ namespace JSONUtils are_types_equal &= value_types[i]->equals(*value_types[0]); if (!are_types_equal) + { + if (!settings.json.try_infer_objects) + return nullptr; return std::make_shared("json", true); + } return std::make_shared(std::make_shared(), value_types[0]); } diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h index 4b9f36ab72d..c95f5209760 100644 --- a/src/Interpreters/MonotonicityCheckVisitor.h +++ b/src/Interpreters/MonotonicityCheckVisitor.h @@ -70,6 +70,12 @@ public: if (!pos) return false; + /// It is possible that tables list is empty. + /// IdentifierSemantic get the position from AST, and it can be not valid to use it. + /// Example is re-analysing a part of AST for storage Merge, see 02147_order_by_optimizations.sql + if (*pos >= tables.size()) + return false; + if (auto data_type_and_name = tables[*pos].columns.tryGetByName(identifier->shortName())) { arg_data_type = data_type_and_name->type; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index abecb24852b..7a633242904 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -73,6 +73,7 @@ #include #include +#include namespace ProfileEvents { @@ -392,10 +393,20 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + if (settings.dialect == Dialect::kusto && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..3a399bdccdb --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + String expr = getExprFromToken(pos); + ASTPtr where_expression; + + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..bb8e08fd378 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr limit_length; + + auto expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..1585805f0fc --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..f8e4f9eaab0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,359 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +{ + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::Comma) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +{ + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + --token_pos; + --token_pos; + return ch_op; + +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +{ + String new_expr, left_wildcards, right_wildcards, left_space, right_space; + + switch (wildcards_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_wildcards ="%"; + break; + + case WildcardsPos::right: + right_wildcards = "%"; + break; + + case WildcardsPos::both: + left_wildcards ="%"; + right_wildcards = "%"; + break; + } + + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + } + else + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +{ + auto begin = pos; + + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue op_value = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if (token == "!") + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + else + --pos; + + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; + + String new_expr; + + if (op_value == KQLOperatorValue::none) + tokens.push_back(op); + else + { + auto last_op = tokens.back(); + auto last_pos = pos; + + switch (op_value) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); + break; + + case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::in_cs: + new_expr = genInOpExpr(pos,op,"in"); + break; + + case KQLOperatorValue::not_in_cs: + new_expr = genInOpExpr(pos,op,"not in"); + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + return true; + } + pos = begin; + return false; +} + +} + diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9796ae10c07 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,106 @@ +#pragma once + +#include +#include +#include +namespace DB +{ + +class KQLOperators +{ +public: + bool convert(std::vector &tokens,IParser::Pos &pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..e978323d821 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,25 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + String expr; + + expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..b64675beed0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..04ee36705a9 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + +String ParserKQLBase :: getExprFromPipe(Pos & pos) +{ + uint16_t bracket_count = 0; + auto begin = pos; + auto end = pos; + while (!end->isEnd() && end->type != TokenType::Semicolon) + { + if (end->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (end->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (end->type == TokenType::PipeMark && bracket_count == 0) + break; + + ++end; + } + --end; + return String(begin->begin, end->end); +} + +String ParserKQLBase :: getExprFromToken(Pos & pos) +{ + String res; + std::vector tokens; + String alias; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String token = String(pos->begin,pos->end); + + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~") + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) + { + tokens.push_back(token); + } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } + ++pos; + } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + + for (auto const &token : tokens) + res = res.empty()? token : res +" " + token; + return res; +} + +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +{ + if (op_name == "filter" || op_name == "where") + return std::make_unique(); + else if (op_name == "limit" || op_name == "take") + return std::make_unique(); + else if (op_name == "project") + return std::make_unique(); + else if (op_name == "sort by" || op_name == "order by") + return std::make_unique(); + else if (op_name == "summarize") + return std::make_unique(); + else if (op_name == "table") + return std::make_unique(); + else + return nullptr; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + + auto select_query = std::make_shared(); + node = select_query; + ASTPtr tables; + + std::unordered_map kql_parser = + { + { "filter", {"filter", false, false, 3}}, + { "where", {"filter", false, false, 3}}, + { "limit", {"limit", false, true, 3}}, + { "take", {"limit", false, true, 3}}, + { "project", {"project", false, false, 3}}, + { "sort by", {"order by", false, false, 4}}, + { "order by", {"order by", false, false, 4}}, + { "table", {"table", false, false, 3}}, + { "summarize", {"summarize", true, true, 3}} + }; + + std::vector> operation_pos; + + String table_name(pos->begin, pos->end); + + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + if (pos->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (pos->type == TokenType::PipeMark && bracket_count == 0) + { + ++pos; + String kql_operator(pos->begin, pos->end); + if (kql_operator == "order" || kql_operator == "sort") + { + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos,expected)) + { + kql_operator = "order by"; + --pos; + } + } + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(kql_operator, pos)); + } + else + ++pos; + } + + auto kql_operator_str = operation_pos.back().first; + auto npos = operation_pos.back().second; + if (!npos.isValid()) + return false; + + auto kql_operator_p = getOperator(kql_operator_str); + + if (!kql_operator_p) + return false; + + if (operation_pos.size() == 1) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + npos = operation_pos.front().second; + if (!ParserKQLTable().parse(npos, node, expected)) + return false; + } + else + { + String project_clause, order_clause, where_clause, limit_clause; + auto last_pos = operation_pos.back().second; + auto last_op = operation_pos.back().first; + + auto set_main_query_clause =[&](String & op, Pos & op_pos) + { + auto op_str = ParserKQLBase::getExprFromPipe(op_pos); + if (op == "project") + project_clause = op_str; + else if (op == "where" || op == "filter") + where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); + else if (op == "limit" || op == "take") + limit_clause = op_str; + else if (op == "order by" || op == "sort by") + order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; + }; + + set_main_query_clause(last_op, last_pos); + + operation_pos.pop_back(); + + if (kql_parser[last_op].need_input) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else + { + while (!operation_pos.empty()) + { + auto prev_op = operation_pos.back().first; + auto prev_pos = operation_pos.back().second; + + if (kql_parser[prev_op].gen_output) + break; + if (!project_clause.empty() && prev_op == "project") + break; + set_main_query_clause(prev_op, prev_pos); + operation_pos.pop_back(); + last_op = prev_op; + last_pos = prev_pos; + } + } + + if (!operation_pos.empty()) + { + for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + --last_pos; + + String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + return false; + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + } + else + { + if (!ParserKQLTable().parse(last_pos, node, expected)) + return false; + } + + auto set_query_clasue =[&](String op_str, String op_calsue) + { + auto oprator = getOperator(op_str); + if (oprator) + { + Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + if (!oprator->parse(pos_clause, node, expected)) + return false; + } + return true; + }; + + if (!select_query->select()) + { + if (project_clause.empty()) + project_clause = "*"; + if (!set_query_clasue("project", project_clause)) + return false; + } + + if (!order_clause.empty()) + if (!set_query_clasue("order by", order_clause)) + return false; + + if (!where_clause.empty()) + if (!set_query_clasue("where", where_clause)) + return false; + + if (!limit_clause.empty()) + if (!set_query_clasue("limit", limit_clause)) + return false; + return true; + } + + if (!select_query->select()) + { + auto expr = String("*"); + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!std::make_unique()->parse(new_pos, node, expected)) + return false; + } + + return true; +} + +bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_node; + + if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) + return false; + + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(select_node); + + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; + + node_table_expr->children.emplace_back(node_subquery); + + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + + ASTPtr res = std::make_shared(); + + res->children.emplace_back(node_table_in_select_query_emlement); + + node = res; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..42f5f84f031 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + static String getExprFromToken(Pos & pos); + static String getExprFromPipe(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t & max_depth); +}; + +class ParserKQLQuery : public IParserBase +{ + +protected: + static std::unique_ptr getOperator(String &op_name); + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserKQLSubquery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..f7540d729fd --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + auto expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + auto pos_backup = new_pos; + if (!order_list.parse(pos_backup, order_expression_list, expected)) + return false; + + while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + { + String tmp(new_pos->begin, new_pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (new_pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + ++new_pos; + } + has_directions.push_back(has_dir); + + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto *order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + } + } + + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..21e480234d3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + + ASTPtr query; + bool parsed = kql_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr kql_query; + + if (!ParserKQLQuery().parse(pos, kql_query, expected)) + return false; + + if (kql_query->as()) + { + node = std::move(kql_query); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(kql_query); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..ef44b2d6c8a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..75eacb1adbd --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + ASTPtr group_expression_list; + + String expr_aggregation; + String expr_groupby; + String expr_columns; + bool groupby = false; + + auto begin = pos; + auto pos_groupby = pos; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + { + groupby = true; + auto end = pos; + --end; + expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; + pos_groupby = pos; + ++pos_groupby; + } + ++pos; + } + --pos; + if (groupby) + expr_groupby = String(pos_groupby->begin, pos->end); + else + expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; + expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + if (groupby) + { + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + } + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..1aad02705df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..6356ad688b6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +namespace DB +{ + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ({ + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + }); + + ASTPtr tables; + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..c67dcb15156 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..b452bd27642 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,185 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + } +}))); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ce4f1dc884d..96328bb9e89 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6144,6 +6144,9 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (format_version != src_data->format_version) throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS); + if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) + throw Exception("Tables have different primary key", ErrorCodes::BAD_ARGUMENTS); + return *src_data; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 5c5fc0cd8f4..9298e841072 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -110,18 +111,26 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( size_t marks_file_size = data_part_storage_->getFileSize(marks_file_path); - auto buffer = data_part_storage_->readFile(marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); - while (!buffer->eof()) + std::unique_ptr buffer = data_part_storage_->readFile( + marks_file_path, ReadSettings().adjustBufferSize(marks_file_size), marks_file_size, std::nullopt); + + std::unique_ptr marks_reader; + bool marks_compressed = index_granularity_info_.mark_type.compressed; + if (marks_compressed) + marks_reader = std::make_unique(std::move(buffer)); + else + marks_reader = std::move(buffer); + + while (!marks_reader->eof()) { - /// Skip offsets for columns - buffer->seek(columns_count * sizeof(MarkInCompressedFile), SEEK_CUR); + marks_reader->ignore(columns_count * sizeof(MarkInCompressedFile)); size_t granularity; - readIntBinary(granularity, *buffer); + readIntBinary(granularity, *marks_reader); index_granularity_.appendMark(granularity); } - if (index_granularity_.getMarksCount() * index_granularity_info_.getMarkSizeInBytes(columns_count) != marks_file_size) - throw Exception("Cannot read all marks from file " + marks_file_path, ErrorCodes::CANNOT_READ_ALL_DATA); + if (!marks_compressed && index_granularity_.getMarksCount() * index_granularity_info_.getMarkSizeInBytes(columns_count) != marks_file_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all marks from file {}", marks_file_path); index_granularity_.setInitialized(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index ba4979e57f2..7f91ffee1fe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int REPLICA_STATUS_CHANGED; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run() { if (const auto * coordination_exception = dynamic_cast(&e)) needs_retry = Coordination::isHardwareError(coordination_exception->code); + else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + needs_retry = true; if (needs_retry) { @@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run() void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path) { - /// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup. + /// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER. /// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica), /// but it did not work correctly, because "/metadata" node was re-created on server startup. /// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped. String replica_metadata; const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata); - if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version")) + if (!replica_metadata_exists || replica_metadata.empty()) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer " "should be done through an intermediate version (failed to get metadata or metadata_version for {}," @@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + String replica_metadata_version; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + if (replica_metadata_version_exists) + { + storage.metadata_version = parse(replica_metadata_version); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); - /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4274386e393..6982521f76a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -683,8 +683,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (replicated) { - auto storage_policy = args.getContext()->getStoragePolicy(storage_settings->storage_policy); - return std::make_shared( zookeeper_path, replica_name, diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f6b110bbad0..28061aaaf48 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap( for (size_t i = 0; i < 1000; ++i) { + std::string stored_metadata_string; + auto exists = client->tryGet(metadata_path, stored_metadata_string); + + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + root_path, + stored_metadata_string); + + auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); + + // tables_path was removed with drop + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); + continue; + } + else if (code != Coordination::Error::ZOK) + { + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path); + } + + return; + } + if (client->exists(dropped_path)) { LOG_INFO(log, "Removing leftover nodes"); @@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap( } } - std::string stored_metadata_string; - auto exists = client->tryGet(metadata_path, stored_metadata_string); + Coordination::Requests create_requests + { + zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent), + }; - if (exists) + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - root_path, - stored_metadata_string); + LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + continue; } - else + else if (code != Coordination::Error::ZOK) { - auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent); - if (code == Coordination::Error::ZNODEEXISTS) - continue; - else if (code != Coordination::Error::ZOK) - throw Coordination::Exception(code, metadata_path); + zkutil::KeeperMultiException::check(code, create_requests, create_responses); } - client->createIfNotExists(tables_path, ""); - auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); - - if (code == Coordination::Error::ZOK) - { - // metadata now should be guaranteed to exist because we added our UUID to the tables_path - client->createIfNotExists(data_path, ""); - table_is_valid = true; - return; - } - - if (code == Coordination::Error::ZNONODE) - LOG_INFO(log, "Metadata nodes were deleted in background, will retry"); - else - throw Coordination::Exception(code, table_path); + table_is_valid = true; + return; } throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc0ace576ce..beeb19fa6f9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1408,31 +1408,23 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd ops = std::move(new_ops); } - try - { - Coordination::Responses responses; - Coordination::Error e = zookeeper->tryMulti(ops, responses); - if (e == Coordination::Error::ZOK) - return transaction.commit(); + Coordination::Responses responses; + Coordination::Error e = zookeeper->tryMulti(ops, responses); + if (e == Coordination::Error::ZOK) + return transaction.commit(); - if (e == Coordination::Error::ZNODEEXISTS) + if (e == Coordination::Error::ZNODEEXISTS) + { + size_t num_check_ops = 2 * absent_part_paths_on_replicas.size(); + size_t failed_op_index = zkutil::getFailedOpIndex(e, responses); + if (failed_op_index < num_check_ops) { - size_t num_check_ops = 2 * absent_part_paths_on_replicas.size(); - size_t failed_op_index = zkutil::getFailedOpIndex(e, responses); - if (failed_op_index < num_check_ops) - { - LOG_INFO(log, "The part {} on a replica suddenly appeared, will recheck checksums", ops[failed_op_index]->getPath()); - continue; - } + LOG_INFO(log, "The part {} on a replica suddenly appeared, will recheck checksums", ops[failed_op_index]->getPath()); + continue; } + } - throw zkutil::KeeperException(e); - } - catch (const std::exception &) - { - unlockSharedData(*part); - throw; - } + throw zkutil::KeeperException(e); } } @@ -7451,8 +7443,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); + bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) createTableSharedID(); return toString(table_shared_id); @@ -8152,7 +8145,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } catch (const Exception & ex) { - unlockSharedData(*new_data_part); LOG_WARNING(log, "Cannot commit empty part {} with error {}", lost_part_name, ex.displayText()); return false; } diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index efc4c0ed37b..6bc080045f8 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,54 +2,21 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) - get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) - -find_package(Git) -if(Git_FOUND) - # The commit's git hash, and whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Git branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) - configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) + configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() + generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) -list (APPEND storages_system_sources ${CONFIG_BUILD}) +list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp) add_custom_target(generate-contributors ./StorageSystemContributors.sh @@ -78,6 +45,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) clickhouse_embed_binaries( TARGET information_schema_metadata RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in similarity index 98% rename from src/Storages/System/StorageSystemBuildOptions.generated.cpp.in rename to src/Storages/System/StorageSystemBuildOptions.cpp.in index dde90ce459a..117d97d2cfd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// .cpp autogenerated by cmake +// File was generated by CMake const char * auto_config_build[] { diff --git a/tests/config/config.d/legacy_geobase.xml b/tests/config/config.d/legacy_geobase.xml new file mode 100644 index 00000000000..352825afd04 --- /dev/null +++ b/tests/config/config.d/legacy_geobase.xml @@ -0,0 +1,4 @@ + + /etc/clickhouse-server/config.d/regions_hierarchy.txt + /etc/clickhouse-server/config.d/ + diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index dcf4d8e9100..a2a7f5cc750 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -8,6 +8,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -15,6 +16,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -22,6 +24,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -29,6 +32,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -36,6 +40,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 s3 @@ -43,6 +48,7 @@ http://localhost:11111/test/00170_test/ clickhouse clickhouse + 20000 diff --git a/tests/config/install.sh b/tests/config/install.sh index d4c71212423..edcc1dcb313 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -52,6 +52,12 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ +# Not supported with fasttest. +if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] +then + ln -sf $SRC_PATH/config.d/legacy_geobase.xml $DEST_SERVER_PATH/config.d/ +fi + ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ @@ -78,6 +84,8 @@ ln -sf $SRC_PATH/executable_pool_dictionary.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/test_function.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/top_level_domains $DEST_SERVER_PATH/ +ln -sf $SRC_PATH/regions_hierarchy.txt $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/regions_names_en.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/ext-en.txt $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/ext-ru.txt $DEST_SERVER_PATH/config.d/ diff --git a/tests/config/regions_hierarchy.txt b/tests/config/regions_hierarchy.txt new file mode 100644 index 00000000000..a111da4a825 --- /dev/null +++ b/tests/config/regions_hierarchy.txt @@ -0,0 +1,12 @@ +1 0 0 7000000000 +2 10 3 330000000 +3 2 4 5700000 +4 3 5 330000 +5 4 6 100000 +6 12 3 1500000000 +7 6 4 83000000 +8 7 6 20000000 +9 1 1 1000000000 +10 9 1 600000000 +11 1 1 5300000000 +12 11 1 4700000000 diff --git a/tests/config/regions_names_en.txt b/tests/config/regions_names_en.txt new file mode 100644 index 00000000000..ccd23678289 --- /dev/null +++ b/tests/config/regions_names_en.txt @@ -0,0 +1,12 @@ +1 World +2 USA +3 Colorado +4 Boulder County +5 Boulder +6 China +7 Sichuan +8 Chengdu +9 America +10 North America +11 Eurasia +12 Asia diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py new file mode 100644 index 00000000000..681407e5e8c --- /dev/null +++ b/tests/integration/helpers/keeper_utils.py @@ -0,0 +1,41 @@ +import socket +import time + + +def get_keeper_socket(cluster, node, port=9181): + hosts = cluster.get_instance_ip(node.name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, port)) + return client + + +def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): + client = None + try: + client = get_keeper_socket(cluster, node, port) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def wait_until_connected(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_until_quorum_lost(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(cluster, nodes): + for node in nodes: + wait_until_connected(cluster, node) diff --git a/tests/integration/test_compressed_marks_restart/__init__.py b/tests/integration/test_compressed_marks_restart/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_compressed_marks_restart/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_compressed_marks_restart/test.py b/tests/integration/test_compressed_marks_restart/test.py new file mode 100644 index 00000000000..90e09d62792 --- /dev/null +++ b/tests/integration/test_compressed_marks_restart/test.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_compressed_marks_restart_compact(): + node.query( + "create table test_02381_compact (a UInt64, b String) ENGINE = MergeTree order by (a, b)" + ) + node.query("insert into test_02381_compact values (1, 'Hello')") + node.query( + "alter table test_02381_compact modify setting compress_marks=true, compress_primary_key=true" + ) + node.query("insert into test_02381_compact values (2, 'World')") + node.query("optimize table test_02381_compact final") + + assert ( + node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") + == "2\n" + ) + node.restart_clickhouse() + assert ( + node.query("SELECT count() FROM test_02381_compact WHERE not ignore(*)") + == "2\n" + ) + + +def test_compressed_marks_restart_wide(): + node.query( + "create table test_02381_wide (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS min_bytes_for_wide_part=0" + ) + node.query("insert into test_02381_wide values (1, 'Hello')") + node.query( + "alter table test_02381_wide modify setting compress_marks=true, compress_primary_key=true" + ) + node.query("insert into test_02381_wide values (2, 'World')") + node.query("optimize table test_02381_wide final") + + assert ( + node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + ) + node.restart_clickhouse() + assert ( + node.query("SELECT count() FROM test_02381_wide WHERE not ignore(*)") == "2\n" + ) diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index ae6b0085094..6ec307f7082 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -15,6 +15,7 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() + yield cluster finally: cluster.shutdown() diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml deleted file mode 100644 index c1d38a1de52..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - - diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml deleted file mode 100644 index ebb0d98ddf4..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - node1 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f3bb0ca56e3..f7c3787b4d8 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -2,6 +2,7 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time @@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): while zk.retry(zk.exists, path) is None: time.sleep(0.1) @@ -104,9 +74,6 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: @@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster): for node in nodes[CLUSTER_SIZE:]: node.stop_clickhouse() - wait_nodes(nodes[:CLUSTER_SIZE]) + keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE]) node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]] @@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster): node.stop_clickhouse() # wait for node1 to lose quorum - while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, nodes[0]) nodes[0].copy_file_to_container( os.path.join(CONFIG_DIR, "recovered_keeper1.xml"), @@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster): nodes[0].query("SYSTEM RELOAD CONFIG") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG - send_4lw_cmd(nodes[0].name, "rcvr") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) + keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr") + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) # add one node to restore the quorum nodes[CLUSTER_SIZE].copy_file_to_container( @@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster): ) nodes[CLUSTER_SIZE].start_clickhouse() - wait_until_connected(nodes[CLUSTER_SIZE].name) + keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE]) # node1 should have quorum now and accept requests - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name)) @@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster): f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml", ) node.start_clickhouse() - wait_until_connected(node.name) + keeper_utils.wait_until_connected(cluster, node) node_zks.append(get_fake_zk(node.name)) # refresh old zk sessions @@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) for zk in node_zks[:nodes_left]: assert_all_data(zk) diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 0a554e33119..1c0d5e9a306 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -2,10 +2,11 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooRetry CLUSTER_SIZE = 3 @@ -45,47 +46,19 @@ def started_cluster(): def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + hosts=cluster.get_instance_ip(nodename) + ":9181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=10), + command_retry=KazooRetry(max_tries=10), ) _fake_zk_instance.start() return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): - while zk.exists(path) is None: + while zk.retry(zk.exists, path) is None: time.sleep(0.1) - assert zk.get(path)[0] == data.encode() + assert zk.retry(zk.get, path)[0] == data.encode() def close_zk(zk): @@ -93,20 +66,17 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: - wait_nodes(nodes) + keeper_utils.wait_nodes(cluster, nodes) node_zks = [get_fake_zk(node.name) for node in nodes] data_in_cluster = [] def add_data(zk, path, data): - zk.create(path, data.encode()) + zk.retry(zk.create, path, data.encode()) data_in_cluster.append((path, data)) def assert_all_data(zk): @@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster): ) nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) assert_all_data(get_fake_zk(nodes[0].name)) finally: diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..30abc7422c4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -1,6 +1,7 @@ import socket import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -25,6 +26,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -56,28 +61,6 @@ def clear_znodes(): destroy_zk_client(zk) -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - # zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def reset_node_stats(node_name=node1.name): +def reset_node_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"srst") client.recv(10) finally: @@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name): client.close() -def send_4lw_cmd(node_name=node1.name, cmd="ruok"): +def reset_conn_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def reset_conn_stats(node_name=node1.name): - client = None - try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"crst") client.recv(10_000) finally: @@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster): client = None try: wait_nodes() - data = send_4lw_cmd(cmd="ruok") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok") assert data == "imok" finally: close_keeper_socket(client) @@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster): clear_znodes() # reset stat first - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action( @@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster): delete_cnt=2, ) - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") # print(data.decode()) reader = csv.reader(data.split("\n"), delimiter="\t") @@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="srst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst") assert data.strip() == "Server stats reset." - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") assert len(data) != 0 # print(data) @@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="conf") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf") reader = csv.reader(data.split("\n"), delimiter="=") result = {} @@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster): def test_cmd_isro(started_cluster): wait_nodes() - assert send_4lw_cmd(node1.name, "isro") == "rw" - assert send_4lw_cmd(node2.name, "isro") == "ro" + assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw" + assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro" def test_cmd_srvr(started_cluster): @@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster): wait_nodes() clear_znodes() - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="srvr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr") print("srvr output -------------------------------------") print(data) @@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster): try: wait_nodes() clear_znodes() - reset_node_stats(node1.name) - reset_conn_stats(node1.name) + reset_node_stats(node1) + reset_conn_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="stat") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat") print("stat output -------------------------------------") print(data) @@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output -------------------------------------") print(data) @@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="crst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst") print("crst output -------------------------------------") print(data) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output(after crst) -------------------------------------") print(data) @@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, ephemeral_cnt=2) - data = send_4lw_cmd(cmd="dump") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump") print("dump output -------------------------------------") print(data) @@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchs") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs") print("wchs output -------------------------------------") print(data) @@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchc") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc") print("wchc output -------------------------------------") print(data) @@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchp") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp") print("wchp output -------------------------------------") print(data) diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index cedb195a6e0..95482745b31 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -204,7 +204,7 @@ JUST_WRONG_CONFIG = """ """ -def test_duplicate_endpoint(started_cluster): +def test_invalid_configs(started_cluster): node1.stop_clickhouse() def assert_config_fails(config): diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 471767210d6..d351b238ead 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import pytest import random import string @@ -37,40 +38,22 @@ def started_cluster(): cluster.shutdown() -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - def test_aggressive_mntr(started_cluster): - def go_mntr(node_name): - for _ in range(100000): - print(node_name, send_4lw_cmd(node_name, "mntr")) + def go_mntr(node): + for _ in range(10000): + try: + print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr")) + except ConnectionRefusedError: + pass - node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) - node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) - node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread = threading.Thread(target=lambda: go_mntr(node1)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3)) node1_thread.start() node2_thread.start() node3_thread.start() @@ -78,8 +61,7 @@ def test_aggressive_mntr(started_cluster): node2.stop_clickhouse() node3.stop_clickhouse() - while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, node1) node1.stop_clickhouse() starters = [] diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index d6d01a5d0a6..a7a80d90a58 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -55,31 +56,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -88,6 +64,10 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + # in extremely rare case it can take more than 5 minutes in debug build with sanitizer @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 694600acc67..1dcbb290fa8 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -43,29 +44,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index c3449534e87..aad674332ac 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -41,9 +42,11 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def test_nodes_add(started_cluster): + keeper_utils.wait_until_connected(cluster, node1) zk_conn = get_fake_zk(node1) for i in range(100): @@ -62,6 +65,7 @@ def test_nodes_add(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) zk_conn2 = get_fake_zk(node2) @@ -93,6 +97,7 @@ def test_nodes_add(started_cluster): node2.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) zk_conn3 = get_fake_zk(node3) for i in range(100): diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 31082846fb8..1e3bd95c5e7 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -11,6 +11,7 @@ import os import time from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) @@ -41,6 +42,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def get_fake_zk(node, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 13303d320eb..59bdaadf2e2 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import time import os from kazoo.client import KazooClient, KazooState @@ -79,9 +80,12 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn3 = get_fake_zk(node3) zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node3.stop_clickhouse() @@ -91,6 +95,7 @@ def test_nodes_remove(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") + zk_conn = get_fake_zk(node1) zk_conn.sync("/test_two_0") @@ -98,8 +103,11 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node2.stop_clickhouse() diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 377fa436a87..70cc14fe26d 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -46,6 +46,10 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -62,7 +66,7 @@ def test_state_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -111,7 +115,7 @@ def test_state_duplicate_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_duplicated_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -119,7 +123,7 @@ def test_state_duplicate_restart(started_cluster): node_zk2.create("/test_state_duplicated_restart/just_test2") node_zk2.create("/test_state_duplicated_restart/just_test3") - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk3 = get_connection_zk("node") @@ -159,6 +163,7 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): + try: node_zk = None node_zk2 = None @@ -176,7 +181,7 @@ def test_ephemeral_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_ephemeral_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index f15e772fd5f..1552abd32e9 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -26,10 +27,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml deleted file mode 100644 index d51e420f733..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml deleted file mode 100644 index 3f1ee1e01a8..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml deleted file mode 100644 index a99bd5d5296..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7270c84bdda..bc33689dd20 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -84,6 +85,7 @@ def test_recover_from_snapshot(started_cluster): # stale node should recover from leader's snapshot # with some sanitizers can start longer than 5 seconds node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) print("Restarted") try: diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 55e00880da0..2a17afac75b 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -40,4 +40,4 @@ def started_cluster(): def test_connection(started_cluster): # just nothrow - node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") + node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 4b3aa7e3fdf..72a162c1765 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time import socket import struct @@ -52,25 +53,8 @@ def destroy_zk_client(zk): pass -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 4351c5ac96f..6a64cf0ac92 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -2,6 +2,7 @@ ##!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool from kazoo.client import KazooClient, KazooState import random @@ -22,7 +23,7 @@ node3 = cluster.add_instance( def start_zookeeper(node): - node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) def stop_zookeeper(node): @@ -66,6 +67,7 @@ def stop_clickhouse(node): def start_clickhouse(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots, node): diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 08f60e538a4..ce57a852dca 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -50,6 +51,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -69,7 +75,7 @@ def test_state_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -123,7 +129,7 @@ def test_ephemeral_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 1461f35e6a4..a68a34dae2e 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -20,6 +21,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -94,6 +99,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index f1de469c5a1..bd29ded357f 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -48,6 +49,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def delete_with_retry(node_name, path): @@ -138,6 +140,7 @@ def test_restart_third_node(started_cluster): node1_zk.create("/test_restart", b"aaaa") node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) assert node3.contains_in_log( "Connected to ZooKeeper (or Keeper) before internal Keeper start" diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 8c0276f7d77..c6bc0ebd33a 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -40,29 +41,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index bff3d52014e..f2076acc4d2 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -42,29 +43,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): @@ -129,6 +109,7 @@ def test_server_restart(started_cluster): node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue") node3.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node3) node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 50a9ee6a4a7..af8d1ca4bf9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -60,6 +61,7 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots): diff --git a/tests/queries/0_stateless/01825_type_json_schema_inference.sh b/tests/queries/0_stateless/01825_type_json_schema_inference.sh index 447ebdf65cb..36991bd8069 100755 --- a/tests/queries/0_stateless/01825_type_json_schema_inference.sh +++ b/tests/queries/0_stateless/01825_type_json_schema_inference.sh @@ -19,7 +19,8 @@ filename="${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json" echo '{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}' > $filename echo '{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}' >> $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1" @@ -30,7 +31,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (id UInt64, obj String, s echo '{"obj": "aaa", "id": 1, "s": "foo"}' > $filename echo '{"id": 2, "obj": "bbb", "s": "bar"}' >> $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" @@ -38,14 +39,14 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_inference" echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": {"k3": 2}}}' > $filename ${CLICKHOUSE_CLIENT} -q "SELECT map, obj, toTypeName(map) AS map_type, toTypeName(obj) AS obj_type \ - FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow') FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 + FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow') FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_inference (obj JSON, map Map(String, UInt64)) \ ENGINE = MergeTree ORDER BY tuple()" --allow_experimental_object_type 1 echo '{"map": {"k1": 1, "k2": 2}, "obj": {"k1": 1, "k2": 2}}' > $filename -${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_inference SELECT * FROM file('${CLICKHOUSE_TEST_UNIQUE_NAME}/data.json', 'JSONEachRow')" --allow_experimental_object_type 1 ${CLICKHOUSE_CLIENT} -q "SELECT * FROM t_json_inference FORMAT JSONEachRow" --output_format_json_named_tuples_as_objects 1 ${CLICKHOUSE_CLIENT} -q "SELECT toTypeName(obj) FROM t_json_inference LIMIT 1" diff --git a/tests/queries/0_stateless/02147_order_by_optimizations.sql b/tests/queries/0_stateless/02147_order_by_optimizations.sql index 7aa631ff432..3925e92bffc 100644 --- a/tests/queries/0_stateless/02147_order_by_optimizations.sql +++ b/tests/queries/0_stateless/02147_order_by_optimizations.sql @@ -13,3 +13,7 @@ SET optimize_monotonous_functions_in_order_by = 1; EXPLAIN SYNTAX SELECT * FROM t_02147 ORDER BY toStartOfHour(date), v; EXPLAIN SYNTAX SELECT * FROM t_02147_dist ORDER BY toStartOfHour(date), v; EXPLAIN SYNTAX SELECT * FROM t_02147_merge ORDER BY toStartOfHour(date), v; + +drop table t_02147; +CREATE TABLE t_02147 (date DateTime, v UInt32) ENGINE = MergeTree ORDER BY date; +select *, toString(t.v) as s from t_02147_merge as t order by date, s; diff --git a/tests/queries/0_stateless/02268_json_maps_and_objects.sql b/tests/queries/0_stateless/02268_json_maps_and_objects.sql index 8a9ede6876c..3e63227ba66 100644 --- a/tests/queries/0_stateless/02268_json_maps_and_objects.sql +++ b/tests/queries/0_stateless/02268_json_maps_and_objects.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3]}}, {"x" : {"c" : {"d" : 10}}}'); desc format(JSONEachRow, '{"x" : {"a" : "Some string"}}, {"x" : {"b" : [1, 2, 3], "c" : {"42" : 42}}}'); desc format(JSONEachRow, '{"x" : [{"a" : "Some string"}]}, {"x" : [{"b" : [1, 2, 3]}]}'); diff --git a/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql b/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql index 2012a53c09d..2f8bb3a0331 100644 --- a/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql +++ b/tests/queries/0_stateless/02326_numbers_from_json_strings_schema_inference.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest set input_format_json_try_infer_numbers_from_strings=1; +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"x" : "123"}'); desc format(JSONEachRow, '{"x" : ["123", 123, 12.3]}'); diff --git a/tests/queries/0_stateless/02411_legacy_geobase.reference b/tests/queries/0_stateless/02411_legacy_geobase.reference new file mode 100644 index 00000000000..4fc360d876c --- /dev/null +++ b/tests/queries/0_stateless/02411_legacy_geobase.reference @@ -0,0 +1,286 @@ + +World +USA +Colorado +Boulder County +Boulder +China +Sichuan +Chengdu +America +North America +Eurasia +Asia + 0 +World 0 +USA 0 +Colorado 0 +Boulder County 0 +Boulder 5 Boulder +China 0 +Sichuan 0 +Chengdu 8 Chengdu +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 0 +Colorado 0 +Boulder County 4 Boulder County +Boulder 4 Boulder County +China 0 +Sichuan 0 +Chengdu 0 +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 0 +Colorado 3 Colorado +Boulder County 3 Colorado +Boulder 3 Colorado +China 0 +Sichuan 7 Sichuan +Chengdu 7 Sichuan +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 2 USA +Colorado 2 USA +Boulder County 2 USA +Boulder 2 USA +China 6 China +Sichuan 6 China +Chengdu 6 China +America 0 +North America 0 +Eurasia 0 +Asia 0 + 0 +World 0 +USA 10 North America +Colorado 10 North America +Boulder County 10 North America +Boulder 10 North America +China 12 Asia +Sichuan 12 Asia +Chengdu 12 Asia +America 9 America +North America 10 North America +Eurasia 11 Eurasia +Asia 12 Asia + 0 +World 0 +USA 9 America +Colorado 9 America +Boulder County 9 America +Boulder 9 America +China 11 Eurasia +Sichuan 11 Eurasia +Chengdu 11 Eurasia +America 9 America +North America 9 America +Eurasia 11 Eurasia +Asia 11 Eurasia + 0 +World 4294967295 +USA 330000000 +Colorado 5700000 +Boulder County 330000 +Boulder 100000 +China 1500000000 +Sichuan 83000000 +Chengdu 20000000 +America 1000000000 +North America 600000000 +Eurasia 4294967295 +Asia 4294967295 + is not in + is not in World + is not in USA + is not in Colorado + is not in Boulder County + is not in Boulder + is not in China + is not in Sichuan + is not in Chengdu + is not in America + is not in North America + is not in Eurasia + is not in Asia +World is not in +World is in World +World is not in USA +World is not in Colorado +World is not in Boulder County +World is not in Boulder +World is not in China +World is not in Sichuan +World is not in Chengdu +World is not in America +World is not in North America +World is not in Eurasia +World is not in Asia +USA is not in +USA is in World +USA is in USA +USA is not in Colorado +USA is not in Boulder County +USA is not in Boulder +USA is not in China +USA is not in Sichuan +USA is not in Chengdu +USA is in America +USA is in North America +USA is not in Eurasia +USA is not in Asia +Colorado is not in +Colorado is in World +Colorado is in USA +Colorado is in Colorado +Colorado is not in Boulder County +Colorado is not in Boulder +Colorado is not in China +Colorado is not in Sichuan +Colorado is not in Chengdu +Colorado is in America +Colorado is in North America +Colorado is not in Eurasia +Colorado is not in Asia +Boulder County is not in +Boulder County is in World +Boulder County is in USA +Boulder County is in Colorado +Boulder County is in Boulder County +Boulder County is not in Boulder +Boulder County is not in China +Boulder County is not in Sichuan +Boulder County is not in Chengdu +Boulder County is in America +Boulder County is in North America +Boulder County is not in Eurasia +Boulder County is not in Asia +Boulder is not in +Boulder is in World +Boulder is in USA +Boulder is in Colorado +Boulder is in Boulder County +Boulder is in Boulder +Boulder is not in China +Boulder is not in Sichuan +Boulder is not in Chengdu +Boulder is in America +Boulder is in North America +Boulder is not in Eurasia +Boulder is not in Asia +China is not in +China is in World +China is not in USA +China is not in Colorado +China is not in Boulder County +China is not in Boulder +China is in China +China is not in Sichuan +China is not in Chengdu +China is not in America +China is not in North America +China is in Eurasia +China is in Asia +Sichuan is not in +Sichuan is in World +Sichuan is not in USA +Sichuan is not in Colorado +Sichuan is not in Boulder County +Sichuan is not in Boulder +Sichuan is in China +Sichuan is in Sichuan +Sichuan is not in Chengdu +Sichuan is not in America +Sichuan is not in North America +Sichuan is in Eurasia +Sichuan is in Asia +Chengdu is not in +Chengdu is in World +Chengdu is not in USA +Chengdu is not in Colorado +Chengdu is not in Boulder County +Chengdu is not in Boulder +Chengdu is in China +Chengdu is in Sichuan +Chengdu is in Chengdu +Chengdu is not in America +Chengdu is not in North America +Chengdu is in Eurasia +Chengdu is in Asia +America is not in +America is in World +America is not in USA +America is not in Colorado +America is not in Boulder County +America is not in Boulder +America is not in China +America is not in Sichuan +America is not in Chengdu +America is in America +America is not in North America +America is not in Eurasia +America is not in Asia +North America is not in +North America is in World +North America is not in USA +North America is not in Colorado +North America is not in Boulder County +North America is not in Boulder +North America is not in China +North America is not in Sichuan +North America is not in Chengdu +North America is in America +North America is in North America +North America is not in Eurasia +North America is not in Asia +Eurasia is not in +Eurasia is in World +Eurasia is not in USA +Eurasia is not in Colorado +Eurasia is not in Boulder County +Eurasia is not in Boulder +Eurasia is not in China +Eurasia is not in Sichuan +Eurasia is not in Chengdu +Eurasia is not in America +Eurasia is not in North America +Eurasia is in Eurasia +Eurasia is not in Asia +Asia is not in +Asia is in World +Asia is not in USA +Asia is not in Colorado +Asia is not in Boulder County +Asia is not in Boulder +Asia is not in China +Asia is not in Sichuan +Asia is not in Chengdu +Asia is not in America +Asia is not in North America +Asia is in Eurasia +Asia is in Asia +[] [] +[1] ['World'] +[2,10,9,1] ['USA','North America','America','World'] +[3,2,10,9,1] ['Colorado','USA','North America','America','World'] +[4,3,2,10,9,1] ['Boulder County','Colorado','USA','North America','America','World'] +[5,4,3,2,10,9,1] ['Boulder','Boulder County','Colorado','USA','North America','America','World'] +[6,12,11,1] ['China','Asia','Eurasia','World'] +[7,6,12,11,1] ['Sichuan','China','Asia','Eurasia','World'] +[8,7,6,12,11,1] ['Chengdu','Sichuan','China','Asia','Eurasia','World'] +[9,1] ['America','World'] +[10,9,1] ['North America','America','World'] +[11,1] ['Eurasia','World'] +[12,11,1] ['Asia','Eurasia','World'] diff --git a/tests/queries/0_stateless/02411_legacy_geobase.sql b/tests/queries/0_stateless/02411_legacy_geobase.sql new file mode 100644 index 00000000000..a7d82f3beb9 --- /dev/null +++ b/tests/queries/0_stateless/02411_legacy_geobase.sql @@ -0,0 +1,14 @@ +-- Tags: no-fasttest + +SELECT regionToName(number::UInt32, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'xy') FROM numbers(13); -- { serverError 1000 } + +SELECT regionToName(number::UInt32, 'en'), regionToCity(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToArea(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToDistrict(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToCountry(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToTopContinent(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(number::UInt32, 'en'), regionToPopulation(number::UInt32) AS id, regionToName(id, 'en') FROM numbers(13); +SELECT regionToName(n1.number::UInt32, 'en') || (regionIn(n1.number::UInt32, n2.number::UInt32) ? ' is in ' : ' is not in ') || regionToName(n2.number::UInt32, 'en') FROM numbers(13) AS n1 CROSS JOIN numbers(13) AS n2; +SELECT regionHierarchy(number::UInt32) AS arr, arrayMap(id -> regionToName(id, 'en'), arr) FROM numbers(13); diff --git a/tests/queries/0_stateless/02416_json_object_inference.sql b/tests/queries/0_stateless/02416_json_object_inference.sql index b861468a08a..24f50930a68 100644 --- a/tests/queries/0_stateless/02416_json_object_inference.sql +++ b/tests/queries/0_stateless/02416_json_object_inference.sql @@ -1,2 +1,6 @@ -- Tags: no-fasttest +set allow_experimental_object_type=1; desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); +set allow_experimental_object_type=0; +desc format(JSONEachRow, '{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); -- {serverError 652} + diff --git a/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference new file mode 100644 index 00000000000..bade13b252d --- /dev/null +++ b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.reference @@ -0,0 +1 @@ +2 World diff --git a/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql new file mode 100644 index 00000000000..332b1c05d6c --- /dev/null +++ b/tests/queries/0_stateless/02454_compressed_marks_in_compact_part.sql @@ -0,0 +1,6 @@ +drop table if exists cc sync; +create table cc (a UInt64, b String) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = true; +insert into cc values (2, 'World'); +alter table cc detach part 'all_1_1_0'; +alter table cc attach part 'all_1_1_0'; +select * from cc; diff --git a/tests/queries/0_stateless/test_ugtxj2/tuples b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference similarity index 100% rename from tests/queries/0_stateless/test_ugtxj2/tuples rename to tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.reference diff --git a/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql new file mode 100644 index 00000000000..d000fb4479c --- /dev/null +++ b/tests/queries/0_stateless/02455_improve_feedback_when_replacing_partition_with_different_primary_key.sql @@ -0,0 +1,4 @@ +CREATE TABLE test_a (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company PRIMARY KEY (id) ORDER BY (id, company); +INSERT INTO test_a SELECT number%10 as id, number%2 as company, count() as total FROM numbers(100) GROUP BY id,company; +CREATE TABLE test_b (id UInt32, company UInt32, total UInt64) ENGINE=SummingMergeTree() PARTITION BY company ORDER BY (id, company); +ALTER TABLE test_b REPLACE PARTITION '0' FROM test_a; -- {serverError BAD_ARGUMENTS} diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/1_stateful/00175_partition_by_ignore.reference new file mode 100644 index 00000000000..39dffcae642 --- /dev/null +++ b/tests/queries/1_stateful/00175_partition_by_ignore.reference @@ -0,0 +1,3 @@ +-- check that partition key with ignore works correctly +"default","partition_by_ignore",1,29,1 +"default","partition_by_ignore",1,29,1 diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/1_stateful/00175_partition_by_ignore.sql new file mode 100644 index 00000000000..737d1b59fe3 --- /dev/null +++ b/tests/queries/1_stateful/00175_partition_by_ignore.sql @@ -0,0 +1,11 @@ +SELECT '-- check that partition key with ignore works correctly'; + +DROP TABLE IF EXISTS partition_by_ignore SYNC; + +CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); +INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); + +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; +EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts_2 BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; + +DROP TABLE IF EXISTS partition_by_ignore SYNC;